Index: stable/11/sys/ofed/drivers/infiniband/core/agent.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/agent.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/agent.h (revision 331772) @@ -1,51 +1,53 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef __AGENT_H_ #define __AGENT_H_ #include #include extern int ib_agent_port_open(struct ib_device *device, int port_num); extern int ib_agent_port_close(struct ib_device *device, int port_num); extern void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, const struct ib_wc *wc, const struct ib_device *device, int port_num, int qpn, size_t resp_mad_len, bool opa); #endif /* __AGENT_H_ */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/agent.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/cm_msgs.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/cm_msgs.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/cm_msgs.h (revision 331772) @@ -1,836 +1,841 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING the madirectory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use source and binary forms, with or * withmodification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retathe above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE * SOFTWARE. + * + * $FreeBSD$ */ + #if !defined(CM_MSGS_H) #define CM_MSGS_H #include #include /* * Parameters to routines below should be in network-byte order, and values * are returned in network-byte order. */ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, CM_MSG_SEQUENCE_DREQ, CM_MSG_SEQUENCE_SIDR }; struct cm_req_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 rsvd4; __be64 service_id; __be64 local_ca_guid; __be32 rsvd24; __be32 local_qkey; /* local QPN:24, responder resources:8 */ __be32 offset32; /* local EECN:24, initiator depth:8 */ __be32 offset36; /* * remote EECN:24, remote CM response timeout:5, * transport service type:2, end-to-end flow control:1 */ __be32 offset40; /* starting PSN:24, local CM response timeout:5, retry count:3 */ __be32 offset44; __be16 pkey; /* path MTU:4, RDC exists:1, RNR retry count:3. */ u8 offset50; /* max CM Retries:4, SRQ:1, extended transport type:3 */ u8 offset51; __be16 primary_local_lid; __be16 primary_remote_lid; union ib_gid primary_local_gid; union ib_gid primary_remote_gid; /* flow label:20, rsvd:6, packet rate:6 */ __be32 primary_offset88; u8 primary_traffic_class; u8 primary_hop_limit; /* SL:4, subnet local:1, rsvd:3 */ u8 primary_offset94; /* local ACK timeout:5, rsvd:3 */ u8 primary_offset95; __be16 alt_local_lid; __be16 alt_remote_lid; union ib_gid alt_local_gid; union ib_gid alt_remote_gid; /* flow label:20, rsvd:6, packet rate:6 */ __be32 alt_offset132; u8 alt_traffic_class; u8 alt_hop_limit; /* SL:4, subnet local:1, rsvd:3 */ u8 alt_offset138; /* local ACK timeout:5, rsvd:3 */ u8 alt_offset139; u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8); } static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn) { req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(req_msg->offset32) & 0x000000FF)); } static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg) { return (u8) be32_to_cpu(req_msg->offset32); } static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res) { req_msg->offset32 = cpu_to_be32(resp_res | (be32_to_cpu(req_msg->offset32) & 0xFFFFFF00)); } static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg) { return (u8) be32_to_cpu(req_msg->offset36); } static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg, u8 init_depth) { req_msg->offset36 = cpu_to_be32(init_depth | (be32_to_cpu(req_msg->offset36) & 0xFFFFFF00)); } static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg) { return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3); } static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg, u8 resp_timeout) { req_msg->offset40 = cpu_to_be32((resp_timeout << 3) | (be32_to_cpu(req_msg->offset40) & 0xFFFFFF07)); } static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg) { u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1; switch(transport_type) { case 0: return IB_QPT_RC; case 1: return IB_QPT_UC; case 3: switch (req_msg->offset51 & 0x7) { case 1: return IB_QPT_XRC_TGT; default: return 0; } default: return 0; } } static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg, enum ib_qp_type qp_type) { switch(qp_type) { case IB_QPT_UC: req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x2); break; case IB_QPT_XRC_INI: req_msg->offset40 = cpu_to_be32((be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9) | 0x6); req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; break; default: req_msg->offset40 = cpu_to_be32(be32_to_cpu( req_msg->offset40) & 0xFFFFFFF9); } } static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg) { return be32_to_cpu(req_msg->offset40) & 0x1; } static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg, u8 flow_ctrl) { req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) | (be32_to_cpu(req_msg->offset40) & 0xFFFFFFFE)); } static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8); } static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg, __be32 starting_psn) { req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | (be32_to_cpu(req_msg->offset44) & 0x000000FF)); } static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg) { return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3); } static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg, u8 resp_timeout) { req_msg->offset44 = cpu_to_be32((resp_timeout << 3) | (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07)); } static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg) { return (u8) (be32_to_cpu(req_msg->offset44) & 0x7); } static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg, u8 retry_count) { req_msg->offset44 = cpu_to_be32((retry_count & 0x7) | (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8)); } static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg) { return req_msg->offset50 >> 4; } static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu) { req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4)); } static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg) { return req_msg->offset50 & 0x7; } static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg, u8 rnr_retry_count) { req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) | (rnr_retry_count & 0x7)); } static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg) { return req_msg->offset51 >> 4; } static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg, u8 retries) { req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4)); } static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg) { return (req_msg->offset51 & 0x8) >> 3; } static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq) { req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) | ((srq & 0x1) << 3)); } static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12); } static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg, __be32 flow_label) { req_msg->primary_offset88 = cpu_to_be32( (be32_to_cpu(req_msg->primary_offset88) & 0x00000FFF) | (be32_to_cpu(flow_label) << 12)); } static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg) { return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F); } static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg, u8 rate) { req_msg->primary_offset88 = cpu_to_be32( (be32_to_cpu(req_msg->primary_offset88) & 0xFFFFFFC0) | (rate & 0x3F)); } static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg) { return (u8) (req_msg->primary_offset94 >> 4); } static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl) { req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) | (sl << 4)); } static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg) { return (u8) ((req_msg->primary_offset94 & 0x08) >> 3); } static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg, u8 subnet_local) { req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) | ((subnet_local & 0x1) << 3)); } static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg) { return (u8) (req_msg->primary_offset95 >> 3); } static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg, u8 local_ack_timeout) { req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) | (local_ack_timeout << 3)); } static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg) { return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12); } static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg, __be32 flow_label) { req_msg->alt_offset132 = cpu_to_be32( (be32_to_cpu(req_msg->alt_offset132) & 0x00000FFF) | (be32_to_cpu(flow_label) << 12)); } static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg) { return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F); } static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg, u8 rate) { req_msg->alt_offset132 = cpu_to_be32( (be32_to_cpu(req_msg->alt_offset132) & 0xFFFFFFC0) | (rate & 0x3F)); } static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg) { return (u8) (req_msg->alt_offset138 >> 4); } static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl) { req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) | (sl << 4)); } static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg) { return (u8) ((req_msg->alt_offset138 & 0x08) >> 3); } static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg, u8 subnet_local) { req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) | ((subnet_local & 0x1) << 3)); } static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg) { return (u8) (req_msg->alt_offset139 >> 3); } static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg, u8 local_ack_timeout) { req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) | (local_ack_timeout << 3)); } /* Message REJected or MRAed */ enum cm_msg_response { CM_MSG_RESPONSE_REQ = 0x0, CM_MSG_RESPONSE_REP = 0x1, CM_MSG_RESPONSE_OTHER = 0x2 }; struct cm_mra_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; /* message MRAed:2, rsvd:6 */ u8 offset8; /* service timeout:5, rsvd:3 */ u8 offset9; u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg) { return (u8) (mra_msg->offset8 >> 6); } static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg) { mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6)); } static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg) { return (u8) (mra_msg->offset9 >> 3); } static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg, u8 service_timeout) { mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) | (service_timeout << 3)); } struct cm_rej_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; /* message REJected:2, rsvd:6 */ u8 offset8; /* reject info length:7, rsvd:1. */ u8 offset9; __be16 reason; u8 ari[IB_CM_REJ_ARI_LENGTH]; u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg) { return (u8) (rej_msg->offset8 >> 6); } static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg) { rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6)); } static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg) { return (u8) (rej_msg->offset9 >> 1); } static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg, u8 len) { rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1)); } struct cm_rep_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; __be32 local_qkey; /* local QPN:24, rsvd:8 */ __be32 offset12; /* local EECN:24, rsvd:8 */ __be32 offset16; /* starting PSN:24 rsvd:8 */ __be32 offset20; u8 resp_resources; u8 initiator_depth; /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */ u8 offset26; /* RNR retry count:3, SRQ:1, rsvd:5 */ u8 offset27; __be64 local_ca_guid; u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8); } static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn) { rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); } static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); } static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) { rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); } static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) { return (qp_type == IB_QPT_XRC_INI) ? cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); } static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) { return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); } static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg, __be32 starting_psn) { rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) | (be32_to_cpu(rep_msg->offset20) & 0x000000FF)); } static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg) { return (u8) (rep_msg->offset26 >> 3); } static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg, u8 target_ack_delay) { rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) | (target_ack_delay << 3)); } static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg) { return (u8) ((rep_msg->offset26 & 0x06) >> 1); } static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover) { rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) | ((failover & 0x3) << 1)); } static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg) { return (u8) (rep_msg->offset26 & 0x01); } static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg, u8 flow_ctrl) { rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) | (flow_ctrl & 0x1)); } static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg) { return (u8) (rep_msg->offset27 >> 5); } static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg, u8 rnr_retry_count) { rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) | (rnr_retry_count << 5)); } static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg) { return (u8) ((rep_msg->offset27 >> 4) & 0x1); } static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq) { rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) | ((srq & 0x1) << 4)); } struct cm_rtu_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); struct cm_dreq_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; /* remote QPN/EECN:24, rsvd:8 */ __be32 offset8; u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg) { return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8); } static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn) { dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(dreq_msg->offset8) & 0x000000FF)); } struct cm_drep_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); struct cm_lap_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; __be32 rsvd8; /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */ __be32 offset12; __be32 rsvd16; __be16 alt_local_lid; __be16 alt_remote_lid; union ib_gid alt_local_gid; union ib_gid alt_remote_gid; /* flow label:20, rsvd:4, traffic class:8 */ __be32 offset56; u8 alt_hop_limit; /* rsvd:2, packet rate:6 */ u8 offset61; /* SL:4, subnet local:1, rsvd:3 */ u8 offset62; /* local ACK timeout:5, rsvd:3 */ u8 offset63; u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg) { return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8); } static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn) { lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(lap_msg->offset12) & 0x000000FF)); } static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg) { return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3); } static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg, u8 resp_timeout) { lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) | (be32_to_cpu(lap_msg->offset12) & 0xFFFFFF07)); } static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg) { return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12); } static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg, __be32 flow_label) { lap_msg->offset56 = cpu_to_be32( (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) | (be32_to_cpu(flow_label) << 12)); } static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg) { return (u8) be32_to_cpu(lap_msg->offset56); } static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg, u8 traffic_class) { lap_msg->offset56 = cpu_to_be32(traffic_class | (be32_to_cpu(lap_msg->offset56) & 0xFFFFFF00)); } static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg) { return lap_msg->offset61 & 0x3F; } static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg, u8 packet_rate) { lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0); } static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg) { return lap_msg->offset62 >> 4; } static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl) { lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F); } static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg) { return (lap_msg->offset62 >> 3) & 0x1; } static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg, u8 subnet_local) { lap_msg->offset62 = ((subnet_local & 0x1) << 3) | (lap_msg->offset61 & 0xF7); } static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg) { return lap_msg->offset63 >> 3; } static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg, u8 local_ack_timeout) { lap_msg->offset63 = (local_ack_timeout << 3) | (lap_msg->offset63 & 0x07); } struct cm_apr_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; u8 info_length; u8 ap_status; __be16 rsvd; u8 info[IB_CM_APR_INFO_LENGTH]; u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); struct cm_sidr_req_msg { struct ib_mad_hdr hdr; __be32 request_id; __be16 pkey; __be16 rsvd; __be64 service_id; u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)]; } __attribute__ ((packed)); struct cm_sidr_rep_msg { struct ib_mad_hdr hdr; __be32 request_id; u8 status; u8 info_length; __be16 rsvd; /* QPN:24, rsvd:8 */ __be32 offset8; __be64 service_id; __be32 qkey; u8 info[IB_CM_SIDR_REP_INFO_LENGTH]; u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE]; } __attribute__ ((packed)); static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg) { return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8); } static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg, __be32 qpn) { sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) | (be32_to_cpu(sidr_rep_msg->offset8) & 0x000000FF)); } #endif /* CM_MSGS_H */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/cm_msgs.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/core_priv.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/core_priv.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/core_priv.h (revision 331772) @@ -1,149 +1,151 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef _CORE_PRIV_H #define _CORE_PRIV_H #include #include #include #include #ifdef CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS int cma_configfs_init(void); void cma_configfs_exit(void); #else static inline int cma_configfs_init(void) { return 0; } static inline void cma_configfs_exit(void) { } #endif struct cma_device; void cma_ref_dev(struct cma_device *cma_dev); void cma_deref_dev(struct cma_device *cma_dev); typedef bool (*cma_device_filter)(struct ib_device *, void *); struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, void *cookie); int cma_get_default_gid_type(struct cma_device *cma_dev, unsigned int port); int cma_set_default_gid_type(struct cma_device *cma_dev, unsigned int port, enum ib_gid_type default_gid_type); struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev); int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); int ib_resolve_eth_dmac(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int *qp_attr_mask); typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, void *filter_cookie, roce_netdev_callback cb, void *cookie); void ib_enum_all_roce_netdevs(roce_netdev_filter filter, void *filter_cookie, roce_netdev_callback cb, void *cookie); enum ib_cache_gid_default_mode { IB_CACHE_GID_DEFAULT_MODE_SET, IB_CACHE_GID_DEFAULT_MODE_DELETE }; int ib_cache_gid_parse_type_str(const char *buf); const char *ib_cache_gid_type_str(enum ib_gid_type gid_type); void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, struct net_device *ndev, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode); int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr); int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr); int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, struct net_device *ndev); void ib_cache_gid_del_all_by_netdev(struct net_device *ndev); int roce_gid_mgmt_init(void); void roce_gid_mgmt_cleanup(void); int roce_rescan_device(struct ib_device *ib_dev); unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, struct net_device *upper) { /* TODO: add support for LAGG */ upper = VLAN_TRUNKDEV(upper); return (dev == upper); } int addr_init(void); void addr_cleanup(void); int ib_mad_init(void); void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); #endif /* _CORE_PRIV_H */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/core_priv.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/ib_addr.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_addr.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_addr.c (revision 331772) @@ -1,819 +1,823 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "core_priv.h" struct addr_req { struct list_head list; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; int status; }; static void process_req(struct work_struct *work); static DEFINE_MUTEX(lock); static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; int rdma_addr_size(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return sizeof(struct sockaddr_in); case AF_INET6: return sizeof(struct sockaddr_in6); case AF_IB: return sizeof(struct sockaddr_ib); default: return 0; } } EXPORT_SYMBOL(rdma_addr_size); static struct rdma_addr_client self; void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); init_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_register_client); static inline void put_client(struct rdma_addr_client *client) { if (atomic_dec_and_test(&client->refcount)) complete(&client->comp); } void rdma_addr_unregister_client(struct rdma_addr_client *client) { put_client(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(rdma_addr_unregister_client); static inline void rdma_copy_addr_sub(u8 *dst, const u8 *src, unsigned min, unsigned max) { if (min > max) min = max; memcpy(dst, src, min); memset(dst + min, 0, max - min); } int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { if (dev->if_type == IFT_INFINIBAND) dev_addr->dev_type = ARPHRD_INFINIBAND; else if (dev->if_type == IFT_ETHER) dev_addr->dev_type = ARPHRD_ETHER; else dev_addr->dev_type = 0; rdma_copy_addr_sub(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen, MAX_ADDR_LEN); rdma_copy_addr_sub(dev_addr->broadcast, dev->if_broadcastaddr, dev->if_addrlen, MAX_ADDR_LEN); if (dst_dev_addr != NULL) { rdma_copy_addr_sub(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen, MAX_ADDR_LEN); } dev_addr->bound_dev_if = dev->if_index; return 0; } EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr, u16 *vlan_id) { struct net_device *dev = NULL; int ret = -EADDRNOTAVAIL; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; ret = rdma_copy_addr(dev_addr, dev, NULL); dev_put(dev); return ret; } switch (addr->sa_family) { #ifdef INET case AF_INET: dev = ip_dev_find(dev_addr->net, ((const struct sockaddr_in *)addr)->sin_addr.s_addr); break; #endif #ifdef INET6 case AF_INET6: dev = ip6_dev_find(dev_addr->net, ((const struct sockaddr_in6 *)addr)->sin6_addr); break; #endif default: break; } if (dev != NULL) { ret = rdma_copy_addr(dev_addr, dev, NULL); if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); } return ret; } EXPORT_SYMBOL(rdma_translate_ip); static void set_timeout(unsigned long time) { int delay; /* under FreeBSD ticks are 32-bit */ delay = time - jiffies; if (delay <= 0) delay = 1; mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) { struct addr_req *temp_req; mutex_lock(&lock); list_for_each_entry_reverse(temp_req, &req_list, list) { if (time_after_eq(req->timeout, temp_req->timeout)) break; } list_add(&req->list, &temp_req->list); if (req_list.next == &req->list) set_timeout(req->timeout); mutex_unlock(&lock); } #if defined(INET) || defined(INET6) static int addr_resolve_multi(u8 *edst, struct ifnet *ifp, struct sockaddr *dst_in) { struct sockaddr *llsa; struct sockaddr_dl sdl; int error; sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; if (ifp->if_resolvemulti == NULL) { error = EOPNOTSUPP; } else { error = ifp->if_resolvemulti(ifp, &llsa, dst_in); if (error == 0) { rdma_copy_addr_sub(edst, LLADDR((struct sockaddr_dl *)llsa), ifp->if_addrlen, MAX_ADDR_LEN); } } return (error); } #endif #ifdef INET static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, struct ifnet **ifpp) { struct sockaddr_in dst_tmp = *dst_in; u8 edst[MAX_ADDR_LEN]; in_port_t src_port; struct sockaddr *saddr; struct rtentry *rte; struct ifnet *ifp; int error; int type; /* set VNET, if any */ CURVNET_SET(addr->net); /* set default TTL limit */ addr->hoplimit = V_ip_defttl; type = 0; if (src_in->sin_addr.s_addr == INADDR_ANY) type |= 1; if (dst_tmp.sin_addr.s_addr == INADDR_ANY) type |= 2; /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ dst_tmp.sin_len = sizeof(dst_tmp); /* Step 1 - lookup destination route if any */ switch (type) { case 0: case 1: /* regular destination route lookup */ rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); if (rte == NULL) { error = EHOSTUNREACH; goto done; } else if (rte->rt_ifp == NULL || rte->rt_ifp == V_loif || RT_LINK_IS_UP(rte->rt_ifp) == 0) { RTFREE_LOCKED(rte); error = EHOSTUNREACH; goto done; } RT_UNLOCK(rte); break; default: error = ENETUNREACH; goto done; } /* Step 2 - find outgoing network interface */ switch (type) { case 0: /* source check */ ifp = ip_dev_find(addr->net, src_in->sin_addr.s_addr); if (ifp == NULL) { error = ENETUNREACH; goto error_rt_free; } else if (ifp != rte->rt_ifp) { error = ENETUNREACH; goto error_put_ifp; } break; case 1: /* get destination network interface from route */ ifp = rte->rt_ifp; dev_hold(ifp); saddr = rte->rt_ifa->ifa_addr; src_port = src_in->sin_port; memcpy(src_in, saddr, rdma_addr_size(saddr)); src_in->sin_port = src_port; /* preserve port number */ break; default: break; } /* * Step 3 - resolve destination MAC address */ if (dst_tmp.sin_addr.s_addr == INADDR_BROADCAST) { rdma_copy_addr_sub(edst, ifp->if_broadcastaddr, ifp->if_addrlen, MAX_ADDR_LEN); } else if (IN_MULTICAST(ntohl(dst_tmp.sin_addr.s_addr))) { error = addr_resolve_multi(edst, ifp, (struct sockaddr *)&dst_tmp); if (error != 0) goto error_put_ifp; } else { bool is_gw = (rte->rt_flags & RTF_GATEWAY) != 0; memset(edst, 0, sizeof(edst)); error = arpresolve(ifp, is_gw, NULL, is_gw ? rte->rt_gateway : (const struct sockaddr *)&dst_tmp, edst, NULL, NULL); if (error != 0) goto error_put_ifp; else if (is_gw != 0) addr->network = RDMA_NETWORK_IPV4; } /* * Step 4 - copy destination and source MAC addresses */ error = -rdma_copy_addr(addr, ifp, edst); if (error != 0) goto error_put_ifp; if (rte != NULL) RTFREE(rte); *ifpp = ifp; goto done; error_put_ifp: dev_put(ifp); error_rt_free: RTFREE(rte); done: CURVNET_RESTORE(); if (error == EWOULDBLOCK || error == EAGAIN) error = ENODATA; return (-error); } #else static int addr4_resolve(struct sockaddr_in *src_in, const struct sockaddr_in *dst_in, struct rdma_dev_addr *addr, struct ifnet **ifpp) { return -EADDRNOTAVAIL; } #endif #ifdef INET6 static int addr6_resolve(struct sockaddr_in6 *src_in, const struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr, struct ifnet **ifpp) { struct sockaddr_in6 dst_tmp = *dst_in; u8 edst[MAX_ADDR_LEN]; in_port_t src_port; struct sockaddr *saddr; struct rtentry *rte; struct ifnet *ifp; int error; int type; /* set VNET, if any */ CURVNET_SET(addr->net); /* set default TTL limit */ addr->hoplimit = V_ip_defttl; type = 0; if (ipv6_addr_any(&src_in->sin6_addr)) type |= 1; if (ipv6_addr_any(&dst_tmp.sin6_addr)) type |= 2; /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ dst_tmp.sin6_len = sizeof(dst_tmp); /* Step 1 - lookup destination route if any */ switch (type) { case 0: /* sanity check for IPv4 addresses */ if (ipv6_addr_v4mapped(&src_in->sin6_addr) != ipv6_addr_v4mapped(&dst_tmp.sin6_addr)) { error = EAFNOSUPPORT; goto done; } /* FALLTHROUGH */ case 1: /* regular destination route lookup */ rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); if (rte == NULL) { error = EHOSTUNREACH; goto done; } else if (rte->rt_ifp == NULL || rte->rt_ifp == V_loif || RT_LINK_IS_UP(rte->rt_ifp) == 0) { RTFREE_LOCKED(rte); error = EHOSTUNREACH; goto done; } RT_UNLOCK(rte); break; default: error = ENETUNREACH; goto done; } /* Step 2 - find outgoing network interface */ switch (type) { case 0: /* source check */ ifp = ip6_dev_find(addr->net, src_in->sin6_addr); if (ifp == NULL) { error = ENETUNREACH; goto error_rt_free; } else if (ifp != rte->rt_ifp) { error = ENETUNREACH; goto error_put_ifp; } break; case 1: /* get destination network interface from route */ ifp = rte->rt_ifp; dev_hold(ifp); saddr = rte->rt_ifa->ifa_addr; src_port = src_in->sin6_port; memcpy(src_in, saddr, rdma_addr_size(saddr)); src_in->sin6_port = src_port; /* preserve port number */ break; default: break; } /* * Step 3 - resolve destination MAC address */ if (IN6_IS_ADDR_MULTICAST(&dst_tmp.sin6_addr)) { error = addr_resolve_multi(edst, ifp, (struct sockaddr *)&dst_tmp); if (error != 0) goto error_put_ifp; } else { bool is_gw = (rte->rt_flags & RTF_GATEWAY) != 0; memset(edst, 0, sizeof(edst)); error = nd6_resolve(ifp, is_gw, NULL, is_gw ? rte->rt_gateway : (const struct sockaddr *)&dst_tmp, edst, NULL, NULL); if (error != 0) goto error_put_ifp; else if (is_gw != 0) addr->network = RDMA_NETWORK_IPV6; } /* * Step 4 - copy destination and source MAC addresses */ error = -rdma_copy_addr(addr, ifp, edst); if (error != 0) goto error_put_ifp; if (rte != NULL) RTFREE(rte); *ifpp = ifp; goto done; error_put_ifp: dev_put(ifp); error_rt_free: RTFREE(rte); done: CURVNET_RESTORE(); if (error == EWOULDBLOCK || error == EAGAIN) error = ENODATA; return (-error); } #else static int addr6_resolve(struct sockaddr_in6 *src_in, const struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr, struct ifnet **ifpp) { return -EADDRNOTAVAIL; } #endif static int addr_resolve_neigh(struct ifnet *dev, const struct sockaddr *dst_in, struct rdma_dev_addr *addr) { if (dev->if_flags & IFF_LOOPBACK) { int ret; ret = rdma_translate_ip(dst_in, addr, NULL); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); return ret; } /* If the device doesn't do ARP internally */ if (!(dev->if_flags & IFF_NOARP)) return 0; return rdma_copy_addr(addr, dev, NULL); } static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, bool resolve_neigh) { struct net_device *ndev = NULL; int ret; if (dst_in->sa_family != src_in->sa_family) return -EINVAL; if (src_in->sa_family == AF_INET) { ret = addr4_resolve((struct sockaddr_in *)src_in, (const struct sockaddr_in *)dst_in, addr, &ndev); if (ret) return ret; if (resolve_neigh) ret = addr_resolve_neigh(ndev, dst_in, addr); } else { ret = addr6_resolve((struct sockaddr_in6 *)src_in, (const struct sockaddr_in6 *)dst_in, addr, &ndev); if (ret) return ret; if (resolve_neigh) ret = addr_resolve_neigh(ndev, dst_in, addr); } addr->bound_dev_if = ndev->if_index; addr->net = dev_net(ndev); dev_put(ndev); return ret; } static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, true); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) continue; } list_move_tail(&req->list, &done_list); } if (!list_empty(&req_list)) { req = list_entry(req_list.next, struct addr_req, list); set_timeout(req->timeout); } mutex_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); req->callback(req->status, (struct sockaddr *) &req->src_addr, req->addr, req->context); put_client(req->client); kfree(req); } } int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context) { struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; if (src_addr) { if (src_addr->sa_family != dst_addr->sa_family) { ret = -EINVAL; goto err; } memcpy(src_in, src_addr, rdma_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); req->status = addr_resolve(src_in, dst_in, addr, true); switch (req->status) { case 0: req->timeout = jiffies; queue_req(req); break; case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); break; default: ret = req->status; atomic_dec(&client->refcount); goto err; } return ret; err: kfree(req); return ret; } EXPORT_SYMBOL(rdma_resolve_ip); int rdma_resolve_ip_route(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr) { struct sockaddr_storage ssrc_addr = {}; struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; if (src_addr) { if (src_addr->sa_family != dst_addr->sa_family) return -EINVAL; memcpy(src_in, src_addr, rdma_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } return addr_resolve(src_in, dst_addr, addr, false); } EXPORT_SYMBOL(rdma_resolve_ip_route); void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { req->status = -ECANCELED; req->timeout = jiffies; list_move(&req->list, &req_list); set_timeout(req->timeout); break; } } mutex_unlock(&lock); } EXPORT_SYMBOL(rdma_addr_cancel); struct resolve_cb_context { struct rdma_dev_addr *addr; struct completion comp; int status; }; static void resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { if (!status) memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct rdma_dev_addr)); ((struct resolve_cb_context *)context)->status = status; complete(&((struct resolve_cb_context *)context)->comp); } int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, u8 *dmac, u16 *vlan_id, int *if_index, int *hoplimit) { int ret = 0; struct rdma_dev_addr dev_addr; struct resolve_cb_context ctx; struct net_device *dev; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; rdma_gid2ip(&sgid_addr._sockaddr, sgid); rdma_gid2ip(&dgid_addr._sockaddr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); if (if_index) dev_addr.bound_dev_if = *if_index; dev_addr.net = TD_TO_VNET(curthread); ctx.addr = &dev_addr; init_completion(&ctx.comp); ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, &dev_addr, 1000, resolve_cb, &ctx); if (ret) return ret; wait_for_completion(&ctx.comp); ret = ctx.status; if (ret) return ret; memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); dev = dev_get_by_index(dev_addr.net, dev_addr.bound_dev_if); if (!dev) return -ENODEV; if (if_index) *if_index = dev_addr.bound_dev_if; if (vlan_id) *vlan_id = rdma_vlan_dev_vlan_id(dev); if (hoplimit) *hoplimit = dev_addr.hoplimit; dev_put(dev); return ret; } EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) { int ret = 0; struct rdma_dev_addr dev_addr; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } gid_addr; rdma_gid2ip(&gid_addr._sockaddr, sgid); memset(&dev_addr, 0, sizeof(dev_addr)); dev_addr.net = TD_TO_VNET(curthread); ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); if (ret) return ret; memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); return ret; } EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); int addr_init(void) { addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); if (!addr_wq) return -ENOMEM; rdma_addr_register_client(&self); return 0; } void addr_cleanup(void) { rdma_addr_unregister_client(&self); destroy_workqueue(addr_wq); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_agent.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_agent.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_agent.c (revision 331772) @@ -1,222 +1,225 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ #include #include #include "agent.h" #include "smi.h" #include "mad_priv.h" #define SPFX "ib_agent: " struct ib_agent_port_private { struct list_head port_list; struct ib_mad_agent *agent[2]; }; static DEFINE_SPINLOCK(ib_agent_port_list_lock); static LIST_HEAD(ib_agent_port_list); static struct ib_agent_port_private * __ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; list_for_each_entry(entry, &ib_agent_port_list, port_list) { if (entry->agent[1]->device == device && entry->agent[1]->port_num == port_num) return entry; } return NULL; } static struct ib_agent_port_private * ib_get_agent_port(const struct ib_device *device, int port_num) { struct ib_agent_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); entry = __ib_get_agent_port(device, port_num); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return entry; } void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *grh, const struct ib_wc *wc, const struct ib_device *device, int port_num, int qpn, size_t resp_mad_len, bool opa) { struct ib_agent_port_private *port_priv; struct ib_mad_agent *agent; struct ib_mad_send_buf *send_buf; struct ib_ah *ah; struct ib_mad_send_wr_private *mad_send_wr; if (rdma_cap_ib_switch(device)) port_priv = ib_get_agent_port(device, 0); else port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { dev_err(&device->dev, "Unable to find port agent\n"); return; } agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", PTR_ERR(ah)); return; } if (opa && mad_hdr->base_version != OPA_MGMT_BASE_VERSION) resp_mad_len = IB_MGMT_MAD_SIZE; send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0, IB_MGMT_MAD_HDR, resp_mad_len - IB_MGMT_MAD_HDR, GFP_KERNEL, mad_hdr->base_version); if (IS_ERR(send_buf)) { dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } memcpy(send_buf->mad, mad_hdr, resp_mad_len); send_buf->ah = ah; if (rdma_cap_ib_switch(device)) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_send_wr->send_wr.port_num = port_num; } if (ib_post_send_mad(send_buf, NULL)) { dev_err(&device->dev, "ib_post_send_mad error\n"); goto err2; } return; err2: ib_free_send_mad(send_buf); err1: ib_destroy_ah(ah); } static void agent_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { ib_destroy_ah(mad_send_wc->send_buf->ah); ib_free_send_mad(mad_send_wc->send_buf); } int ib_agent_port_open(struct ib_device *device, int port_num) { struct ib_agent_port_private *port_priv; unsigned long flags; int ret; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { dev_err(&device->dev, "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } if (rdma_cap_ib_smi(device, port_num)) { /* Obtain send only MAD agent for SMI QP */ port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, &agent_send_handler, NULL, NULL, 0); if (IS_ERR(port_priv->agent[0])) { ret = PTR_ERR(port_priv->agent[0]); goto error2; } } /* Obtain send only MAD agent for GSI QP */ port_priv->agent[1] = ib_register_mad_agent(device, port_num, IB_QPT_GSI, NULL, 0, &agent_send_handler, NULL, NULL, 0); if (IS_ERR(port_priv->agent[1])) { ret = PTR_ERR(port_priv->agent[1]); goto error3; } spin_lock_irqsave(&ib_agent_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_agent_port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); return 0; error3: if (port_priv->agent[0]) ib_unregister_mad_agent(port_priv->agent[0]); error2: kfree(port_priv); error1: return ret; } int ib_agent_port_close(struct ib_device *device, int port_num) { struct ib_agent_port_private *port_priv; unsigned long flags; spin_lock_irqsave(&ib_agent_port_list_lock, flags); port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del(&port_priv->port_list); spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); ib_unregister_mad_agent(port_priv->agent[1]); if (port_priv->agent[0]) ib_unregister_mad_agent(port_priv->agent[0]); kfree(port_priv); return 0; } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_cache.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_cache.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_cache.c (revision 331772) @@ -1,1258 +1,1262 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include "core_priv.h" struct ib_pkey_cache { int table_len; u16 table[0]; }; struct ib_update_work { struct work_struct work; struct ib_device *device; u8 port_num; }; union ib_gid zgid; EXPORT_SYMBOL(zgid); static const struct ib_gid_attr zattr; enum gid_attr_find_mask { GID_ATTR_FIND_MASK_GID = 1UL << 0, GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, }; enum gid_table_entry_props { GID_TABLE_ENTRY_INVALID = 1UL << 0, GID_TABLE_ENTRY_DEFAULT = 1UL << 1, }; enum gid_table_write_action { GID_TABLE_WRITE_ACTION_ADD, GID_TABLE_WRITE_ACTION_DEL, /* MODIFY only updates the GID table. Currently only used by * ib_cache_update. */ GID_TABLE_WRITE_ACTION_MODIFY }; struct ib_gid_table_entry { unsigned long props; union ib_gid gid; struct ib_gid_attr attr; void *context; }; struct ib_gid_table { int sz; /* In RoCE, adding a GID to the table requires: * (a) Find if this GID is already exists. * (b) Find a free space. * (c) Write the new GID * * Delete requires different set of operations: * (a) Find the GID * (b) Delete it. * * Add/delete should be carried out atomically. * This is done by locking this mutex from multiple * writers. We don't need this lock for IB, as the MAD * layer replaces all entries. All data_vec entries * are locked by this lock. **/ struct mutex lock; /* This lock protects the table entries from being * read and written simultaneously. */ rwlock_t rwlock; struct ib_gid_table_entry *data_vec; }; static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) { if (rdma_cap_roce_gid_table(ib_dev, port)) { struct ib_event event; event.device = ib_dev; event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; ib_dispatch_event(&event); } } static const char * const gid_type_str[] = { [IB_GID_TYPE_IB] = "IB/RoCE v1", [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", }; const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) { if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) return gid_type_str[gid_type]; return "Invalid GID type"; } EXPORT_SYMBOL(ib_cache_gid_type_str); int ib_cache_gid_parse_type_str(const char *buf) { unsigned int i; size_t len; int err = -EINVAL; len = strlen(buf); if (len == 0) return -EINVAL; if (buf[len - 1] == '\n') len--; for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && len == strlen(gid_type_str[i])) { err = i; break; } return err; } EXPORT_SYMBOL(ib_cache_gid_parse_type_str); /* This function expects that rwlock will be write locked in all * scenarios and that lock will be locked in sleep-able (RoCE) * scenarios. */ static int write_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix, const union ib_gid *gid, const struct ib_gid_attr *attr, enum gid_table_write_action action, bool default_gid) __releases(&table->rwlock) __acquires(&table->rwlock) { int ret = 0; struct net_device *old_net_dev; enum ib_gid_type old_gid_type; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. */ if (rdma_cap_roce_gid_table(ib_dev, port)) { table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; write_unlock_irq(&table->rwlock); /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by * RoCE providers and thus only updates the cache. */ if (action == GID_TABLE_WRITE_ACTION_ADD) ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, &table->data_vec[ix].context); else if (action == GID_TABLE_WRITE_ACTION_DEL) ret = ib_dev->del_gid(ib_dev, port, ix, &table->data_vec[ix].context); write_lock_irq(&table->rwlock); } old_net_dev = table->data_vec[ix].attr.ndev; old_gid_type = table->data_vec[ix].attr.gid_type; if (old_net_dev && old_net_dev != attr->ndev) dev_put(old_net_dev); /* if modify_gid failed, just delete the old gid */ if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { gid = &zgid; attr = &zattr; table->data_vec[ix].context = NULL; } memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); if (default_gid) { table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; if (action == GID_TABLE_WRITE_ACTION_DEL) table->data_vec[ix].attr.gid_type = old_gid_type; } if (table->data_vec[ix].attr.ndev && table->data_vec[ix].attr.ndev != old_net_dev) dev_hold(table->data_vec[ix].attr.ndev); table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; return ret; } static int add_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix, const union ib_gid *gid, const struct ib_gid_attr *attr, bool default_gid) { return write_gid(ib_dev, port, table, ix, gid, attr, GID_TABLE_WRITE_ACTION_ADD, default_gid); } static int modify_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix, const union ib_gid *gid, const struct ib_gid_attr *attr, bool default_gid) { return write_gid(ib_dev, port, table, ix, gid, attr, GID_TABLE_WRITE_ACTION_MODIFY, default_gid); } static int del_gid(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table, int ix, bool default_gid) { return write_gid(ib_dev, port, table, ix, &zgid, &zattr, GID_TABLE_WRITE_ACTION_DEL, default_gid); } /* rwlock should be read locked */ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, const struct ib_gid_attr *val, bool default_gid, unsigned long mask, int *pempty) { int i = 0; int found = -1; int empty = pempty ? -1 : 0; while (i < table->sz && (found < 0 || empty < 0)) { struct ib_gid_table_entry *data = &table->data_vec[i]; struct ib_gid_attr *attr = &data->attr; int curr_index = i; i++; if (data->props & GID_TABLE_ENTRY_INVALID) continue; if (empty < 0) if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && !memcmp(attr, &zattr, sizeof(*attr)) && !data->props) empty = curr_index; if (found >= 0) continue; if (mask & GID_ATTR_FIND_MASK_GID_TYPE && attr->gid_type != val->gid_type) continue; if (mask & GID_ATTR_FIND_MASK_GID && memcmp(gid, &data->gid, sizeof(*gid))) continue; if (mask & GID_ATTR_FIND_MASK_NETDEV && attr->ndev != val->ndev) continue; if (mask & GID_ATTR_FIND_MASK_DEFAULT && !!(data->props & GID_TABLE_ENTRY_DEFAULT) != default_gid) continue; found = curr_index; } if (pempty) *pempty = empty; return found; } static void addrconf_ifid_eui48(u8 *eui, struct net_device *dev) { if (dev->if_addrlen != ETH_ALEN) return; memcpy(eui, IF_LLADDR(dev), 3); memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); /* NOTE: The scope ID is added by the GID to IP conversion */ eui[3] = 0xFF; eui[4] = 0xFE; eui[0] ^= 2; } static void make_default_gid(struct net_device *dev, union ib_gid *gid) { gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); addrconf_ifid_eui48(&gid->raw[8], dev); } int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; int ret = 0; struct net_device *idev; int empty; table = ports_table[port - rdma_start_port(ib_dev)]; if (!memcmp(gid, &zgid, sizeof(*gid))) return -EINVAL; if (ib_dev->get_netdev) { idev = ib_dev->get_netdev(ib_dev, port); if (idev && attr->ndev != idev) { union ib_gid default_gid; /* Adding default GIDs in not permitted */ make_default_gid(idev, &default_gid); if (!memcmp(gid, &default_gid, sizeof(*gid))) { dev_put(idev); return -EPERM; } } if (idev) dev_put(idev); } mutex_lock(&table->lock); write_lock_irq(&table->rwlock); ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_NETDEV, &empty); if (ix >= 0) goto out_unlock; if (empty < 0) { ret = -ENOSPC; goto out_unlock; } ret = add_gid(ib_dev, port, table, empty, gid, attr, false); if (!ret) dispatch_gid_change_event(ib_dev, port); out_unlock: write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); return ret; } int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct ib_gid_attr *attr) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; table = ports_table[port - rdma_start_port(ib_dev)]; mutex_lock(&table->lock); write_lock_irq(&table->rwlock); ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_NETDEV | GID_ATTR_FIND_MASK_DEFAULT, NULL); if (ix < 0) goto out_unlock; if (!del_gid(ib_dev, port, table, ix, false)) dispatch_gid_change_event(ib_dev, port); out_unlock: write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); return 0; } int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, struct net_device *ndev) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; int ix; bool deleted = false; table = ports_table[port - rdma_start_port(ib_dev)]; mutex_lock(&table->lock); write_lock_irq(&table->rwlock); for (ix = 0; ix < table->sz; ix++) if (table->data_vec[ix].attr.ndev == ndev) if (!del_gid(ib_dev, port, table, ix, !!(table->data_vec[ix].props & GID_TABLE_ENTRY_DEFAULT))) deleted = true; write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); if (deleted) dispatch_gid_change_event(ib_dev, port); return 0; } static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, union ib_gid *gid, struct ib_gid_attr *attr) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; table = ports_table[port - rdma_start_port(ib_dev)]; if (index < 0 || index >= table->sz) return -EINVAL; if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) return -EAGAIN; memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); if (attr) { memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); /* make sure network device is valid and attached */ if (attr->ndev != NULL && (attr->ndev->if_flags & IFF_DYING) == 0 && attr->ndev->if_addr != NULL) dev_hold(attr->ndev); else attr->ndev = NULL; } return 0; } static int _ib_cache_gid_table_find(struct ib_device *ib_dev, const union ib_gid *gid, const struct ib_gid_attr *val, unsigned long mask, u8 *port, u16 *index) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; u8 p; int local_index; unsigned long flags; for (p = 0; p < ib_dev->phys_port_cnt; p++) { table = ports_table[p]; read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, val, false, mask, NULL); if (local_index >= 0) { if (index) *index = local_index; if (port) *port = p + rdma_start_port(ib_dev); read_unlock_irqrestore(&table->rwlock, flags); return 0; } read_unlock_irqrestore(&table->rwlock, flags); } return -ENOENT; } static int ib_cache_gid_find(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev, u8 *port, u16 *index) { unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, mask, port, index); } int ib_find_cached_gid_by_port(struct ib_device *ib_dev, const union ib_gid *gid, enum ib_gid_type gid_type, u8 port, struct net_device *ndev, u16 *index) { int local_index; struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; unsigned long mask = GID_ATTR_FIND_MASK_GID | GID_ATTR_FIND_MASK_GID_TYPE; struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; unsigned long flags; if (port < rdma_start_port(ib_dev) || port > rdma_end_port(ib_dev)) return -ENOENT; table = ports_table[port - rdma_start_port(ib_dev)]; if (ndev) mask |= GID_ATTR_FIND_MASK_NETDEV; read_lock_irqsave(&table->rwlock, flags); local_index = find_gid(table, gid, &val, false, mask, NULL); if (local_index >= 0) { if (index) *index = local_index; read_unlock_irqrestore(&table->rwlock, flags); return 0; } read_unlock_irqrestore(&table->rwlock, flags); return -ENOENT; } EXPORT_SYMBOL(ib_find_cached_gid_by_port); /** * ib_find_gid_by_filter - Returns the GID table index where a specified * GID value occurs * @device: The device to query. * @gid: The GID value to search for. * @port_num: The port number of the device where the GID value could be * searched. * @filter: The filter function is executed on any matching GID in the table. * If the filter function returns true, the corresponding index is returned, * otherwise, we continue searching the GID table. It's guaranteed that * while filter is executed, ndev field is valid and the structure won't * change. filter is executed in an atomic context. filter must not be NULL. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. * * ib_cache_gid_find_by_filter() searches for the specified GID value * of which the filter function returns true in the port's GID table. * This function is only supported on RoCE ports. * */ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, const union ib_gid *gid, u8 port, bool (*filter)(const union ib_gid *, const struct ib_gid_attr *, void *), void *context, u16 *index) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; struct ib_gid_table *table; unsigned int i; unsigned long flags; bool found = false; if (!ports_table) return -EOPNOTSUPP; if (port < rdma_start_port(ib_dev) || port > rdma_end_port(ib_dev) || !rdma_protocol_roce(ib_dev, port)) return -EPROTONOSUPPORT; table = ports_table[port - rdma_start_port(ib_dev)]; read_lock_irqsave(&table->rwlock, flags); for (i = 0; i < table->sz; i++) { struct ib_gid_attr attr; if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) goto next; if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) goto next; memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); if (filter(gid, &attr, context)) found = true; next: if (found) break; } read_unlock_irqrestore(&table->rwlock, flags); if (!found) return -ENOENT; if (index) *index = i; return 0; } static struct ib_gid_table *alloc_gid_table(int sz) { struct ib_gid_table *table = kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); if (!table) return NULL; table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); if (!table->data_vec) goto err_free_table; mutex_init(&table->lock); table->sz = sz; rwlock_init(&table->rwlock); return table; err_free_table: kfree(table); return NULL; } static void release_gid_table(struct ib_gid_table *table) { if (table) { kfree(table->data_vec); kfree(table); } } static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { int i; bool deleted = false; if (!table) return; write_lock_irq(&table->rwlock); for (i = 0; i < table->sz; ++i) { if (memcmp(&table->data_vec[i].gid, &zgid, sizeof(table->data_vec[i].gid))) if (!del_gid(ib_dev, port, table, i, table->data_vec[i].props & GID_ATTR_FIND_MASK_DEFAULT)) deleted = true; } write_unlock_irq(&table->rwlock); if (deleted) dispatch_gid_change_event(ib_dev, port); } void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, struct net_device *ndev, unsigned long gid_type_mask, enum ib_cache_gid_default_mode mode) { struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; union ib_gid gid; struct ib_gid_attr gid_attr; struct ib_gid_attr zattr_type = zattr; struct ib_gid_table *table; unsigned int gid_type; table = ports_table[port - rdma_start_port(ib_dev)]; make_default_gid(ndev, &gid); memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { int ix; union ib_gid current_gid; struct ib_gid_attr current_gid_attr = {}; if (1UL << gid_type & ~gid_type_mask) continue; gid_attr.gid_type = gid_type; mutex_lock(&table->lock); write_lock_irq(&table->rwlock); ix = find_gid(table, NULL, &gid_attr, true, GID_ATTR_FIND_MASK_GID_TYPE | GID_ATTR_FIND_MASK_DEFAULT, NULL); /* Coudn't find default GID location */ if (WARN_ON(ix < 0)) goto release; zattr_type.gid_type = gid_type; if (!__ib_cache_gid_get(ib_dev, port, ix, ¤t_gid, ¤t_gid_attr) && mode == IB_CACHE_GID_DEFAULT_MODE_SET && !memcmp(&gid, ¤t_gid, sizeof(gid)) && !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) goto release; if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || memcmp(¤t_gid_attr, &zattr_type, sizeof(current_gid_attr))) { if (del_gid(ib_dev, port, table, ix, true)) { pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", ix, gid.raw); goto release; } else { dispatch_gid_change_event(ib_dev, port); } } if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) pr_warn("ib_cache_gid: unable to add default gid %pI6\n", gid.raw); else dispatch_gid_change_event(ib_dev, port); } release: if (current_gid_attr.ndev) dev_put(current_gid_attr.ndev); write_unlock_irq(&table->rwlock); mutex_unlock(&table->lock); } } static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, struct ib_gid_table *table) { unsigned int i; unsigned long roce_gid_type_mask; unsigned int num_default_gids; unsigned int current_gid = 0; roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); num_default_gids = hweight_long(roce_gid_type_mask); for (i = 0; i < num_default_gids && i < table->sz; i++) { struct ib_gid_table_entry *entry = &table->data_vec[i]; entry->props |= GID_TABLE_ENTRY_DEFAULT; current_gid = find_next_bit(&roce_gid_type_mask, BITS_PER_LONG, current_gid); entry->attr.gid_type = current_gid++; } return 0; } static int _gid_table_setup_one(struct ib_device *ib_dev) { u8 port; struct ib_gid_table **table; int err = 0; table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); if (!table) { pr_warn("failed to allocate ib gid cache for %s\n", ib_dev->name); return -ENOMEM; } for (port = 0; port < ib_dev->phys_port_cnt; port++) { u8 rdma_port = port + rdma_start_port(ib_dev); table[port] = alloc_gid_table( ib_dev->port_immutable[rdma_port].gid_tbl_len); if (!table[port]) { err = -ENOMEM; goto rollback_table_setup; } err = gid_table_reserve_default(ib_dev, port + rdma_start_port(ib_dev), table[port]); if (err) goto rollback_table_setup; } ib_dev->cache.gid_cache = table; return 0; rollback_table_setup: for (port = 0; port < ib_dev->phys_port_cnt; port++) { cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), table[port]); release_gid_table(table[port]); } kfree(table); return err; } static void gid_table_release_one(struct ib_device *ib_dev) { struct ib_gid_table **table = ib_dev->cache.gid_cache; u8 port; if (!table) return; for (port = 0; port < ib_dev->phys_port_cnt; port++) release_gid_table(table[port]); kfree(table); ib_dev->cache.gid_cache = NULL; } static void gid_table_cleanup_one(struct ib_device *ib_dev) { struct ib_gid_table **table = ib_dev->cache.gid_cache; u8 port; if (!table) return; for (port = 0; port < ib_dev->phys_port_cnt; port++) cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), table[port]); } static int gid_table_setup_one(struct ib_device *ib_dev) { int err; err = _gid_table_setup_one(ib_dev); if (err) return err; err = roce_rescan_device(ib_dev); if (err) { gid_table_cleanup_one(ib_dev); gid_table_release_one(ib_dev); } return err; } int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *gid_attr) { int res; unsigned long flags; struct ib_gid_table **ports_table = device->cache.gid_cache; struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)]; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&table->rwlock, flags); res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); read_unlock_irqrestore(&table->rwlock, flags); return res; } EXPORT_SYMBOL(ib_get_cached_gid); int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index) { return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); } EXPORT_SYMBOL(ib_find_cached_gid); int ib_find_gid_by_filter(struct ib_device *device, const union ib_gid *gid, u8 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context, u16 *index) { /* Only RoCE GID table supports filter function */ if (!rdma_cap_roce_gid_table(device, port_num) && filter) return -EPROTONOSUPPORT; return ib_cache_gid_find_by_filter(device, gid, port_num, filter, context, index); } EXPORT_SYMBOL(ib_find_gid_by_filter); int ib_get_cached_pkey(struct ib_device *device, u8 port_num, int index, u16 *pkey) { struct ib_pkey_cache *cache; unsigned long flags; int ret = 0; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; if (index < 0 || index >= cache->table_len) ret = -EINVAL; else *pkey = cache->table[index]; read_unlock_irqrestore(&device->cache.lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_pkey); int ib_find_cached_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index) { struct ib_pkey_cache *cache; unsigned long flags; int i; int ret = -ENOENT; int partial_ix = -1; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; for (i = 0; i < cache->table_len; ++i) if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { if (cache->table[i] & 0x8000) { *index = i; ret = 0; break; } else partial_ix = i; } if (ret && partial_ix >= 0) { *index = partial_ix; ret = 0; } read_unlock_irqrestore(&device->cache.lock, flags); return ret; } EXPORT_SYMBOL(ib_find_cached_pkey); int ib_find_exact_cached_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index) { struct ib_pkey_cache *cache; unsigned long flags; int i; int ret = -ENOENT; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; *index = -1; for (i = 0; i < cache->table_len; ++i) if (cache->table[i] == pkey) { *index = i; ret = 0; break; } read_unlock_irqrestore(&device->cache.lock, flags); return ret; } EXPORT_SYMBOL(ib_find_exact_cached_pkey); int ib_get_cached_lmc(struct ib_device *device, u8 port_num, u8 *lmc) { unsigned long flags; int ret = 0; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; read_lock_irqsave(&device->cache.lock, flags); *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; read_unlock_irqrestore(&device->cache.lock, flags); return ret; } EXPORT_SYMBOL(ib_get_cached_lmc); static void ib_cache_update(struct ib_device *device, u8 port) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; struct ib_gid_cache { int table_len; union ib_gid table[0]; } *gid_cache = NULL; int i; int ret; struct ib_gid_table *table; struct ib_gid_table **ports_table = device->cache.gid_cache; bool use_roce_gid_table = rdma_cap_roce_gid_table(device, port); if (port < rdma_start_port(device) || port > rdma_end_port(device)) return; table = ports_table[port - rdma_start_port(device)]; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) return; ret = ib_query_port(device, port, tprops); if (ret) { pr_warn("ib_query_port failed (%d) for %s\n", ret, device->name); goto err; } pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * sizeof *pkey_cache->table, GFP_KERNEL); if (!pkey_cache) goto err; pkey_cache->table_len = tprops->pkey_tbl_len; if (!use_roce_gid_table) { gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * sizeof(*gid_cache->table), GFP_KERNEL); if (!gid_cache) goto err; gid_cache->table_len = tprops->gid_tbl_len; } for (i = 0; i < pkey_cache->table_len; ++i) { ret = ib_query_pkey(device, port, i, pkey_cache->table + i); if (ret) { pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } } if (!use_roce_gid_table) { for (i = 0; i < gid_cache->table_len; ++i) { ret = ib_query_gid(device, port, i, gid_cache->table + i, NULL); if (ret) { pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", ret, device->name, i); goto err; } } } write_lock_irq(&device->cache.lock); old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; if (!use_roce_gid_table) { write_lock(&table->rwlock); for (i = 0; i < gid_cache->table_len; i++) { modify_gid(device, port, table, i, gid_cache->table + i, &zattr, false); } write_unlock(&table->rwlock); } device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; write_unlock_irq(&device->cache.lock); kfree(gid_cache); kfree(old_pkey_cache); kfree(tprops); return; err: kfree(pkey_cache); kfree(gid_cache); kfree(tprops); } static void ib_cache_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); ib_cache_update(work->device, work->port_num); kfree(work); } static void ib_cache_event(struct ib_event_handler *handler, struct ib_event *event) { struct ib_update_work *work; if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_GID_CHANGE) { work = kmalloc(sizeof *work, GFP_ATOMIC); if (work) { INIT_WORK(&work->work, ib_cache_task); work->device = event->device; work->port_num = event->element.port_num; queue_work(ib_wq, &work->work); } } } int ib_cache_setup_one(struct ib_device *device) { int p; int err; rwlock_init(&device->cache.lock); device->cache.pkey_cache = kzalloc(sizeof *device->cache.pkey_cache * (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); if (!device->cache.pkey_cache || !device->cache.lmc_cache) { pr_warn("Couldn't allocate cache for %s\n", device->name); return -ENOMEM; } err = gid_table_setup_one(device); if (err) /* Allocated memory will be cleaned in the release function */ return err; for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) ib_cache_update(device, p + rdma_start_port(device)); INIT_IB_EVENT_HANDLER(&device->cache.event_handler, device, ib_cache_event); err = ib_register_event_handler(&device->cache.event_handler); if (err) goto err; return 0; err: gid_table_cleanup_one(device); return err; } void ib_cache_release_one(struct ib_device *device) { int p; /* * The release function frees all the cache elements. * This function should be called as part of freeing * all the device's resources when the cache could no * longer be accessed. */ if (device->cache.pkey_cache) for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) kfree(device->cache.pkey_cache[p]); gid_table_release_one(device); kfree(device->cache.pkey_cache); kfree(device->cache.lmc_cache); } void ib_cache_cleanup_one(struct ib_device *device) { /* The cleanup function unregisters the event handler, * waits for all in-progress workqueue elements and cleans * up the GID cache. This function should be called after * the device was removed from the devices list and all * clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); } void __init ib_cache_setup(void) { roce_gid_mgmt_init(); } void __exit ib_cache_cleanup(void) { roce_gid_mgmt_cleanup(); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_cm.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_cm.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_cm.c (revision 331772) @@ -1,4141 +1,4145 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004-2007 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cm_msgs.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); MODULE_LICENSE("Dual BSD/GPL"); static void cm_add_one(struct ib_device *device); static void cm_remove_one(struct ib_device *device, void *client_data); static struct ib_client cm_client = { .name = "cm", .add = cm_add_one, .remove = cm_remove_one }; static struct ib_cm { spinlock_t lock; struct list_head device_list; rwlock_t device_lock; struct rb_root listen_service_table; u64 listen_service_id; /* struct rb_root peer_service_table; todo: fix peer to peer */ struct rb_root remote_qp_table; struct rb_root remote_id_table; struct rb_root remote_sidr_table; struct idr local_id_table; __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; /* Sync on cm change port state */ spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ enum { CM_REQ_COUNTER, CM_MRA_COUNTER, CM_REJ_COUNTER, CM_REP_COUNTER, CM_RTU_COUNTER, CM_DREQ_COUNTER, CM_DREP_COUNTER, CM_SIDR_REQ_COUNTER, CM_SIDR_REP_COUNTER, CM_LAP_COUNTER, CM_APR_COUNTER, CM_ATTR_COUNT, CM_ATTR_ID_OFFSET = 0x0010, }; enum { CM_XMIT, CM_XMIT_RETRIES, CM_RECV, CM_RECV_DUPLICATES, CM_COUNTER_GROUPS }; static char const counter_group_names[CM_COUNTER_GROUPS] [sizeof("cm_rx_duplicates")] = { "cm_tx_msgs", "cm_tx_retries", "cm_rx_msgs", "cm_rx_duplicates" }; struct cm_counter_group { struct kobject obj; atomic_long_t counter[CM_ATTR_COUNT]; }; struct cm_counter_attribute { struct attribute attr; int index; }; #define CM_COUNTER_ATTR(_name, _index) \ struct cm_counter_attribute cm_##_name##_counter_attr = { \ .attr = { .name = __stringify(_name), .mode = 0444 }, \ .index = _index \ } static CM_COUNTER_ATTR(req, CM_REQ_COUNTER); static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER); static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER); static CM_COUNTER_ATTR(rep, CM_REP_COUNTER); static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER); static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER); static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER); static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER); static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER); static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER); static CM_COUNTER_ATTR(apr, CM_APR_COUNTER); static struct attribute *cm_counter_default_attrs[] = { &cm_req_counter_attr.attr, &cm_mra_counter_attr.attr, &cm_rej_counter_attr.attr, &cm_rep_counter_attr.attr, &cm_rtu_counter_attr.attr, &cm_dreq_counter_attr.attr, &cm_drep_counter_attr.attr, &cm_sidr_req_counter_attr.attr, &cm_sidr_rep_counter_attr.attr, &cm_lap_counter_attr.attr, &cm_apr_counter_attr.attr, NULL }; struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; struct list_head cm_priv_prim_list; struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; struct cm_device { struct list_head list; struct ib_device *ib_device; struct device *device; u8 ack_delay; int going_down; struct cm_port *port[0]; }; struct cm_av { struct cm_port *port; union ib_gid dgid; struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; }; struct cm_work { struct delayed_work work; struct list_head list; struct cm_port *port; struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */ __be32 local_id; /* Established / timewait */ __be32 remote_id; struct ib_cm_event cm_event; struct ib_sa_path_rec path[0]; }; struct cm_timewait_info { struct cm_work work; /* Must be first. */ struct list_head list; struct rb_node remote_qp_node; struct rb_node remote_id_node; __be64 remote_ca_guid; __be32 remote_qpn; u8 inserted_remote_qp; u8 inserted_remote_id; }; struct cm_id_private { struct ib_cm_id id; struct rb_node service_node; struct rb_node sidr_id_node; spinlock_t lock; /* Do not acquire inside cm.lock */ struct completion comp; atomic_t refcount; /* Number of clients sharing this ib_cm_id. Only valid for listeners. * Protected by the cm.lock spinlock. */ int listen_sharecount; struct ib_mad_send_buf *msg; struct cm_timewait_info *timewait_info; /* todo: use alternate port on send failure */ struct cm_av av; struct cm_av alt_av; void *private_data; __be64 tid; __be32 local_qpn; __be32 remote_qpn; enum ib_qp_type qp_type; __be32 sq_psn; __be32 rq_psn; int timeout_ms; enum ib_mtu path_mtu; __be16 pkey; u8 private_data_len; u8 max_cm_retries; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 retry_count; u8 rnr_retry_count; u8 service_timeout; u8 target_ack_delay; struct list_head prim_list; struct list_head altr_list; /* Indicates that the send port mad is registered and av is set */ int prim_send_port_not_ready; int altr_send_port_not_ready; struct list_head work_list; atomic_t work_count; }; static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) { if (atomic_dec_and_test(&cm_id_priv->refcount)) complete(&cm_id_priv->comp); } static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_send_buf **msg) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; struct cm_av *av; unsigned long flags, flags2; int ret = 0; /* don't let the port to be released till the agent is down */ spin_lock_irqsave(&cm.state_lock, flags2); spin_lock_irqsave(&cm.lock, flags); if (!cm_id_priv->prim_send_port_not_ready) av = &cm_id_priv->av; else if (!cm_id_priv->altr_send_port_not_ready && (cm_id_priv->alt_av.port)) av = &cm_id_priv->alt_av; else { pr_info("%s: not valid CM id\n", __func__); ret = -ENODEV; spin_unlock_irqrestore(&cm.lock, flags); goto out; } spin_unlock_irqrestore(&cm.lock, flags); /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; if (!mad_agent) { pr_info("%s: not a valid MAD agent\n", __func__); ret = -ENODEV; goto out; } ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto out; } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, av->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); ret = PTR_ERR(m); goto out; } /* Timeout set by caller if response is expected. */ m->ah = ah; m->retries = cm_id_priv->max_cm_retries; atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; out: spin_unlock_irqrestore(&cm.state_lock, flags2); return ret; } static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) { struct ib_mad_send_buf *m; struct ib_ah *ah; ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, port->port_num); if (IS_ERR(ah)) return PTR_ERR(ah); m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); return PTR_ERR(m); } m->ah = ah; *msg = m; return 0; } static void cm_free_msg(struct ib_mad_send_buf *msg) { ib_destroy_ah(msg->ah); if (msg->context[0]) cm_deref_id(msg->context[0]); ib_free_send_mad(msg); } static void * cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; if (!private_data || !private_data_len) return NULL; data = kmemdup(private_data, private_data_len, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); return data; } static void cm_set_private_data(struct cm_id_private *cm_id_priv, void *private_data, u8 private_data_len) { if (cm_id_priv->private_data && cm_id_priv->private_data_len) kfree(cm_id_priv->private_data); cm_id_priv->private_data = private_data; cm_id_priv->private_data_len = private_data_len; } static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { av->port = port; av->pkey_index = wc->pkey_index; ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc, grh, &av->ah_attr); } static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; struct cm_port *port = NULL; unsigned long flags; int ret; u8 p; struct net_device *ndev = ib_get_ndev_from_path(path); read_lock_irqsave(&cm.device_lock, flags); list_for_each_entry(cm_dev, &cm.device_list, list) { if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, path->gid_type, ndev, &p, NULL)) { port = cm_dev->port[p-1]; break; } } read_unlock_irqrestore(&cm.device_lock, flags); if (ndev) dev_put(ndev); if (!port) return -EINVAL; ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num, be16_to_cpu(path->pkey), &av->pkey_index); if (ret) return ret; av->port = port; ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); if (ret) return ret; av->timeout = path->packet_life_time + 1; spin_lock_irqsave(&cm.lock, flags); if (&cm_id_priv->av == av) list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); else if (&cm_id_priv->alt_av == av) list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); else ret = -EINVAL; spin_unlock_irqrestore(&cm.lock, flags); return ret; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; int id; idr_preload(GFP_KERNEL); spin_lock_irqsave(&cm.lock, flags); id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); spin_unlock_irqrestore(&cm.lock, flags); idr_preload_end(); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; return id < 0 ? id : 0; } static void cm_free_id(__be32 local_id) { spin_lock_irq(&cm.lock); idr_remove(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); spin_unlock_irq(&cm.lock); } static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; cm_id_priv = idr_find(&cm.local_id_table, (__force int) (local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } return cm_id_priv; } static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id) { struct cm_id_private *cm_id_priv; spin_lock_irq(&cm.lock); cm_id_priv = cm_get_id(local_id, remote_id); spin_unlock_irq(&cm.lock); return cm_id_priv; } /* * Trivial helpers to strip endian annotation and compare; the * endianness doesn't actually matter since we just need a stable * order for the RB tree. */ static int be32_lt(__be32 a, __be32 b) { return (__force u32) a < (__force u32) b; } static int be32_gt(__be32 a, __be32 b) { return (__force u32) a > (__force u32) b; } static int be64_lt(__be64 a, __be64 b) { return (__force u64) a < (__force u64) b; } static int be64_gt(__be64 a, __be64 b) { return (__force u64) a > (__force u64) b; } static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.listen_service_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; __be64 service_mask = cm_id_priv->id.service_mask; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); if ((cur_cm_id_priv->id.service_mask & service_id) == (service_mask & cur_cm_id_priv->id.service_id) && (cm_id_priv->id.device == cur_cm_id_priv->id.device)) return cur_cm_id_priv; if (cm_id_priv->id.device < cur_cm_id_priv->id.device) link = &(*link)->rb_left; else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) link = &(*link)->rb_right; else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_left; else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) link = &(*link)->rb_right; else link = &(*link)->rb_right; } rb_link_node(&cm_id_priv->service_node, parent, link); rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table); return NULL; } static struct cm_id_private * cm_find_listen(struct ib_device *device, __be64 service_id) { struct rb_node *node = cm.listen_service_table.rb_node; struct cm_id_private *cm_id_priv; while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); if ((cm_id_priv->id.service_mask & service_id) == cm_id_priv->id.service_id && (cm_id_priv->id.device == device)) return cm_id_priv; if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) node = node->rb_right; else if (be64_lt(service_id, cm_id_priv->id.service_id)) node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; else node = node->rb_right; } return NULL; } static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_id_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_id = timewait_info->work.remote_id; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_timewait_info->work.remote_id)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_id = 1; rb_link_node(&timewait_info->remote_id_node, parent, link); rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table); return NULL; } static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid, __be32 remote_id) { struct rb_node *node = cm.remote_id_table.rb_node; struct cm_timewait_info *timewait_info; while (node) { timewait_info = rb_entry(node, struct cm_timewait_info, remote_id_node); if (be32_lt(remote_id, timewait_info->work.remote_id)) node = node->rb_left; else if (be32_gt(remote_id, timewait_info->work.remote_id)) node = node->rb_right; else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_left; else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid)) node = node->rb_right; else return timewait_info; } return NULL; } static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info *timewait_info) { struct rb_node **link = &cm.remote_qp_table.rb_node; struct rb_node *parent = NULL; struct cm_timewait_info *cur_timewait_info; __be64 remote_ca_guid = timewait_info->remote_ca_guid; __be32 remote_qpn = timewait_info->remote_qpn; while (*link) { parent = *link; cur_timewait_info = rb_entry(parent, struct cm_timewait_info, remote_qp_node); if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_left; else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn)) link = &(*link)->rb_right; else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_left; else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid)) link = &(*link)->rb_right; else return cur_timewait_info; } timewait_info->inserted_remote_qp = 1; rb_link_node(&timewait_info->remote_qp_node, parent, link); rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table); return NULL; } static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private *cm_id_priv) { struct rb_node **link = &cm.remote_sidr_table.rb_node; struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; union ib_gid *port_gid = &cm_id_priv->av.dgid; __be32 remote_id = cm_id_priv->id.remote_id; while (*link) { parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, sidr_id_node); if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_left; else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id)) link = &(*link)->rb_right; else { int cmp; cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid, sizeof *port_gid); if (cmp < 0) link = &(*link)->rb_left; else if (cmp > 0) link = &(*link)->rb_right; else return cur_cm_id_priv; } } rb_link_node(&cm_id_priv->sidr_id_node, parent, link); rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); return NULL; } static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv, enum ib_cm_sidr_status status) { struct ib_cm_sidr_rep_param param; memset(¶m, 0, sizeof param); param.status = status; ib_send_cm_sidr_rep(&cm_id_priv->id, ¶m); } struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->id.state = IB_CM_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.remote_cm_qpn = 1; ret = cm_alloc_id(cm_id_priv); if (ret) goto error; spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->prim_list); INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; error: kfree(cm_id_priv); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(ib_create_cm_id); static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv) { struct cm_work *work; if (list_empty(&cm_id_priv->work_list)) return NULL; work = list_entry(cm_id_priv->work_list.next, struct cm_work, list); list_del(&work->list); return work; } static void cm_free_work(struct cm_work *work) { if (work->mad_recv_wc) ib_free_recv_mad(work->mad_recv_wc); kfree(work); } static inline int cm_convert_to_ms(int iba_time) { /* approximate conversion to ms from 4.096us x 2^iba_time */ return 1 << max(iba_time - 8, 0); } /* * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time * Because of how ack_timeout is stored, adding one doubles the timeout. * To avoid large timeouts, select the max(ack_delay, life_time + 1), and * increment it (round up) only if the other is within 50%. */ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time) { int ack_timeout = packet_life_time + 1; if (ack_timeout >= ca_ack_delay) ack_timeout += (ca_ack_delay >= (ack_timeout - 1)); else ack_timeout = ca_ack_delay + (ack_timeout >= (ca_ack_delay - 1)); return min(31, ack_timeout); } static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) { if (timewait_info->inserted_remote_id) { rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); timewait_info->inserted_remote_id = 0; } if (timewait_info->inserted_remote_qp) { rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); timewait_info->inserted_remote_qp = 0; } } static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) { struct cm_timewait_info *timewait_info; timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL); if (!timewait_info) return ERR_PTR(-ENOMEM); timewait_info->work.local_id = local_id; INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler); timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT; return timewait_info; } static void cm_enter_timewait(struct cm_id_private *cm_id_priv) { int wait_time; unsigned long flags; struct cm_device *cm_dev; cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client); if (!cm_dev) return; spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); spin_unlock_irqrestore(&cm.lock, flags); /* * The cm_id could be destroyed by the user before we exit timewait. * To protect against this, we search for the cm_id after exiting * timewait before notifying the user that we've exited timewait. */ cm_id_priv->id.state = IB_CM_TIMEWAIT; wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); /* Check if the device started its remove_one */ spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); spin_unlock_irqrestore(&cm.lock, flags); cm_id_priv->timewait_info = NULL; } static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) { unsigned long flags; cm_id_priv->id.state = IB_CM_IDLE; if (cm_id_priv->timewait_info) { spin_lock_irqsave(&cm.lock, flags); cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irqrestore(&cm.lock, flags); kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } } static void cm_destroy_id(struct ib_cm_id *cm_id, int err) { struct cm_id_private *cm_id_priv; struct cm_work *work; cm_id_priv = container_of(cm_id, struct cm_id_private, id); retest: spin_lock_irq(&cm_id_priv->lock); switch (cm_id->state) { case IB_CM_LISTEN: spin_unlock_irq(&cm_id_priv->lock); spin_lock_irq(&cm.lock); if (--cm_id_priv->listen_sharecount > 0) { /* The id is still shared. */ cm_deref_id(cm_id_priv); spin_unlock_irq(&cm.lock); return; } rb_erase(&cm_id_priv->service_node, &cm.listen_service_table); spin_unlock_irq(&cm.lock); break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_SIDR_REQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT); spin_lock_irq(&cm.lock); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); spin_unlock_irq(&cm.lock); break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof cm_id_priv->id.device->node_guid, NULL, 0); break; case IB_CM_REQ_RCVD: if (err == -ENOMEM) { /* Do not reject to allow future retries. */ cm_reset_to_idle(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); } else { spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); } break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* Fall through */ case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); break; case IB_CM_ESTABLISHED: spin_unlock_irq(&cm_id_priv->lock); if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) break; ib_send_cm_dreq(cm_id, NULL, 0); goto retest; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); break; case IB_CM_DREQ_RCVD: spin_unlock_irq(&cm_id_priv->lock); ib_send_cm_drep(cm_id, NULL, 0); break; default: spin_unlock_irq(&cm_id_priv->lock); break; } spin_lock_irq(&cm.lock); if (!list_empty(&cm_id_priv->altr_list) && (!cm_id_priv->altr_send_port_not_ready)) list_del(&cm_id_priv->altr_list); if (!list_empty(&cm_id_priv->prim_list) && (!cm_id_priv->prim_send_port_not_ready)) list_del(&cm_id_priv->prim_list); spin_unlock_irq(&cm.lock); cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); kfree(cm_id_priv->private_data); kfree(cm_id_priv); } void ib_destroy_cm_id(struct ib_cm_id *cm_id) { cm_destroy_id(cm_id, 0); } EXPORT_SYMBOL(ib_destroy_cm_id); /** * __ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * @service_mask: Mask applied to service ID used to listen across a * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) { struct cm_id_private *cm_id_priv, *cur_cm_id_priv; int ret = 0; service_mask = service_mask ? service_mask : ~cpu_to_be64(0); service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); if (cm_id->state != IB_CM_IDLE) return -EINVAL; cm_id->state = IB_CM_LISTEN; ++cm_id_priv->listen_sharecount; if (service_id == IB_CM_ASSIGN_SERVICE_ID) { cm_id->service_id = cpu_to_be64(cm.listen_service_id++); cm_id->service_mask = ~cpu_to_be64(0); } else { cm_id->service_id = service_id; cm_id->service_mask = service_mask; } cur_cm_id_priv = cm_insert_listen(cm_id_priv); if (cur_cm_id_priv) { cm_id->state = IB_CM_IDLE; --cm_id_priv->listen_sharecount; ret = -EBUSY; } return ret; } int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm.lock, flags); ret = __ib_cm_listen(cm_id, service_id, service_mask); spin_unlock_irqrestore(&cm.lock, flags); return ret; } EXPORT_SYMBOL(ib_cm_listen); /** * Create a new listening ib_cm_id and listen on the given service ID. * * If there's an existing ID listening on that same device and service ID, * return it. * * @device: Device associated with the cm_id. All related communication will * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * * Callers should call ib_destroy_cm_id when done with the listener ID. */ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id) { struct cm_id_private *cm_id_priv; struct ib_cm_id *cm_id; unsigned long flags; int err = 0; /* Create an ID in advance, since the creation may sleep */ cm_id = ib_create_cm_id(device, cm_handler, NULL); if (IS_ERR(cm_id)) return cm_id; spin_lock_irqsave(&cm.lock, flags); if (service_id == IB_CM_ASSIGN_SERVICE_ID) goto new_id; /* Find an existing ID */ cm_id_priv = cm_find_listen(device, service_id); if (cm_id_priv) { if (cm_id->cm_handler != cm_handler || cm_id->context) { /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); ++cm_id_priv->listen_sharecount; spin_unlock_irqrestore(&cm.lock, flags); ib_destroy_cm_id(cm_id); cm_id = &cm_id_priv->id; return cm_id; } new_id: /* Use newly created ID */ err = __ib_cm_listen(cm_id, service_id, 0); spin_unlock_irqrestore(&cm.lock, flags); if (err) { ib_destroy_cm_id(cm_id); return ERR_PTR(err); } return cm_id; } EXPORT_SYMBOL(ib_cm_insert_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv, enum cm_msg_sequence msg_seq) { u64 hi_tid, low_tid; hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; low_tid = (u64) ((__force u32)cm_id_priv->id.local_id | (msg_seq << 30)); return cpu_to_be64(hi_tid | low_tid); } static void cm_format_mad_hdr(struct ib_mad_hdr *hdr, __be16 attr_id, __be64 tid) { hdr->base_version = IB_MGMT_BASE_VERSION; hdr->mgmt_class = IB_MGMT_CLASS_CM; hdr->class_version = IB_CM_CLASS_VERSION; hdr->method = IB_MGMT_METHOD_SEND; hdr->attr_id = attr_id; hdr->tid = tid; } static void cm_format_req(struct cm_req_msg *req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_req_param *param) { struct ib_sa_path_rec *pri_path = param->primary_path; struct ib_sa_path_rec *alt_path = param->alternate_path; cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ)); req_msg->local_comm_id = cm_id_priv->id.local_id; req_msg->service_id = param->service_id; req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); cm_req_set_init_depth(req_msg, param->initiator_depth); cm_req_set_remote_resp_timeout(req_msg, param->remote_cm_response_timeout); cm_req_set_qp_type(req_msg, param->qp_type); cm_req_set_flow_ctrl(req_msg, param->flow_control); cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); cm_req_set_local_resp_timeout(req_msg, param->local_cm_response_timeout); req_msg->pkey = param->primary_path->pkey; cm_req_set_path_mtu(req_msg, param->primary_path->mtu); cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); if (param->qp_type != IB_QPT_XRC_INI) { cm_req_set_resp_res(req_msg, param->responder_resources); cm_req_set_retry_count(req_msg, param->retry_count); cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); cm_req_set_srq(req_msg, param->srq); } if (pri_path->hop_limit <= 1) { req_msg->primary_local_lid = pri_path->slid; req_msg->primary_remote_lid = pri_path->dlid; } else { /* Work-around until there's a way to obtain remote LID info */ req_msg->primary_local_lid = IB_LID_PERMISSIVE; req_msg->primary_remote_lid = IB_LID_PERMISSIVE; } req_msg->primary_local_gid = pri_path->sgid; req_msg->primary_remote_gid = pri_path->dgid; cm_req_set_primary_flow_label(req_msg, pri_path->flow_label); cm_req_set_primary_packet_rate(req_msg, pri_path->rate); req_msg->primary_traffic_class = pri_path->traffic_class; req_msg->primary_hop_limit = pri_path->hop_limit; cm_req_set_primary_sl(req_msg, pri_path->sl); cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1)); cm_req_set_primary_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, pri_path->packet_life_time)); if (alt_path) { if (alt_path->hop_limit <= 1) { req_msg->alt_local_lid = alt_path->slid; req_msg->alt_remote_lid = alt_path->dlid; } else { req_msg->alt_local_lid = IB_LID_PERMISSIVE; req_msg->alt_remote_lid = IB_LID_PERMISSIVE; } req_msg->alt_local_gid = alt_path->sgid; req_msg->alt_remote_gid = alt_path->dgid; cm_req_set_alt_flow_label(req_msg, alt_path->flow_label); cm_req_set_alt_packet_rate(req_msg, alt_path->rate); req_msg->alt_traffic_class = alt_path->traffic_class; req_msg->alt_hop_limit = alt_path->hop_limit; cm_req_set_alt_sl(req_msg, alt_path->sl); cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1)); cm_req_set_alt_local_ack_timeout(req_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alt_path->packet_life_time)); } if (param->private_data && param->private_data_len) memcpy(req_msg->private_data, param->private_data, param->private_data_len); } static int cm_validate_req_param(struct ib_cm_req_param *param) { /* peer-to-peer not supported */ if (param->peer_to_peer) return -EINVAL; if (!param->primary_path) return -EINVAL; if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && param->qp_type != IB_QPT_XRC_INI) return -EINVAL; if (param->private_data && param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE) return -EINVAL; if (param->alternate_path && (param->alternate_path->pkey != param->primary_path->pkey || param->alternate_path->mtu != param->primary_path->mtu)) return -EINVAL; return 0; } int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; struct cm_req_msg *req_msg; unsigned long flags; int ret; ret = cm_validate_req_param(param); if (ret) return ret; /* Verify that we're not in timewait. */ cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = -EINVAL; goto out; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto out; } ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, cm_id_priv); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( param->remote_cm_response_timeout); cm_id_priv->max_cm_retries = param->max_cm_retries; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->retry_count = param->retry_count; cm_id_priv->path_mtu = param->primary_path->mtu; cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); if (ret) goto error1; req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); goto error2; } BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error2: cm_free_msg(cm_id_priv->msg); error1: kfree(cm_id_priv->timewait_info); out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); static int cm_issue_rej(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, enum ib_cm_rej_reason reason, enum cm_msg_response msg_rejected, void *ari, u8 ari_length) { struct ib_mad_send_buf *msg = NULL; struct cm_rej_msg *rej_msg, *rcv_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; /* We just need common CM header information. Cast to any message. */ rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad; rej_msg = (struct cm_rej_msg *) msg->mad; cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid); rej_msg->remote_comm_id = rcv_msg->local_comm_id; rej_msg->local_comm_id = rcv_msg->remote_comm_id; cm_rej_set_msg_rejected(rej_msg, msg_rejected); rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct ib_sa_path_rec *primary_path, struct ib_sa_path_rec *alt_path) { memset(primary_path, 0, sizeof *primary_path); primary_path->dgid = req_msg->primary_local_gid; primary_path->sgid = req_msg->primary_remote_gid; primary_path->dlid = req_msg->primary_local_lid; primary_path->slid = req_msg->primary_remote_lid; primary_path->flow_label = cm_req_get_primary_flow_label(req_msg); primary_path->hop_limit = req_msg->primary_hop_limit; primary_path->traffic_class = req_msg->primary_traffic_class; primary_path->reversible = 1; primary_path->pkey = req_msg->pkey; primary_path->sl = cm_req_get_primary_sl(req_msg); primary_path->mtu_selector = IB_SA_EQ; primary_path->mtu = cm_req_get_path_mtu(req_msg); primary_path->rate_selector = IB_SA_EQ; primary_path->rate = cm_req_get_primary_packet_rate(req_msg); primary_path->packet_life_time_selector = IB_SA_EQ; primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); primary_path->service_id = req_msg->service_id; if (req_msg->alt_local_lid) { memset(alt_path, 0, sizeof *alt_path); alt_path->dgid = req_msg->alt_local_gid; alt_path->sgid = req_msg->alt_remote_gid; alt_path->dlid = req_msg->alt_local_lid; alt_path->slid = req_msg->alt_remote_lid; alt_path->flow_label = cm_req_get_alt_flow_label(req_msg); alt_path->hop_limit = req_msg->alt_hop_limit; alt_path->traffic_class = req_msg->alt_traffic_class; alt_path->reversible = 1; alt_path->pkey = req_msg->pkey; alt_path->sl = cm_req_get_alt_sl(req_msg); alt_path->mtu_selector = IB_SA_EQ; alt_path->mtu = cm_req_get_path_mtu(req_msg); alt_path->rate_selector = IB_SA_EQ; alt_path->rate = cm_req_get_alt_packet_rate(req_msg); alt_path->packet_life_time_selector = IB_SA_EQ; alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); alt_path->service_id = req_msg->service_id; } } static u16 cm_get_bth_pkey(struct cm_work *work) { struct ib_device *ib_dev = work->port->cm_dev->ib_device; u8 port_num = work->port->port_num; u16 pkey_index = work->mad_recv_wc->wc->pkey_index; u16 pkey; int ret; ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey); if (ret) { dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n", port_num, pkey_index, ret); return 0; } return pkey; } static void cm_format_req_event(struct cm_work *work, struct cm_id_private *cm_id_priv, struct ib_cm_id *listen_id) { struct cm_req_msg *req_msg; struct ib_cm_req_event_param *param; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.req_rcvd; param->listen_id = listen_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = cm_id_priv->av.port->port_num; param->primary_path = &work->path[0]; if (req_msg->alt_local_lid) param->alternate_path = &work->path[1]; else param->alternate_path = NULL; param->remote_ca_guid = req_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(req_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg)); param->qp_type = cm_req_get_qp_type(req_msg); param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg)); param->responder_resources = cm_req_get_init_depth(req_msg); param->initiator_depth = cm_req_get_resp_res(req_msg); param->local_cm_response_timeout = cm_req_get_remote_resp_timeout(req_msg); param->flow_control = cm_req_get_flow_ctrl(req_msg); param->remote_cm_response_timeout = cm_req_get_local_resp_timeout(req_msg); param->retry_count = cm_req_get_retry_count(req_msg); param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); param->srq = cm_req_get_srq(req_msg); work->cm_event.private_data = &req_msg->private_data; } static void cm_process_work(struct cm_id_private *cm_id_priv, struct cm_work *work) { int ret; /* We will typically only have the current event to report. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) { spin_lock_irq(&cm_id_priv->lock); work = cm_dequeue_work(cm_id_priv); spin_unlock_irq(&cm_id_priv->lock); BUG_ON(!work); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event); cm_free_work(work); } cm_deref_id(cm_id_priv); if (ret) cm_destroy_id(&cm_id_priv->id, ret); } static void cm_format_mra(struct cm_mra_msg *mra_msg, struct cm_id_private *cm_id_priv, enum cm_msg_response msg_mraed, u8 service_timeout, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid); cm_mra_set_msg_mraed(mra_msg, msg_mraed); mra_msg->local_comm_id = cm_id_priv->id.local_id; mra_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_mra_set_service_timeout(mra_msg, service_timeout); if (private_data && private_data_len) memcpy(mra_msg->private_data, private_data, private_data_len); } static void cm_format_rej(struct cm_rej_msg *rej_msg, struct cm_id_private *cm_id_priv, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid); rej_msg->remote_comm_id = cm_id_priv->id.remote_id; switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: rej_msg->local_comm_id = 0; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_MRA_REQ_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ); break; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP); break; default: rej_msg->local_comm_id = cm_id_priv->id.local_id; cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER); break; } rej_msg->reason = cpu_to_be16(reason); if (ari && ari_length) { cm_rej_set_reject_info_len(rej_msg, ari_length); memcpy(rej_msg->ari, ari, ari_length); } if (private_data && private_data_len) memcpy(rej_msg->private_data, private_data, private_data_len); } static void cm_dup_req_handler(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct ib_mad_send_buf *msg = NULL; int ret; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REQ_COUNTER]); /* Quick state check to discard duplicate REQs. */ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) return; ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) return; spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_MRA_REQ_SENT: cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); break; case IB_CM_TIMEWAIT: cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0); break; default: goto unlock; } spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; return; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); } static struct cm_id_private * cm_match_req(struct cm_work *work, struct cm_id_private *cm_id_priv) { struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv; struct cm_timewait_info *timewait_info; struct cm_req_msg *req_msg; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; /* Check for possible duplicate REQ. */ spin_lock_irq(&cm.lock); timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info); if (timewait_info) { cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, timewait_info->work.remote_id); spin_unlock_irq(&cm.lock); if (cur_cm_id_priv) { cm_dup_req_handler(work, cur_cm_id_priv); cm_deref_id(cur_cm_id_priv); } return NULL; } /* Check for stale connections. */ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info); if (timewait_info) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, NULL, 0); return NULL; } /* Find matching listen request. */ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device, req_msg->service_id); if (!listen_cm_id_priv) { cm_cleanup_timewait(cm_id_priv->timewait_info); spin_unlock_irq(&cm.lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, NULL, 0); goto out; } atomic_inc(&listen_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); cm_id_priv->id.state = IB_CM_REQ_RCVD; atomic_inc(&cm_id_priv->work_count); spin_unlock_irq(&cm.lock); out: return listen_cm_id_priv; } /* * Work-around for inter-subnet connections. If the LIDs are permissive, * we need to override the LID/SL data in the REQ with the LID information * in the work completion. */ static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc) { if (!cm_req_get_primary_subnet_local(req_msg)) { if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) { req_msg->primary_local_lid = cpu_to_be16(wc->slid); cm_req_set_primary_sl(req_msg, wc->sl); } if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE) req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits); } if (!cm_req_get_alt_subnet_local(req_msg)) { if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) { req_msg->alt_local_lid = cpu_to_be16(wc->slid); cm_req_set_alt_sl(req_msg, wc->sl); } if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE) req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits); } } static int cm_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *listen_cm_id_priv; struct cm_req_msg *req_msg; union ib_gid gid; struct ib_gid_attr gid_attr; int ret; req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); cm_id_priv->id.remote_id = req_msg->local_comm_id; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv-> id.local_id); if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg); listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { ret = -EINVAL; kfree(cm_id_priv->timewait_info); goto destroy; } cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler; cm_id_priv->id.context = listen_cm_id_priv->id.context; cm_id_priv->id.service_id = req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit; ret = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, cm_id_priv->av.ah_attr.grh.sgid_index, &gid, &gid_attr); if (!ret) { if (gid_attr.ndev) { work->path[0].ifindex = gid_attr.ndev->if_index; work->path[0].net = dev_net(gid_attr.ndev); dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, cm_id_priv); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, work->port->port_num, 0, &work->path[0].sgid, &gid_attr); if (!err && gid_attr.ndev) { work->path[0].ifindex = gid_attr.ndev->if_index; work->path[0].net = dev_net(gid_attr.ndev); dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } if (req_msg->alt_local_lid) { ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, sizeof work->path[0].sgid, NULL, 0); goto rejected; } } cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( cm_req_get_local_resp_timeout(req_msg)); cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg); cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg); cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg); cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg); cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg); cm_id_priv->pkey = req_msg->pkey; cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg); cm_id_priv->retry_count = cm_req_get_retry_count(req_msg); cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg); cm_id_priv->qp_type = cm_req_get_qp_type(req_msg); cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(listen_cm_id_priv); return 0; rejected: atomic_dec(&cm_id_priv->refcount); cm_deref_id(listen_cm_id_priv); destroy: ib_destroy_cm_id(cm_id); return ret; } static void cm_format_rep(struct cm_rep_msg *rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_rep_param *param) { cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); rep_msg->local_comm_id = cm_id_priv->id.local_id; rep_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); rep_msg->resp_resources = param->responder_resources; cm_rep_set_target_ack_delay(rep_msg, cm_id_priv->av.port->cm_dev->ack_delay); cm_rep_set_failover(rep_msg, param->failover_accepted); cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { rep_msg->initiator_depth = param->initiator_depth; cm_rep_set_flow_ctrl(rep_msg, param->flow_control); cm_rep_set_srq(rep_msg, param->srq); cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); } else { cm_rep_set_srq(rep_msg, 1); cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); } if (param->private_data && param->private_data_len) memcpy(rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_rep_msg *rep_msg; unsigned long flags; int ret; if (param->private_data && param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REQ_RCVD && cm_id->state != IB_CM_MRA_REQ_SENT) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_REP_SENT; cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); static void cm_format_rtu(struct cm_rtu_msg *rtu_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid); rtu_msg->local_comm_id = cm_id_priv->id.local_id; rtu_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(rtu_msg->private_data, private_data, private_data_len); } int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REP_RCVD && cm_id->state != IB_CM_MRA_REP_SENT) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); kfree(data); return ret; } cm_id->state = IB_CM_ESTABLISHED; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_rtu); static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) { struct cm_rep_msg *rep_msg; struct ib_cm_rep_event_param *param; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rep_rcvd; param->remote_ca_guid = rep_msg->local_ca_guid; param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); param->responder_resources = rep_msg->initiator_depth; param->initiator_depth = rep_msg->resp_resources; param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); param->failover_accepted = cm_rep_get_failover(rep_msg); param->flow_control = cm_rep_get_flow_ctrl(rep_msg); param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); param->srq = cm_rep_get_srq(rep_msg); work->cm_event.private_data = &rep_msg->private_data; } static void cm_dup_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; struct ib_mad_send_buf *msg = NULL; int ret; rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, rep_msg->local_comm_id); if (!cm_id_priv) return; atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_REP_COUNTER]); ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg); if (ret) goto deref; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state == IB_CM_ESTABLISHED) cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT) cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); else goto unlock; spin_unlock_irq(&cm_id_priv->lock); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); free: cm_free_msg(msg); deref: cm_deref_id(cm_id_priv); } static int cm_rep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rep_msg *rep_msg; int ret; rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0); if (!cm_id_priv) { cm_dup_rep_handler(work); return -EINVAL; } cm_format_rep_event(work, cm_id_priv->qp_type); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: break; default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); spin_lock(&cm.lock); /* Check for duplicate REP. */ if (cm_insert_remote_id(cm_id_priv->timewait_info)) { spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto error; } /* Check for a stale connection. */ if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) { rb_erase(&cm_id_priv->timewait_info->remote_id_node, &cm.remote_id_table); cm_id_priv->timewait_info->inserted_remote_id = 0; spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); cm_issue_rej(work->port, work->mad_recv_wc, IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; goto error; } spin_unlock(&cm.lock); cm_id_priv->id.state = IB_CM_REP_RCVD; cm_id_priv->id.remote_id = rep_msg->local_comm_id; cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); cm_id_priv->initiator_depth = rep_msg->resp_resources; cm_id_priv->responder_resources = rep_msg->initiator_depth; cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg); cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg); cm_id_priv->av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->av.timeout - 1); cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); /* todo: handle peer_to_peer */ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; error: cm_deref_id(cm_id_priv); return ret; } static int cm_establish_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; int ret; /* See comment in cm_establish about lookup. */ cm_id_priv = cm_acquire_id(work->local_id, work->remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { spin_unlock_irq(&cm_id_priv->lock); goto out; } ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_rtu_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rtu_msg *rtu_msg; int ret; rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id, rtu_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &rtu_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_REP_SENT && cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) { spin_unlock_irq(&cm_id_priv->lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_RTU_COUNTER]); goto out; } cm_id_priv->id.state = IB_CM_ESTABLISHED; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_dreq(struct cm_dreq_msg *dreq_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ)); dreq_msg->local_comm_id = cm_id_priv->id.local_id; dreq_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn); if (private_data && private_data_len) memcpy(dreq_msg->private_data, private_data, private_data_len); } int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED) { ret = -EINVAL; goto out; } if (cm_id->lap_state == IB_CM_LAP_SENT || cm_id->lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) { cm_enter_timewait(cm_id_priv); goto out; } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_DREQ_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_dreq); static void cm_format_drep(struct cm_drep_msg *drep_msg, struct cm_id_private *cm_id_priv, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid); drep_msg->local_comm_id = cm_id_priv->id.local_id; drep_msg->remote_comm_id = cm_id_priv->id.remote_id; if (private_data && private_data_len) memcpy(drep_msg->private_data, private_data, private_data_len); } int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; void *data; int ret; if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_DREQ_RCVD) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return -EINVAL; } cm_set_private_data(cm_id_priv, data, private_data_len); cm_enter_timewait(cm_id_priv); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_drep); static int cm_issue_drep(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_buf *msg = NULL; struct cm_dreq_msg *dreq_msg; struct cm_drep_msg *drep_msg; int ret; ret = cm_alloc_response_msg(port, mad_recv_wc, &msg); if (ret) return ret; dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad; drep_msg = (struct cm_drep_msg *) msg->mad; cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid); drep_msg->remote_comm_id = dreq_msg->local_comm_id; drep_msg->local_comm_id = dreq_msg->remote_comm_id; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); return ret; } static int cm_dreq_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_dreq_msg *dreq_msg; struct ib_mad_send_buf *msg = NULL; int ret; dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id, dreq_msg->local_comm_id); if (!cm_id_priv) { atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); return -EINVAL; } work->cm_event.private_data = &dreq_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg)) goto unlock; switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); break; case IB_CM_MRA_REP_RCVD: break; case IB_CM_TIMEWAIT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_RCVD; cm_id_priv->tid = dreq_msg->hdr.tid; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_drep_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_drep_msg *drep_msg; int ret; drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id, drep_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &drep_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_DREQ_SENT && cm_id_priv->id.state != IB_CM_DREQ_RCVD) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_enter_timewait(cm_id_priv); ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) || (ari && ari_length > IB_CM_REJ_ARI_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_reset_to_idle(cm_id_priv); break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ret = cm_alloc_msg(cm_id_priv, &msg); if (!ret) cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len); cm_enter_timewait(cm_id_priv); break; default: ret = -EINVAL; goto out; } if (ret) goto out; ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rej); static void cm_format_rej_event(struct cm_work *work) { struct cm_rej_msg *rej_msg; struct ib_cm_rej_event_param *param; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.rej_rcvd; param->ari = rej_msg->ari; param->ari_length = cm_rej_get_reject_info_len(rej_msg); param->reason = __be16_to_cpu(rej_msg->reason); work->cm_event.private_data = &rej_msg->private_data; } static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; __be32 remote_id; remote_id = rej_msg->local_comm_id; if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) { spin_lock_irq(&cm.lock); timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari), remote_id); if (!timewait_info) { spin_unlock_irq(&cm.lock); return NULL; } cm_id_priv = idr_find(&cm.local_id_table, (__force int) (timewait_info->work.local_id ^ cm.random_id_operand)); if (cm_id_priv) { if (cm_id_priv->id.remote_id == remote_id) atomic_inc(&cm_id_priv->refcount); else cm_id_priv = NULL; } spin_unlock_irq(&cm.lock); } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ) cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0); else cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id); return cm_id_priv; } static int cm_rej_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_rej_msg *rej_msg; int ret; rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_rejected_id(rej_msg); if (!cm_id_priv) return -EINVAL; cm_format_rej_event(work); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN) cm_enter_timewait(cm_id_priv); else cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); /* fall through */ case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: cm_enter_timewait(cm_id_priv); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; } /* fall through */ default: spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; goto out; } ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; enum ib_cm_state cm_state; enum ib_cm_lap_state lap_state; enum cm_msg_response msg_response; void *data; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE) return -EINVAL; data = cm_copy_private_data(private_data, private_data_len); if (IS_ERR(data)) return PTR_ERR(data); cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch(cm_id_priv->id.state) { case IB_CM_REQ_RCVD: cm_state = IB_CM_MRA_REQ_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REQ; break; case IB_CM_REP_RCVD: cm_state = IB_CM_MRA_REP_SENT; lap_state = cm_id->lap_state; msg_response = CM_MSG_RESPONSE_REP; break; case IB_CM_ESTABLISHED: if (cm_id->lap_state == IB_CM_LAP_RCVD) { cm_state = cm_id->state; lap_state = IB_CM_MRA_LAP_SENT; msg_response = CM_MSG_RESPONSE_OTHER; break; } default: ret = -EINVAL; goto error1; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error1; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; } cm_id->state = cm_state; cm_id->lap_state = lap_state; cm_id_priv->service_timeout = service_timeout; cm_set_private_data(cm_id_priv, data, private_data_len); spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); return ret; error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); kfree(data); cm_free_msg(msg); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg) { switch (cm_mra_get_msg_mraed(mra_msg)) { case CM_MSG_RESPONSE_REQ: return cm_acquire_id(mra_msg->remote_comm_id, 0); case CM_MSG_RESPONSE_REP: case CM_MSG_RESPONSE_OTHER: return cm_acquire_id(mra_msg->remote_comm_id, mra_msg->local_comm_id); default: return NULL; } } static int cm_mra_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_mra_msg *mra_msg; int timeout, ret; mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_mraed_id(mra_msg); if (!cm_id_priv) return -EINVAL; work->cm_event.private_data = &mra_msg->private_data; work->cm_event.param.mra_rcvd.service_timeout = cm_mra_get_service_timeout(mra_msg); timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) + cm_convert_to_ms(cm_id_priv->av.timeout); spin_lock_irq(&cm_id_priv->lock); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; case IB_CM_ESTABLISHED: if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || ib_modify_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); goto out; } cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD; break; case IB_CM_MRA_REQ_RCVD: case IB_CM_MRA_REP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_MRA_COUNTER]); /* fall through */ default: goto out; } cm_id_priv->msg->context[1] = (void *) (unsigned long) cm_id_priv->id.state; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: spin_unlock_irq(&cm_id_priv->lock); cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_lap(struct cm_lap_msg *lap_msg, struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP)); lap_msg->local_comm_id = cm_id_priv->id.local_id; lap_msg->remote_comm_id = cm_id_priv->id.remote_id; cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn); /* todo: need remote CM response timeout */ cm_lap_set_remote_resp_timeout(lap_msg, 0x1F); lap_msg->alt_local_lid = alternate_path->slid; lap_msg->alt_remote_lid = alternate_path->dlid; lap_msg->alt_local_gid = alternate_path->sgid; lap_msg->alt_remote_gid = alternate_path->dgid; cm_lap_set_flow_label(lap_msg, alternate_path->flow_label); cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class); lap_msg->alt_hop_limit = alternate_path->hop_limit; cm_lap_set_packet_rate(lap_msg, alternate_path->rate); cm_lap_set_sl(lap_msg, alternate_path->sl); cm_lap_set_subnet_local(lap_msg, 1); /* local only... */ cm_lap_set_local_ack_timeout(lap_msg, cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay, alternate_path->packet_life_time)); if (private_data && private_data_len) memcpy(lap_msg->private_data, private_data, private_data_len); } int ib_send_cm_lap(struct ib_cm_id *cm_id, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_UNINIT && cm_id->lap_state != IB_CM_LAP_IDLE)) { ret = -EINVAL; goto out; } ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, cm_id_priv); if (ret) goto out; cm_id_priv->alt_av.timeout = cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv, alternate_path, private_data, private_data_len); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED; ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_SENT; cm_id_priv->msg = msg; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_lap); static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv, struct ib_sa_path_rec *path, struct cm_lap_msg *lap_msg) { memset(path, 0, sizeof *path); path->dgid = lap_msg->alt_local_gid; path->sgid = lap_msg->alt_remote_gid; path->dlid = lap_msg->alt_local_lid; path->slid = lap_msg->alt_remote_lid; path->flow_label = cm_lap_get_flow_label(lap_msg); path->hop_limit = lap_msg->alt_hop_limit; path->traffic_class = cm_lap_get_traffic_class(lap_msg); path->reversible = 1; path->pkey = cm_id_priv->pkey; path->sl = cm_lap_get_sl(lap_msg); path->mtu_selector = IB_SA_EQ; path->mtu = cm_id_priv->path_mtu; path->rate_selector = IB_SA_EQ; path->rate = cm_lap_get_packet_rate(lap_msg); path->packet_life_time_selector = IB_SA_EQ; path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg); path->packet_life_time -= (path->packet_life_time > 0); } static int cm_lap_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; int ret; /* todo: verify LAP request and send reject APR if invalid. */ lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id, lap_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; param = &work->cm_event.param.lap_rcvd; param->alternate_path = &work->path[0]; cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg); work->cm_event.private_data = &lap_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; switch (cm_id_priv->id.lap_state) { case IB_CM_LAP_UNINIT: case IB_CM_LAP_IDLE: break; case IB_CM_MRA_LAP_SENT: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg)) goto unlock; cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, CM_MSG_RESPONSE_OTHER, cm_id_priv->service_timeout, cm_id_priv->private_data, cm_id_priv->private_data_len); spin_unlock_irq(&cm_id_priv->lock); if (ib_post_send_mad(msg, NULL)) cm_free_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_LAP_COUNTER]); goto unlock; default: goto unlock; } cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, cm_id_priv); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; unlock: spin_unlock_irq(&cm_id_priv->lock); deref: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_apr(struct cm_apr_msg *apr_msg, struct cm_id_private *cm_id_priv, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid); apr_msg->local_comm_id = cm_id_priv->id.local_id; apr_msg->remote_comm_id = cm_id_priv->id.remote_id; apr_msg->ap_status = (u8) status; if (info && info_length) { apr_msg->info_length = info_length; memcpy(apr_msg->info, info, info_length); } if (private_data && private_data_len) memcpy(apr_msg->private_data, private_data, private_data_len); } int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) || (info && info_length > IB_CM_APR_INFO_LENGTH)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_ESTABLISHED || (cm_id->lap_state != IB_CM_LAP_RCVD && cm_id->lap_state != IB_CM_MRA_LAP_SENT)) { ret = -EINVAL; goto out; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status, info, info_length, private_data, private_data_len); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->lap_state = IB_CM_LAP_IDLE; out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_apr); static int cm_apr_handler(struct cm_work *work) { struct cm_id_private *cm_id_priv; struct cm_apr_msg *apr_msg; int ret; apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id, apr_msg->local_comm_id); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status; work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info; work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length; work->cm_event.private_data = &apr_msg->private_data; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_ESTABLISHED || (cm_id_priv->id.lap_state != IB_CM_LAP_SENT && cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); cm_id_priv->msg = NULL; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static int cm_timewait_handler(struct cm_work *work) { struct cm_timewait_info *timewait_info; struct cm_id_private *cm_id_priv; int ret; timewait_info = (struct cm_timewait_info *)work; spin_lock_irq(&cm.lock); list_del(&timewait_info->list); spin_unlock_irq(&cm.lock); cm_id_priv = cm_acquire_id(timewait_info->work.local_id, timewait_info->work.remote_id); if (!cm_id_priv) return -EINVAL; spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_TIMEWAIT || cm_id_priv->remote_qpn != timewait_info->remote_qpn) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); spin_unlock_irq(&cm_id_priv->lock); if (ret) cm_process_work(cm_id_priv, work); else cm_deref_id(cm_id_priv); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_req_param *param) { cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID, cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR)); sidr_req_msg->request_id = cm_id_priv->id.local_id; sidr_req_msg->pkey = param->path->pkey; sidr_req_msg->service_id = param->service_id; if (param->private_data && param->private_data_len) memcpy(sidr_req_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if (!param->path || (param->private_data && param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); if (ret) goto out; cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto out; cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, param); msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_IDLE) ret = ib_post_send_mad(msg, NULL); else ret = -EINVAL; if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); goto out; } cm_id->state = IB_CM_SIDR_REQ_SENT; cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); out: return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); static void cm_format_sidr_req_event(struct cm_work *work, struct ib_cm_id *listen_id) { struct cm_sidr_req_msg *sidr_req_msg; struct ib_cm_sidr_req_event_param *param; sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_req_rcvd; param->pkey = __be16_to_cpu(sidr_req_msg->pkey); param->listen_id = listen_id; param->service_id = sidr_req_msg->service_id; param->bth_pkey = cm_get_bth_pkey(work); param->port = work->port->port_num; work->cm_event.private_data = &sidr_req_msg->private_data; } static int cm_sidr_req_handler(struct cm_work *work) { struct ib_cm_id *cm_id; struct cm_id_private *cm_id_priv, *cur_cm_id_priv; struct cm_sidr_req_msg *sidr_req_msg; struct ib_wc *wc; cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id_priv = container_of(cm_id, struct cm_id_private, id); /* Record SGID/SLID and request ID for lookup. */ sidr_req_msg = (struct cm_sidr_req_msg *) work->mad_recv_wc->recv_buf.mad; wc = work->mad_recv_wc->wc; cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid); cm_id_priv->av.dgid.global.interface_id = 0; cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); cm_id_priv->id.remote_id = sidr_req_msg->request_id; cm_id_priv->tid = sidr_req_msg->hdr.tid; atomic_inc(&cm_id_priv->work_count); spin_lock_irq(&cm.lock); cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv); if (cur_cm_id_priv) { spin_unlock_irq(&cm.lock); atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_SIDR_REQ_COUNTER]); goto out; /* Duplicate message. */ } cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD; cur_cm_id_priv = cm_find_listen(cm_id->device, sidr_req_msg->service_id); if (!cur_cm_id_priv) { spin_unlock_irq(&cm.lock); cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED); goto out; /* No match. */ } atomic_inc(&cur_cm_id_priv->refcount); atomic_inc(&cm_id_priv->refcount); spin_unlock_irq(&cm.lock); cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; cm_id_priv->id.context = cur_cm_id_priv->id.context; cm_id_priv->id.service_id = sidr_req_msg->service_id; cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_format_sidr_req_event(work, &cur_cm_id_priv->id); cm_process_work(cm_id_priv, work); cm_deref_id(cur_cm_id_priv); return 0; out: ib_destroy_cm_id(&cm_id_priv->id); return -EINVAL; } static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg, struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID, cm_id_priv->tid); sidr_rep_msg->request_id = cm_id_priv->id.remote_id; sidr_rep_msg->status = param->status; cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num)); sidr_rep_msg->service_id = cm_id_priv->id.service_id; sidr_rep_msg->qkey = cpu_to_be32(param->qkey); if (param->info && param->info_length) memcpy(sidr_rep_msg->info, param->info, param->info_length); if (param->private_data && param->private_data_len) memcpy(sidr_rep_msg->private_data, param->private_data, param->private_data_len); } int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param) { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; unsigned long flags; int ret; if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) || (param->private_data && param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE)) return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_SIDR_REQ_RCVD) { ret = -EINVAL; goto error; } ret = cm_alloc_msg(cm_id_priv, &msg); if (ret) goto error; cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_free_msg(msg); return ret; } cm_id->state = IB_CM_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); spin_lock_irqsave(&cm.lock, flags); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } spin_unlock_irqrestore(&cm.lock, flags); return 0; error: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_rep); static void cm_format_sidr_rep_event(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct ib_cm_sidr_rep_event_param *param; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; param = &work->cm_event.param.sidr_rep_rcvd; param->status = sidr_rep_msg->status; param->qkey = be32_to_cpu(sidr_rep_msg->qkey); param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg)); param->info = &sidr_rep_msg->info; param->info_len = sidr_rep_msg->info_length; work->cm_event.private_data = &sidr_rep_msg->private_data; } static int cm_sidr_rep_handler(struct cm_work *work) { struct cm_sidr_rep_msg *sidr_rep_msg; struct cm_id_private *cm_id_priv; sidr_rep_msg = (struct cm_sidr_rep_msg *) work->mad_recv_wc->recv_buf.mad; cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0); if (!cm_id_priv) return -EINVAL; /* Unmatched reply. */ spin_lock_irq(&cm_id_priv->lock); if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) { spin_unlock_irq(&cm_id_priv->lock); goto out; } cm_id_priv->id.state = IB_CM_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work); cm_process_work(cm_id_priv, work); return 0; out: cm_deref_id(cm_id_priv); return -EINVAL; } static void cm_process_send_error(struct ib_mad_send_buf *msg, enum ib_wc_status wc_status) { struct cm_id_private *cm_id_priv; struct ib_cm_event cm_event; enum ib_cm_state state; int ret; memset(&cm_event, 0, sizeof cm_event); cm_id_priv = msg->context[0]; /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); state = (enum ib_cm_state) (unsigned long) msg->context[1]; if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; switch (state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REQ_ERROR; break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_reset_to_idle(cm_id_priv); cm_event.event = IB_CM_REP_ERROR; break; case IB_CM_DREQ_SENT: cm_enter_timewait(cm_id_priv); cm_event.event = IB_CM_DREQ_ERROR; break; case IB_CM_SIDR_REQ_SENT: cm_id_priv->id.state = IB_CM_IDLE; cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: goto discard; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; discard: spin_unlock_irq(&cm_id_priv->lock); cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; struct cm_port *port; u16 attr_index; port = mad_agent->context; attr_index = be16_to_cpu(((struct ib_mad_hdr *) msg->mad)->attr_id) - CM_ATTR_ID_OFFSET; /* * If the send was in response to a received message (context[0] is not * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; atomic_long_add(1 + msg->retries, &port->counter_group[CM_XMIT].counter[attr_index]); if (msg->retries) atomic_long_add(msg->retries, &port->counter_group[CM_XMIT_RETRIES]. counter[attr_index]); switch (mad_send_wc->status) { case IB_WC_SUCCESS: case IB_WC_WR_FLUSH_ERR: cm_free_msg(msg); break; default: if (msg->context[0] && msg->context[1]) cm_process_send_error(msg, mad_send_wc->status); else cm_free_msg(msg); break; } } static void cm_work_handler(struct work_struct *_work) { struct cm_work *work = container_of(_work, struct cm_work, work.work); int ret; switch (work->cm_event.event) { case IB_CM_REQ_RECEIVED: ret = cm_req_handler(work); break; case IB_CM_MRA_RECEIVED: ret = cm_mra_handler(work); break; case IB_CM_REJ_RECEIVED: ret = cm_rej_handler(work); break; case IB_CM_REP_RECEIVED: ret = cm_rep_handler(work); break; case IB_CM_RTU_RECEIVED: ret = cm_rtu_handler(work); break; case IB_CM_USER_ESTABLISHED: ret = cm_establish_handler(work); break; case IB_CM_DREQ_RECEIVED: ret = cm_dreq_handler(work); break; case IB_CM_DREP_RECEIVED: ret = cm_drep_handler(work); break; case IB_CM_SIDR_REQ_RECEIVED: ret = cm_sidr_req_handler(work); break; case IB_CM_SIDR_REP_RECEIVED: ret = cm_sidr_rep_handler(work); break; case IB_CM_LAP_RECEIVED: ret = cm_lap_handler(work); break; case IB_CM_APR_RECEIVED: ret = cm_apr_handler(work); break; case IB_CM_TIMEWAIT_EXIT: ret = cm_timewait_handler(work); break; default: ret = -EINVAL; break; } if (ret) cm_free_work(work); } static int cm_establish(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_work *work; unsigned long flags; int ret = 0; struct cm_device *cm_dev; cm_dev = ib_get_client_data(cm_id->device, &cm_client); if (!cm_dev) return -ENODEV; work = kmalloc(sizeof *work, GFP_ATOMIC); if (!work) return -ENOMEM; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id->state) { case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_id->state = IB_CM_ESTABLISHED; break; case IB_CM_ESTABLISHED: ret = -EISCONN; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (ret) { kfree(work); goto out; } /* * The CM worker thread may try to destroy the cm_id before it * can execute this work item. To prevent potential deadlock, * we need to find the cm_id once we're in the context of the * worker thread, rather than holding a reference on it. */ INIT_DELAYED_WORK(&work->work, cm_work_handler); work->local_id = cm_id->local_id; work->remote_id = cm_id->remote_id; work->mad_recv_wc = NULL; work->cm_event.event = IB_CM_USER_ESTABLISHED; /* Check if the device started its remove_one */ spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) { queue_delayed_work(cm.wq, &work->work, 0); } else { kfree(work); ret = -ENODEV; } spin_unlock_irqrestore(&cm.lock, flags); out: return ret; } static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; struct cm_av tmp_av; unsigned long flags; int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state == IB_CM_ESTABLISHED && (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; /* Swap address vector */ tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; cm_id_priv->alt_av = tmp_av; /* Swap port send ready state */ tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event) { int ret; switch (event) { case IB_EVENT_COMM_EST: ret = cm_establish(cm_id); break; case IB_EVENT_PATH_MIG: ret = cm_migrate(cm_id); break; default: ret = -EINVAL; } return ret; } EXPORT_SYMBOL(ib_cm_notify); static void cm_recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct cm_port *port = mad_agent->context; struct cm_work *work; enum ib_cm_event_type event; u16 attr_id; int paths = 0; int going_down = 0; switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) { case CM_REQ_ATTR_ID: paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)-> alt_local_lid != 0); event = IB_CM_REQ_RECEIVED; break; case CM_MRA_ATTR_ID: event = IB_CM_MRA_RECEIVED; break; case CM_REJ_ATTR_ID: event = IB_CM_REJ_RECEIVED; break; case CM_REP_ATTR_ID: event = IB_CM_REP_RECEIVED; break; case CM_RTU_ATTR_ID: event = IB_CM_RTU_RECEIVED; break; case CM_DREQ_ATTR_ID: event = IB_CM_DREQ_RECEIVED; break; case CM_DREP_ATTR_ID: event = IB_CM_DREP_RECEIVED; break; case CM_SIDR_REQ_ATTR_ID: event = IB_CM_SIDR_REQ_RECEIVED; break; case CM_SIDR_REP_ATTR_ID: event = IB_CM_SIDR_REP_RECEIVED; break; case CM_LAP_ATTR_ID: paths = 1; event = IB_CM_LAP_RECEIVED; break; case CM_APR_ATTR_ID: event = IB_CM_APR_RECEIVED; break; default: ib_free_recv_mad(mad_recv_wc); return; } attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id); atomic_long_inc(&port->counter_group[CM_RECV]. counter[attr_id - CM_ATTR_ID_OFFSET]); work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths, GFP_KERNEL); if (!work) { ib_free_recv_mad(mad_recv_wc); return; } INIT_DELAYED_WORK(&work->work, cm_work_handler); work->cm_event.event = event; work->mad_recv_wc = mad_recv_wc; work->port = port; /* Check if the device started its remove_one */ spin_lock_irq(&cm.lock); if (!port->cm_dev->going_down) queue_delayed_work(cm.wq, &work->work, 0); else going_down = 1; spin_unlock_irq(&cm.lock); if (going_down) { kfree(work); ib_free_recv_mad(mad_recv_wc); } } static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; if (cm_id_priv->responder_resources) qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); if (cm_id_priv->qp_type == IB_QPT_RC || cm_id_priv->qp_type == IB_QPT_XRC_TGT) { *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->id.state) { /* Allow transition to RTS before sending REP */ case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); switch (cm_id_priv->qp_type) { case IB_QPT_RC: case IB_QPT_XRC_INI: *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; qp_attr->retry_cnt = cm_id_priv->retry_count; qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; /* fall through */ case IB_QPT_XRC_TGT: *qp_attr_mask |= IB_QP_TIMEOUT; qp_attr->timeout = cm_id_priv->av.timeout; break; default: break; } if (cm_id_priv->alt_av.ah_attr.dlid) { *qp_attr_mask |= IB_QP_PATH_MIG_STATE; qp_attr->path_mig_state = IB_MIG_REARM; } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; qp_attr->path_mig_state = IB_MIG_REARM; } ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct cm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct cm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTR: ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(ib_cm_init_qp_attr); static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr, char *buf) { struct cm_counter_group *group; struct cm_counter_attribute *cm_attr; group = container_of(obj, struct cm_counter_group, obj); cm_attr = container_of(attr, struct cm_counter_attribute, attr); return sprintf(buf, "%ld\n", atomic_long_read(&group->counter[cm_attr->index])); } static const struct sysfs_ops cm_counter_ops = { .show = cm_show_counter }; static struct kobj_type cm_counter_obj_type = { .sysfs_ops = &cm_counter_ops, .default_attrs = cm_counter_default_attrs }; static void cm_release_port_obj(struct kobject *obj) { struct cm_port *cm_port; cm_port = container_of(obj, struct cm_port, port_obj); kfree(cm_port); } static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } struct class cm_class = { .owner = THIS_MODULE, .name = "infiniband_cm", .devnode = cm_devnode, }; EXPORT_SYMBOL(cm_class); static int cm_create_port_fs(struct cm_port *port) { int i, ret; ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type, &port->cm_dev->device->kobj, "%d", port->port_num); if (ret) { kfree(port); return ret; } for (i = 0; i < CM_COUNTER_GROUPS; i++) { ret = kobject_init_and_add(&port->counter_group[i].obj, &cm_counter_obj_type, &port->port_obj, "%s", counter_group_names[i]); if (ret) goto error; } return 0; error: while (i--) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); return ret; } static void cm_remove_port_fs(struct cm_port *port) { int i; for (i = 0; i < CM_COUNTER_GROUPS; i++) kobject_put(&port->counter_group[i].obj); kobject_put(&port->port_obj); } static void cm_add_one(struct ib_device *ib_device) { struct cm_device *cm_dev; struct cm_port *port; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_CM, .mgmt_class_version = IB_CM_CLASS_VERSION, }; struct ib_port_modify port_modify = { .set_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int ret; int count = 0; u8 i; cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) * ib_device->phys_port_cnt, GFP_KERNEL); if (!cm_dev) return; cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; cm_dev->device = device_create(&cm_class, &ib_device->dev, MKDEV(0, 0), NULL, "%s", ib_device->name); if (IS_ERR(cm_dev->device)) { kfree(cm_dev); return; } set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = kzalloc(sizeof *port, GFP_KERNEL); if (!port) goto error1; cm_dev->port[i-1] = port; port->cm_dev = cm_dev; port->port_num = i; INIT_LIST_HEAD(&port->cm_priv_prim_list); INIT_LIST_HEAD(&port->cm_priv_altr_list); ret = cm_create_port_fs(port); if (ret) goto error1; port->mad_agent = ib_register_mad_agent(ib_device, i, IB_QPT_GSI, ®_req, 0, cm_send_handler, cm_recv_handler, port, 0); if (IS_ERR(port->mad_agent)) goto error2; ret = ib_modify_port(ib_device, i, 0, &port_modify); if (ret) goto error3; count++; } if (!count) goto free; ib_set_client_data(ib_device, &cm_client, cm_dev); write_lock_irqsave(&cm.device_lock, flags); list_add_tail(&cm_dev->list, &cm.device_list); write_unlock_irqrestore(&cm.device_lock, flags); return; error3: ib_unregister_mad_agent(port->mad_agent); error2: cm_remove_port_fs(port); error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); } free: device_unregister(cm_dev->device); kfree(cm_dev); } static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; struct cm_id_private *cm_id_priv; struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; int i; if (!cm_dev) return; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); write_unlock_irqrestore(&cm.device_lock, flags); spin_lock_irq(&cm.lock); cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); for (i = 1; i <= ib_device->phys_port_cnt; i++) { if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); /* Mark all the cm_id's as not valid */ spin_lock_irq(&cm.lock); list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) cm_id_priv->altr_send_port_not_ready = 1; list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) cm_id_priv->prim_send_port_not_ready = 1; spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); spin_lock_irq(&cm.state_lock); cur_mad_agent = port->mad_agent; port->mad_agent = NULL; spin_unlock_irq(&cm.state_lock); ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); } device_unregister(cm_dev->device); kfree(cm_dev); } static int __init ib_cm_init(void) { int ret; memset(&cm, 0, sizeof cm); INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; cm.remote_qp_table = RB_ROOT; cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); if (ret) { ret = -ENOMEM; goto error1; } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; goto error2; } ret = ib_register_client(&cm_client); if (ret) goto error3; return 0; error3: destroy_workqueue(cm.wq); error2: class_unregister(&cm_class); error1: idr_destroy(&cm.local_id_table); return ret; } static void __exit ib_cm_cleanup(void) { struct cm_timewait_info *timewait_info, *tmp; spin_lock_irq(&cm.lock); list_for_each_entry(timewait_info, &cm.timewait_list, list) cancel_delayed_work(&timewait_info->work.work); spin_unlock_irq(&cm.lock); ib_unregister_client(&cm_client); destroy_workqueue(cm.wq); list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { list_del(&timewait_info->list); kfree(timewait_info); } class_unregister(&cm_class); idr_destroy(&cm.local_id_table); } module_init_order(ib_cm_init, SI_ORDER_SECOND); module_exit_order(ib_cm_cleanup, SI_ORDER_FIRST); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_cma.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_cma.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_cma.c (revision 331772) @@ -1,4312 +1,4316 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "core_priv.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("Generic RDMA CM Agent"); MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) #define CMA_IBOE_PACKET_LIFETIME 18 static const char * const cma_events[] = { [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", [RDMA_CM_EVENT_ADDR_ERROR] = "address error", [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", [RDMA_CM_EVENT_REJECTED] = "rejected", [RDMA_CM_EVENT_ESTABLISHED] = "established", [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? cma_events[index] : "unrecognized event"; } EXPORT_SYMBOL(rdma_event_msg); static void cma_add_one(struct ib_device *device); static void cma_remove_one(struct ib_device *device, void *client_data); static struct ib_client cma_client = { .name = "cma", .add = cma_add_one, .remove = cma_remove_one }; static struct ib_sa_client sa_client; static struct rdma_addr_client addr_client; static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; struct cma_pernet { struct idr tcp_ps; struct idr udp_ps; struct idr ipoib_ps; struct idr ib_ps; }; VNET_DEFINE(struct cma_pernet, cma_pernet); static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) { struct cma_pernet *retval; CURVNET_SET_QUIET(vnet); retval = &VNET(cma_pernet); CURVNET_RESTORE(); return (retval); } static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) { struct cma_pernet *pernet = cma_pernet_ptr(net); switch (ps) { case RDMA_PS_TCP: return &pernet->tcp_ps; case RDMA_PS_UDP: return &pernet->udp_ps; case RDMA_PS_IPOIB: return &pernet->ipoib_ps; case RDMA_PS_IB: return &pernet->ib_ps; default: return NULL; } } struct cma_device { struct list_head list; struct ib_device *device; struct completion comp; atomic_t refcount; struct list_head id_list; struct sysctl_ctx_list sysctl_ctx; enum ib_gid_type *default_gid_type; }; struct rdma_bind_list { enum rdma_port_space ps; struct hlist_head owners; unsigned short port; }; struct class_port_info_context { struct ib_class_port_info *class_port_info; struct ib_device *device; struct completion done; struct ib_sa_query *sa_query; u8 port_num; }; static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, struct rdma_bind_list *bind_list, int snum) { struct idr *idr = cma_pernet_idr(vnet, ps); return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); } static struct rdma_bind_list *cma_ps_find(struct vnet *net, enum rdma_port_space ps, int snum) { struct idr *idr = cma_pernet_idr(net, ps); return idr_find(idr, snum); } static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) { struct idr *idr = cma_pernet_idr(net, ps); idr_remove(idr, snum); } enum { CMA_OPTION_AFONLY, }; void cma_ref_dev(struct cma_device *cma_dev) { atomic_inc(&cma_dev->refcount); } struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, void *cookie) { struct cma_device *cma_dev; struct cma_device *found_cma_dev = NULL; mutex_lock(&lock); list_for_each_entry(cma_dev, &dev_list, list) if (filter(cma_dev->device, cookie)) { found_cma_dev = cma_dev; break; } if (found_cma_dev) cma_ref_dev(found_cma_dev); mutex_unlock(&lock); return found_cma_dev; } int cma_get_default_gid_type(struct cma_device *cma_dev, unsigned int port) { if (port < rdma_start_port(cma_dev->device) || port > rdma_end_port(cma_dev->device)) return -EINVAL; return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; } int cma_set_default_gid_type(struct cma_device *cma_dev, unsigned int port, enum ib_gid_type default_gid_type) { unsigned long supported_gids; if (port < rdma_start_port(cma_dev->device) || port > rdma_end_port(cma_dev->device)) return -EINVAL; supported_gids = roce_gid_type_mask_support(cma_dev->device, port); if (!(supported_gids & 1 << default_gid_type)) return -EINVAL; cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = default_gid_type; return 0; } struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) { return cma_dev->device; } /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. * We do this by disabling removal notification while a callback is in process, * and reporting it after the callback completes. */ struct rdma_id_private { struct rdma_cm_id id; struct rdma_bind_list *bind_list; struct hlist_node node; struct list_head list; /* listen_any_list or cma_device.list */ struct list_head listen_list; /* per device listens */ struct cma_device *cma_dev; struct list_head mc_list; int internal_id; enum rdma_cm_state state; spinlock_t lock; struct mutex qp_mutex; struct completion comp; atomic_t refcount; struct mutex handler_mutex; int backlog; int timeout_ms; struct ib_sa_query *query; int query_id; union { struct ib_cm_id *ib; struct iw_cm_id *iw; } cm_id; u32 seq_num; u32 qkey; u32 qp_num; pid_t owner; u32 options; u8 srq; u8 tos; u8 reuseaddr; u8 afonly; enum ib_gid_type gid_type; }; struct cma_multicast { struct rdma_id_private *id_priv; union { struct ib_sa_multicast *ib; } multicast; struct list_head list; void *context; struct sockaddr_storage addr; struct kref mcref; bool igmp_joined; u8 join_state; }; struct cma_work { struct work_struct work; struct rdma_id_private *id; enum rdma_cm_state old_state; enum rdma_cm_state new_state; struct rdma_cm_event event; }; struct cma_ndev_work { struct work_struct work; struct rdma_id_private *id; struct rdma_cm_event event; }; struct iboe_mcast_work { struct work_struct work; struct rdma_id_private *id; struct cma_multicast *mc; }; union cma_ip_addr { struct in6_addr ip6; struct { __be32 pad[3]; __be32 addr; } ip4; }; struct cma_hdr { u8 cma_version; u8 ip_version; /* IP version: 7:4 */ __be16 port; union cma_ip_addr src_addr; union cma_ip_addr dst_addr; }; #define CMA_VERSION 0x00 struct cma_req_info { struct ib_device *device; int port; union ib_gid local_gid; __be64 service_id; u16 pkey; bool has_gid:1; }; static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); ret = (id_priv->state == comp); spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static int cma_comp_exch(struct rdma_id_private *id_priv, enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; spin_lock_irqsave(&id_priv->lock, flags); if ((ret = (id_priv->state == comp))) id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, enum rdma_cm_state exch) { unsigned long flags; enum rdma_cm_state old; spin_lock_irqsave(&id_priv->lock, flags); old = id_priv->state; id_priv->state = exch; spin_unlock_irqrestore(&id_priv->lock, flags); return old; } static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) { return hdr->ip_version >> 4; } static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) { hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } static void _cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { cma_ref_dev(cma_dev); id_priv->cma_dev = cma_dev; id_priv->gid_type = 0; id_priv->id.device = cma_dev->device; id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); } static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { _cma_attach_to_dev(id_priv, cma_dev); id_priv->gid_type = cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(cma_dev->device)]; } void cma_deref_dev(struct cma_device *cma_dev) { if (atomic_dec_and_test(&cma_dev->refcount)) complete(&cma_dev->comp); } static inline void release_mc(struct kref *kref) { struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); kfree(mc->multicast.ib); kfree(mc); } static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); list_del(&id_priv->list); cma_deref_dev(id_priv->cma_dev); id_priv->cma_dev = NULL; mutex_unlock(&lock); } static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) { return (struct sockaddr *) &id_priv->id.route.addr.src_addr; } static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) { return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; } static inline unsigned short cma_family(struct rdma_id_private *id_priv) { return id_priv->id.route.addr.src_addr.ss_family; } static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) { struct ib_sa_mcmember_rec rec; int ret = 0; if (id_priv->qkey) { if (qkey && id_priv->qkey != qkey) return -EINVAL; return 0; } if (qkey) { id_priv->qkey = qkey; return 0; } switch (id_priv->id.ps) { case RDMA_PS_UDP: case RDMA_PS_IB: id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &rec.mgid, &rec); if (!ret) id_priv->qkey = be32_to_cpu(rec.qkey); break; default: break; } return ret; } static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { dev_addr->dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); } static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { int ret; if (addr->sa_family != AF_IB) { ret = rdma_translate_ip(addr, dev_addr, NULL); } else { cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); ret = 0; } return ret; } static inline int cma_validate_port(struct ib_device *device, u8 port, enum ib_gid_type gid_type, union ib_gid *gid, int dev_type, struct vnet *net, int bound_if_index) { int ret = -ENODEV; struct net_device *ndev = NULL; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) return ret; if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(net, bound_if_index); if (ndev && ndev->if_flags & IFF_LOOPBACK) { pr_info("detected loopback device\n"); dev_put(ndev); if (!device->get_netdev) return -EOPNOTSUPP; ndev = device->get_netdev(device, port); if (!ndev) return -ENODEV; } } else { gid_type = IB_GID_TYPE_IB; } ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, ndev, NULL); if (ndev) dev_put(ndev); return ret; } static int cma_acquire_dev(struct rdma_id_private *id_priv, struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid, *gidp; int ret = -ENODEV; u8 port; if (dev_addr->dev_type != ARPHRD_INFINIBAND && id_priv->id.ps == RDMA_PS_IPOIB) return -EINVAL; mutex_lock(&lock); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &iboe_gid); memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); if (listen_id_priv) { cma_dev = listen_id_priv->cma_dev; port = listen_id_priv->id.port_num; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, rdma_protocol_ib(cma_dev->device, port) ? IB_GID_TYPE_IB : listen_id_priv->gid_type, gidp, dev_addr->dev_type, dev_addr->net, dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; } } list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { if (listen_id_priv && listen_id_priv->cma_dev == cma_dev && listen_id_priv->id.port_num == port) continue; gidp = rdma_protocol_roce(cma_dev->device, port) ? &iboe_gid : &gid; ret = cma_validate_port(cma_dev->device, port, rdma_protocol_ib(cma_dev->device, port) ? IB_GID_TYPE_IB : cma_dev->default_gid_type[port - 1], gidp, dev_addr->dev_type, dev_addr->net, dev_addr->bound_dev_if); if (!ret) { id_priv->id.port_num = port; goto out; } } } out: if (!ret) cma_attach_to_dev(id_priv, cma_dev); mutex_unlock(&lock); return ret; } /* * Select the source IB device and address to reach the destination IB address. */ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) { struct cma_device *cma_dev, *cur_dev; struct sockaddr_ib *addr; union ib_gid gid, sgid, *dgid; u16 pkey, index; u8 p; int i; cma_dev = NULL; addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); dgid = (union ib_gid *) &addr->sib_addr; pkey = ntohs(addr->sib_pkey); list_for_each_entry(cur_dev, &dev_list, list) { for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!rdma_cap_af_ib(cur_dev->device, p)) continue; if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) continue; for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid, NULL); i++) { if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; goto found; } if (!cma_dev && (gid.global.subnet_prefix == dgid->global.subnet_prefix)) { cma_dev = cur_dev; sgid = gid; id_priv->id.port_num = p; } } } } if (!cma_dev) return -ENODEV; found: cma_attach_to_dev(id_priv, cma_dev); addr = (struct sockaddr_ib *) cma_src_addr(id_priv); memcpy(&addr->sib_addr, &sgid, sizeof sgid); cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); return 0; } static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) complete(&id_priv->comp); } struct rdma_cm_id *rdma_create_id(struct vnet *net, rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { struct rdma_id_private *id_priv; id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); if (!id_priv) return ERR_PTR(-ENOMEM); id_priv->owner = task_pid_nr(current); id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; id_priv->id.qp_type = qp_type; spin_lock_init(&id_priv->lock); mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); mutex_init(&id_priv->handler_mutex); INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); return &id_priv->id; } EXPORT_SYMBOL(rdma_create_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); if (ret) return ret; qp_attr.qp_state = IB_QPS_RTS; qp_attr.sq_psn = 0; ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); return ret; } static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) return ret; return ib_modify_qp(qp, &qp_attr, qp_attr_mask); } int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) return -EINVAL; qp_init_attr->port_num = id->port_num; qp = ib_create_qp(pd, qp_init_attr); if (IS_ERR(qp)) return PTR_ERR(qp); if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); else ret = cma_init_conn_qp(id_priv, qp); if (ret) goto err; id->qp = qp; id_priv->qp_num = qp->qp_num; id_priv->srq = (qp->srq != NULL); return 0; err: ib_destroy_qp(qp); return ret; } EXPORT_SYMBOL(rdma_create_qp); void rdma_destroy_qp(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); mutex_lock(&id_priv->qp_mutex); ib_destroy_qp(id_priv->id.qp); id_priv->id.qp = NULL; mutex_unlock(&id_priv->qp_mutex); } EXPORT_SYMBOL(rdma_destroy_qp); static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } /* Need to update QP attributes from default values. */ qp_attr.qp_state = IB_QPS_INIT; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); if (ret) goto out; qp_attr.qp_state = IB_QPS_RTR; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); if (ret) goto out; BUG_ON(id_priv->cma_dev->device != id_priv->id.device); if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_rts(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_RTS; ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) goto out; if (conn_param) qp_attr.max_rd_atomic = conn_param->initiator_depth; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_modify_qp_err(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; int ret; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { ret = 0; goto out; } qp_attr.qp_state = IB_QPS_ERR; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); out: mutex_unlock(&id_priv->qp_mutex); return ret; } static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int ret; u16 pkey; if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) pkey = 0xffff; else pkey = ib_addr_get_pkey(dev_addr); ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, pkey, &qp_attr->pkey_index); if (ret) return ret; qp_attr->port_num = id_priv->id.port_num; *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { ret = cma_set_qkey(id_priv, 0); if (ret) return ret; qp_attr->qkey = id_priv->qkey; *qp_attr_mask |= IB_QP_QKEY; } else { qp_attr->qp_access_flags = 0; *qp_attr_mask |= IB_QP_ACCESS_FLAGS; } return 0; } int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct rdma_id_private *id_priv; int ret = 0; id_priv = container_of(id, struct rdma_id_private, id); if (rdma_cap_ib_cm(id->device, id->port_num)) { if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; } else if (rdma_cap_iw_cm(id->device, id->port_num)) { if (!id_priv->cm_id.iw) { qp_attr->qp_access_flags = 0; *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; } else ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, qp_attr_mask); } else ret = -ENOSYS; return ret; } EXPORT_SYMBOL(rdma_init_qp_attr); static inline int cma_zero_addr(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); case AF_INET6: return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); case AF_IB: return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); default: return 0; } } static inline int cma_loopback_addr(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); case AF_INET6: return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); case AF_IB: return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); default: return 0; } } static inline int cma_any_addr(struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) { if (src->sa_family != dst->sa_family) return -1; switch (src->sa_family) { case AF_INET: return ((struct sockaddr_in *) src)->sin_addr.s_addr != ((struct sockaddr_in *) dst)->sin_addr.s_addr; case AF_INET6: return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, &((struct sockaddr_in6 *) dst)->sin6_addr); default: return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, &((struct sockaddr_ib *) dst)->sib_addr); } } static __be16 cma_port(struct sockaddr *addr) { struct sockaddr_ib *sib; switch (addr->sa_family) { case AF_INET: return ((struct sockaddr_in *) addr)->sin_port; case AF_INET6: return ((struct sockaddr_in6 *) addr)->sin6_port; case AF_IB: sib = (struct sockaddr_ib *) addr; return htons((u16) (be64_to_cpu(sib->sib_sid) & be64_to_cpu(sib->sib_sid_mask))); default: return 0; } } static inline int cma_any_port(struct sockaddr *addr) { return !cma_port(addr); } static void cma_save_ib_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_cm_id *listen_id, struct ib_sa_path_rec *path) { struct sockaddr_ib *listen_ib, *ib; listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; if (src_addr) { ib = (struct sockaddr_ib *)src_addr; ib->sib_family = AF_IB; if (path) { ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->sgid, 16); ib->sib_sid = path->service_id; ib->sib_scope_id = 0; } else { ib->sib_pkey = listen_ib->sib_pkey; ib->sib_flowinfo = listen_ib->sib_flowinfo; ib->sib_addr = listen_ib->sib_addr; ib->sib_sid = listen_ib->sib_sid; ib->sib_scope_id = listen_ib->sib_scope_id; } ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); } if (dst_addr) { ib = (struct sockaddr_ib *)dst_addr; ib->sib_family = AF_IB; if (path) { ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->dgid, 16); } } } static void cma_save_ip4_info(struct sockaddr_in *src_addr, struct sockaddr_in *dst_addr, struct cma_hdr *hdr, __be16 local_port) { if (src_addr) { *src_addr = (struct sockaddr_in) { .sin_len = sizeof(struct sockaddr_in), .sin_family = AF_INET, .sin_addr.s_addr = hdr->dst_addr.ip4.addr, .sin_port = local_port, }; } if (dst_addr) { *dst_addr = (struct sockaddr_in) { .sin_len = sizeof(struct sockaddr_in), .sin_family = AF_INET, .sin_addr.s_addr = hdr->src_addr.ip4.addr, .sin_port = hdr->port, }; } } static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, struct sockaddr_in6 *dst_addr, struct cma_hdr *hdr, __be16 local_port) { if (src_addr) { *src_addr = (struct sockaddr_in6) { .sin6_len = sizeof(struct sockaddr_in6), .sin6_family = AF_INET6, .sin6_addr = hdr->dst_addr.ip6, .sin6_port = local_port, }; } if (dst_addr) { *dst_addr = (struct sockaddr_in6) { .sin6_len = sizeof(struct sockaddr_in6), .sin6_family = AF_INET6, .sin6_addr = hdr->src_addr.ip6, .sin6_port = hdr->port, }; } } static u16 cma_port_from_service_id(__be64 service_id) { return (u16)be64_to_cpu(service_id); } static int cma_save_ip_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct ib_cm_event *ib_event, __be64 service_id) { struct cma_hdr *hdr; __be16 port; hdr = ib_event->private_data; if (hdr->cma_version != CMA_VERSION) return -EINVAL; port = htons(cma_port_from_service_id(service_id)); switch (cma_get_ip_ver(hdr)) { case 4: cma_save_ip4_info((struct sockaddr_in *)src_addr, (struct sockaddr_in *)dst_addr, hdr, port); break; case 6: cma_save_ip6_info((struct sockaddr_in6 *)src_addr, (struct sockaddr_in6 *)dst_addr, hdr, port); break; default: return -EAFNOSUPPORT; } return 0; } static int cma_save_net_info(struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, sa_family_t sa_family, __be64 service_id) { if (sa_family == AF_IB) { if (ib_event->event == IB_CM_REQ_RECEIVED) cma_save_ib_info(src_addr, dst_addr, listen_id, ib_event->param.req_rcvd.primary_path); else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); return 0; } return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); } static int cma_save_req_info(const struct ib_cm_event *ib_event, struct cma_req_info *req) { const struct ib_cm_req_event_param *req_param = &ib_event->param.req_rcvd; const struct ib_cm_sidr_req_event_param *sidr_param = &ib_event->param.sidr_req_rcvd; switch (ib_event->event) { case IB_CM_REQ_RECEIVED: req->device = req_param->listen_id->device; req->port = req_param->port; memcpy(&req->local_gid, &req_param->primary_path->sgid, sizeof(req->local_gid)); req->has_gid = true; req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); if (req->pkey != req_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" "RDMA CMA: in the future this may cause the request to be dropped\n", req_param->bth_pkey, req->pkey); break; case IB_CM_SIDR_REQ_RECEIVED: req->device = sidr_param->listen_id->device; req->port = sidr_param->port; req->has_gid = false; req->service_id = sidr_param->service_id; req->pkey = sidr_param->pkey; if (req->pkey != sidr_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" "RDMA CMA: in the future this may cause the request to be dropped\n", sidr_param->bth_pkey, req->pkey); break; default: return -EINVAL; } return 0; } static bool validate_ipv4_net_dev(struct net_device *net_dev, const struct sockaddr_in *dst_addr, const struct sockaddr_in *src_addr) { #ifdef INET struct sockaddr_in dst_tmp = *dst_addr; __be32 daddr = dst_addr->sin_addr.s_addr, saddr = src_addr->sin_addr.s_addr; struct net_device *src_dev; struct rtentry *rte; bool ret; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || ipv4_is_loopback(saddr)) return false; src_dev = ip_dev_find(net_dev->if_vnet, saddr); if (src_dev != net_dev) { if (src_dev != NULL) dev_put(src_dev); return false; } dev_put(src_dev); /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ dst_tmp.sin_len = sizeof(dst_tmp); CURVNET_SET(net_dev->if_vnet); rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); CURVNET_RESTORE(); if (rte != NULL) { ret = (rte->rt_ifp == net_dev); RTFREE_LOCKED(rte); } else { ret = false; } return ret; #else return false; #endif } static bool validate_ipv6_net_dev(struct net_device *net_dev, const struct sockaddr_in6 *dst_addr, const struct sockaddr_in6 *src_addr) { #ifdef INET6 struct sockaddr_in6 dst_tmp = *dst_addr; struct in6_addr in6_addr = src_addr->sin6_addr; struct net_device *src_dev; struct rtentry *rte; bool ret; src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); if (src_dev != net_dev) return false; /* * Make sure the socket address length field * is set, else rtalloc1() will fail. */ dst_tmp.sin6_len = sizeof(dst_tmp); CURVNET_SET(net_dev->if_vnet); rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); CURVNET_RESTORE(); if (rte != NULL) { ret = (rte->rt_ifp == net_dev); RTFREE_LOCKED(rte); } else { ret = false; } return ret; #else return false; #endif } static bool validate_net_dev(struct net_device *net_dev, const struct sockaddr *daddr, const struct sockaddr *saddr) { const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; switch (daddr->sa_family) { case AF_INET: return saddr->sa_family == AF_INET && validate_ipv4_net_dev(net_dev, daddr4, saddr4); case AF_INET6: return saddr->sa_family == AF_INET6 && validate_ipv6_net_dev(net_dev, daddr6, saddr6); default: return false; } } static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, const struct cma_req_info *req) { struct sockaddr_storage listen_addr_storage, src_addr_storage; struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, *src_addr = (struct sockaddr *)&src_addr_storage; struct net_device *net_dev; const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; int err; err = cma_save_ip_info(listen_addr, src_addr, ib_event, req->service_id); if (err) return ERR_PTR(err); net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, gid, listen_addr); if (!net_dev) return ERR_PTR(-ENODEV); if (!validate_net_dev(net_dev, listen_addr, src_addr)) { dev_put(net_dev); return ERR_PTR(-EHOSTUNREACH); } return net_dev; } static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) { return (be64_to_cpu(service_id) >> 16) & 0xffff; } static bool cma_match_private_data(struct rdma_id_private *id_priv, const struct cma_hdr *hdr) { struct sockaddr *addr = cma_src_addr(id_priv); __be32 ip4_addr; struct in6_addr ip6_addr; if (cma_any_addr(addr) && !id_priv->afonly) return true; switch (addr->sa_family) { case AF_INET: ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; if (cma_get_ip_ver(hdr) != 4) return false; if (!cma_any_addr(addr) && hdr->dst_addr.ip4.addr != ip4_addr) return false; break; case AF_INET6: ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; if (cma_get_ip_ver(hdr) != 6) return false; if (!cma_any_addr(addr) && memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) return false; break; case AF_IB: return true; default: return false; } return true; } static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) { enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); enum rdma_transport_type transport = rdma_node_get_transport(device->node_type); return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; } static bool cma_protocol_roce(const struct rdma_cm_id *id) { struct ib_device *device = id->device; const int port_num = id->port_num ?: rdma_start_port(device); return cma_protocol_roce_dev_port(device, port_num); } static bool cma_match_net_dev(const struct rdma_cm_id *id, const struct net_device *net_dev, u8 port_num) { const struct rdma_addr *addr = &id->route.addr; if (!net_dev) /* This request is an AF_IB request or a RoCE request */ return (!id->port_num || id->port_num == port_num) && (addr->src_addr.ss_family == AF_IB || cma_protocol_roce_dev_port(id->device, port_num)); return !addr->dev_addr.bound_dev_if || (net_eq(dev_net(net_dev), addr->dev_addr.net) && addr->dev_addr.bound_dev_if == net_dev->if_index); } static struct rdma_id_private *cma_find_listener( const struct rdma_bind_list *bind_list, const struct ib_cm_id *cm_id, const struct ib_cm_event *ib_event, const struct cma_req_info *req, const struct net_device *net_dev) { struct rdma_id_private *id_priv, *id_priv_dev; if (!bind_list) return ERR_PTR(-EINVAL); hlist_for_each_entry(id_priv, &bind_list->owners, node) { if (cma_match_private_data(id_priv, ib_event->private_data)) { if (id_priv->id.device == cm_id->device && cma_match_net_dev(&id_priv->id, net_dev, req->port)) return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, listen_list) { if (id_priv_dev->id.device == cm_id->device && cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) return id_priv_dev; } } } return ERR_PTR(-EINVAL); } static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event, struct net_device **net_dev) { struct cma_req_info req; struct rdma_bind_list *bind_list; struct rdma_id_private *id_priv; int err; err = cma_save_req_info(ib_event, &req); if (err) return ERR_PTR(err); *net_dev = cma_get_net_dev(ib_event, &req); if (IS_ERR(*net_dev)) { if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { /* Assuming the protocol is AF_IB */ *net_dev = NULL; } else if (cma_protocol_roce_dev_port(req.device, req.port)) { /* TODO find the net dev matching the request parameters * through the RoCE GID table */ *net_dev = NULL; } else { return ERR_CAST(*net_dev); } } bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); if (IS_ERR(id_priv) && *net_dev) { dev_put(*net_dev); *net_dev = NULL; } return id_priv; } static inline int cma_user_data_offset(struct rdma_id_private *id_priv) { return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } static void cma_cancel_route(struct rdma_id_private *id_priv) { if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { if (id_priv->query) ib_sa_cancel_query(id_priv->query_id, id_priv->query); } } static void cma_cancel_listens(struct rdma_id_private *id_priv) { struct rdma_id_private *dev_id_priv; /* * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ mutex_lock(&lock); list_del(&id_priv->list); while (!list_empty(&id_priv->listen_list)) { dev_id_priv = list_entry(id_priv->listen_list.next, struct rdma_id_private, listen_list); /* sync with device removal to avoid duplicate destruction */ list_del_init(&dev_id_priv->list); list_del(&dev_id_priv->listen_list); mutex_unlock(&lock); rdma_destroy_id(&dev_id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); } static void cma_cancel_operation(struct rdma_id_private *id_priv, enum rdma_cm_state state) { switch (state) { case RDMA_CM_ADDR_QUERY: rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; case RDMA_CM_ROUTE_QUERY: cma_cancel_route(id_priv); break; case RDMA_CM_LISTEN: if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: break; } } static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; struct vnet *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) { struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { mc = container_of(id_priv->mc_list.next, struct cma_multicast, list); list_del(&mc->list); if (rdma_cap_ib_mcast(id_priv->cma_dev->device, id_priv->id.port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else { if (mc->igmp_joined) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct net_device *ndev = NULL; if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (ndev) { dev_put(ndev); } } kref_put(&mc->mcref, release_mc); } } } void rdma_destroy_id(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; enum rdma_cm_state state; id_priv = container_of(id, struct rdma_id_private, id); state = cma_exch(id_priv, RDMA_CM_DESTROYING); cma_cancel_operation(id_priv, state); /* * Wait for any active callback to finish. New callbacks will find * the id_priv state set to destroying and abort. */ mutex_lock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) ib_destroy_cm_id(id_priv->cm_id.ib); } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.iw) iw_destroy_cm_id(id_priv->cm_id.iw); } cma_leave_mc_groups(id_priv); cma_release_dev(id_priv); } cma_release_port(id_priv); cma_deref_id(id_priv); wait_for_completion(&id_priv->comp); if (id_priv->internal_id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; ret = cma_modify_qp_rtr(id_priv, NULL); if (ret) goto reject; ret = cma_modify_qp_rts(id_priv, NULL); if (ret) goto reject; ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; } static void cma_set_rep_event_data(struct rdma_cm_event *event, struct ib_cm_rep_event_param *rep_data, void *private_data) { event->param.conn.private_data = private_data; event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; event->param.conn.responder_resources = rep_data->responder_resources; event->param.conn.initiator_depth = rep_data->initiator_depth; event->param.conn.flow_control = rep_data->flow_control; event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; event->param.conn.srq = rep_data->srq; event->param.conn.qp_num = rep_data->remote_qpn; } static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; int ret = 0; mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && id_priv->state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && id_priv->state != RDMA_CM_DISCONNECT)) goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_REQ_ERROR: case IB_CM_REP_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : RDMA_CM_EVENT_ESTABLISHED; } else { event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; } cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, ib_event->private_data); break; case IB_CM_RTU_RECEIVED: case IB_CM_USER_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; break; case IB_CM_DREQ_ERROR: event.status = -ETIMEDOUT; /* fall through */ case IB_CM_DREQ_RECEIVED: case IB_CM_DREP_RECEIVED: if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_DISCONNECT)) goto out; event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IB_CM_TIMEWAIT_EXIT: event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; break; case IB_CM_MRA_RECEIVED: /* ignore event */ goto out; case IB_CM_REJ_RECEIVED: cma_modify_qp_err(id_priv); event.status = ib_event->param.rej_rcvd.reason; event.event = RDMA_CM_EVENT_REJECTED; event.param.conn.private_data = ib_event->private_data; event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; break; default: pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; const __be64 service_id = ib_event->param.req_rcvd.primary_path->service_id; int ret; id = rdma_create_id(listen_id->route.addr.dev_addr.net, listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, service_id)) goto err; rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); if (ret) goto err; } else { if (!cma_protocol_roce(listen_id) && cma_any_addr(cma_src_addr(id_priv))) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); if (ret) goto err; } } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct ib_cm_event *ib_event, struct net_device *net_dev) { struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct vnet *net = listen_id->route.addr.dev_addr.net; int ret; id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; id_priv = container_of(id, struct rdma_id_private, id); if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, ib_event->param.sidr_req_rcvd.service_id)) goto err; if (net_dev) { ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); if (ret) goto err; } else { if (!cma_any_addr(cma_src_addr(id_priv))) { ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr); if (ret) goto err; } } id_priv->state = RDMA_CM_CONNECT; return id_priv; err: rdma_destroy_id(id); return NULL; } static void cma_set_req_event_data(struct rdma_cm_event *event, struct ib_cm_req_event_param *req_data, void *private_data, int offset) { event->param.conn.private_data = (char *)private_data + offset; event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; event->param.conn.responder_resources = req_data->responder_resources; event->param.conn.initiator_depth = req_data->initiator_depth; event->param.conn.flow_control = req_data->flow_control; event->param.conn.retry_count = req_data->retry_count; event->param.conn.rnr_retry_count = req_data->rnr_retry_count; event->param.conn.srq = req_data->srq; event->param.conn.qp_num = req_data->remote_qpn; } static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) { return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && (id->qp_type == IB_QPT_UD)) || (!id->qp_type)); } static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; int offset, ret; listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); if (IS_ERR(listen_id)) return PTR_ERR(listen_id); if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { ret = -EINVAL; goto net_dev_put; } mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; goto err1; } memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); event.param.ud.private_data = (char *)ib_event->private_data + offset; event.param.ud.private_data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; } else { conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); cma_set_req_event_data(&event, &ib_event->param.req_rcvd, ib_event->private_data, offset); } if (!conn_id) { ret = -ENOMEM; goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); ret = cma_acquire_dev(conn_id, listen_id); if (ret) goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_ib_handler; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) goto err3; /* * Acquire mutex to prevent user executing rdma_destroy_id() * while we're accessing the cm_id. */ mutex_lock(&lock); if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); mutex_unlock(&lock); mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); if (net_dev) dev_put(net_dev); return 0; err3: cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); err1: mutex_unlock(&listen_id->handler_mutex); if (conn_id) rdma_destroy_id(&conn_id->id); net_dev_put: if (net_dev) dev_put(net_dev); return ret; } __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) { if (addr->sa_family == AF_IB) return ((struct sockaddr_ib *) addr)->sib_sid; return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); } EXPORT_SYMBOL(rdma_get_service_id); static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; struct rdma_cm_event event; int ret = 0; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: memcpy(cma_src_addr(id_priv), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(id_priv), raddr, rdma_addr_size(raddr)); switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; case -ECONNRESET: case -ECONNREFUSED: event.event = RDMA_CM_EVENT_REJECTED; break; case -ETIMEDOUT: event.event = RDMA_CM_EVENT_UNREACHABLE; break; default: event.event = RDMA_CM_EVENT_CONNECT_ERROR; break; } break; case IW_CM_EVENT_ESTABLISHED: event.event = RDMA_CM_EVENT_ESTABLISHED; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; break; default: BUG_ON(1); } event.status = iw_event->status; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); if (listen_id->state != RDMA_CM_LISTEN) goto out; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { ret = -ENOMEM; goto out; } conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } ret = cma_acquire_dev(conn_id, listen_id); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } conn_id->cm_id.iw = cm_id; cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; event.param.conn.private_data = iw_event->private_data; event.param.conn.private_data_len = iw_event->private_data_len; event.param.conn.initiator_depth = iw_event->ird; event.param.conn.responder_resources = iw_event->ord; /* * Protect against the user destroying conn_id from another thread * until we're done accessing it. */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); if (ret) { /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); goto out; } mutex_unlock(&conn_id->handler_mutex); cma_deref_id(conn_id); out: mutex_unlock(&listen_id->handler_mutex); return ret; } static int cma_ib_listen(struct rdma_id_private *id_priv) { struct sockaddr *addr; struct ib_cm_id *id; __be64 svc_id; addr = cma_src_addr(id_priv); svc_id = rdma_get_service_id(&id_priv->id, addr); id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); if (IS_ERR(id)) return PTR_ERR(id); id_priv->cm_id.ib = id; return 0; } static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; struct iw_cm_id *id; id = iw_create_cm_id(id_priv->id.device, iw_conn_req_handler, id_priv); if (IS_ERR(id)) return PTR_ERR(id); id->tos = id_priv->tos; id_priv->cm_id.iw = id; memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); ret = iw_cm_listen(id_priv->cm_id.iw, backlog); if (ret) { iw_destroy_cm_id(id_priv->cm_id.iw); id_priv->cm_id.iw = NULL; } return ret; } static int cma_listen_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rdma_id_private *id_priv = id->context; id->context = id_priv->id.context; id->event_handler = id_priv->id.event_handler; return id_priv->id.event_handler(id, event); } static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; struct vnet *net = id_priv->id.route.addr.dev_addr.net; int ret; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); _cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name); } static void cma_listen_on_all(struct rdma_id_private *id_priv) { struct cma_device *cma_dev; mutex_lock(&lock); list_add_tail(&id_priv->list, &listen_any_list); list_for_each_entry(cma_dev, &dev_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); } void rdma_set_service_type(struct rdma_cm_id *id, int tos) { struct rdma_id_private *id_priv; id_priv = container_of(id, struct rdma_id_private, id); id_priv->tos = (u8) tos; } EXPORT_SYMBOL(rdma_set_service_type); static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, void *context) { struct cma_work *work = context; struct rdma_route *route; route = &work->id->id.route; if (!status) { route->num_paths = 1; *route->path_rec = *path_rec; } else { work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; work->event.status = status; } queue_work(cma_wq, &work->work); } static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; struct sockaddr_ib *sib; memset(&path_rec, 0, sizeof path_rec); rdma_addr_get_sgid(dev_addr, &path_rec.sgid); rdma_addr_get_dgid(dev_addr, &path_rec.dgid); path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; switch (cma_family(id_priv)) { case AF_INET: path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; break; case AF_INET6: sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; break; case AF_IB: sib = (struct sockaddr_ib *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; break; } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, id_priv->id.port_num, &path_rec, comp_mask, timeout_ms, GFP_KERNEL, cma_query_handler, work, &id_priv->query); return (id_priv->query_id < 0) ? id_priv->query_id : 0; } static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); struct rdma_id_private *id_priv = work->id; int destroy = 0; mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) goto out; if (id_priv->id.event_handler(&id_priv->id, &work->event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); destroy = 1; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); kfree(work); } static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) { struct rdma_route *route = &id_priv->id.route; struct cma_work *work; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } ret = cma_query_ib_route(id_priv, timeout_ms, work); if (ret) goto err2; return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_RESOLVED)) return -EINVAL; id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, GFP_KERNEL); if (!id->route.path_rec) { ret = -ENOMEM; goto err; } id->route.num_paths = num_paths; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_set_ib_paths); static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) { struct cma_work *work; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; queue_work(cma_wq, &work->work); return 0; } static int iboe_tos_to_sl(struct net_device *ndev, int tos) { /* TODO: Implement this function */ return 0; } static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, unsigned long supported_gids, enum ib_gid_type default_gid) { if ((network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) && test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) return IB_GID_TYPE_ROCE_UDP_ENCAP; return default_gid; } static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; struct rdma_addr *addr = &route->addr; struct cma_work *work; int ret; struct net_device *ndev = NULL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); if (!route->path_rec) { ret = -ENOMEM; goto err1; } route->num_paths = 1; if (addr->dev_addr.bound_dev_if) { unsigned long supported_gids; ndev = dev_get_by_index(addr->dev_addr.net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; goto err2; } if (ndev->if_flags & IFF_LOOPBACK) { dev_put(ndev); if (!id_priv->id.device->get_netdev) { ret = -EOPNOTSUPP; goto err2; } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); if (!ndev) { ret = -ENODEV; goto err2; } } route->path_rec->net = ndev->if_vnet; route->path_rec->ifindex = ndev->if_index; supported_gids = roce_gid_type_mask_support(id_priv->id.device, id_priv->id.port_num); route->path_rec->gid_type = cma_route_gid_type(addr->dev_addr.network, supported_gids, id_priv->gid_type); } if (!ndev) { ret = -ENODEV; goto err2; } memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &route->path_rec->sgid); rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, &route->path_rec->dgid); /* Use the hint from IP Stack to select GID Type */ if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) /* TODO: get the hoplimit from the inet/inet6 device */ route->path_rec->hop_limit = addr->dev_addr.hoplimit; else route->path_rec->hop_limit = 1; route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); dev_put(ndev); route->path_rec->packet_life_time_selector = IB_SA_EQ; route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; if (!route->path_rec->mtu) { ret = -EINVAL; goto err2; } work->old_state = RDMA_CM_ROUTE_QUERY; work->new_state = RDMA_CM_ROUTE_RESOLVED; work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; work->event.status = 0; queue_work(cma_wq, &work->work); return 0; err2: kfree(route->path_rec); route->path_rec = NULL; err1: kfree(work); return ret; } int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); if (rdma_cap_ib_sa(id->device, id->port_num)) ret = cma_resolve_ib_route(id_priv, timeout_ms); else if (rdma_protocol_roce(id->device, id->port_num)) ret = cma_resolve_iboe_route(id_priv); else if (rdma_protocol_iwarp(id->device, id->port_num)) ret = cma_resolve_iw_route(id_priv, timeout_ms); else ret = -ENOSYS; if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_route); static void cma_set_loopback(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); break; case AF_INET6: ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 0, 0, 0, htonl(1)); break; default: ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 0, 0, 0, htonl(1)); break; } } static int cma_bind_loopback(struct rdma_id_private *id_priv) { struct cma_device *cma_dev, *cur_dev; struct ib_port_attr port_attr; union ib_gid gid; u16 pkey; int ret; u8 p; cma_dev = NULL; mutex_lock(&lock); list_for_each_entry(cur_dev, &dev_list, list) { if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cur_dev->device, 1)) continue; if (!cma_dev) cma_dev = cur_dev; for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { if (!ib_query_port(cur_dev->device, p, &port_attr) && port_attr.state == IB_PORT_ACTIVE) { cma_dev = cur_dev; goto port_found; } } } if (!cma_dev) { ret = -ENODEV; goto out; } p = 1; port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); if (ret) goto out; ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); if (ret) goto out; id_priv->id.route.addr.dev_addr.dev_type = (rdma_protocol_ib(cma_dev->device, p)) ? ARPHRD_INFINIBAND : ARPHRD_ETHER; rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); cma_set_loopback(cma_src_addr(id_priv)); out: mutex_unlock(&lock); return ret; } static void addr_handler(int status, struct sockaddr *src_addr, struct rdma_dev_addr *dev_addr, void *context) { struct rdma_id_private *id_priv = context; struct rdma_cm_event event; memset(&event, 0, sizeof event); mutex_lock(&id_priv->handler_mutex); if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED)) goto out; memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); if (!status && !id_priv->cma_dev) status = cma_acquire_dev(id_priv, NULL); if (status) { if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ADDR_BOUND)) goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: mutex_unlock(&id_priv->handler_mutex); cma_deref_id(id_priv); } static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; union ib_gid gid; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; if (!id_priv->cma_dev) { ret = cma_bind_loopback(id_priv); if (ret) goto err; } rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; err: kfree(work); return ret; } static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) { struct cma_work *work; int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; if (!id_priv->cma_dev) { ret = cma_resolve_ib_dev(id_priv); if (ret) goto err; } rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; work->new_state = RDMA_CM_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; queue_work(cma_wq, &work->work); return 0; err: kfree(work); return ret; } static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr) { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; src_addr->sa_family = dst_addr->sa_family; if (dst_addr->sa_family == AF_INET6) { struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr)) id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; } else if (dst_addr->sa_family == AF_IB) { ((struct sockaddr_ib *) src_addr)->sib_pkey = ((struct sockaddr_ib *) dst_addr)->sib_pkey; } } return rdma_bind_addr(id, src_addr); } int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr, int timeout_ms) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); if (ret) return ret; } if (cma_family(id_priv) != dst_addr->sa_family) return -EINVAL; if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, id_priv); } } if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_deref_id(id_priv); return ret; } EXPORT_SYMBOL(rdma_resolve_addr); int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (reuse || id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_reuseaddr); int rdma_set_afonly(struct rdma_cm_id *id, int afonly) { struct rdma_id_private *id_priv; unsigned long flags; int ret; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { id_priv->options |= (1 << CMA_OPTION_AFONLY); id_priv->afonly = afonly; ret = 0; } else { ret = -EINVAL; } spin_unlock_irqrestore(&id_priv->lock, flags); return ret; } EXPORT_SYMBOL(rdma_set_afonly); static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { struct sockaddr *addr; struct sockaddr_ib *sib; u64 sid, mask; __be16 port; addr = cma_src_addr(id_priv); port = htons(bind_list->port); switch (addr->sa_family) { case AF_INET: ((struct sockaddr_in *) addr)->sin_port = port; break; case AF_INET6: ((struct sockaddr_in6 *) addr)->sin6_port = port; break; case AF_IB: sib = (struct sockaddr_ib *) addr; sid = be64_to_cpu(sib->sib_sid); mask = be64_to_cpu(sib->sib_sid_mask); sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); sib->sib_sid_mask = cpu_to_be64(~0ULL); break; } id_priv->bind_list = bind_list; hlist_add_head(&id_priv->node, &bind_list->owners); } static int cma_alloc_port(enum rdma_port_space ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; int ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, snum); if (ret < 0) goto err; bind_list->ps = ps; bind_list->port = (unsigned short)ret; cma_bind_port(bind_list, id_priv); return 0; err: kfree(bind_list); return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } static int cma_alloc_any_port(enum rdma_port_space ps, struct rdma_id_private *id_priv) { static unsigned int last_used_port; int low, high, remaining; unsigned int rover; struct vnet *net = id_priv->id.route.addr.dev_addr.net; u32 rand; inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; get_random_bytes(&rand, sizeof(rand)); rover = rand % remaining + low; retry: if (last_used_port != rover && !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid * re-using same port immediately after it is closed. */ if (!ret) last_used_port = rover; if (ret != -EADDRNOTAVAIL) return ret; } if (--remaining) { rover++; if ((rover < low) || (rover > high)) rover = low; goto retry; } return -EADDRNOTAVAIL; } /* * Check that the requested port is available. This is called when trying to * bind to a specific port, or when trying to listen on a bound port. In * the latter case, the provided id_priv may already be on the bind_list, but * we still need to check that it's okay to start listening. */ static int cma_check_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv, uint8_t reuseaddr) { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; addr = cma_src_addr(id_priv); hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && cur_id->reuseaddr) continue; cur_addr = cma_src_addr(cur_id); if (id_priv->afonly && cur_id->afonly && (addr->sa_family != cur_addr->sa_family)) continue; if (cma_any_addr(addr) || cma_any_addr(cur_addr)) return -EADDRNOTAVAIL; if (!cma_addr_cmp(addr, cur_addr)) return -EADDRINUSE; } return 0; } static int cma_use_port(enum rdma_port_space ps, struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list; unsigned short snum; int ret; snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < IPPORT_RESERVED && priv_check(curthread, PRIV_NETINET_BINDANY) != 0) return -EACCES; bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); if (!ret) cma_bind_port(bind_list, id_priv); } return ret; } static int cma_bind_listen(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; int ret = 0; mutex_lock(&lock); if (bind_list->owners.first->next) ret = cma_check_port(bind_list, id_priv, 0); mutex_unlock(&lock); return ret; } static enum rdma_port_space cma_select_inet_ps( struct rdma_id_private *id_priv) { switch (id_priv->id.ps) { case RDMA_PS_TCP: case RDMA_PS_UDP: case RDMA_PS_IPOIB: case RDMA_PS_IB: return id_priv->id.ps; default: return 0; } } static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) { enum rdma_port_space ps = 0; struct sockaddr_ib *sib; u64 sid_ps, mask, sid; sib = (struct sockaddr_ib *) cma_src_addr(id_priv); mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; sid = be64_to_cpu(sib->sib_sid) & mask; if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { sid_ps = RDMA_IB_IP_PS_IB; ps = RDMA_PS_IB; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && (sid == (RDMA_IB_IP_PS_TCP & mask))) { sid_ps = RDMA_IB_IP_PS_TCP; ps = RDMA_PS_TCP; } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && (sid == (RDMA_IB_IP_PS_UDP & mask))) { sid_ps = RDMA_IB_IP_PS_UDP; ps = RDMA_PS_UDP; } if (ps) { sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | be64_to_cpu(sib->sib_sid_mask)); } return ps; } static int cma_get_port(struct rdma_id_private *id_priv) { enum rdma_port_space ps; int ret; if (cma_family(id_priv) != AF_IB) ps = cma_select_inet_ps(id_priv); else ps = cma_select_ib_ps(id_priv); if (!ps) return -EPROTONOSUPPORT; mutex_lock(&lock); if (cma_any_port(cma_src_addr(id_priv))) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); mutex_unlock(&lock); return ret; } static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { #ifdef INET6 struct sockaddr_in6 sin6; if (addr->sa_family != AF_INET6) return 0; sin6 = *(struct sockaddr_in6 *)addr; if (!(IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr))) return 0; if (sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0) return -EINVAL; dev_addr->bound_dev_if = sin6.sin6_scope_id; #endif return 0; } int rdma_listen(struct rdma_cm_id *id, int backlog) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { id->route.addr.src_addr.ss_family = AF_INET; ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; } if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) return -EINVAL; if (id_priv->reuseaddr) { ret = cma_bind_listen(id_priv); if (ret) goto err; } id_priv->backlog = backlog; if (id->device) { if (rdma_cap_ib_cm(id->device, 1)) { ret = cma_ib_listen(id_priv); if (ret) goto err; } else if (rdma_cap_iw_cm(id->device, 1)) { ret = cma_iw_listen(id_priv, backlog); if (ret) goto err; } else { ret = -ENOSYS; goto err; } } else cma_listen_on_all(id_priv); return 0; err: id_priv->backlog = 0; cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } EXPORT_SYMBOL(rdma_listen); int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; int ret; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && addr->sa_family != AF_IB) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) return -EINVAL; ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); if (ret) goto err1; memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { ret = cma_translate_addr(addr, &id->route.addr.dev_addr); if (ret) goto err1; ret = cma_acquire_dev(id_priv, NULL); if (ret) goto err1; } if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { if (addr->sa_family == AF_INET) id_priv->afonly = 1; #ifdef INET6 else if (addr->sa_family == AF_INET6) { CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); id_priv->afonly = V_ip6_v6only; CURVNET_RESTORE(); } #endif } ret = cma_get_port(id_priv); if (ret) goto err2; return 0; err2: if (id_priv->cma_dev) cma_release_dev(id_priv); err1: cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); return ret; } EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) { struct cma_hdr *cma_hdr; cma_hdr = hdr; cma_hdr->cma_version = CMA_VERSION; if (cma_family(id_priv) == AF_INET) { struct sockaddr_in *src4, *dst4; src4 = (struct sockaddr_in *) cma_src_addr(id_priv); dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); cma_set_ip_ver(cma_hdr, 4); cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; cma_hdr->port = src4->sin_port; } else if (cma_family(id_priv) == AF_INET6) { struct sockaddr_in6 *src6, *dst6; src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); cma_set_ip_ver(cma_hdr, 6); cma_hdr->src_addr.ip6 = src6->sin6_addr; cma_hdr->dst_addr.ip6 = dst6->sin6_addr; cma_hdr->port = src6->sin6_port; } return 0; } static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event; struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_CONNECT) goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = -ETIMEDOUT; break; case IB_CM_SIDR_REP_RECEIVED: event.param.ud.private_data = ib_event->private_data; event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; if (rep->status != IB_SIDR_SUCCESS) { event.event = RDMA_CM_EVENT_UNREACHABLE; event.status = ib_event->param.sidr_rep_rcvd.status; break; } ret = cma_set_qkey(id_priv, rep->qkey); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = ret; break; } ret = ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, id_priv->id.route.path_rec, &event.param.ud.ah_attr); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = ret; break; } event.param.ud.qp_num = rep->qpn; event.param.ud.qkey = rep->qkey; event.event = RDMA_CM_EVENT_ESTABLISHED; event.status = 0; break; default: pr_err("RDMA CMA: unexpected IB CM event: %d\n", ib_event->event); goto out; } ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return ret; } out: mutex_unlock(&id_priv->handler_mutex); return ret; } static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_sidr_req_param req; struct ib_cm_id *id; void *private_data; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; if (req.private_data_len) { private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; } else { private_data = NULL; } if (conn_param->private_data && conn_param->private_data_len) memcpy((char *)private_data + offset, conn_param->private_data, conn_param->private_data_len); if (private_data) { ret = cma_format_hdr(private_data, id_priv); if (ret) goto out; req.private_data = private_data; } id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; req.path = id_priv->id.route.path_rec; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); if (ret) { ib_destroy_cm_id(id_priv->cm_id.ib); id_priv->cm_id.ib = NULL; } out: kfree(private_data); return ret; } static int cma_connect_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_req_param req; struct rdma_route *route; void *private_data; struct ib_cm_id *id; int offset, ret; memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; if (req.private_data_len) { private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; } else { private_data = NULL; } if (conn_param->private_data && conn_param->private_data_len) memcpy((char *)private_data + offset, conn_param->private_data, conn_param->private_data_len); id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); if (IS_ERR(id)) { ret = PTR_ERR(id); goto out; } id_priv->cm_id.ib = id; route = &id_priv->id.route; if (private_data) { ret = cma_format_hdr(private_data, id_priv); if (ret) goto out; req.private_data = private_data; } req.primary_path = &route->path_rec[0]; if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; req.flow_control = conn_param->flow_control; req.retry_count = min_t(u8, 7, conn_param->retry_count); req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.max_cm_retries = CMA_MAX_CM_RETRIES; req.srq = id_priv->srq ? 1 : 0; ret = ib_send_cm_req(id_priv->cm_id.ib, &req); out: if (ret && !IS_ERR(id)) { ib_destroy_cm_id(id); id_priv->cm_id.ib = NULL; } kfree(private_data); return ret; } static int cma_connect_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_id *cm_id; int ret; struct iw_cm_conn_param iw_param; cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); cm_id->tos = id_priv->tos; id_priv->cm_id.iw = cm_id; memcpy(&cm_id->local_addr, cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), rdma_addr_size(cma_dst_addr(id_priv))); ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; if (conn_param) { iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; } else { memset(&iw_param, 0, sizeof iw_param); iw_param.qpn = id_priv->qp_num; } ret = iw_cm_connect(cm_id, &iw_param); out: if (ret) { iw_destroy_cm_id(cm_id); id_priv->cm_id.iw = NULL; } return ret; } int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_resolve_ib_udp(id_priv, conn_param); else ret = cma_connect_ib(id_priv, conn_param); } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_connect_iw(id_priv, conn_param); else ret = -ENOSYS; if (ret) goto err; return 0; err: cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); return ret; } EXPORT_SYMBOL(rdma_connect); static int cma_accept_ib(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_rep_param rep; int ret; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) goto out; ret = cma_modify_qp_rts(id_priv, conn_param); if (ret) goto out; memset(&rep, 0, sizeof rep); rep.qp_num = id_priv->qp_num; rep.starting_psn = id_priv->seq_num; rep.private_data = conn_param->private_data; rep.private_data_len = conn_param->private_data_len; rep.responder_resources = conn_param->responder_resources; rep.initiator_depth = conn_param->initiator_depth; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); out: return ret; } static int cma_accept_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_conn_param iw_param; int ret; ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) return ret; iw_param.ord = conn_param->initiator_depth; iw_param.ird = conn_param->responder_resources; iw_param.private_data = conn_param->private_data; iw_param.private_data_len = conn_param->private_data_len; if (id_priv->id.qp) { iw_param.qpn = id_priv->qp_num; } else iw_param.qpn = conn_param->qp_num; return iw_cm_accept(id_priv->cm_id.iw, &iw_param); } static int cma_send_sidr_rep(struct rdma_id_private *id_priv, enum ib_cm_sidr_status status, u32 qkey, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; int ret; memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { ret = cma_set_qkey(id_priv, qkey); if (ret) return ret; rep.qp_num = id_priv->qp_num; rep.qkey = id_priv->qkey; } rep.private_data = private_data; rep.private_data_len = private_data_len; return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); id_priv->owner = task_pid_nr(current); if (!cma_comp(id_priv, RDMA_CM_CONNECT)) return -EINVAL; if (!id->qp && conn_param) { id_priv->qp_num = conn_param->qp_num; id_priv->srq = conn_param->srq; } if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, conn_param->qkey, conn_param->private_data, conn_param->private_data_len); else ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 0, NULL, 0); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); else ret = cma_rep_recv(id_priv); } } else if (rdma_cap_iw_cm(id->device, id->port_num)) ret = cma_accept_iw(id_priv, conn_param); else ret = -ENOSYS; if (ret) goto reject; return 0; reject: cma_modify_qp_err(id_priv); rdma_reject(id, NULL, 0); return ret; } EXPORT_SYMBOL(rdma_accept); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; switch (id->device->node_type) { case RDMA_NODE_IB_CA: ret = ib_cm_notify(id_priv->cm_id.ib, event); break; default: ret = 0; break; } return ret; } EXPORT_SYMBOL(rdma_notify); int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { if (id->qp_type == IB_QPT_UD) ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); else ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, private_data, private_data_len); } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_reject(id_priv->cm_id.iw, private_data, private_data_len); } else ret = -ENOSYS; return ret; } EXPORT_SYMBOL(rdma_reject); int rdma_disconnect(struct rdma_cm_id *id) { struct rdma_id_private *id_priv; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!id_priv->cm_id.ib) return -EINVAL; if (rdma_cap_ib_cm(id->device, id->port_num)) { ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); } else if (rdma_cap_iw_cm(id->device, id->port_num)) { ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); } else ret = -EINVAL; out: return ret; } EXPORT_SYMBOL(rdma_disconnect); static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) { struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; int ret = 0; id_priv = mc->id_priv; mutex_lock(&id_priv->handler_mutex); if (id_priv->state != RDMA_CM_ADDR_BOUND && id_priv->state != RDMA_CM_ADDR_RESOLVED) goto out; if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, be16_to_cpu(multicast->rec.mlid)); mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); event.status = status; event.param.ud.private_data = mc->context; if (!status) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct net_device *ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); enum ib_gid_type gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; event.event = RDMA_CM_EVENT_MULTICAST_JOIN; ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, &multicast->rec, ndev, gid_type, &event.param.ud.ah_attr); event.param.ud.qp_num = 0xFFFFFF; event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); if (ndev) dev_put(ndev); } else event.event = RDMA_CM_EVENT_MULTICAST_ERROR; ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, RDMA_CM_DESTROYING); mutex_unlock(&id_priv->handler_mutex); rdma_destroy_id(&id_priv->id); return 0; } out: mutex_unlock(&id_priv->handler_mutex); return 0; } static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, union ib_gid *mgid) { unsigned char mc_map[MAX_ADDR_LEN]; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct sockaddr_in *sin = (struct sockaddr_in *) addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if ((addr->sa_family == AF_INET6) && ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else if (addr->sa_family == AF_IB) { memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); } else { ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) mc_map[7] = 0x01; /* Use RDMA CM signature */ *mgid = *(union ib_gid *) (mc_map + 4); } } static void cma_query_sa_classport_info_cb(int status, struct ib_class_port_info *rec, void *context) { struct class_port_info_context *cb_ctx = context; WARN_ON(!context); if (status || !rec) { pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", cb_ctx->device->name, cb_ctx->port_num, status); goto out; } memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); out: complete(&cb_ctx->done); } static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, struct ib_class_port_info *class_port_info) { struct class_port_info_context *cb_ctx; int ret; cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); if (!cb_ctx) return -ENOMEM; cb_ctx->device = device; cb_ctx->class_port_info = class_port_info; cb_ctx->port_num = port_num; init_completion(&cb_ctx->done); ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, CMA_QUERY_CLASSPORT_INFO_TIMEOUT, GFP_KERNEL, cma_query_sa_classport_info_cb, cb_ctx, &cb_ctx->sa_query); if (ret < 0) { pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", device->name, port_num, ret); goto out; } wait_for_completion(&cb_ctx->done); out: kfree(cb_ctx); return ret; } static int cma_join_ib_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct ib_sa_mcmember_rec rec; struct ib_class_port_info class_port_info; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; ib_sa_comp_mask comp_mask; int ret; ib_addr_get_mgid(dev_addr, &rec.mgid); ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, &rec.mgid, &rec); if (ret) return ret; ret = cma_set_qkey(id_priv, 0); if (ret) return ret; cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = mc->join_state; if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { ret = cma_query_sa_classport_info(id_priv->id.device, id_priv->id.port_num, &class_port_info); if (ret) return ret; if (!(ib_get_cpi_capmask2(&class_port_info) & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { pr_warn("RDMA CM: %s port %u Unable to multicast join\n" "RDMA CM: SM doesn't support Send Only Full Member option\n", id_priv->id.device->name, id_priv->id.port_num); return -EOPNOTSUPP; } } comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | IB_SA_MCMEMBER_REC_FLOW_LABEL | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; if (id_priv->id.ps == RDMA_PS_IPOIB) comp_mask |= IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_RATE_SELECTOR | IB_SA_MCMEMBER_REC_MTU_SELECTOR | IB_SA_MCMEMBER_REC_MTU | IB_SA_MCMEMBER_REC_HOP_LIMIT; mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); return PTR_ERR_OR_ZERO(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) { struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); struct cma_multicast *mc = mw->mc; struct ib_sa_multicast *m = mc->multicast.ib; mc->multicast.ib->context = mc; cma_ib_mc_handler(0, m); kref_put(&mc->mcref, release_mc); kfree(mw); } static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) { struct sockaddr_in *sin = (struct sockaddr_in *)addr; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; if (cma_any_addr(addr)) { memset(mgid, 0, sizeof *mgid); } else if (addr->sa_family == AF_INET6) { memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); } else { mgid->raw[0] = 0xff; mgid->raw[1] = 0x0e; mgid->raw[2] = 0; mgid->raw[3] = 0; mgid->raw[4] = 0; mgid->raw[5] = 0; mgid->raw[6] = 0; mgid->raw[7] = 0; mgid->raw[8] = 0; mgid->raw[9] = 0; mgid->raw[10] = 0xff; mgid->raw[11] = 0xff; *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; } } static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { struct iboe_mcast_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; enum ib_gid_type gid_type; bool send_only; send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); if (cma_zero_addr((struct sockaddr *)&mc->addr)) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); if (!mc->multicast.ib) { err = -ENOMEM; goto out1; } cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) { err = -ENODEV; goto out2; } mc->multicast.ib->rec.rate = iboe_get_rate(ndev); mc->multicast.ib->rec.hop_limit = 1; mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; if (!send_only) { mc->igmp_joined = true; } } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) err = -ENOTSUPP; } dev_put(ndev); if (err || !mc->multicast.ib->rec.mtu) { if (!err) err = -EINVAL; goto out2; } rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &mc->multicast.ib->rec.port_gid); work->id = id_priv; work->mc = mc; INIT_WORK(&work->work, iboe_mcast_work_handler); kref_get(&mc->mcref); queue_work(cma_wq, &work->work); return 0; out2: kfree(mc->multicast.ib); out1: kfree(work); return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context) { struct rdma_id_private *id_priv; struct cma_multicast *mc; int ret; id_priv = container_of(id, struct rdma_id_private, id); if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) return -EINVAL; mc = kmalloc(sizeof *mc, GFP_KERNEL); if (!mc) return -ENOMEM; memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; mc->igmp_joined = false; mc->join_state = join_state; spin_lock(&id_priv->lock); list_add(&mc->list, &id_priv->mc_list); spin_unlock(&id_priv->lock); if (rdma_protocol_roce(id->device, id->port_num)) { kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); } else if (rdma_cap_ib_mcast(id->device, id->port_num)) ret = cma_join_ib_multicast(id_priv, mc); else ret = -ENOSYS; if (ret) { spin_lock_irq(&id_priv->lock); list_del(&mc->list); spin_unlock_irq(&id_priv->lock); kfree(mc); } return ret; } EXPORT_SYMBOL(rdma_join_multicast); void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) { struct rdma_id_private *id_priv; struct cma_multicast *mc; id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { list_del(&mc->list); spin_unlock_irq(&id_priv->lock); if (id->qp) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, be16_to_cpu(mc->multicast.ib->rec.mlid)); BUG_ON(id_priv->cma_dev->device != id->device); if (rdma_cap_ib_mcast(id->device, id->port_num)) { ib_sa_free_multicast(mc->multicast.ib); kfree(mc); } else if (rdma_protocol_roce(id->device, id->port_num)) { if (mc->igmp_joined) { struct rdma_dev_addr *dev_addr = &id->route.addr.dev_addr; struct net_device *ndev = NULL; if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (ndev) { dev_put(ndev); } mc->igmp_joined = false; } kref_put(&mc->mcref, release_mc); } return; } } spin_unlock_irq(&id_priv->lock); } EXPORT_SYMBOL(rdma_leave_multicast); static int sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) { struct cma_device *cma_dev = arg1; const int port = arg2; char buf[64]; int error; strlcpy(buf, ib_cache_gid_type_str( cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) goto done; error = ib_cache_gid_parse_type_str(buf); if (error < 0) { error = EINVAL; goto done; } cma_set_default_gid_type(cma_dev, port, error); error = 0; done: return (error); } static void cma_add_one(struct ib_device *device) { struct cma_device *cma_dev; struct rdma_id_private *id_priv; unsigned int i; unsigned long supported_gids = 0; cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); if (!cma_dev) return; sysctl_ctx_init(&cma_dev->sysctl_ctx); cma_dev->device = device; cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, sizeof(*cma_dev->default_gid_type), GFP_KERNEL); if (!cma_dev->default_gid_type) { kfree(cma_dev); return; } for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { supported_gids = roce_gid_type_mask_support(device, i); WARN_ON(!supported_gids); cma_dev->default_gid_type[i - rdma_start_port(device)] = find_first_bit(&supported_gids, BITS_PER_LONG); } init_completion(&cma_dev->comp); atomic_set(&cma_dev->refcount, 1); INIT_LIST_HEAD(&cma_dev->id_list); ib_set_client_data(device, &cma_client, cma_dev); mutex_lock(&lock); list_add_tail(&cma_dev->list, &dev_list); list_for_each_entry(id_priv, &listen_any_list, list) cma_listen_on_dev(id_priv, cma_dev); mutex_unlock(&lock); for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { char buf[64]; snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, SYSCTL_CHILDREN(device->ports_parent->parent->oidp), OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, cma_dev, i, &sysctl_cma_default_roce_mode, "A", "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); } } static int cma_remove_id_dev(struct rdma_id_private *id_priv) { struct rdma_cm_event event; enum rdma_cm_state state; int ret = 0; /* Record that we want to remove the device */ state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); if (state == RDMA_CM_DESTROYING) return 0; cma_cancel_operation(id_priv, state); mutex_lock(&id_priv->handler_mutex); /* Check for destruction from another callback. */ if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) goto out; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; ret = id_priv->id.event_handler(&id_priv->id, &event); out: mutex_unlock(&id_priv->handler_mutex); return ret; } static void cma_process_remove(struct cma_device *cma_dev) { struct rdma_id_private *id_priv; int ret; mutex_lock(&lock); while (!list_empty(&cma_dev->id_list)) { id_priv = list_entry(cma_dev->id_list.next, struct rdma_id_private, list); list_del(&id_priv->listen_list); list_del_init(&id_priv->list); atomic_inc(&id_priv->refcount); mutex_unlock(&lock); ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); cma_deref_id(id_priv); if (ret) rdma_destroy_id(&id_priv->id); mutex_lock(&lock); } mutex_unlock(&lock); cma_deref_dev(cma_dev); wait_for_completion(&cma_dev->comp); } static void cma_remove_one(struct ib_device *device, void *client_data) { struct cma_device *cma_dev = client_data; if (!cma_dev) return; mutex_lock(&lock); list_del(&cma_dev->list); mutex_unlock(&lock); cma_process_remove(cma_dev); sysctl_ctx_free(&cma_dev->sysctl_ctx); kfree(cma_dev->default_gid_type); kfree(cma_dev); } static void cma_init_vnet(void *arg) { struct cma_pernet *pernet = &VNET(cma_pernet); idr_init(&pernet->tcp_ps); idr_init(&pernet->udp_ps); idr_init(&pernet->ipoib_ps); idr_init(&pernet->ib_ps); } VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); static void cma_destroy_vnet(void *arg) { struct cma_pernet *pernet = &VNET(cma_pernet); idr_destroy(&pernet->tcp_ps); idr_destroy(&pernet->udp_ps); idr_destroy(&pernet->ipoib_ps); idr_destroy(&pernet->ib_ps); } VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); static int __init cma_init(void) { int ret; cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; ib_sa_register_client(&sa_client); rdma_addr_register_client(&addr_client); ret = ib_register_client(&cma_client); if (ret) goto err; cma_configfs_init(); return 0; err: rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); return ret; } static void __exit cma_cleanup(void) { cma_configfs_exit(); ib_unregister_client(&cma_client); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); } module_init(cma_init); module_exit(cma_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_cq.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_cq.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_cq.c (revision 331772) @@ -1,154 +1,158 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #define IB_CQ_POLL_MAX 16 /* maximum number of completions per poll loop */ #define IB_CQ_POLL_BUDGET 65536 #define IB_CQ_POLL_FLAGS (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) static void ib_cq_poll_work(struct work_struct *work) { struct ib_wc ib_wc[IB_CQ_POLL_MAX]; struct ib_cq *cq = container_of(work, struct ib_cq, work); int total = 0; int i; int n; while (1) { n = ib_poll_cq(cq, IB_CQ_POLL_MAX, ib_wc); for (i = 0; i < n; i++) { struct ib_wc *wc = ib_wc + i; if (wc->wr_cqe != NULL) wc->wr_cqe->done(cq, wc); } if (n != IB_CQ_POLL_MAX) { if (ib_req_notify_cq(cq, IB_CQ_POLL_FLAGS) > 0) break; else return; } total += n; if (total >= IB_CQ_POLL_BUDGET) break; } /* give other work structs a chance */ queue_work(ib_comp_wq, &cq->work); } static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) { queue_work(ib_comp_wq, &cq->work); } struct ib_cq * ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) { struct ib_cq_init_attr cq_attr = { .cqe = nr_cqe, .comp_vector = comp_vector, }; struct ib_cq *cq; /* * Check for invalid parameters early on to avoid * extra error handling code: */ switch (poll_ctx) { case IB_POLL_DIRECT: case IB_POLL_SOFTIRQ: case IB_POLL_WORKQUEUE: break; default: return (ERR_PTR(-EINVAL)); } cq = dev->create_cq(dev, &cq_attr, NULL, NULL); if (IS_ERR(cq)) return (cq); cq->device = dev; cq->uobject = NULL; cq->event_handler = NULL; cq->cq_context = private; cq->poll_ctx = poll_ctx; atomic_set(&cq->usecnt, 0); switch (poll_ctx) { case IB_POLL_DIRECT: cq->comp_handler = NULL; /* no hardware completions */ break; case IB_POLL_SOFTIRQ: case IB_POLL_WORKQUEUE: cq->comp_handler = ib_cq_completion_workqueue; INIT_WORK(&cq->work, ib_cq_poll_work); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); break; default: break; } return (cq); } EXPORT_SYMBOL(ib_alloc_cq); void ib_free_cq(struct ib_cq *cq) { if (WARN_ON_ONCE(atomic_read(&cq->usecnt) != 0)) return; switch (cq->poll_ctx) { case IB_POLL_DIRECT: break; case IB_POLL_SOFTIRQ: case IB_POLL_WORKQUEUE: flush_work(&cq->work); break; default: break; } (void)cq->device->destroy_cq(cq); } EXPORT_SYMBOL(ib_free_cq); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_device.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_device.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_device.c (revision 331772) @@ -1,1048 +1,1052 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include "core_priv.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("core kernel InfiniBand API"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_client_data { struct list_head list; struct ib_client *client; void * data; /* The device or client is going down. Do not call client or device * callbacks other than remove(). */ bool going_down; }; struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); /* The device_list and client_list contain devices and clients after their * registration has completed, and the devices and clients are removed * during unregistration. */ static LIST_HEAD(device_list); static LIST_HEAD(client_list); /* * device_mutex and lists_rwsem protect access to both device_list and * client_list. device_mutex protects writer access by device and client * registration / de-registration. lists_rwsem protects reader access to * these lists. Iterators of these lists must lock it for read, while updates * to the lists must be done with a write lock. A special case is when the * device_mutex is locked. In this case locking the lists for read access is * not necessary as the device_mutex implies it. * * lists_rwsem also protects access to the client data list. */ static DEFINE_MUTEX(device_mutex); static DECLARE_RWSEM(lists_rwsem); static int ib_device_check_mandatory(struct ib_device *device) { #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x } static const struct { size_t offset; char *name; } mandatory_table[] = { IB_MANDATORY_FUNC(query_device), IB_MANDATORY_FUNC(query_port), IB_MANDATORY_FUNC(query_pkey), IB_MANDATORY_FUNC(query_gid), IB_MANDATORY_FUNC(alloc_pd), IB_MANDATORY_FUNC(dealloc_pd), IB_MANDATORY_FUNC(create_ah), IB_MANDATORY_FUNC(destroy_ah), IB_MANDATORY_FUNC(create_qp), IB_MANDATORY_FUNC(modify_qp), IB_MANDATORY_FUNC(destroy_qp), IB_MANDATORY_FUNC(post_send), IB_MANDATORY_FUNC(post_recv), IB_MANDATORY_FUNC(create_cq), IB_MANDATORY_FUNC(destroy_cq), IB_MANDATORY_FUNC(poll_cq), IB_MANDATORY_FUNC(req_notify_cq), IB_MANDATORY_FUNC(get_dma_mr), IB_MANDATORY_FUNC(dereg_mr), IB_MANDATORY_FUNC(get_port_immutable) }; int i; for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { if (!*(void **) ((char *) device + mandatory_table[i].offset)) { pr_warn("Device %s is missing mandatory function %s\n", device->name, mandatory_table[i].name); return -EINVAL; } } return 0; } static struct ib_device *__ib_device_get_by_name(const char *name) { struct ib_device *device; list_for_each_entry(device, &device_list, core_list) if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) return device; return NULL; } static int alloc_name(char *name) { unsigned long *inuse; char buf[IB_DEVICE_NAME_MAX]; struct ib_device *device; int i; inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL); if (!inuse) return -ENOMEM; list_for_each_entry(device, &device_list, core_list) { if (!sscanf(device->name, name, &i)) continue; if (i < 0 || i >= PAGE_SIZE * 8) continue; snprintf(buf, sizeof buf, name, i); if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) set_bit(i, inuse); } i = find_first_zero_bit(inuse, PAGE_SIZE * 8); free_page((unsigned long) inuse); snprintf(buf, sizeof buf, name, i); if (__ib_device_get_by_name(buf)) return -ENFILE; strlcpy(name, buf, IB_DEVICE_NAME_MAX); return 0; } static void ib_device_release(struct device *device) { struct ib_device *dev = container_of(device, struct ib_device, dev); ib_cache_release_one(dev); kfree(dev->port_immutable); kfree(dev); } static struct class ib_class = { .name = "infiniband", .dev_release = ib_device_release, }; /** * ib_alloc_device - allocate an IB device struct * @size:size of structure to allocate * * Low-level drivers should use ib_alloc_device() to allocate &struct * ib_device. @size is the size of the structure to be allocated, * including any private data used by the low-level driver. * ib_dealloc_device() must be used to free structures allocated with * ib_alloc_device(). */ struct ib_device *ib_alloc_device(size_t size) { struct ib_device *device; if (WARN_ON(size < sizeof(struct ib_device))) return NULL; device = kzalloc(size, GFP_KERNEL); if (!device) return NULL; device->dev.parent = &linux_root_device; device->dev.class = &ib_class; device_initialize(&device->dev); dev_set_drvdata(&device->dev, device); INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->client_data_lock); INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->port_list); return device; } EXPORT_SYMBOL(ib_alloc_device); /** * ib_dealloc_device - free an IB device struct * @device:structure to free * * Free a structure allocated with ib_alloc_device(). */ void ib_dealloc_device(struct ib_device *device) { WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && device->reg_state != IB_DEV_UNINITIALIZED); kobject_put(&device->dev.kobj); } EXPORT_SYMBOL(ib_dealloc_device); static int add_client_context(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; unsigned long flags; context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) { pr_warn("Couldn't allocate client context for %s/%s\n", device->name, client->name); return -ENOMEM; } context->client = client; context->data = NULL; context->going_down = false; down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_add(&context->list, &device->client_data_list); spin_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); return 0; } static int verify_immutable(const struct ib_device *dev, u8 port) { return WARN_ON(!rdma_cap_ib_mad(dev, port) && rdma_max_mad_size(dev, port) != 0); } static int read_port_immutable(struct ib_device *device) { int ret; u8 start_port = rdma_start_port(device); u8 end_port = rdma_end_port(device); u8 port; /** * device->port_immutable is indexed directly by the port number to make * access to this data as efficient as possible. * * Therefore port_immutable is declared as a 1 based array with * potential empty slots at the beginning. */ device->port_immutable = kzalloc(sizeof(*device->port_immutable) * (end_port + 1), GFP_KERNEL); if (!device->port_immutable) return -ENOMEM; for (port = start_port; port <= end_port; ++port) { ret = device->get_port_immutable(device, port, &device->port_immutable[port]); if (ret) return ret; if (verify_immutable(device, port)) return -EINVAL; } return 0; } void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len) { if (dev->get_dev_fw_str) dev->get_dev_fw_str(dev, str, str_len); else str[0] = '\0'; } EXPORT_SYMBOL(ib_get_device_fw_str); /** * ib_register_device - Register an IB device with IB core * @device:Device to register * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a * callback for each device that is added. @device must be allocated * with ib_alloc_device(). */ int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { int ret; struct ib_client *client; struct ib_udata uhw = {.outlen = 0, .inlen = 0}; mutex_lock(&device_mutex); if (strchr(device->name, '%')) { ret = alloc_name(device->name); if (ret) goto out; } if (ib_device_check_mandatory(device)) { ret = -EINVAL; goto out; } ret = read_port_immutable(device); if (ret) { pr_warn("Couldn't create per port immutable data %s\n", device->name); goto out; } ret = ib_cache_setup_one(device); if (ret) { pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n"); goto out; } memset(&device->attrs, 0, sizeof(device->attrs)); ret = device->query_device(device, &device->attrs, &uhw); if (ret) { pr_warn("Couldn't query the device attributes\n"); ib_cache_cleanup_one(device); goto out; } ret = ib_device_register_sysfs(device, port_callback); if (ret) { pr_warn("Couldn't register device %s with driver model\n", device->name); ib_cache_cleanup_one(device); goto out; } device->reg_state = IB_DEV_REGISTERED; list_for_each_entry(client, &client_list, list) if (client->add && !add_client_context(device, client)) client->add(device); down_write(&lists_rwsem); list_add_tail(&device->core_list, &device_list); up_write(&lists_rwsem); out: mutex_unlock(&device_mutex); return ret; } EXPORT_SYMBOL(ib_register_device); /** * ib_unregister_device - Unregister an IB device * @device:Device to unregister * * Unregister an IB device. All clients will receive a remove callback. */ void ib_unregister_device(struct ib_device *device) { struct ib_client_data *context, *tmp; unsigned long flags; mutex_lock(&device_mutex); down_write(&lists_rwsem); list_del(&device->core_list); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) context->going_down = true; spin_unlock_irqrestore(&device->client_data_lock, flags); downgrade_write(&lists_rwsem); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) { if (context->client->remove) context->client->remove(device, context->data); } up_read(&lists_rwsem); mutex_unlock(&device_mutex); ib_device_unregister_sysfs(device); ib_cache_cleanup_one(device); down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) kfree(context); spin_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); device->reg_state = IB_DEV_UNREGISTERED; } EXPORT_SYMBOL(ib_unregister_device); /** * ib_register_client - Register an IB client * @client:Client to register * * Upper level users of the IB drivers can use ib_register_client() to * register callbacks for IB device addition and removal. When an IB * device is added, each registered client's add method will be called * (in the order the clients were registered), and when a device is * removed, each client's remove method will be called (in the reverse * order that clients were registered). In addition, when * ib_register_client() is called, the client will receive an add * callback for all devices already registered. */ int ib_register_client(struct ib_client *client) { struct ib_device *device; mutex_lock(&device_mutex); list_for_each_entry(device, &device_list, core_list) if (client->add && !add_client_context(device, client)) client->add(device); down_write(&lists_rwsem); list_add_tail(&client->list, &client_list); up_write(&lists_rwsem); mutex_unlock(&device_mutex); return 0; } EXPORT_SYMBOL(ib_register_client); /** * ib_unregister_client - Unregister an IB client * @client:Client to unregister * * Upper level users use ib_unregister_client() to remove their client * registration. When ib_unregister_client() is called, the client * will receive a remove callback for each IB device still registered. */ void ib_unregister_client(struct ib_client *client) { struct ib_client_data *context, *tmp; struct ib_device *device; unsigned long flags; mutex_lock(&device_mutex); down_write(&lists_rwsem); list_del(&client->list); up_write(&lists_rwsem); list_for_each_entry(device, &device_list, core_list) { struct ib_client_data *found_context = NULL; down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry_safe(context, tmp, &device->client_data_list, list) if (context->client == client) { context->going_down = true; found_context = context; break; } spin_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); if (client->remove) client->remove(device, found_context ? found_context->data : NULL); if (!found_context) { pr_warn("No client context found for %s/%s\n", device->name, client->name); continue; } down_write(&lists_rwsem); spin_lock_irqsave(&device->client_data_lock, flags); list_del(&found_context->list); kfree(found_context); spin_unlock_irqrestore(&device->client_data_lock, flags); up_write(&lists_rwsem); } mutex_unlock(&device_mutex); } EXPORT_SYMBOL(ib_unregister_client); /** * ib_get_client_data - Get IB client context * @device:Device to get context for * @client:Client to get context for * * ib_get_client_data() returns client context set with * ib_set_client_data(). */ void *ib_get_client_data(struct ib_device *device, struct ib_client *client) { struct ib_client_data *context; void *ret = NULL; unsigned long flags; spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { ret = context->data; break; } spin_unlock_irqrestore(&device->client_data_lock, flags); return ret; } EXPORT_SYMBOL(ib_get_client_data); /** * ib_set_client_data - Set IB client context * @device:Device to set context for * @client:Client to set context for * @data:Context to set * * ib_set_client_data() sets client context that can be retrieved with * ib_get_client_data(). */ void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data) { struct ib_client_data *context; unsigned long flags; spin_lock_irqsave(&device->client_data_lock, flags); list_for_each_entry(context, &device->client_data_list, list) if (context->client == client) { context->data = data; goto out; } pr_warn("No client context found for %s/%s\n", device->name, client->name); out: spin_unlock_irqrestore(&device->client_data_lock, flags); } EXPORT_SYMBOL(ib_set_client_data); /** * ib_register_event_handler - Register an IB event handler * @event_handler:Handler to register * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in * chapter 11 of the InfiniBand Architecture Specification). This * callback may occur in interrupt context. */ int ib_register_event_handler (struct ib_event_handler *event_handler) { unsigned long flags; spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); return 0; } EXPORT_SYMBOL(ib_register_event_handler); /** * ib_unregister_event_handler - Unregister an event handler * @event_handler:Handler to unregister * * Unregister an event handler registered with * ib_register_event_handler(). */ int ib_unregister_event_handler(struct ib_event_handler *event_handler) { unsigned long flags; spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); list_del(&event_handler->list); spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); return 0; } EXPORT_SYMBOL(ib_unregister_event_handler); /** * ib_dispatch_event - Dispatch an asynchronous event * @event:Event to dispatch * * Low-level drivers must call ib_dispatch_event() to dispatch the * event to all registered event handlers when an asynchronous event * occurs. */ void ib_dispatch_event(struct ib_event *event) { unsigned long flags; struct ib_event_handler *handler; spin_lock_irqsave(&event->device->event_handler_lock, flags); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); spin_unlock_irqrestore(&event->device->event_handler_lock, flags); } EXPORT_SYMBOL(ib_dispatch_event); /** * ib_query_port - Query IB port attributes * @device:Device to query * @port_num:Port number to query * @port_attr:Port attributes * * ib_query_port() returns the attributes of a port through the * @port_attr pointer. */ int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr) { union ib_gid gid; int err; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; memset(port_attr, 0, sizeof(*port_attr)); err = device->query_port(device, port_num, port_attr); if (err || port_attr->subnet_prefix) return err; if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) return 0; err = ib_query_gid(device, port_num, 0, &gid, NULL); if (err) return err; port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix); return 0; } EXPORT_SYMBOL(ib_query_port); /** * ib_query_gid - Get GID table entry * @device:Device to query * @port_num:Port number to query * @index:GID table index to query * @gid:Returned GID * @attr: Returned GID attributes related to this GID index (only in RoCE). * NULL means ignore. * * ib_query_gid() fetches the specified GID table entry. */ int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr) { if (rdma_cap_roce_gid_table(device, port_num)) return ib_get_cached_gid(device, port_num, index, gid, attr); if (attr) return -EINVAL; return device->query_gid(device, port_num, index, gid); } EXPORT_SYMBOL(ib_query_gid); /** * ib_enum_roce_netdev - enumerate all RoCE ports * @ib_dev : IB device we want to query * @filter: Should we call the callback? * @filter_cookie: Cookie passed to filter * @cb: Callback to call for each found RoCE ports * @cookie: Cookie passed back to the callback * * Enumerates all of the physical RoCE ports of ib_dev * which are related to netdevice and calls callback() on each * device for which filter() function returns non zero. */ void ib_enum_roce_netdev(struct ib_device *ib_dev, roce_netdev_filter filter, void *filter_cookie, roce_netdev_callback cb, void *cookie) { u8 port; for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); port++) if (rdma_protocol_roce(ib_dev, port)) { struct net_device *idev = NULL; if (ib_dev->get_netdev) idev = ib_dev->get_netdev(ib_dev, port); if (idev && (idev->if_flags & IFF_DYING)) { dev_put(idev); idev = NULL; } if (filter(ib_dev, port, idev, filter_cookie)) cb(ib_dev, port, idev, cookie); if (idev) dev_put(idev); } } /** * ib_enum_all_roce_netdevs - enumerate all RoCE devices * @filter: Should we call the callback? * @filter_cookie: Cookie passed to filter * @cb: Callback to call for each found RoCE ports * @cookie: Cookie passed back to the callback * * Enumerates all RoCE devices' physical ports which are related * to netdevices and calls callback() on each device for which * filter() function returns non zero. */ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, void *filter_cookie, roce_netdev_callback cb, void *cookie) { struct ib_device *dev; down_read(&lists_rwsem); list_for_each_entry(dev, &device_list, core_list) ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); up_read(&lists_rwsem); } /** * ib_cache_gid_del_all_by_netdev - delete GIDs belonging a netdevice * * @ndev: Pointer to netdevice */ void ib_cache_gid_del_all_by_netdev(struct net_device *ndev) { struct ib_device *ib_dev; u8 port; down_read(&lists_rwsem); list_for_each_entry(ib_dev, &device_list, core_list) { for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev); port++) { if (rdma_protocol_roce(ib_dev, port) == 0) continue; (void) ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); } } up_read(&lists_rwsem); } /** * ib_query_pkey - Get P_Key table entry * @device:Device to query * @port_num:Port number to query * @index:P_Key table index to query * @pkey:Returned P_Key * * ib_query_pkey() fetches the specified P_Key table entry. */ int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { return device->query_pkey(device, port_num, index, pkey); } EXPORT_SYMBOL(ib_query_pkey); /** * ib_modify_device - Change IB device attributes * @device:Device to modify * @device_modify_mask:Mask of attributes to change * @device_modify:New attribute values * * ib_modify_device() changes a device's attributes as specified by * the @device_modify_mask and @device_modify structure. */ int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify) { if (!device->modify_device) return -ENOSYS; return device->modify_device(device, device_modify_mask, device_modify); } EXPORT_SYMBOL(ib_modify_device); /** * ib_modify_port - Modifies the attributes for the specified port. * @device: The device to modify. * @port_num: The number of the port to modify. * @port_modify_mask: Mask used to specify which attributes of the port * to change. * @port_modify: New attribute values for the port. * * ib_modify_port() changes a port's attributes as specified by the * @port_modify_mask and @port_modify structure. */ int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { if (!device->modify_port) return -ENOSYS; if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device)) return -EINVAL; return device->modify_port(device, port_num, port_modify_mask, port_modify); } EXPORT_SYMBOL(ib_modify_port); /** * ib_find_gid - Returns the port number and GID table index where * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: Type of GID. * @ndev: The ndev related to the GID to search for. * @port_num: The port number of the device where the GID value was found. * @index: The index into the GID table where the GID was found. This * parameter may be NULL. */ int ib_find_gid(struct ib_device *device, union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index) { union ib_gid tmp_gid; int ret, port, i; for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { if (rdma_cap_roce_gid_table(device, port)) { if (!ib_find_cached_gid_by_port(device, gid, gid_type, port, ndev, index)) { *port_num = port; return 0; } } if (gid_type != IB_GID_TYPE_IB) continue; for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { ret = ib_query_gid(device, port, i, &tmp_gid, NULL); if (ret) return ret; if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) *index = i; return 0; } } } return -ENOENT; } EXPORT_SYMBOL(ib_find_gid); /** * ib_find_pkey - Returns the PKey table index where a specified * PKey value occurs. * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the PKey table where the PKey was found. */ int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index) { int ret, i; u16 tmp_pkey; int partial_ix = -1; for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { /* if there is full-member pkey take it.*/ if (tmp_pkey & 0x8000) { *index = i; return 0; } if (partial_ix < 0) partial_ix = i; } } /*no full-member, if exists take the limited*/ if (partial_ix >= 0) { *index = partial_ix; return 0; } return -ENOENT; } EXPORT_SYMBOL(ib_find_pkey); /** * ib_get_net_dev_by_params() - Return the appropriate net_dev * for a received CM request * @dev: An RDMA device on which the request has been received. * @port: Port number on the RDMA device. * @pkey: The Pkey the request came on. * @gid: A GID that the net_dev uses to communicate. * @addr: Contains the IP address that the request specified as its * destination. */ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr) { struct net_device *net_dev = NULL; struct ib_client_data *context; if (!rdma_protocol_ib(dev, port)) return NULL; down_read(&lists_rwsem); list_for_each_entry(context, &dev->client_data_list, list) { struct ib_client *client = context->client; if (context->going_down) continue; if (client->get_net_dev_by_params) { net_dev = client->get_net_dev_by_params(dev, port, pkey, gid, addr, context->data); if (net_dev) break; } } up_read(&lists_rwsem); return net_dev; } EXPORT_SYMBOL(ib_get_net_dev_by_params); static int __init ib_core_init(void) { int ret; ib_wq = alloc_workqueue("infiniband", 0, 0); if (!ib_wq) return -ENOMEM; ib_comp_wq = alloc_workqueue("ib-comp-wq", WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, mp_ncpus * 4 /* WQ_UNBOUND_MAX_ACTIVE */); if (!ib_comp_wq) { ret = -ENOMEM; goto err; } ret = class_register(&ib_class); if (ret) { pr_warn("Couldn't create InfiniBand device class\n"); goto err_comp; } ret = addr_init(); if (ret) { pr_warn("Could't init IB address resolution\n"); goto err_sysfs; } ret = ib_mad_init(); if (ret) { pr_warn("Couldn't init IB MAD\n"); goto err_addr; } ret = ib_sa_init(); if (ret) { pr_warn("Couldn't init SA\n"); goto err_mad; } ib_cache_setup(); return 0; err_mad: ib_mad_cleanup(); err_addr: addr_cleanup(); err_sysfs: class_unregister(&ib_class); err_comp: destroy_workqueue(ib_comp_wq); err: destroy_workqueue(ib_wq); return ret; } static void __exit ib_core_cleanup(void) { ib_cache_cleanup(); ib_sa_cleanup(); ib_mad_cleanup(); addr_cleanup(); class_unregister(&ib_class); destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); } module_init(ib_core_init); module_exit(ib_core_cleanup); MODULE_VERSION(ibcore, 1); MODULE_DEPEND(ibcore, linuxkpi, 1, 1, 1); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_fmr_pool.c (revision 331772) @@ -1,521 +1,525 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include "core_priv.h" #define PFX "fmr_pool: " enum { IB_FMR_MAX_REMAPS = 32, IB_FMR_HASH_BITS = 8, IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS, IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1 }; /* * If an FMR is not in use, then the list member will point to either * its pool's free_list (if the FMR can be mapped again; that is, * remap_count < pool->max_remaps) or its pool's dirty_list (if the * FMR needs to be unmapped before being remapped). In either of * these cases it is a bug if the ref_count is not 0. In other words, * if ref_count is > 0, then the list member must not be linked into * either free_list or dirty_list. * * The cache_node member is used to link the FMR into a cache bucket * (if caching is enabled). This is independent of the reference * count of the FMR. When a valid FMR is released, its ref_count is * decremented, and if ref_count reaches 0, the FMR is placed in * either free_list or dirty_list as appropriate. However, it is not * removed from the cache and may be "revived" if a call to * ib_fmr_register_physical() occurs before the FMR is remapped. In * this case we just increment the ref_count and remove the FMR from * free_list/dirty_list. * * Before we remap an FMR from free_list, we remove it from the cache * (to prevent another user from obtaining a stale FMR). When an FMR * is released, we add it to the tail of the free list, so that our * cache eviction policy is "least recently used." * * All manipulation of ref_count, list and cache_node is protected by * pool_lock to maintain consistency. */ struct ib_fmr_pool { spinlock_t pool_lock; int pool_size; int max_pages; int max_remaps; int dirty_watermark; int dirty_len; struct list_head free_list; struct list_head dirty_list; struct hlist_head *cache_bucket; void (*flush_function)(struct ib_fmr_pool *pool, void * arg); void *flush_arg; struct task_struct *thread; atomic_t req_ser; atomic_t flush_ser; wait_queue_head_t force_wait; }; static inline u32 ib_fmr_hash(u64 first_page) { return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) & (IB_FMR_HASH_SIZE - 1); } /* Caller must hold pool_lock */ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, u64 *page_list, int page_list_len, u64 io_virtual_address) { struct hlist_head *bucket; struct ib_pool_fmr *fmr; if (!pool->cache_bucket) return NULL; bucket = pool->cache_bucket + ib_fmr_hash(*page_list); hlist_for_each_entry(fmr, bucket, cache_node) if (io_virtual_address == fmr->io_virtual_address && page_list_len == fmr->page_list_len && !memcmp(page_list, fmr->page_list, page_list_len * sizeof *page_list)) return fmr; return NULL; } static void ib_fmr_batch_release(struct ib_fmr_pool *pool) { int ret; struct ib_pool_fmr *fmr; LIST_HEAD(unmap_list); LIST_HEAD(fmr_list); spin_lock_irq(&pool->pool_lock); list_for_each_entry(fmr, &pool->dirty_list, list) { hlist_del_init(&fmr->cache_node); fmr->remap_count = 0; list_add_tail(&fmr->fmr->list, &fmr_list); #ifdef DEBUG if (fmr->ref_count !=0) { pr_warn(PFX "Unmapping FMR %p with ref count %d\n", fmr, (int)fmr->ref_count); } #endif } list_splice_init(&pool->dirty_list, &unmap_list); pool->dirty_len = 0; spin_unlock_irq(&pool->pool_lock); if (list_empty(&unmap_list)) { return; } ret = ib_unmap_fmr(&fmr_list); if (ret) pr_warn(PFX "ib_unmap_fmr returned %d\n", ret); spin_lock_irq(&pool->pool_lock); list_splice(&unmap_list, &pool->free_list); spin_unlock_irq(&pool->pool_lock); } static int ib_fmr_cleanup_thread(void *pool_ptr) { struct ib_fmr_pool *pool = pool_ptr; do { if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) { ib_fmr_batch_release(pool); atomic_inc(&pool->flush_ser); wake_up_interruptible(&pool->force_wait); if (pool->flush_function) pool->flush_function(pool, pool->flush_arg); } set_current_state(TASK_INTERRUPTIBLE); if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 && !kthread_should_stop()) schedule(); __set_current_state(TASK_RUNNING); } while (!kthread_should_stop()); return 0; } /** * ib_create_fmr_pool - Create an FMR pool * @pd:Protection domain for FMRs * @params:FMR pool parameters * * Create a pool of FMRs. Return value is pointer to new pool or * error code if creation failed. */ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, struct ib_fmr_pool_param *params) { struct ib_device *device; struct ib_fmr_pool *pool; int i; int ret; int max_remaps; if (!params) return ERR_PTR(-EINVAL); device = pd->device; if (!device->alloc_fmr || !device->dealloc_fmr || !device->map_phys_fmr || !device->unmap_fmr) { pr_info(PFX "Device %s does not support FMRs\n", device->name); return ERR_PTR(-ENOSYS); } if (!device->attrs.max_map_per_fmr) max_remaps = IB_FMR_MAX_REMAPS; else max_remaps = device->attrs.max_map_per_fmr; pool = kmalloc(sizeof *pool, GFP_KERNEL); if (!pool) return ERR_PTR(-ENOMEM); pool->cache_bucket = NULL; pool->flush_function = params->flush_function; pool->flush_arg = params->flush_arg; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->dirty_list); if (params->cache) { pool->cache_bucket = kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket, GFP_KERNEL); if (!pool->cache_bucket) { pr_warn(PFX "Failed to allocate cache in pool\n"); ret = -ENOMEM; goto out_free_pool; } for (i = 0; i < IB_FMR_HASH_SIZE; ++i) INIT_HLIST_HEAD(pool->cache_bucket + i); } pool->pool_size = 0; pool->max_pages = params->max_pages_per_fmr; pool->max_remaps = max_remaps; pool->dirty_watermark = params->dirty_watermark; pool->dirty_len = 0; spin_lock_init(&pool->pool_lock); atomic_set(&pool->req_ser, 0); atomic_set(&pool->flush_ser, 0); init_waitqueue_head(&pool->force_wait); pool->thread = kthread_run(ib_fmr_cleanup_thread, pool, "ib_fmr(%s)", device->name); if (IS_ERR(pool->thread)) { pr_warn(PFX "couldn't start cleanup thread\n"); ret = PTR_ERR(pool->thread); goto out_free_pool; } { struct ib_pool_fmr *fmr; struct ib_fmr_attr fmr_attr = { .max_pages = params->max_pages_per_fmr, .max_maps = pool->max_remaps, .page_shift = params->page_shift }; int bytes_per_fmr = sizeof *fmr; if (pool->cache_bucket) bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64); for (i = 0; i < params->pool_size; ++i) { fmr = kmalloc(bytes_per_fmr, GFP_KERNEL); if (!fmr) goto out_fail; fmr->pool = pool; fmr->remap_count = 0; fmr->ref_count = 0; INIT_HLIST_NODE(&fmr->cache_node); fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr); if (IS_ERR(fmr->fmr)) { pr_warn(PFX "fmr_create failed for FMR %d\n", i); kfree(fmr); goto out_fail; } list_add_tail(&fmr->list, &pool->free_list); ++pool->pool_size; } } return pool; out_free_pool: kfree(pool->cache_bucket); kfree(pool); return ERR_PTR(ret); out_fail: ib_destroy_fmr_pool(pool); return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL(ib_create_fmr_pool); /** * ib_destroy_fmr_pool - Free FMR pool * @pool:FMR pool to free * * Destroy an FMR pool and free all associated resources. */ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool) { struct ib_pool_fmr *fmr; struct ib_pool_fmr *tmp; LIST_HEAD(fmr_list); int i; kthread_stop(pool->thread); ib_fmr_batch_release(pool); i = 0; list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) { if (fmr->remap_count) { INIT_LIST_HEAD(&fmr_list); list_add_tail(&fmr->fmr->list, &fmr_list); ib_unmap_fmr(&fmr_list); } ib_dealloc_fmr(fmr->fmr); list_del(&fmr->list); kfree(fmr); ++i; } if (i < pool->pool_size) pr_warn(PFX "pool still has %d regions registered\n", pool->pool_size - i); kfree(pool->cache_bucket); kfree(pool); } EXPORT_SYMBOL(ib_destroy_fmr_pool); /** * ib_flush_fmr_pool - Invalidate all unmapped FMRs * @pool:FMR pool to flush * * Ensure that all unmapped FMRs are fully invalidated. */ int ib_flush_fmr_pool(struct ib_fmr_pool *pool) { int serial; struct ib_pool_fmr *fmr, *next; /* * The free_list holds FMRs that may have been used * but have not been remapped enough times to be dirty. * Put them on the dirty list now so that the cleanup * thread will reap them too. */ spin_lock_irq(&pool->pool_lock); list_for_each_entry_safe(fmr, next, &pool->free_list, list) { if (fmr->remap_count > 0) list_move(&fmr->list, &pool->dirty_list); } spin_unlock_irq(&pool->pool_lock); serial = atomic_inc_return(&pool->req_ser); wake_up_process(pool->thread); if (wait_event_interruptible(pool->force_wait, atomic_read(&pool->flush_ser) - serial >= 0)) return -EINTR; return 0; } EXPORT_SYMBOL(ib_flush_fmr_pool); /** * ib_fmr_pool_map_phys - * @pool:FMR pool to allocate FMR from * @page_list:List of pages to map * @list_len:Number of pages in @page_list * @io_virtual_address:I/O virtual address for new FMR * * Map an FMR from an FMR pool. */ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, u64 *page_list, int list_len, u64 io_virtual_address) { struct ib_fmr_pool *pool = pool_handle; struct ib_pool_fmr *fmr; unsigned long flags; int result; if (list_len < 1 || list_len > pool->max_pages) return ERR_PTR(-EINVAL); spin_lock_irqsave(&pool->pool_lock, flags); fmr = ib_fmr_cache_lookup(pool, page_list, list_len, io_virtual_address); if (fmr) { /* found in cache */ ++fmr->ref_count; if (fmr->ref_count == 1) { list_del(&fmr->list); } spin_unlock_irqrestore(&pool->pool_lock, flags); return fmr; } if (list_empty(&pool->free_list)) { spin_unlock_irqrestore(&pool->pool_lock, flags); return ERR_PTR(-EAGAIN); } fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list); list_del(&fmr->list); hlist_del_init(&fmr->cache_node); spin_unlock_irqrestore(&pool->pool_lock, flags); result = ib_map_phys_fmr(fmr->fmr, page_list, list_len, io_virtual_address); if (result) { spin_lock_irqsave(&pool->pool_lock, flags); list_add(&fmr->list, &pool->free_list); spin_unlock_irqrestore(&pool->pool_lock, flags); pr_warn(PFX "fmr_map returns %d\n", result); return ERR_PTR(result); } ++fmr->remap_count; fmr->ref_count = 1; if (pool->cache_bucket) { fmr->io_virtual_address = io_virtual_address; fmr->page_list_len = list_len; memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list)); spin_lock_irqsave(&pool->pool_lock, flags); hlist_add_head(&fmr->cache_node, pool->cache_bucket + ib_fmr_hash(fmr->page_list[0])); spin_unlock_irqrestore(&pool->pool_lock, flags); } return fmr; } EXPORT_SYMBOL(ib_fmr_pool_map_phys); /** * ib_fmr_pool_unmap - Unmap FMR * @fmr:FMR to unmap * * Unmap an FMR. The FMR mapping may remain valid until the FMR is * reused (or until ib_flush_fmr_pool() is called). */ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr) { struct ib_fmr_pool *pool; unsigned long flags; pool = fmr->pool; spin_lock_irqsave(&pool->pool_lock, flags); --fmr->ref_count; if (!fmr->ref_count) { if (fmr->remap_count < pool->max_remaps) { list_add_tail(&fmr->list, &pool->free_list); } else { list_add_tail(&fmr->list, &pool->dirty_list); if (++pool->dirty_len >= pool->dirty_watermark) { atomic_inc(&pool->req_ser); wake_up_process(pool->thread); } } } #ifdef DEBUG if (fmr->ref_count < 0) pr_warn(PFX "FMR %p has ref count %d < 0\n", fmr, fmr->ref_count); #endif spin_unlock_irqrestore(&pool->pool_lock, flags); return 0; } EXPORT_SYMBOL(ib_fmr_pool_unmap); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_iwcm.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_iwcm.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_iwcm.c (revision 331772) @@ -1,1051 +1,1055 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "iwcm.h" MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; struct iwcm_id_private *cm_id; struct list_head list; struct iw_cm_event event; struct list_head free_list; }; static unsigned int default_backlog = 256; /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: * LISTENING IDS: Get enough elements preallocated to handle the * listen backlog. * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE * * Allocating them in connect and listen avoids having to deal * with allocation failures on the event upcall from the provider (which * is called in the interrupt context). * * One exception is when creating the cm_id for incoming connection requests. * There are two cases: * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns -ENOMEM in this case. Its up * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since * cm_conn_req_handler() runs in the workqueue thread context. */ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *work; if (list_empty(&cm_id_priv->work_free_list)) return NULL; work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, free_list); list_del_init(&work->free_list); return work; } static void put_work(struct iwcm_work *work) { list_add(&work->free_list, &work->cm_id->work_free_list); } static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) kfree(list_entry(e, struct iwcm_work, free_list)); } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) { struct iwcm_work *work; BUG_ON(!list_empty(&cm_id_priv->work_free_list)); while (count--) { work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); if (!work) { dealloc_work_entries(cm_id_priv); return -ENOMEM; } work->cm_id = cm_id_priv; INIT_LIST_HEAD(&work->list); put_work(work); } return 0; } /* * Save private data from incoming connection requests to * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ static int copy_private_data(struct iw_cm_event *event) { void *p; p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); if (!p) return -ENOMEM; event->private_data = p; return 0; } static void free_cm_id(struct iwcm_id_private *cm_id_priv) { dealloc_work_entries(cm_id_priv); kfree(cm_id_priv); } /* * Release a reference on cm_id. If the last reference is being * released, free the cm_id and return 1. */ static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { BUG_ON(atomic_read(&cm_id_priv->refcount)==0); if (atomic_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); return 1; } return 0; } static void add_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); atomic_inc(&cm_id_priv->refcount); } static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id *iw_create_cm_id(struct ib_device *device, iw_cm_handler cm_handler, void *context) { struct iwcm_id_private *cm_id_priv; cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->state = IW_CM_STATE_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.event_handler = cm_event_handler; cm_id_priv->id.add_ref = add_ref; cm_id_priv->id.rem_ref = rem_ref; spin_lock_init(&cm_id_priv->lock); atomic_set(&cm_id_priv->refcount, 1); init_waitqueue_head(&cm_id_priv->connect_wait); init_completion(&cm_id_priv->destroy_comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->work_free_list); return &cm_id_priv->id; } EXPORT_SYMBOL(iw_create_cm_id); static int iwcm_modify_qp_err(struct ib_qp *qp) { struct ib_qp_attr qp_attr; if (!qp) return -EINVAL; qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * This is really the RDMAC CLOSING state. It is most similar to the * IB SQD QP state. */ static int iwcm_modify_qp_sqd(struct ib_qp *qp) { struct ib_qp_attr qp_attr; BUG_ON(qp == NULL); qp_attr.qp_state = IB_QPS_SQD; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * CM_ID <-- CLOSING * * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag * - If the connection is already in the CLOSING or IDLE state, the peer is * disconnecting concurrently with us and we've already seen the * DISCONNECT event -- ignore the request and return 0 * - Disconnect on a listening endpoint returns -EINVAL */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* Wait if we're currently in a connect or accept downcall */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_CLOSING; /* QP could be for user-mode client */ if (cm_id_priv->qp) qp = cm_id_priv->qp; else ret = -EINVAL; break; case IW_CM_STATE_LISTEN: ret = -EINVAL; break; case IW_CM_STATE_CLOSING: /* remote peer closed first */ case IW_CM_STATE_IDLE: /* accept or connect returned !0 */ break; case IW_CM_STATE_CONN_RECV: /* * App called disconnect before/without calling accept after * connect_request event delivered. */ break; case IW_CM_STATE_CONN_SENT: /* Can only get here if wait above fails */ default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) { if (abrupt) ret = iwcm_modify_qp_err(qp); else ret = iwcm_modify_qp_sqd(qp); /* * If both sides are disconnecting the QP could * already be in ERR or SQD states */ ret = 0; } return ret; } EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. */ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* * Wait if we're currently in a connect or accept downcall. A * listening endpoint should never block here. */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); /* * Since we're deleting the cm_id, drop any events that * might arrive before the last dereference. */ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ cm_id->device->iwcm->destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ (void)iwcm_modify_qp_err(cm_id_priv->qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_RECV: /* * App called destroy before/without calling accept after * receiving connection request event notification or * returned non zero from the event callback function. * In either case, must tell the provider to reject. */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id->device->iwcm->reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_DESTROYING: default: BUG(); break; } if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); (void)iwcm_deref_id(cm_id_priv); } /* * This function is only called by the application thread and cannot * be called by the event thread. The function will wait for all * references to be released on the cm_id and then kfree the cm_id * object. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); destroy_cm_id(cm_id); } EXPORT_SYMBOL(iw_destroy_cm_id); /** * iw_cm_map - Use portmapper to map the ports * @cm_id: connection manager pointer * @active: Indicates the active side when true * returns nonzero for error only if iwpm_create_mapinfo() fails * * Tries to add a mapping for a port using the Portmapper. If * successful in mapping the IP/Port it will check the remote * mapped IP address for a wildcard IP address and replace the * zero IP address with the remote_addr. */ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { cm_id->m_local_addr = cm_id->local_addr; cm_id->m_remote_addr = cm_id->remote_addr; return 0; } /* * CM_ID <-- LISTEN * * Start listening for connect requests. Generates one CONNECT_REQUEST * event for each inbound connect request. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); if (!backlog) backlog = default_backlog; ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, false); if (!ret) ret = cm_id->device->iwcm->create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(iw_cm_listen); /* * CM_ID <-- IDLE * * Rejects an inbound connection request. No events are generated. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_reject); /* * CM_ID <-- ESTABLISHED * * Accepts an inbound connection request and generates an ESTABLISHED * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block * until the ESTABLISHED event is received from the provider. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->iwcm->accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } return ret; } EXPORT_SYMBOL(iw_cm_accept); /* * Active Side: CM_ID <-- CONN_SENT * * If successful, results in the generation of a CONNECT_REPLY * event. iw_cm_disconnect and iw_cm_destroy will block until the * CONNECT_REPLY event is received from the provider. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; struct ib_qp *qp; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, 4); if (ret) return ret; set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { ret = -EINVAL; goto err; } /* Get the ib_qp given the QPN */ qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); if (!qp) { ret = -EINVAL; goto err; } cm_id->device->iwcm->add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, true); if (!ret) ret = cm_id->device->iwcm->connect(cm_id, iw_param); if (!ret) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id->device->iwcm->rem_ref(qp); cm_id_priv->qp = NULL; } cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_connect); /* * Passive Side: new CM_ID <-- CONN_RECV * * Handles an inbound connect request. The function creates a new * iw_cm_id to represent the new connection and inherits the client * callback function and other attributes from the listening parent. * * The work item contains a pointer to the listen_cm_id and the event. The * listen_cm_id contains the client cm_handler, context and * device. These are copied when the device is cloned. The event * contains the new four tuple. * * An error on the child should not affect the parent, so this * function does not return a value. */ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; struct iw_cm_id *cm_id; struct iwcm_id_private *cm_id_priv; int ret; /* * The provider should never generate a connection request * event with a bad status. */ BUG_ON(iw_event->status); cm_id = iw_create_cm_id(listen_id_priv->id.device, listen_id_priv->id.cm_handler, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) goto out; cm_id->provider_data = iw_event->provider_data; cm_id->m_local_addr = iw_event->local_addr; cm_id->m_remote_addr = iw_event->remote_addr; cm_id->local_addr = listen_id_priv->id.local_addr; cm_id->remote_addr = iw_event->remote_addr; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; /* * We could be destroying the listening id. If so, ignore this * upcall. */ spin_lock_irqsave(&listen_id_priv->lock, flags); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { spin_unlock_irqrestore(&listen_id_priv->lock, flags); iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } spin_unlock_irqrestore(&listen_id_priv->lock, flags); ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } /* Call the client CM handler */ ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); } out: if (iw_event->private_data_len) kfree(iw_event->private_data); } /* * Passive Side: CM_ID <-- ESTABLISHED * * The provider generated an ESTABLISHED event which means that * the MPA negotion has completed successfully and we are now in MPA * FPDU mode. * * This event can only be received in the CONN_RECV state. If the * remote peer closed, the ESTABLISHED event would be received followed * by the CLOSE event. If the app closes, it will block until we wake * it up after processing this event. */ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * We clear the CONNECT_WAIT bit here to allow the callback * function to call iw_cm_disconnect. Calling iw_destroy_cm_id * from a callback handler is not allowed. */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_ESTABLISHED; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * Active Side: CM_ID <-- ESTABLISHED * * The app has called connect and is waiting for the established event to * post it's requests to the server. This event will wake up anyone * blocked in iw_cm_disconnect or iw_destroy_id. */ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * Clear the connect wait bit so a callback function calling * iw_cm_disconnect will not wait and deadlock this thread */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == 0) { cm_id_priv->id.m_local_addr = iw_event->local_addr; cm_id_priv->id.m_remote_addr = iw_event->remote_addr; iw_event->local_addr = cm_id_priv->id.local_addr; iw_event->remote_addr = cm_id_priv->id.remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) kfree(iw_event->private_data); /* Wake up waiters on connect complete */ wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * CM_ID <-- CLOSING * * If in the ESTABLISHED state, move to CLOSING. */ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) cm_id_priv->state = IW_CM_STATE_CLOSING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * CM_ID <-- IDLE * * If in the ESTBLISHED or CLOSING states, the QP will have have been * moved by the provider to the ERR state. Disassociate the CM_ID from * the QP, move to IDLE, and remove the 'connected' reference. * * If in some other state, the cm_id was destroyed asynchronously. * This is the last reference that will result in waking up * the app thread blocked in iw_destroy_cm_id. */ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->qp) { cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); cm_id_priv->qp = NULL; } switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_DESTROYING: break; default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int process_event(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; switch (iw_event->event) { case IW_CM_EVENT_CONNECT_REQUEST: cm_conn_req_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CONNECT_REPLY: ret = cm_conn_rep_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_ESTABLISHED: ret = cm_conn_est_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_DISCONNECT: cm_disconnect_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CLOSE: ret = cm_close_handler(cm_id_priv, iw_event); break; default: BUG(); } return ret; } /* * Process events on the work_list for the cm_id. If the callback * function requests that the cm_id be deleted, a flag is set in the * cm_id flags to indicate that when the last reference is * removed, the cm_id is to be destroyed. This is necessary to * distinguish between an object that will be destroyed by the app * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ static void cm_work_handler(struct work_struct *_work) { struct iwcm_work *work = container_of(_work, struct iwcm_work, work); struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; int empty; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); empty = list_empty(&cm_id_priv->work_list); while (!empty) { work = list_entry(cm_id_priv->work_list.next, struct iwcm_work, list); list_del_init(&work->list); empty = list_empty(&cm_id_priv->work_list); levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); if (ret) destroy_cm_id(&cm_id_priv->id); } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) return; if (empty) return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * This function is called on interrupt context. Schedule events on * the iwcm_wq thread to allow callback functions to downcall into * the CM and/or block. Events are queued to a per-CM_ID * work_list. If this is the first event on the work_list, the work * element is also queued on the iwcm_wq thread. * * Each event holds a reference on the cm_id. Until the last posted * event has been delivered and processed, the cm_id cannot be * deleted. * * Returns: * 0 - the event was handled. * -ENOMEM - the event was not handled due to lack of resources. */ static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct iwcm_work *work; struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); work = get_work(cm_id_priv); if (!work) { ret = -ENOMEM; goto out; } INIT_WORK(&work->work, cm_work_handler); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; } } atomic_inc(&cm_id_priv->refcount); if (list_empty(&cm_id_priv->work_list)) { list_add_tail(&work->list, &cm_id_priv->work_list); queue_work(iwcm_wq, &work->work); } else list_add_tail(&work->list, &cm_id_priv->work_list); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = 0; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: ret = iwcm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = iwcm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) return -ENOMEM; return 0; } static void __exit iw_cm_cleanup(void) { destroy_workqueue(iwcm_wq); } module_init(iw_cm_init); module_exit(iw_cm_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_msg.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_msg.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_msg.c (revision 331772) @@ -1,43 +1,47 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * Copyright (c) 2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include "iwpm_util.h" static int iwpm_user_pid = IWPM_PID_UNDEFINED; int iwpm_valid_pid(void) { return iwpm_user_pid > 0; } EXPORT_SYMBOL(iwpm_valid_pid); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_util.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_util.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_iwpm_util.c (revision 331772) @@ -1,97 +1,101 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Chelsio, Inc. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include "iwpm_util.h" #define IWPM_MAPINFO_HASH_SIZE 512 #define IWPM_MAPINFO_HASH_MASK (IWPM_MAPINFO_HASH_SIZE - 1) #define IWPM_REMINFO_HASH_SIZE 64 #define IWPM_REMINFO_HASH_MASK (IWPM_REMINFO_HASH_SIZE - 1) #define IWPM_MSG_SIZE 512 int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr) { if (a_sockaddr->ss_family != b_sockaddr->ss_family) return 1; if (a_sockaddr->ss_family == AF_INET) { struct sockaddr_in *a4_sockaddr = (struct sockaddr_in *)a_sockaddr; struct sockaddr_in *b4_sockaddr = (struct sockaddr_in *)b_sockaddr; if (!memcmp(&a4_sockaddr->sin_addr, &b4_sockaddr->sin_addr, sizeof(struct in_addr)) && a4_sockaddr->sin_port == b4_sockaddr->sin_port) return 0; } else if (a_sockaddr->ss_family == AF_INET6) { struct sockaddr_in6 *a6_sockaddr = (struct sockaddr_in6 *)a_sockaddr; struct sockaddr_in6 *b6_sockaddr = (struct sockaddr_in6 *)b_sockaddr; if (!memcmp(&a6_sockaddr->sin6_addr, &b6_sockaddr->sin6_addr, sizeof(struct in6_addr)) && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port) return 0; } else { pr_err("%s: Invalid sockaddr family\n", __func__); } return 1; } void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg) { struct sockaddr_in6 *sockaddr_v6; struct sockaddr_in *sockaddr_v4; switch (sockaddr->ss_family) { case AF_INET: sockaddr_v4 = (struct sockaddr_in *)sockaddr; pr_debug("%s IPV4 %pI4: %u(0x%04X)\n", msg, &sockaddr_v4->sin_addr, ntohs(sockaddr_v4->sin_port), ntohs(sockaddr_v4->sin_port)); break; case AF_INET6: sockaddr_v6 = (struct sockaddr_in6 *)sockaddr; pr_debug("%s IPV6 %pI6: %u(0x%04X)\n", msg, &sockaddr_v6->sin6_addr, ntohs(sockaddr_v6->sin6_port), ntohs(sockaddr_v6->sin6_port)); break; default: break; } } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_mad.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_mad.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_mad.c (revision 331772) @@ -1,3339 +1,3342 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ #define LINUXKPI_PARAM_PREFIX ibcore_ #define KBUILD_MODNAME "ibcore" #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include "mad_priv.h" #include "mad_rmpp.h" #include "smi.h" #include "opa_smi.h" #include "agent.h" #include "core_priv.h" static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; module_param_named(send_queue_size, mad_sendq_size, int, 0444); MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests"); module_param_named(recv_queue_size, mad_recvq_size, int, 0444); MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests"); static struct list_head ib_mad_port_list; static u32 ib_mad_client_id = 0; /* Port list lock */ static DEFINE_SPINLOCK(ib_mad_port_list_lock); /* Forward declarations */ static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req); static void remove_mad_reg_req(struct ib_mad_agent_private *priv); static struct ib_mad_agent_private *find_mad_agent( struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad); static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad); static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv); static void timeout_sends(struct work_struct *work); static void local_completions(struct work_struct *work); static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class); static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv); static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc); static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc); /* * Returns a ib_mad_port_private structure or NULL for a device/port * Assumes ib_mad_port_list_lock is being held */ static inline struct ib_mad_port_private * __ib_get_mad_port(struct ib_device *device, int port_num) { struct ib_mad_port_private *entry; list_for_each_entry(entry, &ib_mad_port_list, port_list) { if (entry->device == device && entry->port_num == port_num) return entry; } return NULL; } /* * Wrapper function to return a ib_mad_port_private structure or NULL * for a device/port */ static inline struct ib_mad_port_private * ib_get_mad_port(struct ib_device *device, int port_num) { struct ib_mad_port_private *entry; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); entry = __ib_get_mad_port(device, port_num); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); return entry; } static inline u8 convert_mgmt_class(u8 mgmt_class) { /* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */ return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ? 0 : mgmt_class; } static int get_spl_qp_index(enum ib_qp_type qp_type) { switch (qp_type) { case IB_QPT_SMI: return 0; case IB_QPT_GSI: return 1; default: return -1; } } static int vendor_class_index(u8 mgmt_class) { return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START; } static int is_vendor_class(u8 mgmt_class) { if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) || (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END)) return 0; return 1; } static int is_vendor_oui(char *oui) { if (oui[0] || oui[1] || oui[2]) return 1; return 0; } static int is_vendor_method_in_use( struct ib_mad_mgmt_vendor_class *vendor_class, struct ib_mad_reg_req *mad_reg_req) { struct ib_mad_mgmt_method_table *method; int i; for (i = 0; i < MAX_MGMT_OUI; i++) { if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) { method = vendor_class->method_table[i]; if (method) { if (method_in_use(&method, mad_reg_req)) return 1; else break; } } } return 0; } int ib_response_mad(const struct ib_mad_hdr *hdr) { return ((hdr->method & IB_MGMT_METHOD_RESP) || (hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) || ((hdr->mgmt_class == IB_MGMT_CLASS_BM) && (hdr->attr_mod & IB_BM_ATTR_MOD_RESP))); } EXPORT_SYMBOL(ib_response_mad); /* * ib_register_mad_agent - Register to send/receive MADs */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_reg_req *reg_req = NULL; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; struct ib_mad_mgmt_method_table *method; int ret2, qpn; unsigned long flags; u8 mgmt_class, vclass; /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { dev_notice(&device->dev, "ib_register_mad_agent: invalid QP Type %d\n", qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { dev_notice(&device->dev, "ib_register_mad_agent: invalid RMPP Version %u\n", rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { dev_notice(&device->dev, "ib_register_mad_agent: invalid Class Version %u\n", mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { dev_notice(&device->dev, "ib_register_mad_agent: no recv_handler\n"); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { dev_notice(&device->dev, "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ dev_notice(&device->dev, "ib_register_mad_agent: Invalid Mgmt Class 0\n"); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { dev_notice(&device->dev, "ib_register_mad_agent: No OUI specified for class 0x%x\n", mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { dev_notice(&device->dev, "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_notice(&device->dev, "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", mad_reg_req->mgmt_class); goto error1; } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { dev_notice(&device->dev, "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", mad_reg_req->mgmt_class); goto error1; } } } else { /* No registration request supplied */ if (!send_handler) goto error1; if (registration_flags & IB_MAD_USER_RMPP) goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); ret = ERR_PTR(-ENODEV); goto error1; } /* Verify the QP requested is supported. For example, Ethernet devices * will not have QP0 */ if (!port_priv->qp_info[qpn].qp) { dev_notice(&device->dev, "ib_register_mad_agent: QP %d not supported\n", qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } /* Allocate structures */ mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL); if (!mad_agent_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } if (mad_reg_req) { reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL); if (!reg_req) { ret = ERR_PTR(-ENOMEM); goto error3; } } /* Now, fill in the various structures */ mad_agent_priv->qp_info = &port_priv->qp_info[qpn]; mad_agent_priv->reg_req = reg_req; mad_agent_priv->agent.rmpp_version = rmpp_version; mad_agent_priv->agent.device = device; mad_agent_priv->agent.recv_handler = recv_handler; mad_agent_priv->agent.send_handler = send_handler; mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); INIT_LIST_HEAD(&mad_agent_priv->done_list); INIT_LIST_HEAD(&mad_agent_priv->rmpp_list); INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends); INIT_LIST_HEAD(&mad_agent_priv->local_list); INIT_WORK(&mad_agent_priv->local_work, local_completions); atomic_set(&mad_agent_priv->refcount, 1); init_completion(&mad_agent_priv->comp); spin_lock_irqsave(&port_priv->reg_lock, flags); mad_agent_priv->agent.hi_tid = ++ib_mad_client_id; /* * Make sure MAD registration (if supplied) * is non overlapping with any existing ones */ if (mad_reg_req) { mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class); if (!is_vendor_class(mgmt_class)) { class = port_priv->version[mad_reg_req-> mgmt_class_version].class; if (class) { method = class->method_table[mgmt_class]; if (method) { if (method_in_use(&method, mad_reg_req)) goto error4; } } ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv, mgmt_class); } else { /* "New" vendor class range */ vendor = port_priv->version[mad_reg_req-> mgmt_class_version].vendor; if (vendor) { vclass = vendor_class_index(mgmt_class); vendor_class = vendor->vendor_class[vclass]; if (vendor_class) { if (is_vendor_method_in_use( vendor_class, mad_reg_req)) goto error4; } } ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv); } if (ret2) { ret = ERR_PTR(ret2); goto error4; } } /* Add mad agent into port's agent list */ list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); return &mad_agent_priv->agent; error4: spin_unlock_irqrestore(&port_priv->reg_lock, flags); kfree(reg_req); error3: kfree(mad_agent_priv); error1: return ret; } EXPORT_SYMBOL(ib_register_mad_agent); static inline int is_snooping_sends(int mad_snoop_flags) { return (mad_snoop_flags & (/*IB_MAD_SNOOP_POSTED_SENDS | IB_MAD_SNOOP_RMPP_SENDS |*/ IB_MAD_SNOOP_SEND_COMPLETIONS /*| IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/)); } static inline int is_snooping_recvs(int mad_snoop_flags) { return (mad_snoop_flags & (IB_MAD_SNOOP_RECVS /*| IB_MAD_SNOOP_RMPP_RECVS*/)); } static int register_snoop_agent(struct ib_mad_qp_info *qp_info, struct ib_mad_snoop_private *mad_snoop_priv) { struct ib_mad_snoop_private **new_snoop_table; unsigned long flags; int i; spin_lock_irqsave(&qp_info->snoop_lock, flags); /* Check for empty slot in array. */ for (i = 0; i < qp_info->snoop_table_size; i++) if (!qp_info->snoop_table[i]) break; if (i == qp_info->snoop_table_size) { /* Grow table. */ new_snoop_table = krealloc(qp_info->snoop_table, sizeof mad_snoop_priv * (qp_info->snoop_table_size + 1), GFP_ATOMIC); if (!new_snoop_table) { i = -ENOMEM; goto out; } qp_info->snoop_table = new_snoop_table; qp_info->snoop_table_size++; } qp_info->snoop_table[i] = mad_snoop_priv; atomic_inc(&qp_info->snoop_count); out: spin_unlock_irqrestore(&qp_info->snoop_lock, flags); return i; } struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, u8 port_num, enum ib_qp_type qp_type, int mad_snoop_flags, ib_mad_snoop_handler snoop_handler, ib_mad_recv_handler recv_handler, void *context) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret; struct ib_mad_snoop_private *mad_snoop_priv; int qpn; /* Validate parameters */ if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) || (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) { ret = ERR_PTR(-EINVAL); goto error1; } qpn = get_spl_qp_index(qp_type); if (qpn == -1) { ret = ERR_PTR(-EINVAL); goto error1; } port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { ret = ERR_PTR(-ENODEV); goto error1; } /* Allocate structures */ mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL); if (!mad_snoop_priv) { ret = ERR_PTR(-ENOMEM); goto error1; } /* Now, fill in the various structures */ mad_snoop_priv->qp_info = &port_priv->qp_info[qpn]; mad_snoop_priv->agent.device = device; mad_snoop_priv->agent.recv_handler = recv_handler; mad_snoop_priv->agent.snoop_handler = snoop_handler; mad_snoop_priv->agent.context = context; mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_snoop_priv->agent.port_num = port_num; mad_snoop_priv->mad_snoop_flags = mad_snoop_flags; init_completion(&mad_snoop_priv->comp); mad_snoop_priv->snoop_index = register_snoop_agent( &port_priv->qp_info[qpn], mad_snoop_priv); if (mad_snoop_priv->snoop_index < 0) { ret = ERR_PTR(mad_snoop_priv->snoop_index); goto error2; } atomic_set(&mad_snoop_priv->refcount, 1); return &mad_snoop_priv->agent; error2: kfree(mad_snoop_priv); error1: return ret; } EXPORT_SYMBOL(ib_register_mad_snoop); static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { if (atomic_dec_and_test(&mad_agent_priv->refcount)) complete(&mad_agent_priv->comp); } static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv) { if (atomic_dec_and_test(&mad_snoop_priv->refcount)) complete(&mad_snoop_priv->comp); } static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_port_private *port_priv; unsigned long flags; /* Note that we could still be handling received MADs */ /* * Canceling all sends results in dropping received response * MADs, preventing us from queuing additional work */ cancel_mads(mad_agent_priv); port_priv = mad_agent_priv->qp_info->port_priv; cancel_delayed_work(&mad_agent_priv->timed_work); spin_lock_irqsave(&port_priv->reg_lock, flags); remove_mad_reg_req(mad_agent_priv); list_del(&mad_agent_priv->agent_list); spin_unlock_irqrestore(&port_priv->reg_lock, flags); flush_workqueue(port_priv->wq); ib_cancel_rmpp_recvs(mad_agent_priv); deref_mad_agent(mad_agent_priv); wait_for_completion(&mad_agent_priv->comp); kfree(mad_agent_priv->reg_req); kfree(mad_agent_priv); } static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv) { struct ib_mad_qp_info *qp_info; unsigned long flags; qp_info = mad_snoop_priv->qp_info; spin_lock_irqsave(&qp_info->snoop_lock, flags); qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL; atomic_dec(&qp_info->snoop_count); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); deref_snoop_agent(mad_snoop_priv); wait_for_completion(&mad_snoop_priv->comp); kfree(mad_snoop_priv); } /* * ib_unregister_mad_agent - Unregisters a client from using MAD services */ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_snoop_private *mad_snoop_priv; /* If the TID is zero, the agent can only snoop. */ if (mad_agent->hi_tid) { mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); unregister_mad_agent(mad_agent_priv); } else { mad_snoop_priv = container_of(mad_agent, struct ib_mad_snoop_private, agent); unregister_mad_snoop(mad_snoop_priv); } return 0; } EXPORT_SYMBOL(ib_unregister_mad_agent); static void dequeue_mad(struct ib_mad_list_head *mad_list) { struct ib_mad_queue *mad_queue; unsigned long flags; BUG_ON(!mad_list->mad_queue); mad_queue = mad_list->mad_queue; spin_lock_irqsave(&mad_queue->lock, flags); list_del(&mad_list->list); mad_queue->count--; spin_unlock_irqrestore(&mad_queue->lock, flags); } static void snoop_send(struct ib_mad_qp_info *qp_info, struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc, int mad_snoop_flags) { struct ib_mad_snoop_private *mad_snoop_priv; unsigned long flags; int i; spin_lock_irqsave(&qp_info->snoop_lock, flags); for (i = 0; i < qp_info->snoop_table_size; i++) { mad_snoop_priv = qp_info->snoop_table[i]; if (!mad_snoop_priv || !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) continue; atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent, send_buf, mad_send_wc); deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); } static void snoop_recv(struct ib_mad_qp_info *qp_info, struct ib_mad_recv_wc *mad_recv_wc, int mad_snoop_flags) { struct ib_mad_snoop_private *mad_snoop_priv; unsigned long flags; int i; spin_lock_irqsave(&qp_info->snoop_lock, flags); for (i = 0; i < qp_info->snoop_table_size; i++) { mad_snoop_priv = qp_info->snoop_table[i]; if (!mad_snoop_priv || !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags)) continue; atomic_inc(&mad_snoop_priv->refcount); spin_unlock_irqrestore(&qp_info->snoop_lock, flags); mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL, mad_recv_wc); deref_snoop_agent(mad_snoop_priv); spin_lock_irqsave(&qp_info->snoop_lock, flags); } spin_unlock_irqrestore(&qp_info->snoop_lock, flags); } static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid, u16 pkey_index, u8 port_num, struct ib_wc *wc) { memset(wc, 0, sizeof *wc); wc->wr_cqe = cqe; wc->status = IB_WC_SUCCESS; wc->opcode = IB_WC_RECV; wc->pkey_index = pkey_index; wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh); wc->src_qp = IB_QP0; wc->qp = qp; wc->slid = slid; wc->sl = 0; wc->dlid_path_bits = 0; wc->port_num = port_num; } static size_t mad_priv_size(const struct ib_mad_private *mp) { return sizeof(struct ib_mad_private) + mp->mad_size; } static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags) { size_t size = sizeof(struct ib_mad_private) + mad_size; struct ib_mad_private *ret = kzalloc(size, flags); if (ret) ret->mad_size = mad_size; return ret; } static size_t port_mad_size(const struct ib_mad_port_private *port_priv) { return rdma_max_mad_size(port_priv->device, port_priv->port_num); } static size_t mad_priv_dma_size(const struct ib_mad_private *mp) { return sizeof(struct ib_grh) + mp->mad_size; } /* * Return 0 if SMP is to be sent * Return 1 if SMP was consumed locally (whether or not solicited) * Return < 0 if error */ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_wr_private *mad_send_wr) { int ret = 0; struct ib_smp *smp = mad_send_wr->send_buf.mad; struct opa_smp *opa_smp = (struct opa_smp *)smp; unsigned long flags; struct ib_mad_local_private *local; struct ib_mad_private *mad_priv; struct ib_mad_port_private *port_priv; struct ib_mad_agent_private *recv_mad_agent = NULL; struct ib_device *device = mad_agent_priv->agent.device; u8 port_num; struct ib_wc mad_wc; struct ib_ud_wr *send_wr = &mad_send_wr->send_wr; size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv); u16 out_mad_pkey_index = 0; u16 drslid; bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); if (rdma_cap_ib_switch(device) && smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) port_num = send_wr->port_num; else port_num = mad_agent_priv->agent.port_num; /* * Directed route handling starts if the initial LID routed part of * a request or the ending LID routed part of a response is empty. * If we are at the start of the LID routed part, don't update the * hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec. */ if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) { u32 opa_drslid; if ((opa_get_smp_direction(opa_smp) ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) == OPA_LID_PERMISSIVE && opa_smi_handle_dr_smp_send(opa_smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid directed route\n"); goto out; } opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid); if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) && opa_drslid & 0xffff0000) { ret = -EINVAL; dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n", opa_drslid); goto out; } drslid = (u16)(opa_drslid & 0x0000ffff); /* Check to post send on QP or process locally */ if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD && opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD) goto out; } else { if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) == IB_LID_PERMISSIVE && smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) == IB_SMI_DISCARD) { ret = -EINVAL; dev_err(&device->dev, "Invalid directed route\n"); goto out; } drslid = be16_to_cpu(smp->dr_slid); /* Check to post send on QP or process locally */ if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD && smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD) goto out; } local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; dev_err(&device->dev, "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; local->recv_mad_agent = NULL; mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; dev_err(&device->dev, "No memory for local response MAD\n"); kfree(local); goto out; } build_smp_wc(mad_agent_priv->agent.qp, send_wr->wr.wr_cqe, drslid, send_wr->pkey_index, send_wr->port_num, &mad_wc); if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) { mad_wc.byte_len = mad_send_wr->send_buf.hdr_len + mad_send_wr->send_buf.data_len + sizeof(struct ib_grh); } /* No GRH for DR SMP */ ret = device->process_mad(device, 0, port_num, &mad_wc, NULL, (const struct ib_mad_hdr *)smp, mad_size, (struct ib_mad_hdr *)mad_priv->mad, &mad_size, &out_mad_pkey_index); switch (ret) { case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY: if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) && mad_agent_priv->agent.recv_handler) { local->mad_priv = mad_priv; local->recv_mad_agent = mad_agent_priv; /* * Reference MAD agent until receive * side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); } else kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED: kfree(mad_priv); break; case IB_MAD_RESULT_SUCCESS: /* Treat like an incoming receive MAD */ port_priv = ib_get_mad_port(mad_agent_priv->agent.device, mad_agent_priv->agent.port_num); if (port_priv) { memcpy(mad_priv->mad, smp, mad_priv->mad_size); recv_mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)mad_priv->mad); } if (!port_priv || !recv_mad_agent) { /* * No receiving agent so drop packet and * generate send completion. */ kfree(mad_priv); break; } local->mad_priv = mad_priv; local->recv_mad_agent = recv_mad_agent; break; default: kfree(mad_priv); kfree(local); ret = -EINVAL; goto out; } local->mad_send_wr = mad_send_wr; if (opa) { local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index; local->return_wc_byte_len = mad_size; } /* Reference MAD agent until send side of local completion handled */ atomic_inc(&mad_agent_priv->refcount); /* Queue local completion to local list */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&local->completion_list, &mad_agent_priv->local_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); queue_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->local_work); ret = 1; out: return ret; } static int get_pad_size(int hdr_len, int data_len, size_t mad_size) { int seg_size, pad; seg_size = mad_size - hdr_len; if (data_len && seg_size) { pad = seg_size - data_len % seg_size; return pad == seg_size ? 0 : pad; } else return seg_size; } static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_segment *s, *t; list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) { list_del(&s->list); kfree(s); } } static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, size_t mad_size, gfp_t gfp_mask) { struct ib_mad_send_buf *send_buf = &send_wr->send_buf; struct ib_rmpp_mad *rmpp_mad = send_buf->mad; struct ib_rmpp_segment *seg = NULL; int left, seg_size, pad; send_buf->seg_size = mad_size - send_buf->hdr_len; send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR; seg_size = send_buf->seg_size; pad = send_wr->pad; /* Allocate data segments. */ for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); if (!seg) { dev_err(&send_buf->mad_agent->device->dev, "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } seg->num = ++send_buf->seg_count; list_add_tail(&seg->list, &send_wr->rmpp_list); } /* Zero any padding */ if (pad) memset(seg->data + seg_size - pad, 0, pad); rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv-> agent.rmpp_version; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); send_wr->cur_seg = container_of(send_wr->rmpp_list.next, struct ib_rmpp_segment, list); send_wr->last_ack_seg = send_wr->cur_seg; return 0; } int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent) { return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); } EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask, u8 base_version) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; int pad, message_size, ret, size; void *buf; size_t mad_size; bool opa; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num); if (opa && base_version == OPA_MGMT_BASE_VERSION) mad_size = sizeof(struct opa_mad); else mad_size = sizeof(struct ib_mad); pad = get_pad_size(hdr_len, data_len, mad_size); message_size = hdr_len + data_len + pad; if (ib_mad_kernel_rmpp_agent(mad_agent)) { if (!rmpp_active && message_size > mad_size) return ERR_PTR(-EINVAL); } else if (rmpp_active || message_size > mad_size) return ERR_PTR(-EINVAL); size = rmpp_active ? hdr_len : mad_size; buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); if (!buf) return ERR_PTR(-ENOMEM); mad_send_wr = (struct ib_mad_send_wr_private *)((char *)buf + size); INIT_LIST_HEAD(&mad_send_wr->rmpp_list); mad_send_wr->send_buf.mad = buf; mad_send_wr->send_buf.hdr_len = hdr_len; mad_send_wr->send_buf.data_len = data_len; mad_send_wr->pad = pad; mad_send_wr->mad_agent_priv = mad_agent_priv; mad_send_wr->sg_list[0].length = hdr_len; mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey; /* OPA MADs don't have to be the full 2048 bytes */ if (opa && base_version == OPA_MGMT_BASE_VERSION && data_len < mad_size - hdr_len) mad_send_wr->sg_list[1].length = data_len; else mad_send_wr->sg_list[1].length = mad_size - hdr_len; mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list; mad_send_wr->send_wr.wr.num_sge = 2; mad_send_wr->send_wr.wr.opcode = IB_WR_SEND; mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED; mad_send_wr->send_wr.remote_qpn = remote_qpn; mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY; mad_send_wr->send_wr.pkey_index = pkey_index; if (rmpp_active) { ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask); if (ret) { kfree(buf); return ERR_PTR(ret); } } mad_send_wr->send_buf.mad_agent = mad_agent; atomic_inc(&mad_agent_priv->refcount); return &mad_send_wr->send_buf; } EXPORT_SYMBOL(ib_create_send_mad); int ib_get_mad_data_offset(u8 mgmt_class) { if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM) return IB_MGMT_SA_HDR; else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS)) return IB_MGMT_DEVICE_HDR; else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) return IB_MGMT_VENDOR_HDR; else return IB_MGMT_MAD_HDR; } EXPORT_SYMBOL(ib_get_mad_data_offset); int ib_is_mad_class_rmpp(u8 mgmt_class) { if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) || (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) || (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) || (mgmt_class == IB_MGMT_CLASS_BIS) || ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))) return 1; return 0; } EXPORT_SYMBOL(ib_is_mad_class_rmpp); void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num) { struct ib_mad_send_wr_private *mad_send_wr; struct list_head *list; mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); list = &mad_send_wr->cur_seg->list; if (mad_send_wr->cur_seg->num < seg_num) { list_for_each_entry(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } else if (mad_send_wr->cur_seg->num > seg_num) { list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list) if (mad_send_wr->cur_seg->num == seg_num) break; } return mad_send_wr->cur_seg->data; } EXPORT_SYMBOL(ib_get_rmpp_segment); static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr) { if (mad_send_wr->send_buf.seg_count) return ib_get_rmpp_segment(&mad_send_wr->send_buf, mad_send_wr->seg_num); else return (char *)mad_send_wr->send_buf.mad + mad_send_wr->send_buf.hdr_len; } void ib_free_send_mad(struct ib_mad_send_buf *send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; mad_agent_priv = container_of(send_buf->mad_agent, struct ib_mad_agent_private, agent); mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); free_send_rmpp_list(mad_send_wr); kfree(send_buf->mad); deref_mad_agent(mad_agent_priv); } EXPORT_SYMBOL(ib_free_send_mad); int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_qp_info *qp_info; struct list_head *list; struct ib_send_wr *bad_send_wr; struct ib_mad_agent *mad_agent; struct ib_sge *sge; unsigned long flags; int ret; /* Set WR ID to find mad_send_wr upon completion */ qp_info = mad_send_wr->mad_agent_priv->qp_info; mad_send_wr->mad_list.mad_queue = &qp_info->send_queue; mad_send_wr->mad_list.cqe.done = ib_mad_send_done; mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe; mad_agent = mad_send_wr->send_buf.mad_agent; sge = mad_send_wr->sg_list; sge[0].addr = ib_dma_map_single(mad_agent->device, mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) return -ENOMEM; mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); return -ENOMEM; } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); if (qp_info->send_queue.count < qp_info->send_queue.max_active) { ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); list = &qp_info->send_queue.list; } else { ret = 0; list = &qp_info->overflow_list; } if (!ret) { qp_info->send_queue.count++; list_add_tail(&mad_send_wr->mad_list.list, list); } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); if (ret) { ib_dma_unmap_single(mad_agent->device, mad_send_wr->header_mapping, sge[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_agent->device, mad_send_wr->payload_mapping, sge[1].length, DMA_TO_DEVICE); } return ret; } /* * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client */ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf *next_send_buf; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int ret = -EINVAL; /* Walk list of send WRs and post each on send list */ for (; send_buf; send_buf = next_send_buf) { mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private, send_buf); mad_agent_priv = mad_send_wr->mad_agent_priv; if (!send_buf->mad_agent->send_handler || (send_buf->timeout_ms && !send_buf->mad_agent->recv_handler)) { ret = -EINVAL; goto error; } if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) { if (mad_agent_priv->agent.rmpp_version) { ret = -EINVAL; goto error; } } /* * Save pointer to next work request to post in case the * current one completes, and the user modifies the work * request associated with the completion */ next_send_buf = send_buf->next; mad_send_wr->send_wr.ah = send_buf->ah; if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { ret = handle_outgoing_dr_smp(mad_agent_priv, mad_send_wr); if (ret < 0) /* error */ goto error; else if (ret == 1) /* locally consumed */ continue; } mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid; /* Timeout will be updated after send completes */ mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms); mad_send_wr->max_retries = send_buf->retries; mad_send_wr->retries_left = send_buf->retries; send_buf->retries = 0; /* Reference for work request to QP + response */ mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0); mad_send_wr->status = IB_WC_SUCCESS; /* Reference MAD agent until send completes */ atomic_inc(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_add_tail(&mad_send_wr->agent_list, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); } else ret = ib_send_mad(mad_send_wr); if (ret < 0) { /* Fail send request */ spin_lock_irqsave(&mad_agent_priv->lock, flags); list_del(&mad_send_wr->agent_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); goto error; } } return 0; error: if (bad_send_buf) *bad_send_buf = send_buf; return ret; } EXPORT_SYMBOL(ib_post_send_mad); /* * ib_free_recv_mad - Returns data buffers used to receive * a MAD to the access layer */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *priv; struct list_head free_list; INIT_LIST_HEAD(&free_list); list_splice_init(&mad_recv_wc->rmpp_list, &free_list); list_for_each_entry_safe(mad_recv_buf, temp_recv_buf, &free_list, list) { mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc, recv_buf); mad_priv_hdr = container_of(mad_recv_wc, struct ib_mad_private_header, recv_wc); priv = container_of(mad_priv_hdr, struct ib_mad_private, header); kfree(priv); } } EXPORT_SYMBOL(ib_free_recv_mad); struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context) { return ERR_PTR(-EINVAL); /* XXX: for now */ } EXPORT_SYMBOL(ib_redirect_mad_qp); int ib_process_mad_wc(struct ib_mad_agent *mad_agent, struct ib_wc *wc) { dev_err(&mad_agent->device->dev, "ib_process_mad_wc() not implemented yet\n"); return 0; } EXPORT_SYMBOL(ib_process_mad_wc); static int method_in_use(struct ib_mad_mgmt_method_table **method, struct ib_mad_reg_req *mad_reg_req) { int i; for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { pr_err("Method %d already in use\n", i); return -EINVAL; } } return 0; } static int allocate_method_table(struct ib_mad_mgmt_method_table **method) { /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); if (!*method) { pr_err("No memory for ib_mad_mgmt_method_table\n"); return -ENOMEM; } return 0; } /* * Check to see if there are any methods still in use */ static int check_method_table(struct ib_mad_mgmt_method_table *method) { int i; for (i = 0; i < IB_MGMT_MAX_METHODS; i++) if (method->agent[i]) return 1; return 0; } /* * Check to see if there are any method tables for this class still in use */ static int check_class_table(struct ib_mad_mgmt_class_table *class) { int i; for (i = 0; i < MAX_MGMT_CLASS; i++) if (class->method_table[i]) return 1; return 0; } static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) if (vendor_class->method_table[i]) return 1; return 0; } static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class, const char *oui) { int i; for (i = 0; i < MAX_MGMT_OUI; i++) /* Is there matching OUI for this vendor class ? */ if (!memcmp(vendor_class->oui[i], oui, 3)) return i; return -1; } static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor) { int i; for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++) if (vendor->vendor_class[i]) return 1; return 0; } static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method, struct ib_mad_agent_private *agent) { int i; /* Remove any methods for this mad agent */ for (i = 0; i < IB_MGMT_MAX_METHODS; i++) { if (method->agent[i] == agent) { method->agent[i] = NULL; } } } static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv, u8 mgmt_class) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table **class; struct ib_mad_mgmt_method_table **method; int i, ret; port_priv = agent_priv->qp_info->port_priv; class = &port_priv->version[mad_reg_req->mgmt_class_version].class; if (!*class) { /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { dev_err(&agent_priv->agent.device->dev, "No memory for ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } /* Allocate method table for this management class */ method = &(*class)->method_table[mgmt_class]; if ((ret = allocate_method_table(method))) goto error2; } else { method = &(*class)->method_table[mgmt_class]; if (!*method) { /* Allocate method table for this management class */ if ((ret = allocate_method_table(method))) goto error1; } } /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error3; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error3: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; goto error1; error2: kfree(*class); *class = NULL; error1: return ret; } static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_vendor_class_table **vendor_table; struct ib_mad_mgmt_vendor_class_table *vendor = NULL; struct ib_mad_mgmt_vendor_class *vendor_class = NULL; struct ib_mad_mgmt_method_table **method; int i, ret = -ENOMEM; u8 vclass; /* "New" vendor (with OUI) class */ vclass = vendor_class_index(mad_reg_req->mgmt_class); port_priv = agent_priv->qp_info->port_priv; vendor_table = &port_priv->version[ mad_reg_req->mgmt_class_version].vendor; if (!*vendor_table) { /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) { dev_err(&agent_priv->agent.device->dev, "No memory for ib_mad_mgmt_vendor_class_table\n"); goto error1; } *vendor_table = vendor; } if (!(*vendor_table)->vendor_class[vclass]) { /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) { dev_err(&agent_priv->agent.device->dev, "No memory for ib_mad_mgmt_vendor_class\n"); goto error2; } (*vendor_table)->vendor_class[vclass] = vendor_class; } for (i = 0; i < MAX_MGMT_OUI; i++) { /* Is there matching OUI for this vendor class ? */ if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3)) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; BUG_ON(!*method); goto check_in_use; } } for (i = 0; i < MAX_MGMT_OUI; i++) { /* OUI slot available ? */ if (!is_vendor_oui((*vendor_table)->vendor_class[ vclass]->oui[i])) { method = &(*vendor_table)->vendor_class[ vclass]->method_table[i]; BUG_ON(*method); /* Allocate method table for this OUI */ if ((ret = allocate_method_table(method))) goto error3; memcpy((*vendor_table)->vendor_class[vclass]->oui[i], mad_reg_req->oui, 3); goto check_in_use; } } dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: /* Now, make sure methods are not already in use */ if (method_in_use(method, mad_reg_req)) goto error4; /* Finally, add in methods being registered */ for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) (*method)->agent[i] = agent_priv; return 0; error4: /* Remove any methods for this mad agent */ remove_methods_mad_agent(*method, agent_priv); /* Now, check to see if there are any methods in use */ if (!check_method_table(*method)) { /* If not, release management method table */ kfree(*method); *method = NULL; } ret = -EINVAL; error3: if (vendor_class) { (*vendor_table)->vendor_class[vclass] = NULL; kfree(vendor_class); } error2: if (vendor) { *vendor_table = NULL; kfree(vendor); } error1: return ret; } static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) { struct ib_mad_port_private *port_priv; struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; int index; u8 mgmt_class; /* * Was MAD registration request supplied * with original registration ? */ if (!agent_priv->reg_req) { goto out; } port_priv = agent_priv->qp_info->port_priv; mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class); class = port_priv->version[ agent_priv->reg_req->mgmt_class_version].class; if (!class) goto vendor_check; method = class->method_table[mgmt_class]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); class->method_table[mgmt_class] = NULL; /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); port_priv->version[ agent_priv->reg_req-> mgmt_class_version].class = NULL; } } } vendor_check: if (!is_vendor_class(mgmt_class)) goto out; /* normalize mgmt_class to vendor range 2 */ mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class); vendor = port_priv->version[ agent_priv->reg_req->mgmt_class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[mgmt_class]; if (vendor_class) { index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui); if (index < 0) goto out; method = vendor_class->method_table[index]; if (method) { /* Remove any methods for this mad agent */ remove_methods_mad_agent(method, agent_priv); /* * Now, check to see if there are * any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ kfree(method); vendor_class->method_table[index] = NULL; memset(vendor_class->oui[index], 0, 3); /* Any OUIs left ? */ if (!check_vendor_class(vendor_class)) { /* If not, release vendor class table */ kfree(vendor_class); vendor->vendor_class[mgmt_class] = NULL; /* Any other vendor classes left ? */ if (!check_vendor_table(vendor)) { kfree(vendor); port_priv->version[ agent_priv->reg_req-> mgmt_class_version]. vendor = NULL; } } } } } out: return; } static struct ib_mad_agent_private * find_mad_agent(struct ib_mad_port_private *port_priv, const struct ib_mad_hdr *mad_hdr) { struct ib_mad_agent_private *mad_agent = NULL; unsigned long flags; spin_lock_irqsave(&port_priv->reg_lock, flags); if (ib_response_mad(mad_hdr)) { u32 hi_tid; struct ib_mad_agent_private *entry; /* * Routing is based on high 32 bits of transaction ID * of MAD. */ hi_tid = be64_to_cpu(mad_hdr->tid) >> 32; list_for_each_entry(entry, &port_priv->agent_list, agent_list) { if (entry->agent.hi_tid == hi_tid) { mad_agent = entry; break; } } } else { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_method_table *method; struct ib_mad_mgmt_vendor_class_table *vendor; struct ib_mad_mgmt_vendor_class *vendor_class; const struct ib_vendor_mad *vendor_mad; int index; /* * Routing is based on version, class, and method * For "newer" vendor MADs, also based on OUI */ if (mad_hdr->class_version >= MAX_MGMT_VERSION) goto out; if (!is_vendor_class(mad_hdr->mgmt_class)) { class = port_priv->version[ mad_hdr->class_version].class; if (!class) goto out; if (convert_mgmt_class(mad_hdr->mgmt_class) >= IB_MGMT_MAX_METHODS) goto out; method = class->method_table[convert_mgmt_class( mad_hdr->mgmt_class)]; if (method) mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } else { vendor = port_priv->version[ mad_hdr->class_version].vendor; if (!vendor) goto out; vendor_class = vendor->vendor_class[vendor_class_index( mad_hdr->mgmt_class)]; if (!vendor_class) goto out; /* Find matching OUI */ vendor_mad = (const struct ib_vendor_mad *)mad_hdr; index = find_vendor_oui(vendor_class, vendor_mad->oui); if (index == -1) goto out; method = vendor_class->method_table[index]; if (method) { mad_agent = method->agent[mad_hdr->method & ~IB_MGMT_METHOD_RESP]; } } } if (mad_agent) { if (mad_agent->agent.recv_handler) atomic_inc(&mad_agent->refcount); else { dev_notice(&port_priv->device->dev, "No receive handler for client %p on port %d\n", &mad_agent->agent, port_priv->port_num); mad_agent = NULL; } } out: spin_unlock_irqrestore(&port_priv->reg_lock, flags); return mad_agent; } static int validate_mad(const struct ib_mad_hdr *mad_hdr, const struct ib_mad_qp_info *qp_info, bool opa) { int valid = 0; u32 qp_num = qp_info->qp->qp_num; /* Make sure MAD base version is understood */ if (mad_hdr->base_version != IB_MGMT_BASE_VERSION && (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) { pr_err("MAD received with unsupported base version %d %s\n", mad_hdr->base_version, opa ? "(opa)" : ""); goto out; } /* Filter SMI packets sent to other than QP0 */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { if (qp_num == 0) valid = 1; } else { /* CM attributes other than ClassPortInfo only use Send method */ if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) && (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) && (mad_hdr->method != IB_MGMT_METHOD_SEND)) goto out; /* Filter GSI packets sent to QP0 */ if (qp_num != 0) valid = 1; } out: return valid; } static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_hdr *mad_hdr) { const struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (const struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); } static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc) { return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class == rwc->recv_buf.mad->mad_hdr.mgmt_class; } static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_send_wr_private *wr, const struct ib_mad_recv_wc *rwc ) { struct ib_ah_attr attr; u8 send_resp, rcv_resp; union ib_gid sgid; struct ib_device *device = mad_agent_priv->agent.device; u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad); rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ return 0; if (ib_query_ah(wr->send_buf.ah, &attr)) /* Assume not equal, to avoid false positives. */ return 0; if (!!(attr.ah_flags & IB_AH_GRH) != !!(rwc->wc->wc_flags & IB_WC_GRH)) /* one has GID, other does not. Assume different */ return 0; if (!send_resp && rcv_resp) { /* is request/response. */ if (!(attr.ah_flags & IB_AH_GRH)) { if (ib_get_cached_lmc(device, port_num, &lmc)) return 0; return (!lmc || !((attr.src_path_bits ^ rwc->wc->dlid_path_bits) & ((1 << lmc) - 1))); } else { if (ib_get_cached_gid(device, port_num, attr.grh.sgid_index, &sgid, NULL)) return 0; return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw, 16); } } if (!(attr.ah_flags & IB_AH_GRH)) return attr.dlid == rwc->wc->slid; else return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw, 16); } static inline int is_direct(u8 class) { return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE); } struct ib_mad_send_wr_private* ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_recv_wc *wc) { struct ib_mad_send_wr_private *wr; const struct ib_mad_hdr *mad_hdr; mad_hdr = &wc->recv_buf.mad->mad_hdr; list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) { if ((wr->tid == mad_hdr->tid) && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } /* * It's possible to receive the response before we've * been notified that the send has completed */ list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) && wr->tid == mad_hdr->tid && wr->timeout && rcv_has_same_class(wr, wc) && /* * Don't check GID for direct routed MADs. * These might have permissive LIDs. */ (is_direct(mad_hdr->mgmt_class) || rcv_has_same_gid(mad_agent_priv, wr, wc))) /* Verify request has not been canceled */ return (wr->status == IB_WC_SUCCESS) ? wr : NULL; } return NULL; } void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr) { mad_send_wr->timeout = 0; if (mad_send_wr->refcount == 1) list_move_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->done_list); } static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { deref_mad_agent(mad_agent_priv); return; } } /* Complete corresponding request */ if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) { spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { /* user rmpp is in effect * and this is an active RMPP MAD */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, NULL, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); } else { /* not user rmpp, revert to normal behavior and * drop the mad */ ib_free_recv_mad(mad_recv_wc); deref_mad_agent(mad_agent_priv); return; } } else { ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Defined behavior is to complete response before request */ mad_agent_priv->agent.recv_handler( &mad_agent_priv->agent, &mad_send_wr->send_buf, mad_recv_wc); atomic_dec(&mad_agent_priv->refcount); mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL, mad_recv_wc); deref_mad_agent(mad_agent_priv); } } static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv, const struct ib_mad_qp_info *qp_info, const struct ib_wc *wc, int port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct ib_smp *smp = (struct ib_smp *)recv->mad; if (smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, smi_get_fwd_port(smp), qp_info->qp->qp_num, response->mad_size, false); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static bool generate_unmatched_resp(const struct ib_mad_private *recv, struct ib_mad_private *response, size_t *resp_len, bool opa) { const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad; struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad; if (recv_hdr->method == IB_MGMT_METHOD_GET || recv_hdr->method == IB_MGMT_METHOD_SET) { memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; resp_hdr->method = IB_MGMT_METHOD_GET_RESP; resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) resp_hdr->status |= IB_SMP_DIRECTION; if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) { if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) *resp_len = opa_get_smp_header_size( (const struct opa_smp *)recv->mad); else *resp_len = sizeof(struct ib_mad_hdr); } return true; } else { return false; } } static enum smi_action handle_opa_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, int port_num, struct ib_mad_private *recv, struct ib_mad_private *response) { enum smi_forward_action retsmi; struct opa_smp *smp = (struct opa_smp *)recv->mad; if (opa_smi_handle_dr_smp_recv(smp, rdma_cap_ib_switch(port_priv->device), port_num, port_priv->device->phys_port_cnt) == IB_SMI_DISCARD) return IB_SMI_DISCARD; retsmi = opa_smi_check_forward_dr_smp(smp); if (retsmi == IB_SMI_LOCAL) return IB_SMI_HANDLE; if (retsmi == IB_SMI_SEND) { /* don't forward */ if (opa_smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(port_priv->device), port_num) == IB_SMI_DISCARD) return IB_SMI_DISCARD; if (opa_smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD) return IB_SMI_DISCARD; } else if (rdma_cap_ib_switch(port_priv->device)) { /* forward case for switches */ memcpy(response, recv, mad_priv_size(response)); response->header.recv_wc.wc = &response->header.wc; response->header.recv_wc.recv_buf.opa_mad = (struct opa_mad *)response->mad; response->header.recv_wc.recv_buf.grh = &response->grh; agent_send_response((const struct ib_mad_hdr *)response->mad, &response->grh, wc, port_priv->device, opa_smi_get_fwd_port(smp), qp_info->qp->qp_num, recv->header.wc.byte_len, true); return IB_SMI_DISCARD; } return IB_SMI_HANDLE; } static enum smi_action handle_smi(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info, struct ib_wc *wc, int port_num, struct ib_mad_private *recv, struct ib_mad_private *response, bool opa) { struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad; if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION && mad_hdr->class_version == OPA_SMI_CLASS_VERSION) return handle_opa_smi(port_priv, qp_info, wc, port_num, recv, response); return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response); } static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info; struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv, *response = NULL; struct ib_mad_agent_private *mad_agent; int port_num; int ret = IB_MAD_RESULT_SUCCESS; size_t mad_size; u16 resp_mad_pkey_index = 0; bool opa; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { /* * Receive errors indicate that the QP has entered the error * state - error handling/shutdown code will cleanup */ return; } qp_info = mad_list->mad_queue->qp_info; dequeue_mad(mad_list); opa = rdma_cap_opa_mad(qp_info->port_priv->device, qp_info->port_priv->port_num); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); ib_dma_unmap_single(port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); /* Setup MAD receive work completion from "normal" work completion */ recv->header.wc = *wc; recv->header.recv_wc.wc = &recv->header.wc; if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) { recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh); recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { recv->header.recv_wc.mad_len = sizeof(struct ib_mad); recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad; recv->header.recv_wc.recv_buf.grh = &recv->grh; if (atomic_read(&qp_info->snoop_count)) snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS); /* Validate MAD */ if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa)) goto out; mad_size = recv->mad_size; response = alloc_mad_private(mad_size, GFP_KERNEL); if (!response) { dev_err(&port_priv->device->dev, "%s: no memory for response buffer\n", __func__); goto out; } if (rdma_cap_ib_switch(port_priv->device)) port_num = wc->port_num; else port_num = port_priv->port_num; if (((struct ib_mad_hdr *)recv->mad)->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { if (handle_smi(port_priv, qp_info, wc, port_num, recv, response, opa) == IB_SMI_DISCARD) goto out; } /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, (const struct ib_mad_hdr *)recv->mad, recv->mad_size, (struct ib_mad_hdr *)response->mad, &mad_size, &resp_mad_pkey_index); if (opa) wc->pkey_index = resp_mad_pkey_index; if (ret & IB_MAD_RESULT_SUCCESS) { if (ret & IB_MAD_RESULT_CONSUMED) goto out; if (ret & IB_MAD_RESULT_REPLY) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); goto out; } } } mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad); if (mad_agent) { ib_mad_complete_recv(mad_agent, &recv->header.recv_wc); /* * recv is freed up in error cases in ib_mad_complete_recv * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; } else if ((ret & IB_MAD_RESULT_SUCCESS) && generate_unmatched_resp(recv, response, &mad_size, opa)) { agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc, port_priv->device, port_num, qp_info->qp->qp_num, mad_size, opa); } out: /* Post another receive request for this QP */ if (response) { ib_mad_post_receive_mads(qp_info, response); kfree(recv); } else ib_mad_post_receive_mads(qp_info, recv); } static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) { struct ib_mad_send_wr_private *mad_send_wr; unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } } } static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *temp_mad_send_wr; struct list_head *list_item; unsigned long delay; mad_agent_priv = mad_send_wr->mad_agent_priv; list_del(&mad_send_wr->agent_list); delay = mad_send_wr->timeout; mad_send_wr->timeout += jiffies; if (delay) { list_for_each_prev(list_item, &mad_agent_priv->wait_list) { temp_mad_send_wr = list_entry(list_item, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, temp_mad_send_wr->timeout)) break; } } else list_item = &mad_agent_priv->wait_list; list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, int timeout_ms) { mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); wait_for_response(mad_send_wr); } /* * Process a send work completion */ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_agent_private *mad_agent_priv; unsigned long flags; int ret; mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; } else ret = IB_RMPP_RESULT_UNHANDLED; if (mad_send_wc->status != IB_WC_SUCCESS && mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = mad_send_wc->status; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } if (--mad_send_wr->refcount > 0) { if (mad_send_wr->refcount == 1 && mad_send_wr->timeout && mad_send_wr->status == IB_WC_SUCCESS) { wait_for_response(mad_send_wr); } goto done; } /* Remove send from MAD agent and notify client of completion */ list_del(&mad_send_wr->agent_list); adjust_timeout(mad_agent_priv); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (mad_send_wr->status != IB_WC_SUCCESS ) mad_send_wc->status = mad_send_wr->status; if (ret == IB_RMPP_RESULT_INTERNAL) ib_rmpp_send_handler(mad_send_wc); else mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, mad_send_wc); /* Release reference on agent taken when sending */ deref_mad_agent(mad_agent_priv); return; done: spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_mad_port_private *port_priv = cq->cq_context; struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr; struct ib_mad_qp_info *qp_info; struct ib_mad_queue *send_queue; struct ib_send_wr *bad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags; int ret; if (list_empty_careful(&port_priv->port_list)) return; if (wc->status != IB_WC_SUCCESS) { if (!ib_mad_send_error(port_priv, wc)) return; } mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); send_queue = mad_list->mad_queue; qp_info = send_queue->qp_info; retry: ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->header_mapping, mad_send_wr->sg_list[0].length, DMA_TO_DEVICE); ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device, mad_send_wr->payload_mapping, mad_send_wr->sg_list[1].length, DMA_TO_DEVICE); queued_send_wr = NULL; spin_lock_irqsave(&send_queue->lock, flags); list_del(&mad_list->list); /* Move queued send to the send queue */ if (send_queue->count-- > send_queue->max_active) { mad_list = container_of(qp_info->overflow_list.next, struct ib_mad_list_head, list); queued_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); list_move_tail(&mad_list->list, &send_queue->list); } spin_unlock_irqrestore(&send_queue->lock, flags); mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_send_wc.status = wc->status; mad_send_wc.vendor_err = wc->vendor_err; if (atomic_read(&qp_info->snoop_count)) snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); if (queued_send_wr) { ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr, &bad_send_wr); if (ret) { dev_err(&port_priv->device->dev, "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; } } } static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_list_head *mad_list; unsigned long flags; spin_lock_irqsave(&qp_info->send_queue.lock, flags); list_for_each_entry(mad_list, &qp_info->send_queue.list, list) { mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); mad_send_wr->retry = 1; } spin_unlock_irqrestore(&qp_info->send_queue.lock, flags); } static bool ib_mad_send_error(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { struct ib_mad_list_head *mad_list = container_of(wc->wr_cqe, struct ib_mad_list_head, cqe); struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info; struct ib_mad_send_wr_private *mad_send_wr; int ret; /* * Send errors will transition the QP to SQE - move * QP to RTS and repost flushed work requests */ mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private, mad_list); if (wc->status == IB_WC_WR_FLUSH_ERR) { if (mad_send_wr->retry) { /* Repost send */ struct ib_send_wr *bad_send_wr; mad_send_wr->retry = 0; ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr, &bad_send_wr); if (!ret) return false; } } else { struct ib_qp_attr *attr; /* Transition QP to RTS and fail offending send */ attr = kmalloc(sizeof *attr, GFP_KERNEL); if (attr) { attr->qp_state = IB_QPS_RTS; attr->cur_qp_state = IB_QPS_SQE; ret = ib_modify_qp(qp_info->qp, attr, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) dev_err(&port_priv->device->dev, "%s - ib_modify_qp to RTS: %d\n", __func__, ret); else mark_sends_for_retry(qp_info); } } return true; } static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv) { unsigned long flags; struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr; struct ib_mad_send_wc mad_send_wc; struct list_head cancel_list; INIT_LIST_HEAD(&cancel_list); spin_lock_irqsave(&mad_agent_priv->lock, flags); list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (mad_send_wr->status == IB_WC_SUCCESS) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } } /* Empty wait list to prevent receives from finding a request */ list_splice_init(&mad_agent_priv->wait_list, &cancel_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); /* Report all cancelled requests */ mad_send_wc.status = IB_WC_WR_FLUSH_ERR; mad_send_wc.vendor_err = 0; list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr, &cancel_list, agent_list) { mad_send_wc.send_buf = &mad_send_wr->send_buf; list_del(&mad_send_wr->agent_list); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); atomic_dec(&mad_agent_priv->refcount); } } static struct ib_mad_send_wr_private* find_send_wr(struct ib_mad_agent_private *mad_agent_priv, struct ib_mad_send_buf *send_buf) { struct ib_mad_send_wr_private *mad_send_wr; list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, agent_list) { if (&mad_send_wr->send_buf == send_buf) return mad_send_wr; } list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, agent_list) { if (is_rmpp_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && &mad_send_wr->send_buf == send_buf) return mad_send_wr; } return NULL; } int ib_modify_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return -EINVAL; } active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1); if (!timeout_ms) { mad_send_wr->status = IB_WC_WR_FLUSH_ERR; mad_send_wr->refcount -= (mad_send_wr->timeout > 0); } mad_send_wr->send_buf.timeout_ms = timeout_ms; if (active) mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); else ib_reset_mad_timeout(mad_send_wr, timeout_ms); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); return 0; } EXPORT_SYMBOL(ib_modify_mad); void ib_cancel_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf) { ib_modify_mad(mad_agent, send_buf, 0); } EXPORT_SYMBOL(ib_cancel_mad); static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_local_private *local; struct ib_mad_agent_private *recv_mad_agent; unsigned long flags; int free_mad; struct ib_wc wc; struct ib_mad_send_wc mad_send_wc; bool opa; mad_agent_priv = container_of(work, struct ib_mad_agent_private, local_work); opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device, mad_agent_priv->qp_info->port_priv->port_num); spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->local_list)) { local = list_entry(mad_agent_priv->local_list.next, struct ib_mad_local_private, completion_list); list_del(&local->completion_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); free_mad = 0; if (local->mad_priv) { u8 base_version; recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { dev_err(&mad_agent_priv->agent.device->dev, "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } /* * Defined behavior is to complete response * before request */ build_smp_wc(recv_mad_agent->agent.qp, local->mad_send_wr->send_wr.wr.wr_cqe, be16_to_cpu(IB_LID_PERMISSIVE), local->mad_send_wr->send_wr.pkey_index, recv_mad_agent->agent.port_num, &wc); local->mad_priv->header.recv_wc.wc = &wc; base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version; if (opa && base_version == OPA_MGMT_BASE_VERSION) { local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len; local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad); } else { local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad); local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad); } INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list); list_add(&local->mad_priv->header.recv_wc.recv_buf.list, &local->mad_priv->header.recv_wc.rmpp_list); local->mad_priv->header.recv_wc.recv_buf.grh = NULL; local->mad_priv->header.recv_wc.recv_buf.mad = (struct ib_mad *)local->mad_priv->mad; if (atomic_read(&recv_mad_agent->qp_info->snoop_count)) snoop_recv(recv_mad_agent->qp_info, &local->mad_priv->header.recv_wc, IB_MAD_SNOOP_RECVS); recv_mad_agent->agent.recv_handler( &recv_mad_agent->agent, &local->mad_send_wr->send_buf, &local->mad_priv->header.recv_wc); spin_lock_irqsave(&recv_mad_agent->lock, flags); atomic_dec(&recv_mad_agent->refcount); spin_unlock_irqrestore(&recv_mad_agent->lock, flags); } local_send_completion: /* Complete send */ mad_send_wc.status = IB_WC_SUCCESS; mad_send_wc.vendor_err = 0; mad_send_wc.send_buf = &local->mad_send_wr->send_buf; if (atomic_read(&mad_agent_priv->qp_info->snoop_count)) snoop_send(mad_agent_priv->qp_info, &local->mad_send_wr->send_buf, &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS); mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); spin_lock_irqsave(&mad_agent_priv->lock, flags); atomic_dec(&mad_agent_priv->refcount); if (free_mad) kfree(local->mad_priv); kfree(local); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) { int ret; if (!mad_send_wr->retries_left) return -ETIMEDOUT; mad_send_wr->retries_left--; mad_send_wr->send_buf.retries++; mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: ret = ib_send_mad(mad_send_wr); break; case IB_RMPP_RESULT_CONSUMED: ret = 0; break; default: ret = -ECOMM; break; } } else ret = ib_send_mad(mad_send_wr); if (!ret) { mad_send_wr->refcount++; list_add_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->send_list); } return ret; } static void timeout_sends(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc mad_send_wc; unsigned long flags, delay; mad_agent_priv = container_of(work, struct ib_mad_agent_private, timed_work.work); mad_send_wc.vendor_err = 0; spin_lock_irqsave(&mad_agent_priv->lock, flags); while (!list_empty(&mad_agent_priv->wait_list)) { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, agent_list); if (time_after(mad_send_wr->timeout, jiffies)) { delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(mad_agent_priv->qp_info-> port_priv->wq, &mad_agent_priv->timed_work, delay); break; } list_del(&mad_send_wr->agent_list); if (mad_send_wr->status == IB_WC_SUCCESS && !retry_send(mad_send_wr)) continue; spin_unlock_irqrestore(&mad_agent_priv->lock, flags); if (mad_send_wr->status == IB_WC_SUCCESS) mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR; else mad_send_wc.status = mad_send_wr->status; mad_send_wc.send_buf = &mad_send_wr->send_buf; mad_agent_priv->agent.send_handler(&mad_agent_priv->agent, &mad_send_wc); atomic_dec(&mad_agent_priv->refcount); spin_lock_irqsave(&mad_agent_priv->lock, flags); } spin_unlock_irqrestore(&mad_agent_priv->lock, flags); } /* * Allocate receive MADs and post receive WRs for them */ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, struct ib_mad_private *mad) { unsigned long flags; int post, ret; struct ib_mad_private *mad_priv; struct ib_sge sg_list; struct ib_recv_wr recv_wr, *bad_recv_wr; struct ib_mad_queue *recv_queue = &qp_info->recv_queue; /* Initialize common scatter list fields */ sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey; /* Initialize common receive WR fields */ recv_wr.next = NULL; recv_wr.sg_list = &sg_list; recv_wr.num_sge = 1; do { /* Allocate and map receive buffer */ if (mad) { mad_priv = mad; mad = NULL; } else { mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv), GFP_ATOMIC); if (!mad_priv) { dev_err(&qp_info->port_priv->device->dev, "No memory for receive buffer\n"); ret = -ENOMEM; break; } } sg_list.length = mad_priv_dma_size(mad_priv); sg_list.addr = ib_dma_map_single(qp_info->port_priv->device, &mad_priv->grh, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, sg_list.addr))) { ret = -ENOMEM; break; } mad_priv->header.mapping = sg_list.addr; mad_priv->header.mad_list.mad_queue = recv_queue; mad_priv->header.mad_list.cqe.done = ib_mad_recv_done; recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe; /* Post receive WR */ spin_lock_irqsave(&recv_queue->lock, flags); post = (++recv_queue->count < recv_queue->max_active); list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list); spin_unlock_irqrestore(&recv_queue->lock, flags); ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr); if (ret) { spin_lock_irqsave(&recv_queue->lock, flags); list_del(&mad_priv->header.mad_list.list); recv_queue->count--; spin_unlock_irqrestore(&recv_queue->lock, flags); ib_dma_unmap_single(qp_info->port_priv->device, mad_priv->header.mapping, mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE); kfree(mad_priv); dev_err(&qp_info->port_priv->device->dev, "ib_post_recv failed: %d\n", ret); break; } } while (post); return ret; } /* * Return all the posted receive MADs */ static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info) { struct ib_mad_private_header *mad_priv_hdr; struct ib_mad_private *recv; struct ib_mad_list_head *mad_list; if (!qp_info->qp) return; while (!list_empty(&qp_info->recv_queue.list)) { mad_list = list_entry(qp_info->recv_queue.list.next, struct ib_mad_list_head, list); mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header, mad_list); recv = container_of(mad_priv_hdr, struct ib_mad_private, header); /* Remove from posted receive MAD list */ list_del(&mad_list->list); ib_dma_unmap_single(qp_info->port_priv->device, recv->header.mapping, mad_priv_dma_size(recv), DMA_FROM_DEVICE); kfree(recv); } qp_info->recv_queue.count = 0; } /* * Start the port */ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) { int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { dev_err(&port_priv->device->dev, "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } ret = ib_find_pkey(port_priv->device, port_priv->port_num, IB_DEFAULT_PKEY_FULL, &pkey_index); if (ret) pkey_index = 0; for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) continue; /* * PKey index for QP1 is irrelevant but * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to INIT: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTR: %d\n", i, ret); goto out; } attr->qp_state = IB_QPS_RTS; attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { dev_err(&port_priv->device->dev, "Couldn't change QP%d state to RTS: %d\n", i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { dev_err(&port_priv->device->dev, "Failed to request completion notification: %d\n", ret); goto out; } for (i = 0; i < IB_MAD_QPS_CORE; i++) { if (!port_priv->qp_info[i].qp) continue; ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { dev_err(&port_priv->device->dev, "Couldn't post receive WRs\n"); goto out; } } out: kfree(attr); return ret; } static void qp_event_handler(struct ib_event *event, void *qp_context) { struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ dev_err(&qp_info->port_priv->device->dev, "Fatal error (%d) on MAD QP (%d)\n", event->event, qp_info->qp->qp_num); } static void init_mad_queue(struct ib_mad_qp_info *qp_info, struct ib_mad_queue *mad_queue) { mad_queue->qp_info = qp_info; mad_queue->count = 0; spin_lock_init(&mad_queue->lock); INIT_LIST_HEAD(&mad_queue->list); } static void init_mad_qp(struct ib_mad_port_private *port_priv, struct ib_mad_qp_info *qp_info) { qp_info->port_priv = port_priv; init_mad_queue(qp_info, &qp_info->send_queue); init_mad_queue(qp_info, &qp_info->recv_queue); INIT_LIST_HEAD(&qp_info->overflow_list); spin_lock_init(&qp_info->snoop_lock); qp_info->snoop_table = NULL; qp_info->snoop_table_size = 0; atomic_set(&qp_info->snoop_count, 0); } static int create_mad_qp(struct ib_mad_qp_info *qp_info, enum ib_qp_type qp_type) { struct ib_qp_init_attr qp_init_attr; int ret; memset(&qp_init_attr, 0, sizeof qp_init_attr); qp_init_attr.send_cq = qp_info->port_priv->cq; qp_init_attr.recv_cq = qp_info->port_priv->cq; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; qp_init_attr.cap.max_send_wr = mad_sendq_size; qp_init_attr.cap.max_recv_wr = mad_recvq_size; qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG; qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG; qp_init_attr.qp_type = qp_type; qp_init_attr.port_num = qp_info->port_priv->port_num; qp_init_attr.qp_context = qp_info; qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { dev_err(&qp_info->port_priv->device->dev, "Couldn't create ib_mad QP%d\n", get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } /* Use minimum queue sizes unless the CQ is resized */ qp_info->send_queue.max_active = mad_sendq_size; qp_info->recv_queue.max_active = mad_recvq_size; return 0; error: return ret; } static void destroy_mad_qp(struct ib_mad_qp_info *qp_info) { if (!qp_info->qp) return; ib_destroy_qp(qp_info->qp); kfree(qp_info->snoop_table); } /* * Open the port * Create the QP, PD, MR, and CQ if needed */ static int ib_mad_port_open(struct ib_device *device, int port_num) { int ret, cq_size; struct ib_mad_port_private *port_priv; unsigned long flags; char name[sizeof "ib_mad123"]; int has_smi; if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE)) return -EFAULT; if (WARN_ON(rdma_cap_opa_mad(device, port_num) && rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE)) return -EFAULT; /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { dev_err(&device->dev, "No memory for ib_mad_port_private\n"); return -ENOMEM; } port_priv->device = device; port_priv->port_num = port_num; spin_lock_init(&port_priv->reg_lock); INIT_LIST_HEAD(&port_priv->agent_list); init_mad_qp(port_priv, &port_priv->qp_info[0]); init_mad_qp(port_priv, &port_priv->qp_info[1]); cq_size = mad_sendq_size + mad_recvq_size; has_smi = rdma_cap_ib_smi(device, port_num); if (has_smi) cq_size *= 2; port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, IB_POLL_WORKQUEUE); if (IS_ERR(port_priv->cq)) { dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); goto error3; } port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); goto error4; } if (has_smi) { ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI); if (ret) goto error6; } ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI); if (ret) goto error7; snprintf(name, sizeof name, "ib_mad%d", port_num); port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!port_priv->wq) { ret = -ENOMEM; goto error8; } spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_add_tail(&port_priv->port_list, &ib_mad_port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); ret = ib_mad_port_start(port_priv); if (ret) { dev_err(&device->dev, "Couldn't start port\n"); goto error9; } return 0; error9: spin_lock_irqsave(&ib_mad_port_list_lock, flags); list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); error8: destroy_mad_qp(&port_priv->qp_info[1]); error7: destroy_mad_qp(&port_priv->qp_info[0]); error6: ib_dealloc_pd(port_priv->pd); error4: ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); error3: kfree(port_priv); return ret; } /* * Close the port * If there are no classes using the port, free the port * resources (CQ, MR, PD, QP) and remove the port's info structure */ static int ib_mad_port_close(struct ib_device *device, int port_num) { struct ib_mad_port_private *port_priv; unsigned long flags; spin_lock_irqsave(&ib_mad_port_list_lock, flags); port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); destroy_workqueue(port_priv->wq); destroy_mad_qp(&port_priv->qp_info[1]); destroy_mad_qp(&port_priv->qp_info[0]); ib_dealloc_pd(port_priv->pd); ib_free_cq(port_priv->cq); cleanup_recv_queue(&port_priv->qp_info[1]); cleanup_recv_queue(&port_priv->qp_info[0]); /* XXX: Handle deallocation of MAD registration tables */ kfree(port_priv); return 0; } static void ib_mad_init_device(struct ib_device *device) { int start, i; start = rdma_start_port(device); for (i = start; i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_mad_port_open(device, i)) { dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } if (ib_agent_port_open(device, i)) { dev_err(&device->dev, "Couldn't open port %d for agents\n", i); goto error_agent; } } return; error_agent: if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); error: while (--i >= start) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } } static void ib_mad_remove_device(struct ib_device *device, void *client_data) { int i; for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { if (!rdma_cap_ib_mad(device, i)) continue; if (ib_agent_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) dev_err(&device->dev, "Couldn't close port %d\n", i); } } static struct ib_client mad_client = { .name = "mad", .add = ib_mad_init_device, .remove = ib_mad_remove_device }; int ib_mad_init(void) { mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE); mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE); INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { pr_err("Couldn't register ib_mad client\n"); return -EINVAL; } return 0; } void ib_mad_cleanup(void) { ib_unregister_client(&mad_client); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_mad_rmpp.c (revision 331772) @@ -1,968 +1,972 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Intel Inc. All rights reserved. * Copyright (c) 2005-2006 Voltaire, Inc. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include "mad_priv.h" #include "mad_rmpp.h" enum rmpp_state { RMPP_STATE_ACTIVE, RMPP_STATE_TIMEOUT, RMPP_STATE_COMPLETE, RMPP_STATE_CANCELING }; struct mad_rmpp_recv { struct ib_mad_agent_private *agent; struct list_head list; struct delayed_work timeout_work; struct delayed_work cleanup_work; struct completion comp; enum rmpp_state state; spinlock_t lock; atomic_t refcount; struct ib_ah *ah; struct ib_mad_recv_wc *rmpp_wc; struct ib_mad_recv_buf *cur_seg_buf; int last_ack; int seg_num; int newwin; int repwin; __be64 tid; u32 src_qp; u16 slid; u8 mgmt_class; u8 class_version; u8 method; u8 base_version; }; static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { if (atomic_dec_and_test(&rmpp_recv->refcount)) complete(&rmpp_recv->comp); } static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv) { deref_rmpp_recv(rmpp_recv); wait_for_completion(&rmpp_recv->comp); ib_destroy_ah(rmpp_recv->ah); kfree(rmpp_recv); } void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent) { struct mad_rmpp_recv *rmpp_recv, *temp_rmpp_recv; unsigned long flags; spin_lock_irqsave(&agent->lock, flags); list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { if (rmpp_recv->state != RMPP_STATE_COMPLETE) ib_free_recv_mad(rmpp_recv->rmpp_wc); rmpp_recv->state = RMPP_STATE_CANCELING; } spin_unlock_irqrestore(&agent->lock, flags); list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { cancel_delayed_work(&rmpp_recv->timeout_work); cancel_delayed_work(&rmpp_recv->cleanup_work); } flush_workqueue(agent->qp_info->port_priv->wq); list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv, &agent->rmpp_list, list) { list_del(&rmpp_recv->list); destroy_rmpp_recv(rmpp_recv); } } static void format_ack(struct ib_mad_send_buf *msg, struct ib_rmpp_mad *data, struct mad_rmpp_recv *rmpp_recv) { struct ib_rmpp_mad *ack = msg->mad; unsigned long flags; memcpy(ack, &data->mad_hdr, msg->hdr_len); ack->mad_hdr.method ^= IB_MGMT_METHOD_RESP; ack->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ACK; ib_set_rmpp_flags(&ack->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); spin_lock_irqsave(&rmpp_recv->lock, flags); rmpp_recv->last_ack = rmpp_recv->seg_num; ack->rmpp_hdr.seg_num = cpu_to_be32(rmpp_recv->seg_num); ack->rmpp_hdr.paylen_newwin = cpu_to_be32(rmpp_recv->newwin); spin_unlock_irqrestore(&rmpp_recv->lock, flags); } static void ack_recv(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; int ret, hdr_len; hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) return; format_ack(msg, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv); msg->ah = rmpp_recv->ah; ret = ib_post_send_mad(msg, NULL); if (ret) ib_free_send_mad(msg); } static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; struct ib_ah *ah; int hdr_len; ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc, recv_wc->recv_buf.grh, agent->port_num); if (IS_ERR(ah)) return (void *) ah; hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class); msg = ib_create_send_mad(agent, recv_wc->wc->src_qp, recv_wc->wc->pkey_index, 1, hdr_len, 0, GFP_KERNEL, IB_MGMT_BASE_VERSION); if (IS_ERR(msg)) ib_destroy_ah(ah); else { msg->ah = ah; msg->context[0] = ah; } return msg; } static void ack_ds_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *recv_wc) { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; int ret; msg = alloc_response_msg(&agent->agent, recv_wc); if (IS_ERR(msg)) return; rmpp_mad = msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1); ret = ib_post_send_mad(msg, NULL); if (ret) { ib_destroy_ah(msg->ah); ib_free_send_mad(msg); } } void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah) ib_destroy_ah(mad_send_wc->send_buf->ah); ib_free_send_mad(mad_send_wc->send_buf); } static void nack_recv(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *recv_wc, u8 rmpp_status) { struct ib_mad_send_buf *msg; struct ib_rmpp_mad *rmpp_mad; int ret; msg = alloc_response_msg(&agent->agent, recv_wc); if (IS_ERR(msg)) return; rmpp_mad = msg->mad; memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len); rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP; rmpp_mad->rmpp_hdr.rmpp_version = IB_MGMT_RMPP_VERSION; rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_ABORT; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.rmpp_status = rmpp_status; rmpp_mad->rmpp_hdr.seg_num = 0; rmpp_mad->rmpp_hdr.paylen_newwin = 0; ret = ib_post_send_mad(msg, NULL); if (ret) { ib_destroy_ah(msg->ah); ib_free_send_mad(msg); } } static void recv_timeout_handler(struct work_struct *work) { struct mad_rmpp_recv *rmpp_recv = container_of(work, struct mad_rmpp_recv, timeout_work.work); struct ib_mad_recv_wc *rmpp_wc; unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); if (rmpp_recv->state != RMPP_STATE_ACTIVE) { spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); return; } rmpp_recv->state = RMPP_STATE_TIMEOUT; list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); rmpp_wc = rmpp_recv->rmpp_wc; nack_recv(rmpp_recv->agent, rmpp_wc, IB_MGMT_RMPP_STATUS_T2L); destroy_rmpp_recv(rmpp_recv); ib_free_recv_mad(rmpp_wc); } static void recv_cleanup_handler(struct work_struct *work) { struct mad_rmpp_recv *rmpp_recv = container_of(work, struct mad_rmpp_recv, cleanup_work.work); unsigned long flags; spin_lock_irqsave(&rmpp_recv->agent->lock, flags); if (rmpp_recv->state == RMPP_STATE_CANCELING) { spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); return; } list_del(&rmpp_recv->list); spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags); destroy_rmpp_recv(rmpp_recv); } static struct mad_rmpp_recv * create_rmpp_recv(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct mad_rmpp_recv *rmpp_recv; struct ib_mad_hdr *mad_hdr; rmpp_recv = kmalloc(sizeof *rmpp_recv, GFP_KERNEL); if (!rmpp_recv) return NULL; rmpp_recv->ah = ib_create_ah_from_wc(agent->agent.qp->pd, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, agent->agent.port_num); if (IS_ERR(rmpp_recv->ah)) goto error; rmpp_recv->agent = agent; init_completion(&rmpp_recv->comp); INIT_DELAYED_WORK(&rmpp_recv->timeout_work, recv_timeout_handler); INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler); spin_lock_init(&rmpp_recv->lock); rmpp_recv->state = RMPP_STATE_ACTIVE; atomic_set(&rmpp_recv->refcount, 1); rmpp_recv->rmpp_wc = mad_recv_wc; rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf; rmpp_recv->newwin = 1; rmpp_recv->seg_num = 1; rmpp_recv->last_ack = 0; rmpp_recv->repwin = 1; mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; rmpp_recv->tid = mad_hdr->tid; rmpp_recv->src_qp = mad_recv_wc->wc->src_qp; rmpp_recv->slid = mad_recv_wc->wc->slid; rmpp_recv->mgmt_class = mad_hdr->mgmt_class; rmpp_recv->class_version = mad_hdr->class_version; rmpp_recv->method = mad_hdr->method; rmpp_recv->base_version = mad_hdr->base_version; return rmpp_recv; error: kfree(rmpp_recv); return NULL; } static struct mad_rmpp_recv * find_rmpp_recv(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct mad_rmpp_recv *rmpp_recv; struct ib_mad_hdr *mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr; list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { if (rmpp_recv->tid == mad_hdr->tid && rmpp_recv->src_qp == mad_recv_wc->wc->src_qp && rmpp_recv->slid == mad_recv_wc->wc->slid && rmpp_recv->mgmt_class == mad_hdr->mgmt_class && rmpp_recv->class_version == mad_hdr->class_version && rmpp_recv->method == mad_hdr->method) return rmpp_recv; } return NULL; } static struct mad_rmpp_recv * acquire_rmpp_recv(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct mad_rmpp_recv *rmpp_recv; unsigned long flags; spin_lock_irqsave(&agent->lock, flags); rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); if (rmpp_recv) atomic_inc(&rmpp_recv->refcount); spin_unlock_irqrestore(&agent->lock, flags); return rmpp_recv; } static struct mad_rmpp_recv * insert_rmpp_recv(struct ib_mad_agent_private *agent, struct mad_rmpp_recv *rmpp_recv) { struct mad_rmpp_recv *cur_rmpp_recv; cur_rmpp_recv = find_rmpp_recv(agent, rmpp_recv->rmpp_wc); if (!cur_rmpp_recv) list_add_tail(&rmpp_recv->list, &agent->rmpp_list); return cur_rmpp_recv; } static inline int get_last_flag(struct ib_mad_recv_buf *seg) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *) seg->mad; return ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_LAST; } static inline int get_seg_num(struct ib_mad_recv_buf *seg) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *) seg->mad; return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); } static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list, struct ib_mad_recv_buf *seg) { if (seg->list.next == rmpp_list) return NULL; return container_of(seg->list.next, struct ib_mad_recv_buf, list); } static inline int window_size(struct ib_mad_agent_private *agent) { return max(agent->qp_info->recv_queue.max_active >> 3, 1); } static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list, int seg_num) { struct ib_mad_recv_buf *seg_buf; int cur_seg_num; list_for_each_entry_reverse(seg_buf, rmpp_list, list) { cur_seg_num = get_seg_num(seg_buf); if (seg_num > cur_seg_num) return seg_buf; if (seg_num == cur_seg_num) break; } return NULL; } static void update_seg_num(struct mad_rmpp_recv *rmpp_recv, struct ib_mad_recv_buf *new_buf) { struct list_head *rmpp_list = &rmpp_recv->rmpp_wc->rmpp_list; while (new_buf && (get_seg_num(new_buf) == rmpp_recv->seg_num + 1)) { rmpp_recv->cur_seg_buf = new_buf; rmpp_recv->seg_num++; new_buf = get_next_seg(rmpp_list, new_buf); } } static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) { struct ib_rmpp_mad *rmpp_mad; int hdr_size, data_size, pad; bool opa = rdma_cap_opa_mad(rmpp_recv->agent->qp_info->port_priv->device, rmpp_recv->agent->qp_info->port_priv->port_num); rmpp_mad = (struct ib_rmpp_mad *)rmpp_recv->cur_seg_buf->mad; hdr_size = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); if (opa && rmpp_recv->base_version == OPA_MGMT_BASE_VERSION) { data_size = sizeof(struct opa_rmpp_mad) - hdr_size; pad = OPA_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); if (pad > OPA_MGMT_RMPP_DATA || pad < 0) pad = 0; } else { data_size = sizeof(struct ib_rmpp_mad) - hdr_size; pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); if (pad > IB_MGMT_RMPP_DATA || pad < 0) pad = 0; } return hdr_size + rmpp_recv->seg_num * data_size - pad; } static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv) { struct ib_mad_recv_wc *rmpp_wc; ack_recv(rmpp_recv, rmpp_recv->rmpp_wc); if (rmpp_recv->seg_num > 1) cancel_delayed_work(&rmpp_recv->timeout_work); rmpp_wc = rmpp_recv->rmpp_wc; rmpp_wc->mad_len = get_mad_len(rmpp_recv); /* 10 seconds until we can find the packet lifetime */ queue_delayed_work(rmpp_recv->agent->qp_info->port_priv->wq, &rmpp_recv->cleanup_work, msecs_to_jiffies(10000)); return rmpp_wc; } static struct ib_mad_recv_wc * continue_rmpp(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct mad_rmpp_recv *rmpp_recv; struct ib_mad_recv_buf *prev_buf; struct ib_mad_recv_wc *done_wc; int seg_num; unsigned long flags; rmpp_recv = acquire_rmpp_recv(agent, mad_recv_wc); if (!rmpp_recv) goto drop1; seg_num = get_seg_num(&mad_recv_wc->recv_buf); spin_lock_irqsave(&rmpp_recv->lock, flags); if ((rmpp_recv->state == RMPP_STATE_TIMEOUT) || (seg_num > rmpp_recv->newwin)) goto drop3; if ((seg_num <= rmpp_recv->last_ack) || (rmpp_recv->state == RMPP_STATE_COMPLETE)) { spin_unlock_irqrestore(&rmpp_recv->lock, flags); ack_recv(rmpp_recv, mad_recv_wc); goto drop2; } prev_buf = find_seg_location(&rmpp_recv->rmpp_wc->rmpp_list, seg_num); if (!prev_buf) goto drop3; done_wc = NULL; list_add(&mad_recv_wc->recv_buf.list, &prev_buf->list); if (rmpp_recv->cur_seg_buf == prev_buf) { update_seg_num(rmpp_recv, &mad_recv_wc->recv_buf); if (get_last_flag(rmpp_recv->cur_seg_buf)) { rmpp_recv->state = RMPP_STATE_COMPLETE; spin_unlock_irqrestore(&rmpp_recv->lock, flags); done_wc = complete_rmpp(rmpp_recv); goto out; } else if (rmpp_recv->seg_num == rmpp_recv->newwin) { rmpp_recv->newwin += window_size(agent); spin_unlock_irqrestore(&rmpp_recv->lock, flags); ack_recv(rmpp_recv, mad_recv_wc); goto out; } } spin_unlock_irqrestore(&rmpp_recv->lock, flags); out: deref_rmpp_recv(rmpp_recv); return done_wc; drop3: spin_unlock_irqrestore(&rmpp_recv->lock, flags); drop2: deref_rmpp_recv(rmpp_recv); drop1: ib_free_recv_mad(mad_recv_wc); return NULL; } static struct ib_mad_recv_wc * start_rmpp(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct mad_rmpp_recv *rmpp_recv; unsigned long flags; rmpp_recv = create_rmpp_recv(agent, mad_recv_wc); if (!rmpp_recv) { ib_free_recv_mad(mad_recv_wc); return NULL; } spin_lock_irqsave(&agent->lock, flags); if (insert_rmpp_recv(agent, rmpp_recv)) { spin_unlock_irqrestore(&agent->lock, flags); /* duplicate first MAD */ destroy_rmpp_recv(rmpp_recv); return continue_rmpp(agent, mad_recv_wc); } atomic_inc(&rmpp_recv->refcount); if (get_last_flag(&mad_recv_wc->recv_buf)) { rmpp_recv->state = RMPP_STATE_COMPLETE; spin_unlock_irqrestore(&agent->lock, flags); complete_rmpp(rmpp_recv); } else { spin_unlock_irqrestore(&agent->lock, flags); /* 40 seconds until we can find the packet lifetimes */ queue_delayed_work(agent->qp_info->port_priv->wq, &rmpp_recv->timeout_work, msecs_to_jiffies(40000)); rmpp_recv->newwin += window_size(agent); ack_recv(rmpp_recv, mad_recv_wc); mad_recv_wc = NULL; } deref_rmpp_recv(rmpp_recv); return mad_recv_wc; } static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int timeout; u32 paylen = 0; rmpp_mad = mad_send_wr->send_buf.mad; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(++mad_send_wr->seg_num); if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; paylen = (mad_send_wr->send_buf.seg_count * mad_send_wr->send_buf.seg_rmpp_size) - mad_send_wr->pad; } if (mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; paylen = mad_send_wr->send_buf.seg_rmpp_size - mad_send_wr->pad; } rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); /* 2 seconds for an ACK until we can find the packet lifetime */ timeout = mad_send_wr->send_buf.timeout_ms; if (!timeout || timeout > 2000) mad_send_wr->timeout = msecs_to_jiffies(2000); return ib_send_mad(mad_send_wr); } static void abort_send(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_mad_send_wc wc; unsigned long flags; spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); if (!mad_send_wr) goto out; /* Unmatched send */ if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&agent->lock, flags); wc.status = IB_WC_REM_ABORT_ERR; wc.vendor_err = rmpp_status; wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; out: spin_unlock_irqrestore(&agent->lock, flags); } static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr, int seg_num) { struct list_head *list; wr->last_ack = seg_num; list = &wr->last_ack_seg->list; list_for_each_entry(wr->last_ack_seg, list, list) if (wr->last_ack_seg->num == seg_num) break; } static void process_ds_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc, int newwin) { struct mad_rmpp_recv *rmpp_recv; rmpp_recv = find_rmpp_recv(agent, mad_recv_wc); if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE) rmpp_recv->repwin = newwin; } static void process_rmpp_ack(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_mad_send_wr_private *mad_send_wr; struct ib_rmpp_mad *rmpp_mad; unsigned long flags; int seg_num, newwin, ret; rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; if (rmpp_mad->rmpp_hdr.rmpp_status) { abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); return; } seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); if (newwin < seg_num) { abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); return; } spin_lock_irqsave(&agent->lock, flags); mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); if (!mad_send_wr) { if (!seg_num) process_ds_ack(agent, mad_recv_wc, newwin); goto out; /* Unmatched or DS RMPP ACK */ } if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) && (mad_send_wr->timeout)) { spin_unlock_irqrestore(&agent->lock, flags); ack_ds_ack(agent, mad_recv_wc); return; /* Repeated ACK for DS RMPP transaction */ } if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) || (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS)) goto out; /* Send is already done */ if (seg_num > mad_send_wr->send_buf.seg_count || seg_num > mad_send_wr->newwin) { spin_unlock_irqrestore(&agent->lock, flags); abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); return; } if (newwin < mad_send_wr->newwin || seg_num < mad_send_wr->last_ack) goto out; /* Old ACK */ if (seg_num > mad_send_wr->last_ack) { adjust_last_ack(mad_send_wr, seg_num); mad_send_wr->retries_left = mad_send_wr->max_retries; } mad_send_wr->newwin = newwin; if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { /* If no response is expected, the ACK completes the send */ if (!mad_send_wr->send_buf.timeout_ms) { struct ib_mad_send_wc wc; ib_mark_mad_done(mad_send_wr); spin_unlock_irqrestore(&agent->lock, flags); wc.status = IB_WC_SUCCESS; wc.vendor_err = 0; wc.send_buf = &mad_send_wr->send_buf; ib_mad_complete_send_wr(mad_send_wr, &wc); return; } if (mad_send_wr->refcount == 1) ib_reset_mad_timeout(mad_send_wr, mad_send_wr->send_buf.timeout_ms); spin_unlock_irqrestore(&agent->lock, flags); ack_ds_ack(agent, mad_recv_wc); return; } else if (mad_send_wr->refcount == 1 && mad_send_wr->seg_num < mad_send_wr->newwin && mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) { /* Send failure will just result in a timeout/retry */ ret = send_next_seg(mad_send_wr); if (ret) goto out; mad_send_wr->refcount++; list_move_tail(&mad_send_wr->agent_list, &mad_send_wr->mad_agent_priv->send_list); } out: spin_unlock_irqrestore(&agent->lock, flags); } static struct ib_mad_recv_wc * process_rmpp_data(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_rmpp_hdr *rmpp_hdr; u8 rmpp_status; rmpp_hdr = &((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr; if (rmpp_hdr->rmpp_status) { rmpp_status = IB_MGMT_RMPP_STATUS_BAD_STATUS; goto bad; } if (rmpp_hdr->seg_num == cpu_to_be32(1)) { if (!(ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST)) { rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; goto bad; } return start_rmpp(agent, mad_recv_wc); } else { if (ib_get_rmpp_flags(rmpp_hdr) & IB_MGMT_RMPP_FLAG_FIRST) { rmpp_status = IB_MGMT_RMPP_STATUS_BAD_SEG; goto bad; } return continue_rmpp(agent, mad_recv_wc); } bad: nack_recv(agent, mad_recv_wc, rmpp_status); ib_free_recv_mad(mad_recv_wc); return NULL; } static void process_rmpp_stop(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); } else abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); } static void process_rmpp_abort(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); } else abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); } struct ib_mad_recv_wc * ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_rmpp_mad *rmpp_mad; rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; if (!(rmpp_mad->rmpp_hdr.rmpp_rtime_flags & IB_MGMT_RMPP_FLAG_ACTIVE)) return mad_recv_wc; if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); goto out; } switch (rmpp_mad->rmpp_hdr.rmpp_type) { case IB_MGMT_RMPP_TYPE_DATA: return process_rmpp_data(agent, mad_recv_wc); case IB_MGMT_RMPP_TYPE_ACK: process_rmpp_ack(agent, mad_recv_wc); break; case IB_MGMT_RMPP_TYPE_STOP: process_rmpp_stop(agent, mad_recv_wc); break; case IB_MGMT_RMPP_TYPE_ABORT: process_rmpp_abort(agent, mad_recv_wc); break; default: abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); break; } out: ib_free_recv_mad(mad_recv_wc); return NULL; } static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv; struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad; struct mad_rmpp_recv *rmpp_recv; struct ib_ah_attr ah_attr; unsigned long flags; int newwin = 1; if (!(mad_hdr->method & IB_MGMT_METHOD_RESP)) goto out; spin_lock_irqsave(&agent->lock, flags); list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) { if (rmpp_recv->tid != mad_hdr->tid || rmpp_recv->mgmt_class != mad_hdr->mgmt_class || rmpp_recv->class_version != mad_hdr->class_version || (rmpp_recv->method & IB_MGMT_METHOD_RESP)) continue; if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr)) continue; if (rmpp_recv->slid == ah_attr.dlid) { newwin = rmpp_recv->repwin; break; } } spin_unlock_irqrestore(&agent->lock, flags); out: return newwin; } int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int ret; rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) { mad_send_wr->seg_num = 1; return IB_RMPP_RESULT_INTERNAL; } mad_send_wr->newwin = init_newwin(mad_send_wr); /* We need to wait for the final ACK even if there isn't a response */ mad_send_wr->refcount += (mad_send_wr->timeout == 0); ret = send_next_seg(mad_send_wr); if (!ret) return IB_RMPP_RESULT_CONSUMED; return ret; } int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc) { struct ib_rmpp_mad *rmpp_mad; int ret; rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */ if (mad_send_wc->status != IB_WC_SUCCESS || mad_send_wr->status != IB_WC_SUCCESS) return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */ if (!mad_send_wr->timeout) return IB_RMPP_RESULT_PROCESSED; /* Response received */ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) { mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); return IB_RMPP_RESULT_PROCESSED; /* Send done */ } if (mad_send_wr->seg_num == mad_send_wr->newwin || mad_send_wr->seg_num == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; /* Wait for ACK */ ret = send_next_seg(mad_send_wr); if (ret) { mad_send_wc->status = IB_WC_GENERAL_ERR; return IB_RMPP_RESULT_PROCESSED; } return IB_RMPP_RESULT_CONSUMED; } int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int ret; rmpp_mad = mad_send_wr->send_buf.mad; if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */ if (mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) return IB_RMPP_RESULT_PROCESSED; mad_send_wr->seg_num = mad_send_wr->last_ack; mad_send_wr->cur_seg = mad_send_wr->last_ack_seg; ret = send_next_seg(mad_send_wr); if (ret) return IB_RMPP_RESULT_PROCESSED; return IB_RMPP_RESULT_CONSUMED; } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_multicast.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_multicast.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_multicast.c (revision 331772) @@ -1,900 +1,904 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include #include #include #include "sa.h" static void mcast_add_one(struct ib_device *device); static void mcast_remove_one(struct ib_device *device, void *client_data); static struct ib_client mcast_client = { .name = "ib_multicast", .add = mcast_add_one, .remove = mcast_remove_one }; static struct ib_sa_client sa_client; static struct workqueue_struct *mcast_wq; static union ib_gid mgid0; struct mcast_device; struct mcast_port { struct mcast_device *dev; spinlock_t lock; struct rb_root table; atomic_t refcount; struct completion comp; u8 port_num; }; struct mcast_device { struct ib_device *device; struct ib_event_handler event_handler; int start_port; int end_port; struct mcast_port port[0]; }; enum mcast_state { MCAST_JOINING, MCAST_MEMBER, MCAST_ERROR, }; enum mcast_group_state { MCAST_IDLE, MCAST_BUSY, MCAST_GROUP_ERROR, MCAST_PKEY_EVENT }; enum { MCAST_INVALID_PKEY_INDEX = 0xFFFF }; struct mcast_member; struct mcast_group { struct ib_sa_mcmember_rec rec; struct rb_node node; struct mcast_port *port; spinlock_t lock; struct work_struct work; struct list_head pending_list; struct list_head active_list; struct mcast_member *last_join; int members[NUM_JOIN_MEMBERSHIP_TYPES]; atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; u16 pkey_index; u8 leave_state; int retries; }; struct mcast_member { struct ib_sa_multicast multicast; struct ib_sa_client *client; struct mcast_group *group; struct list_head list; enum mcast_state state; atomic_t refcount; struct completion comp; }; static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context); static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, void *context); static struct mcast_group *mcast_find(struct mcast_port *port, union ib_gid *mgid) { struct rb_node *node = port->table.rb_node; struct mcast_group *group; int ret; while (node) { group = rb_entry(node, struct mcast_group, node); ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); if (!ret) return group; if (ret < 0) node = node->rb_left; else node = node->rb_right; } return NULL; } static struct mcast_group *mcast_insert(struct mcast_port *port, struct mcast_group *group, int allow_duplicates) { struct rb_node **link = &port->table.rb_node; struct rb_node *parent = NULL; struct mcast_group *cur_group; int ret; while (*link) { parent = *link; cur_group = rb_entry(parent, struct mcast_group, node); ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, sizeof group->rec.mgid); if (ret < 0) link = &(*link)->rb_left; else if (ret > 0) link = &(*link)->rb_right; else if (allow_duplicates) link = &(*link)->rb_left; else return cur_group; } rb_link_node(&group->node, parent, link); rb_insert_color(&group->node, &port->table); return NULL; } static void deref_port(struct mcast_port *port) { if (atomic_dec_and_test(&port->refcount)) complete(&port->comp); } static void release_group(struct mcast_group *group) { struct mcast_port *port = group->port; unsigned long flags; spin_lock_irqsave(&port->lock, flags); if (atomic_dec_and_test(&group->refcount)) { rb_erase(&group->node, &port->table); spin_unlock_irqrestore(&port->lock, flags); kfree(group); deref_port(port); } else spin_unlock_irqrestore(&port->lock, flags); } static void deref_member(struct mcast_member *member) { if (atomic_dec_and_test(&member->refcount)) complete(&member->comp); } static void queue_join(struct mcast_member *member) { struct mcast_group *group = member->group; unsigned long flags; spin_lock_irqsave(&group->lock, flags); list_add_tail(&member->list, &group->pending_list); if (group->state == MCAST_IDLE) { group->state = MCAST_BUSY; atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } spin_unlock_irqrestore(&group->lock, flags); } /* * A multicast group has four types of members: full member, non member, * sendonly non member and sendonly full member. * We need to keep track of the number of members of each * type based on their join state. Adjust the number of members the belong to * the specified join states. */ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) { int i; for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1) if (join_state & 0x1) group->members[i] += inc; } /* * If a multicast group has zero members left for a particular join state, but * the group is still a member with the SA, we need to leave that join state. * Determine which join states we still belong to, but that do not have any * active members. */ static u8 get_leave_state(struct mcast_group *group) { u8 leave_state = 0; int i; for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++) if (!group->members[i]) leave_state |= (0x1 << i); return leave_state & group->rec.join_state; } static int check_selector(ib_sa_comp_mask comp_mask, ib_sa_comp_mask selector_mask, ib_sa_comp_mask value_mask, u8 selector, u8 src_value, u8 dst_value) { int err; if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) return 0; switch (selector) { case IB_SA_GT: err = (src_value <= dst_value); break; case IB_SA_LT: err = (src_value >= dst_value); break; case IB_SA_EQ: err = (src_value != dst_value); break; default: err = 0; break; } return err; } static int cmp_rec(struct ib_sa_mcmember_rec *src, struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask) { /* MGID must already match */ if (comp_mask & IB_SA_MCMEMBER_REC_PORT_GID && memcmp(&src->port_gid, &dst->port_gid, sizeof src->port_gid)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector, src->mtu, dst->mtu)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && src->traffic_class != dst->traffic_class) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, IB_SA_MCMEMBER_REC_RATE, dst->rate_selector, src->rate, dst->rate)) return -EINVAL; if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, dst->packet_life_time_selector, src->packet_life_time, dst->packet_life_time)) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_SL && src->sl != dst->sl) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && src->flow_label != dst->flow_label) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && src->hop_limit != dst->hop_limit) return -EINVAL; if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && src->scope != dst->scope) return -EINVAL; /* join_state checked separately, proxy_join ignored */ return 0; } static int send_join(struct mcast_group *group, struct mcast_member *member) { struct mcast_port *port = group->port; int ret; group->last_join = member; ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, port->port_num, IB_MGMT_METHOD_SET, &member->multicast.rec, member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) { struct mcast_port *port = group->port; struct ib_sa_mcmember_rec rec; int ret; rec = group->rec; rec.join_state = leave_state; group->leave_state = leave_state; ret = ib_sa_mcmember_rec_query(&sa_client, port->dev->device, port->port_num, IB_SA_METHOD_DELETE, &rec, IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, u8 join_state) { member->state = MCAST_MEMBER; adjust_membership(group, join_state, 1); group->rec.join_state |= join_state; member->multicast.rec = group->rec; member->multicast.rec.join_state = join_state; list_move(&member->list, &group->active_list); } static int fail_join(struct mcast_group *group, struct mcast_member *member, int status) { spin_lock_irq(&group->lock); list_del_init(&member->list); spin_unlock_irq(&group->lock); return member->multicast.callback(status, &member->multicast); } static void process_group_error(struct mcast_group *group) { struct mcast_member *member; int ret = 0; u16 pkey_index; if (group->state == MCAST_PKEY_EVENT) ret = ib_find_pkey(group->port->dev->device, group->port->port_num, be16_to_cpu(group->rec.pkey), &pkey_index); spin_lock_irq(&group->lock); if (group->state == MCAST_PKEY_EVENT && !ret && group->pkey_index == pkey_index) goto out; while (!list_empty(&group->active_list)) { member = list_entry(group->active_list.next, struct mcast_member, list); atomic_inc(&member->refcount); list_del_init(&member->list); adjust_membership(group, member->multicast.rec.join_state, -1); member->state = MCAST_ERROR; spin_unlock_irq(&group->lock); ret = member->multicast.callback(-ENETRESET, &member->multicast); deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); spin_lock_irq(&group->lock); } group->rec.join_state = 0; out: group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); } static void mcast_work_handler(struct work_struct *work) { struct mcast_group *group; struct mcast_member *member; struct ib_sa_multicast *multicast; int status, ret; u8 join_state; group = container_of(work, typeof(*group), work); retest: spin_lock_irq(&group->lock); while (!list_empty(&group->pending_list) || (group->state != MCAST_BUSY)) { if (group->state != MCAST_BUSY) { spin_unlock_irq(&group->lock); process_group_error(group); goto retest; } member = list_entry(group->pending_list.next, struct mcast_member, list); multicast = &member->multicast; join_state = multicast->rec.join_state; atomic_inc(&member->refcount); if (join_state == (group->rec.join_state & join_state)) { status = cmp_rec(&group->rec, &multicast->rec, multicast->comp_mask); if (!status) join_group(group, member, join_state); else list_del_init(&member->list); spin_unlock_irq(&group->lock); ret = multicast->callback(status, multicast); } else { spin_unlock_irq(&group->lock); status = send_join(group, member); if (!status) { deref_member(member); return; } ret = fail_join(group, member, status); } deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); spin_lock_irq(&group->lock); } join_state = get_leave_state(group); if (join_state) { group->rec.join_state &= ~join_state; spin_unlock_irq(&group->lock); if (send_leave(group, join_state)) goto retest; } else { group->state = MCAST_IDLE; spin_unlock_irq(&group->lock); release_group(group); } } /* * Fail a join request if it is still active - at the head of the pending queue. */ static void process_join_error(struct mcast_group *group, int status) { struct mcast_member *member; int ret; spin_lock_irq(&group->lock); member = list_entry(group->pending_list.next, struct mcast_member, list); if (group->last_join == member) { atomic_inc(&member->refcount); list_del_init(&member->list); spin_unlock_irq(&group->lock); ret = member->multicast.callback(status, &member->multicast); deref_member(member); if (ret) ib_sa_free_multicast(&member->multicast); } else spin_unlock_irq(&group->lock); } static void join_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; u16 pkey_index = MCAST_INVALID_PKEY_INDEX; if (status) process_join_error(group, status); else { int mgids_changed, is_mgid0; ib_find_pkey(group->port->dev->device, group->port->port_num, be16_to_cpu(rec->pkey), &pkey_index); spin_lock_irq(&group->port->lock); if (group->state == MCAST_BUSY && group->pkey_index == MCAST_INVALID_PKEY_INDEX) group->pkey_index = pkey_index; mgids_changed = memcmp(&rec->mgid, &group->rec.mgid, sizeof(group->rec.mgid)); group->rec = *rec; if (mgids_changed) { rb_erase(&group->node, &group->port->table); is_mgid0 = !memcmp(&mgid0, &group->rec.mgid, sizeof(mgid0)); mcast_insert(group->port, group, is_mgid0); } spin_unlock_irq(&group->port->lock); } mcast_work_handler(&group->work); } static void leave_handler(int status, struct ib_sa_mcmember_rec *rec, void *context) { struct mcast_group *group = context; if (status && group->retries > 0 && !send_leave(group, group->leave_state)) group->retries--; else mcast_work_handler(&group->work); } static struct mcast_group *acquire_group(struct mcast_port *port, union ib_gid *mgid, gfp_t gfp_mask) { struct mcast_group *group, *cur_group; unsigned long flags; int is_mgid0; is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); if (!is_mgid0) { spin_lock_irqsave(&port->lock, flags); group = mcast_find(port, mgid); if (group) goto found; spin_unlock_irqrestore(&port->lock, flags); } group = kzalloc(sizeof *group, gfp_mask); if (!group) return NULL; group->retries = 3; group->port = port; group->rec.mgid = *mgid; group->pkey_index = MCAST_INVALID_PKEY_INDEX; INIT_LIST_HEAD(&group->pending_list); INIT_LIST_HEAD(&group->active_list); INIT_WORK(&group->work, mcast_work_handler); spin_lock_init(&group->lock); spin_lock_irqsave(&port->lock, flags); cur_group = mcast_insert(port, group, is_mgid0); if (cur_group) { kfree(group); group = cur_group; } else atomic_inc(&port->refcount); found: atomic_inc(&group->refcount); spin_unlock_irqrestore(&port->lock, flags); return group; } /* * We serialize all join requests to a single group to make our lives much * easier. Otherwise, two users could try to join the same group * simultaneously, with different configurations, one could leave while the * join is in progress, etc., which makes locking around error recovery * difficult. */ struct ib_sa_multicast * ib_sa_join_multicast(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, struct ib_sa_multicast *multicast), void *context) { struct mcast_device *dev; struct mcast_member *member; struct ib_sa_multicast *multicast; int ret; dev = ib_get_client_data(device, &mcast_client); if (!dev) return ERR_PTR(-ENODEV); member = kmalloc(sizeof *member, gfp_mask); if (!member) return ERR_PTR(-ENOMEM); ib_sa_client_get(client); member->client = client; member->multicast.rec = *rec; member->multicast.comp_mask = comp_mask; member->multicast.callback = callback; member->multicast.context = context; init_completion(&member->comp); atomic_set(&member->refcount, 1); member->state = MCAST_JOINING; member->group = acquire_group(&dev->port[port_num - dev->start_port], &rec->mgid, gfp_mask); if (!member->group) { ret = -ENOMEM; goto err; } /* * The user will get the multicast structure in their callback. They * could then free the multicast structure before we can return from * this routine. So we save the pointer to return before queuing * any callback. */ multicast = &member->multicast; queue_join(member); return multicast; err: ib_sa_client_put(client); kfree(member); return ERR_PTR(ret); } EXPORT_SYMBOL(ib_sa_join_multicast); void ib_sa_free_multicast(struct ib_sa_multicast *multicast) { struct mcast_member *member; struct mcast_group *group; member = container_of(multicast, struct mcast_member, multicast); group = member->group; spin_lock_irq(&group->lock); if (member->state == MCAST_MEMBER) adjust_membership(group, multicast->rec.join_state, -1); list_del_init(&member->list); if (group->state == MCAST_IDLE) { group->state = MCAST_BUSY; spin_unlock_irq(&group->lock); /* Continue to hold reference on group until callback */ queue_work(mcast_wq, &group->work); } else { spin_unlock_irq(&group->lock); release_group(group); } deref_member(member); wait_for_completion(&member->comp); ib_sa_client_put(member->client); kfree(member); } EXPORT_SYMBOL(ib_sa_free_multicast); int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec) { struct mcast_device *dev; struct mcast_port *port; struct mcast_group *group; unsigned long flags; int ret = 0; dev = ib_get_client_data(device, &mcast_client); if (!dev) return -ENODEV; port = &dev->port[port_num - dev->start_port]; spin_lock_irqsave(&port->lock, flags); group = mcast_find(port, mgid); if (group) *rec = group->rec; else ret = -EADDRNOTAVAIL; spin_unlock_irqrestore(&port->lock, flags); return ret; } EXPORT_SYMBOL(ib_sa_get_mcmember_rec); int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct ib_ah_attr *ah_attr) { int ret; u16 gid_index; u8 p; if (rdma_protocol_roce(device, port_num)) { ret = ib_find_cached_gid_by_port(device, &rec->port_gid, gid_type, port_num, ndev, &gid_index); } else if (rdma_protocol_ib(device, port_num)) { ret = ib_find_cached_gid(device, &rec->port_gid, IB_GID_TYPE_IB, NULL, &p, &gid_index); } else { ret = -EINVAL; } if (ret) return ret; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->mlid); ah_attr->sl = rec->sl; ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->mgid; ah_attr->grh.sgid_index = (u8) gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; return 0; } EXPORT_SYMBOL(ib_init_ah_from_mcmember); static void mcast_groups_event(struct mcast_port *port, enum mcast_group_state state) { struct mcast_group *group; struct rb_node *node; unsigned long flags; spin_lock_irqsave(&port->lock, flags); for (node = rb_first(&port->table); node; node = rb_next(node)) { group = rb_entry(node, struct mcast_group, node); spin_lock(&group->lock); if (group->state == MCAST_IDLE) { atomic_inc(&group->refcount); queue_work(mcast_wq, &group->work); } if (group->state != MCAST_GROUP_ERROR) group->state = state; spin_unlock(&group->lock); } spin_unlock_irqrestore(&port->lock, flags); } static void mcast_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct mcast_device *dev; int index; dev = container_of(handler, struct mcast_device, event_handler); if (!rdma_cap_ib_mcast(dev->device, event->element.port_num)) return; index = event->element.port_num - dev->start_port; switch (event->event) { case IB_EVENT_PORT_ERR: case IB_EVENT_LID_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR); break; case IB_EVENT_PKEY_CHANGE: mcast_groups_event(&dev->port[index], MCAST_PKEY_EVENT); break; default: break; } } static void mcast_add_one(struct ib_device *device) { struct mcast_device *dev; struct mcast_port *port; int i; int count = 0; dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port, GFP_KERNEL); if (!dev) return; dev->start_port = rdma_start_port(device); dev->end_port = rdma_end_port(device); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (!rdma_cap_ib_mcast(device, dev->start_port + i)) continue; port = &dev->port[i]; port->dev = dev; port->port_num = dev->start_port + i; spin_lock_init(&port->lock); port->table = RB_ROOT; init_completion(&port->comp); atomic_set(&port->refcount, 1); ++count; } if (!count) { kfree(dev); return; } dev->device = device; ib_set_client_data(device, &mcast_client, dev); INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler); ib_register_event_handler(&dev->event_handler); } static void mcast_remove_one(struct ib_device *device, void *client_data) { struct mcast_device *dev = client_data; struct mcast_port *port; int i; if (!dev) return; ib_unregister_event_handler(&dev->event_handler); flush_workqueue(mcast_wq); for (i = 0; i <= dev->end_port - dev->start_port; i++) { if (rdma_cap_ib_mcast(device, dev->start_port + i)) { port = &dev->port[i]; deref_port(port); wait_for_completion(&port->comp); } } kfree(dev); } int mcast_init(void) { int ret; mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; ib_sa_register_client(&sa_client); ret = ib_register_client(&mcast_client); if (ret) goto err; return 0; err: ib_sa_unregister_client(&sa_client); destroy_workqueue(mcast_wq); return ret; } void mcast_cleanup(void) { ib_unregister_client(&mcast_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(mcast_wq); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_packer.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_packer.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_packer.c (revision 331772) @@ -1,200 +1,204 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include static u64 value_read(int offset, int size, void *structure) { switch (size) { case 1: return *(u8 *) ((char *)structure + offset); case 2: return be16_to_cpup((__be16 *) ((char *)structure + offset)); case 4: return be32_to_cpup((__be32 *) ((char *)structure + offset)); case 8: return be64_to_cpup((__be64 *) ((char *)structure + offset)); default: pr_warn("Field size %d bits not handled\n", size * 8); return 0; } } /** * ib_pack - Pack a structure into a buffer * @desc:Array of structure field descriptions * @desc_len:Number of entries in @desc * @structure:Structure to pack from * @buf:Buffer to pack into * * ib_pack() packs a list of structure fields into a buffer, * controlled by the array of fields in @desc. */ void ib_pack(const struct ib_field *desc, int desc_len, void *structure, void *buf) { int i; for (i = 0; i < desc_len; ++i) { if (desc[i].size_bits <= 32) { int shift; u32 val; __be32 mask; __be32 *addr; shift = 32 - desc[i].offset_bits - desc[i].size_bits; if (desc[i].struct_size_bytes) val = value_read(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, structure) << shift; else val = 0; mask = cpu_to_be32(((1ull << desc[i].size_bits) - 1) << shift); addr = (__be32 *) buf + desc[i].offset_words; *addr = (*addr & ~mask) | (cpu_to_be32(val) & mask); } else if (desc[i].size_bits <= 64) { int shift; u64 val; __be64 mask; __be64 *addr; shift = 64 - desc[i].offset_bits - desc[i].size_bits; if (desc[i].struct_size_bytes) val = value_read(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, structure) << shift; else val = 0; mask = cpu_to_be64((~0ull >> (64 - desc[i].size_bits)) << shift); addr = (__be64 *) ((__be32 *) buf + desc[i].offset_words); *addr = (*addr & ~mask) | (cpu_to_be64(val) & mask); } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { pr_warn("Structure field %s of size %d bits is not byte-aligned\n", desc[i].field_name, desc[i].size_bits); } if (desc[i].struct_size_bytes) memcpy((char *)buf + desc[i].offset_words * 4 + desc[i].offset_bits / 8, (char *)structure + desc[i].struct_offset_bytes, desc[i].size_bits / 8); else memset((char *)buf + desc[i].offset_words * 4 + desc[i].offset_bits / 8, 0, desc[i].size_bits / 8); } } } EXPORT_SYMBOL(ib_pack); static void value_write(int offset, int size, u64 val, void *structure) { switch (size * 8) { case 8: *( u8 *) ((char *)structure + offset) = val; break; case 16: *(__be16 *) ((char *)structure + offset) = cpu_to_be16(val); break; case 32: *(__be32 *) ((char *)structure + offset) = cpu_to_be32(val); break; case 64: *(__be64 *) ((char *)structure + offset) = cpu_to_be64(val); break; default: pr_warn("Field size %d bits not handled\n", size * 8); } } /** * ib_unpack - Unpack a buffer into a structure * @desc:Array of structure field descriptions * @desc_len:Number of entries in @desc * @buf:Buffer to unpack from * @structure:Structure to unpack into * * ib_pack() unpacks a list of structure fields from a buffer, * controlled by the array of fields in @desc. */ void ib_unpack(const struct ib_field *desc, int desc_len, void *buf, void *structure) { int i; for (i = 0; i < desc_len; ++i) { if (!desc[i].struct_size_bytes) continue; if (desc[i].size_bits <= 32) { int shift; u32 val; u32 mask; __be32 *addr; shift = 32 - desc[i].offset_bits - desc[i].size_bits; mask = ((1ull << desc[i].size_bits) - 1) << shift; addr = (__be32 *) buf + desc[i].offset_words; val = (be32_to_cpup(addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, val, structure); } else if (desc[i].size_bits <= 64) { int shift; u64 val; u64 mask; __be64 *addr; shift = 64 - desc[i].offset_bits - desc[i].size_bits; mask = (~0ull >> (64 - desc[i].size_bits)) << shift; addr = (__be64 *) buf + desc[i].offset_words; val = (be64_to_cpup(addr) & mask) >> shift; value_write(desc[i].struct_offset_bytes, desc[i].struct_size_bytes, val, structure); } else { if (desc[i].offset_bits % 8 || desc[i].size_bits % 8) { pr_warn("Structure field %s of size %d bits is not byte-aligned\n", desc[i].field_name, desc[i].size_bits); } memcpy((char *)structure + desc[i].struct_offset_bytes, (char *)buf + desc[i].offset_words * 4 + desc[i].offset_bits / 8, desc[i].size_bits / 8); } } } EXPORT_SYMBOL(ib_unpack); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c (revision 331772) @@ -1,447 +1,451 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include "core_priv.h" #include #include #include #include #include #include static struct workqueue_struct *roce_gid_mgmt_wq; enum gid_op_type { GID_DEL = 0, GID_ADD }; struct roce_netdev_event_work { struct work_struct work; struct net_device *ndev; }; struct roce_rescan_work { struct work_struct work; struct ib_device *ib_dev; }; static const struct { bool (*is_supported)(const struct ib_device *device, u8 port_num); enum ib_gid_type gid_type; } PORT_CAP_TO_GID_TYPE[] = { {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, }; #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) { int i; unsigned int ret_flags = 0; if (!rdma_protocol_roce(ib_dev, port)) return 1UL << IB_GID_TYPE_IB; for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; return ret_flags; } EXPORT_SYMBOL(roce_gid_type_mask_support); static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, u8 port, union ib_gid *gid, struct net_device *ndev) { int i; unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); struct ib_gid_attr gid_attr; memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; for (i = 0; i != IB_GID_TYPE_SIZE; i++) { if ((1UL << i) & gid_type_mask) { gid_attr.gid_type = i; switch (gid_op) { case GID_ADD: ib_cache_gid_add(ib_dev, port, gid, &gid_attr); break; case GID_DEL: ib_cache_gid_del(ib_dev, port, gid, &gid_attr); break; } } } } static int roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, struct net_device *idev, void *cookie) { struct net_device *ndev = (struct net_device *)cookie; if (idev == NULL) return (0); return (ndev == idev); } static int roce_gid_match_all(struct ib_device *ib_dev, u8 port, struct net_device *idev, void *cookie) { if (idev == NULL) return (0); return (1); } static int roce_gid_enum_netdev_default(struct ib_device *ib_dev, u8 port, struct net_device *idev) { unsigned long gid_type_mask; gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_SET); return (hweight_long(gid_type_mask)); } #define ETH_IPOIB_DRV_NAME "ib" static inline int is_eth_ipoib_intf(struct net_device *dev) { if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME)) return 0; return 1; } static void roce_gid_update_addr_callback(struct ib_device *device, u8 port, struct net_device *ndev, void *cookie) { struct ipx_entry { STAILQ_ENTRY(ipx_entry) entry; union ipx_addr { struct sockaddr sa[0]; struct sockaddr_in v4; struct sockaddr_in6 v6; } ipx_addr; }; struct ipx_entry *entry; struct net_device *idev; #if defined(INET) || defined(INET6) struct ifaddr *ifa; #endif union ib_gid gid; int default_gids; u16 index_num; int i; STAILQ_HEAD(, ipx_entry) ipx_head; STAILQ_INIT(&ipx_head); /* make sure default GIDs are in */ default_gids = roce_gid_enum_netdev_default(device, port, ndev); CURVNET_SET(ndev->if_vnet); IFNET_RLOCK(); TAILQ_FOREACH(idev, &V_ifnet, if_link) { if (idev != ndev) { if (idev->if_type != IFT_L2VLAN) continue; if (ndev != rdma_vlan_dev_real_dev(idev)) continue; } /* clone address information for IPv4 and IPv6 */ IF_ADDR_RLOCK(idev); #if defined(INET) TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET) continue; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) { pr_warn("roce_gid_update_addr_callback: " "couldn't allocate entry for IPv4 update\n"); continue; } entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); STAILQ_INSERT_TAIL(&ipx_head, entry, entry); } #endif #if defined(INET6) TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET6) continue; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) { pr_warn("roce_gid_update_addr_callback: " "couldn't allocate entry for IPv6 update\n"); continue; } entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); /* trash IPv6 scope ID */ sa6_recoverscope(&entry->ipx_addr.v6); entry->ipx_addr.v6.sin6_scope_id = 0; STAILQ_INSERT_TAIL(&ipx_head, entry, entry); } #endif IF_ADDR_RUNLOCK(idev); } IFNET_RUNLOCK(); CURVNET_RESTORE(); /* add missing GIDs, if any */ STAILQ_FOREACH(entry, &ipx_head, entry) { unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) continue; for (i = 0; i != IB_GID_TYPE_SIZE; i++) { if (!((1UL << i) & gid_type_mask)) continue; /* check if entry found */ if (ib_find_cached_gid_by_port(device, &gid, i, port, ndev, &index_num) == 0) break; } if (i != IB_GID_TYPE_SIZE) continue; /* add new GID */ update_gid(GID_ADD, device, port, &gid, ndev); } /* remove stale GIDs, if any */ for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, NULL) == 0; i++) { union ipx_addr ipx; /* don't delete empty entries */ if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) continue; /* zero default */ memset(&ipx, 0, sizeof(ipx)); rdma_gid2ip(&ipx.sa[0], &gid); STAILQ_FOREACH(entry, &ipx_head, entry) { if (memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) break; } /* check if entry found */ if (entry != NULL) continue; /* remove GID */ update_gid(GID_DEL, device, port, &gid, ndev); } while ((entry = STAILQ_FIRST(&ipx_head))) { STAILQ_REMOVE_HEAD(&ipx_head, entry); kfree(entry); } } static void roce_gid_queue_scan_event_handler(struct work_struct *_work) { struct roce_netdev_event_work *work = container_of(_work, struct roce_netdev_event_work, work); ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, roce_gid_update_addr_callback, NULL); dev_put(work->ndev); kfree(work); } static void roce_gid_queue_scan_event(struct net_device *ndev) { struct roce_netdev_event_work *work; retry: if (is_eth_ipoib_intf(ndev)) return; if (ndev->if_type != IFT_ETHER) { if (ndev->if_type == IFT_L2VLAN) { ndev = rdma_vlan_dev_real_dev(ndev); if (ndev != NULL) goto retry; } return; } work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) { pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); return; } INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); dev_hold(ndev); work->ndev = ndev; queue_work(roce_gid_mgmt_wq, &work->work); } static void roce_gid_delete_all_event_handler(struct work_struct *_work) { struct roce_netdev_event_work *work = container_of(_work, struct roce_netdev_event_work, work); ib_cache_gid_del_all_by_netdev(work->ndev); dev_put(work->ndev); kfree(work); } static void roce_gid_delete_all_event(struct net_device *ndev) { struct roce_netdev_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) { pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); return; } INIT_WORK(&work->work, roce_gid_delete_all_event_handler); dev_hold(ndev); work->ndev = ndev; queue_work(roce_gid_mgmt_wq, &work->work); } static int inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *ndev = ptr; switch (event) { case NETDEV_UNREGISTER: roce_gid_delete_all_event(ndev); break; case NETDEV_REGISTER: case NETDEV_CHANGEADDR: case NETDEV_CHANGEIFADDR: roce_gid_queue_scan_event(ndev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block nb_inetaddr = { .notifier_call = inetaddr_event }; static void roce_rescan_device_handler(struct work_struct *_work) { struct roce_rescan_work *work = container_of(_work, struct roce_rescan_work, work); ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, roce_gid_update_addr_callback, NULL); kfree(work); } /* Caller must flush system workqueue before removing the ib_device */ int roce_rescan_device(struct ib_device *ib_dev) { struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) return -ENOMEM; work->ib_dev = ib_dev; INIT_WORK(&work->work, roce_rescan_device_handler); queue_work(roce_gid_mgmt_wq, &work->work); return 0; } int __init roce_gid_mgmt_init(void) { roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); if (!roce_gid_mgmt_wq) { pr_warn("roce_gid_mgmt: can't allocate work queue\n"); return -ENOMEM; } register_inetaddr_notifier(&nb_inetaddr); /* * We rely on the netdevice notifier to enumerate all existing * devices in the system. Register to this notifier last to * make sure we will not miss any IP add/del callbacks. */ register_netdevice_notifier(&nb_inetaddr); return 0; } void __exit roce_gid_mgmt_cleanup(void) { unregister_inetaddr_notifier(&nb_inetaddr); unregister_netdevice_notifier(&nb_inetaddr); /* * Ensure all gid deletion tasks complete before we go down, * to avoid any reference to free'd memory. By the time * ib-core is removed, all physical devices have been removed, * so no issue with remaining hardware contexts. */ synchronize_rcu(); drain_workqueue(roce_gid_mgmt_wq); destroy_workqueue(roce_gid_mgmt_wq); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_sa_query.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_sa_query.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_sa_query.c (revision 331772) @@ -1,1580 +1,1584 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "sa.h" #include "core_priv.h" #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; u16 pkey_index; u8 src_path_mask; }; struct ib_sa_classport_cache { bool valid; struct ib_class_port_info data; }; struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; struct ib_sa_classport_cache classport_info; spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u8 port_num; }; struct ib_sa_device { int start_port, end_port; struct ib_event_handler event_handler; struct ib_sa_port port[0]; }; struct ib_sa_query { void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; u32 flags; struct list_head list; /* Local svc request list */ u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 #define IB_SA_CANCEL 0x00000002 struct ib_sa_service_query { void (*callback)(int, struct ib_sa_service_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_path_query { void (*callback)(int, struct ib_sa_path_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_guidinfo_query { void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_classport_info_query { void (*callback)(int, struct ib_class_port_info *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; struct ib_sa_query sa_query; }; static void ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { .name = "sa", .add = ib_sa_add_one, .remove = ib_sa_remove_one }; static DEFINE_SPINLOCK(idr_lock); static DEFINE_IDR(query_idr); static DEFINE_SPINLOCK(tid_lock); static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { PATH_REC_FIELD(slid), .offset_words = 10, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(raw_traffic), .offset_words = 11, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 11, .offset_bits = 1, .size_bits = 3 }, { PATH_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { PATH_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { PATH_REC_FIELD(traffic_class), .offset_words = 12, .offset_bits = 0, .size_bits = 8 }, { PATH_REC_FIELD(reversible), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { PATH_REC_FIELD(numb_path), .offset_words = 12, .offset_bits = 9, .size_bits = 7 }, { PATH_REC_FIELD(pkey), .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(qos_class), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, { PATH_REC_FIELD(sl), .offset_words = 13, .offset_bits = 12, .size_bits = 4 }, { PATH_REC_FIELD(mtu_selector), .offset_words = 13, .offset_bits = 16, .size_bits = 2 }, { PATH_REC_FIELD(mtu), .offset_words = 13, .offset_bits = 18, .size_bits = 6 }, { PATH_REC_FIELD(rate_selector), .offset_words = 13, .offset_bits = 24, .size_bits = 2 }, { PATH_REC_FIELD(rate), .offset_words = 13, .offset_bits = 26, .size_bits = 6 }, { PATH_REC_FIELD(packet_life_time_selector), .offset_words = 14, .offset_bits = 0, .size_bits = 2 }, { PATH_REC_FIELD(packet_life_time), .offset_words = 14, .offset_bits = 2, .size_bits = 6 }, { PATH_REC_FIELD(preference), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 48 }, }; #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { { MCMEMBER_REC_FIELD(mgid), .offset_words = 0, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(port_gid), .offset_words = 4, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(qkey), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { MCMEMBER_REC_FIELD(mlid), .offset_words = 9, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(mtu_selector), .offset_words = 9, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(mtu), .offset_words = 9, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(traffic_class), .offset_words = 9, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(pkey), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(rate_selector), .offset_words = 10, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(rate), .offset_words = 10, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(packet_life_time_selector), .offset_words = 10, .offset_bits = 24, .size_bits = 2 }, { MCMEMBER_REC_FIELD(packet_life_time), .offset_words = 10, .offset_bits = 26, .size_bits = 6 }, { MCMEMBER_REC_FIELD(sl), .offset_words = 11, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { MCMEMBER_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(scope), .offset_words = 12, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(join_state), .offset_words = 12, .offset_bits = 4, .size_bits = 4 }, { MCMEMBER_REC_FIELD(proxy_join), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 9, .size_bits = 23 }, }; #define SERVICE_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \ .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \ .field_name = "sa_service_rec:" #field static const struct ib_field service_rec_table[] = { { SERVICE_REC_FIELD(id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { SERVICE_REC_FIELD(gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(pkey), .offset_words = 6, .offset_bits = 0, .size_bits = 16 }, { SERVICE_REC_FIELD(lease), .offset_words = 7, .offset_bits = 0, .size_bits = 32 }, { SERVICE_REC_FIELD(key), .offset_words = 8, .offset_bits = 0, .size_bits = 128 }, { SERVICE_REC_FIELD(name), .offset_words = 12, .offset_bits = 0, .size_bits = 64*8 }, { SERVICE_REC_FIELD(data8), .offset_words = 28, .offset_bits = 0, .size_bits = 16*8 }, { SERVICE_REC_FIELD(data16), .offset_words = 32, .offset_bits = 0, .size_bits = 8*16 }, { SERVICE_REC_FIELD(data32), .offset_words = 36, .offset_bits = 0, .size_bits = 4*32 }, { SERVICE_REC_FIELD(data64), .offset_words = 40, .offset_bits = 0, .size_bits = 2*64 }, }; #define CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \ .field_name = "ib_class_port_info:" #field static const struct ib_field classport_info_rec_table[] = { { CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(class_version), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(capability_mask), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), .offset_words = 6, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_lid), .offset_words = 7, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_pkey), .offset_words = 7, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_qp), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_qkey), .offset_words = 9, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_gid), .offset_words = 10, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(trap_tcslfl), .offset_words = 14, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_lid), .offset_words = 15, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_pkey), .offset_words = 15, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_hlqp), .offset_words = 16, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_qkey), .offset_words = 17, .offset_bits = 0, .size_bits = 32 }, }; #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ .field_name = "sa_guidinfo_rec:" #field static const struct ib_field guidinfo_rec_table[] = { { GUIDINFO_REC_FIELD(lid), .offset_words = 0, .offset_bits = 0, .size_bits = 16 }, { GUIDINFO_REC_FIELD(block_num), .offset_words = 0, .offset_bits = 16, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res1), .offset_words = 0, .offset_bits = 24, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res2), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { GUIDINFO_REC_FIELD(guid_info_list), .offset_words = 2, .offset_bits = 0, .size_bits = 512 }, }; static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) { query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; } static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); ib_destroy_ah(sm_ah->ah); kfree(sm_ah); } static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct ib_ah_attr ah_attr; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); return; } new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL); if (!new_ah) { return; } kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) pr_err("Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = port_attr.sm_lid; ah_attr.sl = port_attr.sm_sl; ah_attr.port_num = port->port_num; if (port_attr.grh_required) { ah_attr.ah_flags = IB_AH_GRH; ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix); ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID); } new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); return; } spin_lock_irq(&port->ah_lock); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) { if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { unsigned long flags; struct ib_sa_device *sa_dev = container_of(handler, typeof(*sa_dev), event_handler); struct ib_sa_port *port = &sa_dev->port[event->element.port_num - sa_dev->start_port]; if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); if (event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_LID_CHANGE) { spin_lock_irqsave(&port->classport_lock, flags); port->classport_info.valid = false; spin_unlock_irqrestore(&port->classport_lock, flags); } queue_work(ib_wq, &sa_dev->port[event->element.port_num - sa_dev->start_port].update_task); } } void ib_sa_register_client(struct ib_sa_client *client) { atomic_set(&client->users, 1); init_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_register_client); void ib_sa_unregister_client(struct ib_sa_client *client) { ib_sa_client_put(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_unregister_client); /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel * @query:query pointer to cancel * * Try to cancel an SA query. If the id and query don't match up or * the query has already completed, nothing is done. Otherwise the * query is canceled and will complete with a status of -EINTR. */ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; spin_lock_irqsave(&idr_lock, flags); if (idr_find(&query_idr, id) != query) { spin_unlock_irqrestore(&idr_lock, flags); return; } agent = query->port->agent; mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); } EXPORT_SYMBOL(ib_sa_cancel_query); static u8 get_src_path_mask(struct ib_device *device, u8 port_num) { struct ib_sa_device *sa_dev; struct ib_sa_port *port; unsigned long flags; u8 src_path_mask; sa_dev = ib_get_client_data(device, &sa_client); if (!sa_dev) return 0x7f; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->ah_lock, flags); src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; spin_unlock_irqrestore(&port->ah_lock, flags); return src_path_mask; } int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr) { int ret; u16 gid_index; int use_roce; struct net_device *ndev = NULL; memset(ah_attr, 0, sizeof *ah_attr); ah_attr->dlid = be16_to_cpu(rec->dlid); ah_attr->sl = rec->sl; ah_attr->src_path_bits = be16_to_cpu(rec->slid) & get_src_path_mask(device, port_num); ah_attr->port_num = port_num; ah_attr->static_rate = rec->rate; use_roce = rdma_cap_eth_ah(device, port_num); if (use_roce) { struct net_device *idev; struct net_device *resolved_dev; struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex, .net = rec->net ? rec->net : &init_net}; union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; if (!device->get_netdev) return -EOPNOTSUPP; rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid); rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid); /* validate the route */ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr, &dgid_addr._sockaddr, &dev_addr); if (ret) return ret; if ((dev_addr.network == RDMA_NETWORK_IPV4 || dev_addr.network == RDMA_NETWORK_IPV6) && rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) return -EINVAL; idev = device->get_netdev(device, port_num); if (!idev) return -ENODEV; resolved_dev = dev_get_by_index(dev_addr.net, dev_addr.bound_dev_if); if (resolved_dev->if_flags & IFF_LOOPBACK) { dev_put(resolved_dev); resolved_dev = idev; dev_hold(resolved_dev); } ndev = ib_get_ndev_from_path(rec); rcu_read_lock(); if ((ndev && ndev != resolved_dev) || (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, resolved_dev))) ret = -EHOSTUNREACH; rcu_read_unlock(); dev_put(idev); dev_put(resolved_dev); if (ret) { if (ndev) dev_put(ndev); return ret; } } if (rec->hop_limit > 0 || use_roce) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = rec->dgid; ret = ib_find_cached_gid_by_port(device, &rec->sgid, rec->gid_type, port_num, ndev, &gid_index); if (ret) { if (ndev) dev_put(ndev); return ret; } ah_attr->grh.sgid_index = gid_index; ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label); ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; if (ndev) dev_put(ndev); } if (use_roce) memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) { unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); if (!query->port->sm_ah) { spin_unlock_irqrestore(&query->port->ah_lock, flags); return -EAGAIN; } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask, IB_MGMT_BASE_VERSION); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; } query->mad_buf->ah = query->sm_ah->ah; return 0; } static void free_mad(struct ib_sa_query *query) { ib_free_send_mad(query->mad_buf); kref_put(&query->sm_ah->ref, free_sm_ah); } static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) { unsigned long flags; memset(mad, 0, sizeof *mad); mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; spin_lock_irqsave(&tid_lock, flags); mad->mad_hdr.tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); spin_unlock_irqrestore(&tid_lock, flags); } static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { bool preload = gfpflags_allow_blocking(gfp_mask); unsigned long flags; int ret, id; if (preload) idr_preload(gfp_mask); spin_lock_irqsave(&idr_lock, flags); id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); spin_unlock_irqrestore(&idr_lock, flags); if (preload) idr_preload_end(); if (id < 0) return id; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; query->id = id; if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) { ib_sa_disable_local_svc(query); } ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, id); spin_unlock_irqrestore(&idr_lock, flags); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in * another context. */ return ret ? ret : id; } void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) { ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_path); void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); } EXPORT_SYMBOL(ib_sa_pack_path); static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); if (mad) { struct ib_sa_path_rec rec; ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.net = NULL; rec.ifindex = 0; rec.gid_type = IB_GID_TYPE_IB; eth_zero_addr(rec.dmac); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_path_query, sa_query)); } /** * ib_sa_path_rec_get - Start a Path get query * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query * @comp_mask:component mask to send in query * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when query completes, times out or is * canceled * @context:opaque user context passed to callback * @sa_query:query context, used to cancel query * * Send a Path Record Get query to the SA to look up a path. The * callback function will be called when the query completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_path_rec_get() is negative, it is an * error code. Otherwise it is a query ID that can be used to cancel * the query. */ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); *sa_query = &query->sa_query; query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; query->sa_query.mad_buf->context[1] = rec; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_service_query *query = container_of(sa_query, struct ib_sa_service_query, sa_query); if (mad) { struct ib_sa_service_rec rec; ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_service_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_service_query, sa_query)); } /** * ib_sa_service_rec_query - Start Service Record operation * @client:SA client * @device:device to send request on * @port_num: port number to send request on * @method:SA method - should be get, set, or delete * @rec:Service Record to send in request * @comp_mask:component mask to send in request * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when request completes, times out or is * canceled * @context:opaque user context passed to callback * @sa_query:request context, used to cancel request * * Send a Service Record set/get/delete to the SA to register, * unregister or query a service record. * The callback function will be called when the request completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_service_rec_query() is negative, it is an * error code. Otherwise it is a request ID that can be used to cancel * the query. */ int ib_sa_service_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_service_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) return -EINVAL; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL; query->sa_query.release = ib_sa_service_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_service_rec_query); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = container_of(sa_query, struct ib_sa_mcmember_query, sa_query); if (mad) { struct ib_sa_mcmember_rec rec; ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); if (mad) { struct ib_sa_guidinfo_rec rec; ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); } int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_guidinfo_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) { return -EINVAL; } port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; query->sa_query.release = ib_sa_guidinfo_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); /* Support get SA ClassPortInfo */ static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); if (mad) { struct ib_class_port_info rec; ib_unpack(classport_info_rec_table, ARRAY_SIZE(classport_info_rec_table), mad->data, &rec); spin_lock_irqsave(&sa_query->port->classport_lock, flags); if (!status && !sa_query->port->classport_info.valid) { memcpy(&sa_query->port->classport_info.data, &rec, sizeof(sa_query->port->classport_info.data)); sa_query->port->classport_info.valid = true; } spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); query->callback(status, &rec, query->context); } else { query->callback(status, NULL, query->context); } } static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_classport_info_query, sa_query)); } int ib_sa_classport_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_class_port_info *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_classport_info_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; struct ib_class_port_info cached_class_port_info; int ret; unsigned long flags; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; /* Use cached ClassPortInfo attribute if valid instead of sending mad */ spin_lock_irqsave(&port->classport_lock, flags); if (port->classport_info.valid && callback) { memcpy(&cached_class_port_info, &port->classport_info.data, sizeof(cached_class_port_info)); spin_unlock_irqrestore(&port->classport_lock, flags); callback(0, &cached_class_port_info, context); return 0; } spin_unlock_irqrestore(&port->classport_lock, flags); query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(mad, agent); query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL; query->sa_query.release = ib_sa_portclass_info_rec_release; /* support GET only */ mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); mad->sa_hdr.comp_mask = 0; *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_classport_info_rec_query); static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: query->callback(query, -ETIMEDOUT, NULL); break; case IB_WC_WR_FLUSH_ERR: query->callback(query, -EINTR, NULL); break; default: query->callback(query, -EIO, NULL); break; } spin_lock_irqsave(&idr_lock, flags); idr_remove(&query_idr, query->id); spin_unlock_irqrestore(&idr_lock, flags); free_mad(query); ib_sa_client_put(query->client); query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; if (!send_buf) return; query = send_buf->context[0]; if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? -EINVAL : 0, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else query->callback(query, -EIO, NULL); } ib_free_recv_mad(mad_recv_wc); } static void ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; int count = 0; s = rdma_start_port(device); e = rdma_end_port(device); sa_dev = kzalloc(sizeof *sa_dev + (e - s + 1) * sizeof (struct ib_sa_port), GFP_KERNEL); if (!sa_dev) return; sa_dev->start_port = s; sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; spin_lock_init(&sa_dev->port[i].classport_lock); sa_dev->port[i].classport_info.valid = false; sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, recv_handler, sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) goto err; INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); count++; } if (!count) goto free; ib_set_client_data(device, &sa_client, sa_dev); /* * We register our event handler after everything is set up, * and then update our cached info after the event handler is * registered to avoid any problems if a port changes state * during our initialization. */ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); if (ib_register_event_handler(&sa_dev->event_handler)) goto err; for (i = 0; i <= e - s; ++i) { if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); } return; err: while (--i >= 0) { if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); } free: kfree(sa_dev); return; } static void ib_sa_remove_one(struct ib_device *device, void *client_data) { struct ib_sa_device *sa_dev = client_data; int i; if (!sa_dev) return; ib_unregister_event_handler(&sa_dev->event_handler); flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_cap_ib_sa(device, i + 1)) { ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } } kfree(sa_dev); } int ib_sa_init(void) { int ret; get_random_bytes(&tid, sizeof tid); ret = ib_register_client(&sa_client); if (ret) { pr_err("Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { pr_err("Couldn't initialize multicast handling\n"); goto err2; } return 0; err2: ib_unregister_client(&sa_client); err1: return ret; } void ib_sa_cleanup(void) { mcast_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_smi.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_smi.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_smi.c (revision 331772) @@ -1,338 +1,341 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004, 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004, 2005 Infinicon Corporation. All rights reserved. * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004, 2005 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ #include #include "smi.h" #include "opa_smi.h" static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, const u8 *return_path, u8 direction, bool dr_dlid_is_permissive, bool dr_slid_is_permissive) { /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; if (!direction) { /* C14-9:1 */ if (hop_cnt && *hop_ptr == 0) { (*hop_ptr)++; return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:2 */ if (*hop_ptr && *hop_ptr < hop_cnt) { if (!is_switch) return IB_SMI_DISCARD; /* return_path set when received */ (*hop_ptr)++; return (initial_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ if (*hop_ptr == hop_cnt) { /* return_path set when received */ (*hop_ptr)++; return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- Fail unreasonable hop pointer */ return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ if (hop_cnt && *hop_ptr == hop_cnt + 1) { (*hop_ptr)--; return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (!is_switch) return IB_SMI_DISCARD; (*hop_ptr)--; return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- at the end of the DR segment of path */ if (*hop_ptr == 1) { (*hop_ptr)--; /* C14-13:3 -- SMPs destined for SM shouldn't be here */ return (is_switch || dr_slid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> should have gone to SM */ if (*hop_ptr == 0) return IB_SMI_HANDLE; /* C14-13:5 -- Check for unreasonable hop pointer */ return IB_SMI_DISCARD; } } /* * Fixup a directed route SMP for sending * Return IB_SMI_DISCARD if the SMP should be discarded */ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, bool is_switch, int port_num) { return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, ib_get_smp_direction(smp), smp->dr_dlid == IB_LID_PERMISSIVE, smp->dr_slid == IB_LID_PERMISSIVE); } enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, bool is_switch, int port_num) { return __smi_handle_dr_smp_send(is_switch, port_num, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, opa_get_smp_direction(smp), smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE, smp->route.dr.dr_slid == OPA_LID_PERMISSIVE); } static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num, int phys_port_cnt, u8 *hop_ptr, u8 hop_cnt, const u8 *initial_path, u8 *return_path, u8 direction, bool dr_dlid_is_permissive, bool dr_slid_is_permissive) { /* See section 14.2.2.2, Vol 1 IB spec */ /* C14-6 -- valid hop_cnt values are from 0 to 63 */ if (hop_cnt >= IB_SMP_MAX_PATH_HOPS) return IB_SMI_DISCARD; if (!direction) { /* C14-9:1 -- sender should have incremented hop_ptr */ if (hop_cnt && *hop_ptr == 0) return IB_SMI_DISCARD; /* C14-9:2 -- intermediate hop */ if (*hop_ptr && *hop_ptr < hop_cnt) { if (!is_switch) return IB_SMI_DISCARD; return_path[*hop_ptr] = port_num; /* hop_ptr updated when sending */ return (initial_path[*hop_ptr+1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:3 -- We're at the end of the DR segment of path */ if (*hop_ptr == hop_cnt) { if (hop_cnt) return_path[*hop_ptr] = port_num; /* hop_ptr updated when sending */ return (is_switch || dr_dlid_is_permissive ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ /* C14-9:5 -- fail unreasonable hop pointer */ return (*hop_ptr == hop_cnt + 1 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } else { /* C14-13:1 */ if (hop_cnt && *hop_ptr == hop_cnt + 1) { (*hop_ptr)--; return (return_path[*hop_ptr] == port_num ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:2 */ if (2 <= *hop_ptr && *hop_ptr <= hop_cnt) { if (!is_switch) return IB_SMI_DISCARD; /* hop_ptr updated when sending */ return (return_path[*hop_ptr-1] <= phys_port_cnt ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:3 -- We're at the end of the DR segment of path */ if (*hop_ptr == 1) { if (dr_slid_is_permissive) { /* giving SMP to SM - update hop_ptr */ (*hop_ptr)--; return IB_SMI_HANDLE; } /* hop_ptr updated when sending */ return (is_switch ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* C14-13:4 -- hop_ptr = 0 -> give to SM */ /* C14-13:5 -- Check for unreasonable hop pointer */ return (*hop_ptr == 0 ? IB_SMI_HANDLE : IB_SMI_DISCARD); } } /* * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->initial_path, smp->return_path, ib_get_smp_direction(smp), smp->dr_dlid == IB_LID_PERMISSIVE, smp->dr_slid == IB_LID_PERMISSIVE); } /* * Adjust information for a received SMP * Return IB_SMI_DISCARD if the SMP should be dropped */ enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt) { return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt, &smp->hop_ptr, smp->hop_cnt, smp->route.dr.initial_path, smp->route.dr.return_path, opa_get_smp_direction(smp), smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE, smp->route.dr.dr_slid == OPA_LID_PERMISSIVE); } static enum smi_forward_action __smi_check_forward_dr_smp(u8 hop_ptr, u8 hop_cnt, u8 direction, bool dr_dlid_is_permissive, bool dr_slid_is_permissive) { if (!direction) { /* C14-9:2 -- intermediate hop */ if (hop_ptr && hop_ptr < hop_cnt) return IB_SMI_FORWARD; /* C14-9:3 -- at the end of the DR segment of path */ if (hop_ptr == hop_cnt) return (dr_dlid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); /* C14-9:4 -- hop_ptr = hop_cnt + 1 -> give to SMA/SM */ if (hop_ptr == hop_cnt + 1) return IB_SMI_SEND; } else { /* C14-13:2 -- intermediate hop */ if (2 <= hop_ptr && hop_ptr <= hop_cnt) return IB_SMI_FORWARD; /* C14-13:3 -- at the end of the DR segment of path */ if (hop_ptr == 1) return (!dr_slid_is_permissive ? IB_SMI_SEND : IB_SMI_LOCAL); } return IB_SMI_LOCAL; } enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp) { return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, ib_get_smp_direction(smp), smp->dr_dlid == IB_LID_PERMISSIVE, smp->dr_slid == IB_LID_PERMISSIVE); } enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp) { return __smi_check_forward_dr_smp(smp->hop_ptr, smp->hop_cnt, opa_get_smp_direction(smp), smp->route.dr.dr_dlid == OPA_LID_PERMISSIVE, smp->route.dr.dr_slid == OPA_LID_PERMISSIVE); } /* * Return the forwarding port number from initial_path for outgoing SMP and * from return_path for returning SMP */ int smi_get_fwd_port(struct ib_smp *smp) { return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] : smp->return_path[smp->hop_ptr-1]); } /* * Return the forwarding port number from initial_path for outgoing SMP and * from return_path for returning SMP */ int opa_smi_get_fwd_port(struct opa_smp *smp) { return !opa_get_smp_direction(smp) ? smp->route.dr.initial_path[smp->hop_ptr+1] : smp->route.dr.return_path[smp->hop_ptr-1]; } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_sysfs.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_sysfs.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_sysfs.c (revision 331772) @@ -1,1327 +1,1331 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include "core_priv.h" #include #include #include #include #include #include #include struct ib_port; struct gid_attr_group { struct ib_port *port; struct kobject kobj; struct attribute_group ndev; struct attribute_group type; }; struct ib_port { struct kobject kobj; struct ib_device *ibdev; struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; struct attribute_group pkey_group; struct attribute_group *pma_table; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; u8 port_num; }; struct port_attribute { struct attribute attr; ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf); ssize_t (*store)(struct ib_port *, struct port_attribute *, const char *buf, size_t count); }; #define PORT_ATTR(_name, _mode, _show, _store) \ struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store) #define PORT_ATTR_RO(_name) \ struct port_attribute port_attr_##_name = __ATTR_RO(_name) struct port_table_attribute { struct port_attribute attr; char name[8]; int index; __be16 attr_id; }; struct hw_stats_attribute { struct attribute attr; ssize_t (*show)(struct kobject *kobj, struct attribute *attr, char *buf); ssize_t (*store)(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count); int index; u8 port_num; }; static ssize_t port_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct port_attribute *port_attr = container_of(attr, struct port_attribute, attr); struct ib_port *p = container_of(kobj, struct ib_port, kobj); if (!port_attr->show) return -EIO; return port_attr->show(p, port_attr, buf); } static const struct sysfs_ops port_sysfs_ops = { .show = port_attr_show }; static ssize_t gid_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct port_attribute *port_attr = container_of(attr, struct port_attribute, attr); struct ib_port *p = container_of(kobj, struct gid_attr_group, kobj)->port; if (!port_attr->show) return -EIO; return port_attr->show(p, port_attr, buf); } static const struct sysfs_ops gid_attr_sysfs_ops = { .show = gid_attr_show }; static ssize_t state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; static const char *state_name[] = { [IB_PORT_NOP] = "NOP", [IB_PORT_DOWN] = "DOWN", [IB_PORT_INIT] = "INIT", [IB_PORT_ARMED] = "ARMED", [IB_PORT_ACTIVE] = "ACTIVE", [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER" }; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d: %s\n", attr.state, attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ? state_name[attr.state] : "UNKNOWN"); } static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%x\n", attr.lid); } static ssize_t lid_mask_count_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d\n", attr.lmc); } static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%x\n", attr.sm_lid); } static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "%d\n", attr.sm_sl); } static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; return sprintf(buf, "0x%08x\n", attr.port_cap_flags); } static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; char *speed = ""; int rate; /* in deci-Gb/sec */ ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; switch (attr.active_speed) { case IB_SPEED_DDR: speed = " DDR"; rate = 50; break; case IB_SPEED_QDR: speed = " QDR"; rate = 100; break; case IB_SPEED_FDR10: speed = " FDR10"; rate = 100; break; case IB_SPEED_FDR: speed = " FDR"; rate = 140; break; case IB_SPEED_EDR: speed = " EDR"; rate = 250; break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ rate = 25; break; } rate *= ib_width_enum_to_int(attr.active_width); if (rate < 0) return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10, rate % 10 ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused, char *buf) { struct ib_port_attr attr; ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; switch (attr.phys_state) { case 1: return sprintf(buf, "1: Sleep\n"); case 2: return sprintf(buf, "2: Polling\n"); case 3: return sprintf(buf, "3: Disabled\n"); case 4: return sprintf(buf, "4: PortConfigurationTraining\n"); case 5: return sprintf(buf, "5: LinkUp\n"); case 6: return sprintf(buf, "6: LinkErrorRecovery\n"); case 7: return sprintf(buf, "7: Phy Test\n"); default: return sprintf(buf, "%d: \n", attr.phys_state); } } static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused, char *buf) { switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) { case IB_LINK_LAYER_INFINIBAND: return sprintf(buf, "%s\n", "InfiniBand"); case IB_LINK_LAYER_ETHERNET: return sprintf(buf, "%s\n", "Ethernet"); default: return sprintf(buf, "%s\n", "Unknown"); } } static PORT_ATTR_RO(state); static PORT_ATTR_RO(lid); static PORT_ATTR_RO(lid_mask_count); static PORT_ATTR_RO(sm_lid); static PORT_ATTR_RO(sm_sl); static PORT_ATTR_RO(cap_mask); static PORT_ATTR_RO(rate); static PORT_ATTR_RO(phys_state); static PORT_ATTR_RO(link_layer); static struct attribute *port_default_attrs[] = { &port_attr_state.attr, &port_attr_lid.attr, &port_attr_lid_mask_count.attr, &port_attr_sm_lid.attr, &port_attr_sm_sl.attr, &port_attr_cap_mask.attr, &port_attr_rate.attr, &port_attr_phys_state.attr, &port_attr_link_layer.attr, NULL }; static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf) { if (!gid_attr->ndev) return -EINVAL; return sprintf(buf, "%s\n", if_name(gid_attr->ndev)); } static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf) { return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type)); } static ssize_t _show_port_gid_attr(struct ib_port *p, struct port_attribute *attr, char *buf, size_t (*print)(struct ib_gid_attr *gid_attr, char *buf)) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); union ib_gid gid; struct ib_gid_attr gid_attr = {}; ssize_t ret; ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, &gid_attr); if (ret) goto err; ret = print(&gid_attr, buf); err: if (gid_attr.ndev) dev_put(gid_attr.ndev); return ret; } static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); union ib_gid gid; ssize_t ret; ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL); if (ret) return ret; return sprintf(buf, GID_PRINT_FMT"\n", GID_PRINT_ARGS(gid.raw)); } static ssize_t show_port_gid_attr_ndev(struct ib_port *p, struct port_attribute *attr, char *buf) { return _show_port_gid_attr(p, attr, buf, print_ndev); } static ssize_t show_port_gid_attr_gid_type(struct ib_port *p, struct port_attribute *attr, char *buf) { return _show_port_gid_attr(p, attr, buf, print_gid_type); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); u16 pkey; ssize_t ret; ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey); if (ret) return ret; return sprintf(buf, "0x%04x\n", pkey); } #define PORT_PMA_ATTR(_name, _counter, _width, _offset) \ struct port_table_attribute port_pma_attr_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \ .attr_id = IB_PMA_PORT_COUNTERS , \ } #define PORT_PMA_ATTR_EXT(_name, _width, _offset) \ struct port_table_attribute port_pma_attr_ext_##_name = { \ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \ .index = (_offset) | ((_width) << 16), \ .attr_id = IB_PMA_PORT_COUNTERS_EXT , \ } /* * Get a Perfmgmt MAD block of data. * Returns error code or the number of bytes retrieved. */ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr, void *data, int offset, size_t size) { struct ib_mad *in_mad; struct ib_mad *out_mad; size_t mad_size = sizeof(*out_mad); u16 out_mad_pkey_index = 0; ssize_t ret; if (!dev->process_mad) return -ENOSYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) { ret = -ENOMEM; goto out; } in_mad->mad_hdr.base_version = 1; in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT; in_mad->mad_hdr.class_version = 1; in_mad->mad_hdr.method = IB_MGMT_METHOD_GET; in_mad->mad_hdr.attr_id = attr; if (attr != IB_PMA_CLASS_PORT_INFO) in_mad->data[41] = port_num; /* PortSelect field */ if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL, (const struct ib_mad_hdr *)in_mad, mad_size, (struct ib_mad_hdr *)out_mad, &mad_size, &out_mad_pkey_index) & (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) != (IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) { ret = -EINVAL; goto out; } memcpy(data, out_mad->data + offset, size); ret = size; out: kfree(in_mad); kfree(out_mad); return ret; } static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr, char *buf) { struct port_table_attribute *tab_attr = container_of(attr, struct port_table_attribute, attr); int offset = tab_attr->index & 0xffff; int width = (tab_attr->index >> 16) & 0xff; ssize_t ret; u8 data[8]; ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, 40 + offset / 8, sizeof(data)); if (ret < 0) return sprintf(buf, "N/A (no PMA)\n"); switch (width) { case 4: ret = sprintf(buf, "%u\n", (*data >> (4 - (offset % 8))) & 0xf); break; case 8: ret = sprintf(buf, "%u\n", *data); break; case 16: ret = sprintf(buf, "%u\n", be16_to_cpup((__be16 *)data)); break; case 32: ret = sprintf(buf, "%u\n", be32_to_cpup((__be32 *)data)); break; case 64: ret = sprintf(buf, "%llu\n", (unsigned long long)be64_to_cpup((__be64 *)data)); break; default: ret = 0; } return ret; } static PORT_PMA_ATTR(symbol_error , 0, 16, 32); static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48); static PORT_PMA_ATTR(link_downed , 2, 8, 56); static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64); static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80); static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96); static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112); static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128); static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136); static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152); static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156); static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176); static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192); static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224); static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256); static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288); static PORT_PMA_ATTR(port_xmit_wait , 0, 32, 320); /* * Counters added by extended set */ static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64); static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128); static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192); static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256); static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320); static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384); static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448); static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512); static struct attribute *pma_attrs[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_port_xmit_data.attr.attr, &port_pma_attr_port_rcv_data.attr.attr, &port_pma_attr_port_xmit_packets.attr.attr, &port_pma_attr_port_rcv_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, NULL }; static struct attribute *pma_attrs_ext[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_ext_port_xmit_data.attr.attr, &port_pma_attr_ext_port_rcv_data.attr.attr, &port_pma_attr_ext_port_xmit_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, &port_pma_attr_ext_port_rcv_packets.attr.attr, &port_pma_attr_ext_unicast_rcv_packets.attr.attr, &port_pma_attr_ext_unicast_xmit_packets.attr.attr, &port_pma_attr_ext_multicast_rcv_packets.attr.attr, &port_pma_attr_ext_multicast_xmit_packets.attr.attr, NULL }; static struct attribute *pma_attrs_noietf[] = { &port_pma_attr_symbol_error.attr.attr, &port_pma_attr_link_error_recovery.attr.attr, &port_pma_attr_link_downed.attr.attr, &port_pma_attr_port_rcv_errors.attr.attr, &port_pma_attr_port_rcv_remote_physical_errors.attr.attr, &port_pma_attr_port_rcv_switch_relay_errors.attr.attr, &port_pma_attr_port_xmit_discards.attr.attr, &port_pma_attr_port_xmit_constraint_errors.attr.attr, &port_pma_attr_port_rcv_constraint_errors.attr.attr, &port_pma_attr_local_link_integrity_errors.attr.attr, &port_pma_attr_excessive_buffer_overrun_errors.attr.attr, &port_pma_attr_VL15_dropped.attr.attr, &port_pma_attr_ext_port_xmit_data.attr.attr, &port_pma_attr_ext_port_rcv_data.attr.attr, &port_pma_attr_ext_port_xmit_packets.attr.attr, &port_pma_attr_ext_port_rcv_packets.attr.attr, &port_pma_attr_port_xmit_wait.attr.attr, NULL }; static struct attribute_group pma_group = { .name = "counters", .attrs = pma_attrs }; static struct attribute_group pma_group_ext = { .name = "counters", .attrs = pma_attrs_ext }; static struct attribute_group pma_group_noietf = { .name = "counters", .attrs = pma_attrs_noietf }; static void ib_port_release(struct kobject *kobj) { struct ib_port *p = container_of(kobj, struct ib_port, kobj); struct attribute *a; int i; if (p->gid_group.attrs) { for (i = 0; (a = p->gid_group.attrs[i]); ++i) kfree(a); kfree(p->gid_group.attrs); } if (p->pkey_group.attrs) { for (i = 0; (a = p->pkey_group.attrs[i]); ++i) kfree(a); kfree(p->pkey_group.attrs); } kfree(p); } static void ib_port_gid_attr_release(struct kobject *kobj) { struct gid_attr_group *g = container_of(kobj, struct gid_attr_group, kobj); struct attribute *a; int i; if (g->ndev.attrs) { for (i = 0; (a = g->ndev.attrs[i]); ++i) kfree(a); kfree(g->ndev.attrs); } if (g->type.attrs) { for (i = 0; (a = g->type.attrs[i]); ++i) kfree(a); kfree(g->type.attrs); } kfree(g); } static struct kobj_type port_type = { .release = ib_port_release, .sysfs_ops = &port_sysfs_ops, .default_attrs = port_default_attrs }; static struct kobj_type gid_attr_type = { .sysfs_ops = &gid_attr_sysfs_ops, .release = ib_port_gid_attr_release }; static struct attribute ** alloc_group_attrs(ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf), int len) { struct attribute **tab_attr; struct port_table_attribute *element; int i; tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL); if (!tab_attr) return NULL; for (i = 0; i < len; i++) { element = kzalloc(sizeof(struct port_table_attribute), GFP_KERNEL); if (!element) goto err; if (snprintf(element->name, sizeof(element->name), "%d", i) >= sizeof(element->name)) { kfree(element); goto err; } element->attr.attr.name = element->name; element->attr.attr.mode = S_IRUGO; element->attr.show = show; element->index = i; sysfs_attr_init(&element->attr.attr); tab_attr[i] = &element->attr.attr; } return tab_attr; err: while (--i >= 0) kfree(tab_attr[i]); kfree(tab_attr); return NULL; } /* * Figure out which counter table to use depending on * the device capabilities. */ static struct attribute_group *get_counter_table(struct ib_device *dev, int port_num) { struct ib_class_port_info cpi; if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO, &cpi, 40, sizeof(cpi)) >= 0) { if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH) /* We have extended counters */ return &pma_group_ext; if (cpi.capability_mask & IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF) /* But not the IETF ones */ return &pma_group_noietf; } /* Fall back to normal counters */ return &pma_group; } static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats, u8 port_num, int index) { int ret; if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan)) return 0; ret = dev->get_hw_stats(dev, stats, port_num, index); if (ret < 0) return ret; if (ret == stats->num_counters) stats->timestamp = jiffies; return 0; } static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf) { return sprintf(buf, "%llu\n", (unsigned long long)stats->value[index]); } static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr, char *buf) { struct ib_device *dev; struct ib_port *port; struct hw_stats_attribute *hsa; struct rdma_hw_stats *stats; int ret; hsa = container_of(attr, struct hw_stats_attribute, attr); if (!hsa->port_num) { dev = container_of((struct device *)kobj, struct ib_device, dev); stats = dev->hw_stats; } else { port = container_of(kobj, struct ib_port, kobj); dev = port->ibdev; stats = port->hw_stats; } ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index); if (ret) return ret; return print_hw_stat(stats, hsa->index, buf); } static ssize_t show_stats_lifespan(struct kobject *kobj, struct attribute *attr, char *buf) { struct hw_stats_attribute *hsa; int msecs; hsa = container_of(attr, struct hw_stats_attribute, attr); if (!hsa->port_num) { struct ib_device *dev = container_of((struct device *)kobj, struct ib_device, dev); msecs = jiffies_to_msecs(dev->hw_stats->lifespan); } else { struct ib_port *p = container_of(kobj, struct ib_port, kobj); msecs = jiffies_to_msecs(p->hw_stats->lifespan); } return sprintf(buf, "%d\n", msecs); } static ssize_t set_stats_lifespan(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct hw_stats_attribute *hsa; int msecs; int jiffies; int ret; ret = kstrtoint(buf, 10, &msecs); if (ret) return ret; if (msecs < 0 || msecs > 10000) return -EINVAL; jiffies = msecs_to_jiffies(msecs); hsa = container_of(attr, struct hw_stats_attribute, attr); if (!hsa->port_num) { struct ib_device *dev = container_of((struct device *)kobj, struct ib_device, dev); dev->hw_stats->lifespan = jiffies; } else { struct ib_port *p = container_of(kobj, struct ib_port, kobj); p->hw_stats->lifespan = jiffies; } return count; } static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group) { struct attribute **attr; sysfs_remove_group(kobj, attr_group); for (attr = attr_group->attrs; *attr; attr++) kfree(*attr); kfree(attr_group); } static struct attribute *alloc_hsa(int index, u8 port_num, const char *name) { struct hw_stats_attribute *hsa; hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); if (!hsa) return NULL; hsa->attr.name = __DECONST(char *, name); hsa->attr.mode = S_IRUGO; hsa->show = show_hw_stats; hsa->store = NULL; hsa->index = index; hsa->port_num = port_num; return &hsa->attr; } static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) { struct hw_stats_attribute *hsa; hsa = kmalloc(sizeof(*hsa), GFP_KERNEL); if (!hsa) return NULL; hsa->attr.name = name; hsa->attr.mode = S_IWUSR | S_IRUGO; hsa->show = show_stats_lifespan; hsa->store = set_stats_lifespan; hsa->index = 0; hsa->port_num = port_num; return &hsa->attr; } static void setup_hw_stats(struct ib_device *device, struct ib_port *port, u8 port_num) { struct attribute_group *hsag; struct rdma_hw_stats *stats; int i, ret; stats = device->alloc_hw_stats(device, port_num); if (!stats) return; if (!stats->names || stats->num_counters <= 0) goto err_free_stats; /* * Two extra attribue elements here, one for the lifespan entry and * one to NULL terminate the list for the sysfs core code */ hsag = kzalloc(sizeof(*hsag) + sizeof(void *) * (stats->num_counters + 2), GFP_KERNEL); if (!hsag) goto err_free_stats; ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); if (ret != stats->num_counters) goto err_free_hsag; stats->timestamp = jiffies; hsag->name = "hw_counters"; hsag->attrs = (void *)((char *)hsag + sizeof(*hsag)); for (i = 0; i < stats->num_counters; i++) { hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); if (!hsag->attrs[i]) goto err; sysfs_attr_init(hsag->attrs[i]); } /* treat an error here as non-fatal */ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); if (hsag->attrs[i]) sysfs_attr_init(hsag->attrs[i]); if (port) { struct kobject *kobj = &port->kobj; ret = sysfs_create_group(kobj, hsag); if (ret) goto err; port->hw_stats_ag = hsag; port->hw_stats = stats; } else { struct kobject *kobj = &device->dev.kobj; ret = sysfs_create_group(kobj, hsag); if (ret) goto err; device->hw_stats_ag = hsag; device->hw_stats = stats; } return; err: for (; i >= 0; i--) kfree(hsag->attrs[i]); err_free_hsag: kfree(hsag); err_free_stats: kfree(stats); return; } static int add_port(struct ib_device *device, int port_num, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { struct ib_port *p; struct ib_port_attr attr; int i; int ret; ret = ib_query_port(device, port_num, &attr); if (ret) return ret; p = kzalloc(sizeof *p, GFP_KERNEL); if (!p) return -ENOMEM; p->ibdev = device; p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, device->ports_parent, "%d", port_num); if (ret) { kfree(p); return ret; } p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL); if (!p->gid_attr_group) { ret = -ENOMEM; goto err_put; } p->gid_attr_group->port = p; ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type, &p->kobj, "gid_attrs"); if (ret) { kfree(p->gid_attr_group); goto err_put; } p->pma_table = get_counter_table(device, port_num); ret = sysfs_create_group(&p->kobj, p->pma_table); if (ret) goto err_put_gid_attrs; p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); if (!p->gid_group.attrs) { ret = -ENOMEM; goto err_remove_pma; } ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) goto err_free_gid; p->gid_attr_group->ndev.name = "ndevs"; p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev, attr.gid_tbl_len); if (!p->gid_attr_group->ndev.attrs) { ret = -ENOMEM; goto err_remove_gid; } ret = sysfs_create_group(&p->gid_attr_group->kobj, &p->gid_attr_group->ndev); if (ret) goto err_free_gid_ndev; p->gid_attr_group->type.name = "types"; p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type, attr.gid_tbl_len); if (!p->gid_attr_group->type.attrs) { ret = -ENOMEM; goto err_remove_gid_ndev; } ret = sysfs_create_group(&p->gid_attr_group->kobj, &p->gid_attr_group->type); if (ret) goto err_free_gid_type; p->pkey_group.name = "pkeys"; p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, attr.pkey_tbl_len); if (!p->pkey_group.attrs) { ret = -ENOMEM; goto err_remove_gid_type; } ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) goto err_free_pkey; if (port_callback) { ret = port_callback(device, port_num, &p->kobj); if (ret) goto err_remove_pkey; } /* * If port == 0, it means we have only one port and the parent * device, not this port device, should be the holder of the * hw_counters */ if (device->alloc_hw_stats && port_num) setup_hw_stats(device, p, port_num); list_add_tail(&p->kobj.entry, &device->port_list); return 0; err_remove_pkey: sysfs_remove_group(&p->kobj, &p->pkey_group); err_free_pkey: for (i = 0; i < attr.pkey_tbl_len; ++i) kfree(p->pkey_group.attrs[i]); kfree(p->pkey_group.attrs); p->pkey_group.attrs = NULL; err_remove_gid_type: sysfs_remove_group(&p->gid_attr_group->kobj, &p->gid_attr_group->type); err_free_gid_type: for (i = 0; i < attr.gid_tbl_len; ++i) kfree(p->gid_attr_group->type.attrs[i]); kfree(p->gid_attr_group->type.attrs); p->gid_attr_group->type.attrs = NULL; err_remove_gid_ndev: sysfs_remove_group(&p->gid_attr_group->kobj, &p->gid_attr_group->ndev); err_free_gid_ndev: for (i = 0; i < attr.gid_tbl_len; ++i) kfree(p->gid_attr_group->ndev.attrs[i]); kfree(p->gid_attr_group->ndev.attrs); p->gid_attr_group->ndev.attrs = NULL; err_remove_gid: sysfs_remove_group(&p->kobj, &p->gid_group); err_free_gid: for (i = 0; i < attr.gid_tbl_len; ++i) kfree(p->gid_group.attrs[i]); kfree(p->gid_group.attrs); p->gid_group.attrs = NULL; err_remove_pma: sysfs_remove_group(&p->kobj, p->pma_table); err_put_gid_attrs: kobject_put(&p->gid_attr_group->kobj); err_put: kobject_put(&p->kobj); return ret; } static ssize_t show_node_type(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: \n", dev->node_type); } } static ssize_t show_sys_image_guid(struct device *device, struct device_attribute *dev_attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]), be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]), be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); } static ssize_t show_node_guid(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); return sprintf(buf, "%04x:%04x:%04x:%04x\n", be16_to_cpu(((__be16 *) &dev->node_guid)[0]), be16_to_cpu(((__be16 *) &dev->node_guid)[1]), be16_to_cpu(((__be16 *) &dev->node_guid)[2]), be16_to_cpu(((__be16 *) &dev->node_guid)[3])); } static ssize_t show_node_desc(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); return sprintf(buf, "%.64s\n", dev->node_desc); } static ssize_t set_node_desc(struct device *device, struct device_attribute *attr, const char *buf, size_t count) { struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device_modify desc = {}; int ret; if (!dev->modify_device) return -EIO; memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; return count; } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, char *buf) { struct ib_device *dev = container_of(device, struct ib_device, dev); ib_get_device_fw_str(dev, buf, PAGE_SIZE); strlcat(buf, "\n", PAGE_SIZE); return strlen(buf); } static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static struct device_attribute *ib_class_attributes[] = { &dev_attr_node_type, &dev_attr_sys_image_guid, &dev_attr_node_guid, &dev_attr_node_desc, &dev_attr_fw_ver, }; static void free_port_list_attributes(struct ib_device *device) { struct kobject *p, *t; list_for_each_entry_safe(p, t, &device->port_list, entry) { struct ib_port *port = container_of(p, struct ib_port, kobj); list_del(&p->entry); if (port->hw_stats) { kfree(port->hw_stats); free_hsag(&port->kobj, port->hw_stats_ag); } sysfs_remove_group(p, port->pma_table); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(&port->gid_attr_group->kobj, &port->gid_attr_group->ndev); sysfs_remove_group(&port->gid_attr_group->kobj, &port->gid_attr_group->type); kobject_put(&port->gid_attr_group->kobj); kobject_put(p); } kobject_put(device->ports_parent); } int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) { struct device *class_dev = &device->dev; int ret; int i; device->dev.parent = device->dma_device; ret = dev_set_name(class_dev, "%s", device->name); if (ret) return ret; ret = device_add(class_dev); if (ret) goto err; for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { ret = device_create_file(class_dev, ib_class_attributes[i]); if (ret) goto err_unregister; } device->ports_parent = kobject_create_and_add("ports", &class_dev->kobj); if (!device->ports_parent) { ret = -ENOMEM; goto err_put; } if (rdma_cap_ib_switch(device)) { ret = add_port(device, 0, port_callback); if (ret) goto err_put; } else { for (i = 1; i <= device->phys_port_cnt; ++i) { ret = add_port(device, i, port_callback); if (ret) goto err_put; } } if (device->alloc_hw_stats) setup_hw_stats(device, NULL, 0); return 0; err_put: free_port_list_attributes(device); err_unregister: device_unregister(class_dev); err: return ret; } void ib_device_unregister_sysfs(struct ib_device *device) { int i; /* Hold kobject until ib_dealloc_device() */ kobject_get(&device->dev.kobj); free_port_list_attributes(device); if (device->hw_stats) { kfree(device->hw_stats); free_hsag(&device->dev.kobj, device->hw_stats_ag); } for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) device_remove_file(&device->dev, ib_class_attributes[i]); device_unregister(&device->dev); } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_ucm.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_ucm.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_ucm.c (revision 331772) @@ -1,1370 +1,1374 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Libor Michalek"); MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access"); MODULE_LICENSE("Dual BSD/GPL"); struct ib_ucm_device { int devnum; struct cdev cdev; struct device dev; struct ib_device *ib_dev; }; struct ib_ucm_file { struct mutex file_mutex; struct file *filp; struct ib_ucm_device *device; struct list_head ctxs; struct list_head events; wait_queue_head_t poll_wait; }; struct ib_ucm_context { int id; struct completion comp; atomic_t ref; int events_reported; struct ib_ucm_file *file; struct ib_cm_id *cm_id; __u64 uid; struct list_head events; /* list of pending events. */ struct list_head file_list; /* member in file ctx list */ }; struct ib_ucm_event { struct ib_ucm_context *ctx; struct list_head file_list; /* member in file event list */ struct list_head ctx_list; /* member in ctx event list */ struct ib_cm_id *cm_id; struct ib_ucm_event_resp resp; void *data; void *info; int data_len; int info_len; }; enum { IB_UCM_MAJOR = 231, IB_UCM_BASE_MINOR = 224, IB_UCM_MAX_DEVICES = 32 }; #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); static void ib_ucm_remove_one(struct ib_device *device, void *client_data); static struct ib_client ucm_client = { .name = "ucm", .add = ib_ucm_add_one, .remove = ib_ucm_remove_one }; static DEFINE_MUTEX(ctx_id_mutex); static DEFINE_IDR(ctx_id_table); static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES); static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id) { struct ib_ucm_context *ctx; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else atomic_inc(&ctx->ref); mutex_unlock(&ctx_id_mutex); return ctx; } static void ib_ucm_ctx_put(struct ib_ucm_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static inline int ib_ucm_new_cm_id(int event) { return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED; } static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) { struct ib_ucm_event *uevent; mutex_lock(&ctx->file->file_mutex); list_del(&ctx->file_list); while (!list_empty(&ctx->events)) { uevent = list_entry(ctx->events.next, struct ib_ucm_event, ctx_list); list_del(&uevent->file_list); list_del(&uevent->ctx_list); mutex_unlock(&ctx->file->file_mutex); /* clear incoming connections. */ if (ib_ucm_new_cm_id(uevent->resp.event)) ib_destroy_cm_id(uevent->cm_id); kfree(uevent); mutex_lock(&ctx->file->file_mutex); } mutex_unlock(&ctx->file->file_mutex); } static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) return NULL; atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); ctx->file = file; INIT_LIST_HEAD(&ctx->events); mutex_lock(&ctx_id_mutex); ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); mutex_unlock(&ctx_id_mutex); if (ctx->id < 0) goto error; list_add_tail(&ctx->file_list, &file->ctxs); return ctx; error: kfree(ctx); return NULL; } static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq, struct ib_cm_req_event_param *kreq) { ureq->remote_ca_guid = kreq->remote_ca_guid; ureq->remote_qkey = kreq->remote_qkey; ureq->remote_qpn = kreq->remote_qpn; ureq->qp_type = kreq->qp_type; ureq->starting_psn = kreq->starting_psn; ureq->responder_resources = kreq->responder_resources; ureq->initiator_depth = kreq->initiator_depth; ureq->local_cm_response_timeout = kreq->local_cm_response_timeout; ureq->flow_control = kreq->flow_control; ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout; ureq->retry_count = kreq->retry_count; ureq->rnr_retry_count = kreq->rnr_retry_count; ureq->srq = kreq->srq; ureq->port = kreq->port; ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path); if (kreq->alternate_path) ib_copy_path_rec_to_user(&ureq->alternate_path, kreq->alternate_path); } static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep, struct ib_cm_rep_event_param *krep) { urep->remote_ca_guid = krep->remote_ca_guid; urep->remote_qkey = krep->remote_qkey; urep->remote_qpn = krep->remote_qpn; urep->starting_psn = krep->starting_psn; urep->responder_resources = krep->responder_resources; urep->initiator_depth = krep->initiator_depth; urep->target_ack_delay = krep->target_ack_delay; urep->failover_accepted = krep->failover_accepted; urep->flow_control = krep->flow_control; urep->rnr_retry_count = krep->rnr_retry_count; urep->srq = krep->srq; } static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep, struct ib_cm_sidr_rep_event_param *krep) { urep->status = krep->status; urep->qkey = krep->qkey; urep->qpn = krep->qpn; }; static int ib_ucm_event_process(struct ib_cm_event *evt, struct ib_ucm_event *uvt) { void *info = NULL; switch (evt->event) { case IB_CM_REQ_RECEIVED: ib_ucm_event_req_get(&uvt->resp.u.req_resp, &evt->param.req_rcvd); uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_PRIMARY; uvt->resp.present |= (evt->param.req_rcvd.alternate_path ? IB_UCM_PRES_ALTERNATE : 0); break; case IB_CM_REP_RECEIVED: ib_ucm_event_rep_get(&uvt->resp.u.rep_resp, &evt->param.rep_rcvd); uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE; break; case IB_CM_RTU_RECEIVED: uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREQ_RECEIVED: uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_DREP_RECEIVED: uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE; uvt->resp.u.send_status = evt->param.send_status; break; case IB_CM_MRA_RECEIVED: uvt->resp.u.mra_resp.timeout = evt->param.mra_rcvd.service_timeout; uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE; break; case IB_CM_REJ_RECEIVED: uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason; uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.rej_rcvd.ari_length; info = evt->param.rej_rcvd.ari; break; case IB_CM_LAP_RECEIVED: ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path, evt->param.lap_rcvd.alternate_path); uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE; uvt->resp.present = IB_UCM_PRES_ALTERNATE; break; case IB_CM_APR_RECEIVED: uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status; uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.apr_rcvd.info_len; info = evt->param.apr_rcvd.apr_info; break; case IB_CM_SIDR_REQ_RECEIVED: uvt->resp.u.sidr_req_resp.pkey = evt->param.sidr_req_rcvd.pkey; uvt->resp.u.sidr_req_resp.port = evt->param.sidr_req_rcvd.port; uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE; break; case IB_CM_SIDR_REP_RECEIVED: ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp, &evt->param.sidr_rep_rcvd); uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; uvt->info_len = evt->param.sidr_rep_rcvd.info_len; info = evt->param.sidr_rep_rcvd.info; break; default: uvt->resp.u.send_status = evt->param.send_status; break; } if (uvt->data_len) { uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL); if (!uvt->data) goto err1; uvt->resp.present |= IB_UCM_PRES_DATA; } if (uvt->info_len) { uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL); if (!uvt->info) goto err2; uvt->resp.present |= IB_UCM_PRES_INFO; } return 0; err2: kfree(uvt->data); err1: return -ENOMEM; } static int ib_ucm_event_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct ib_ucm_event *uevent; struct ib_ucm_context *ctx; int result = 0; ctx = cm_id->context; uevent = kzalloc(sizeof *uevent, GFP_KERNEL); if (!uevent) goto err1; uevent->ctx = ctx; uevent->cm_id = cm_id; uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; uevent->resp.event = event->event; result = ib_ucm_event_process(event, uevent); if (result) goto err2; mutex_lock(&ctx->file->file_mutex); list_add_tail(&uevent->file_list, &ctx->file->events); list_add_tail(&uevent->ctx_list, &ctx->events); wake_up_interruptible(&ctx->file->poll_wait); mutex_unlock(&ctx->file->file_mutex); return 0; err2: kfree(uevent); err1: /* Destroy new cm_id's */ return ib_ucm_new_cm_id(event->event); } static ssize_t ib_ucm_event(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); while (list_empty(&file->events)) { mutex_unlock(&file->file_mutex); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->events))) return -ERESTARTSYS; mutex_lock(&file->file_mutex); } uevent = list_entry(file->events.next, struct ib_ucm_event, file_list); if (ib_ucm_new_cm_id(uevent->resp.event)) { ctx = ib_ucm_ctx_alloc(file); if (!ctx) { result = -ENOMEM; goto done; } ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof(uevent->resp))) { result = -EFAULT; goto done; } if (uevent->data) { if (cmd.data_len < uevent->data_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.data, uevent->data, uevent->data_len)) { result = -EFAULT; goto done; } } if (uevent->info) { if (cmd.info_len < uevent->info_len) { result = -ENOMEM; goto done; } if (copy_to_user((void __user *)(unsigned long)cmd.info, uevent->info, uevent->info_len)) { result = -EFAULT; goto done; } } list_del(&uevent->file_list); list_del(&uevent->ctx_list); uevent->ctx->events_reported++; kfree(uevent->data); kfree(uevent->info); kfree(uevent); done: mutex_unlock(&file->file_mutex); return result; } static ssize_t ib_ucm_create_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_create_id cmd; struct ib_ucm_create_id_resp resp; struct ib_ucm_context *ctx; int result; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->file_mutex); ctx = ib_ucm_ctx_alloc(file); mutex_unlock(&file->file_mutex); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = ib_create_cm_id(file->device->ib_dev, ib_ucm_event_handler, ctx); if (IS_ERR(ctx->cm_id)) { result = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { result = -EFAULT; goto err2; } return 0; err2: ib_destroy_cm_id(ctx->cm_id); err1: mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); kfree(ctx); return result; } static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_destroy_id cmd; struct ib_ucm_destroy_id_resp resp; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&ctx_id_mutex); ctx = idr_find(&ctx_id_table, cmd.id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); else idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); ib_ucm_ctx_put(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the cm_id. */ ib_destroy_cm_id(ctx->cm_id); /* Cleanup events not yet reported to the user. */ ib_ucm_cleanup_events(ctx); resp.events_reported = ctx->events_reported; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; kfree(ctx); return result; } static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_attr_id_resp resp; struct ib_ucm_attr_id cmd; struct ib_ucm_context *ctx; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.service_id = ctx->cm_id->service_id; resp.service_mask = ctx->cm_id->service_mask; resp.local_id = ctx->cm_id->local_id; resp.remote_id = ctx->cm_id->remote_id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_uverbs_qp_attr resp; struct ib_ucm_init_qp_attr cmd; struct ib_ucm_context *ctx; struct ib_qp_attr qp_attr; int result = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (result) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) result = -EFAULT; out: ib_ucm_ctx_put(ctx); return result; } static int ucm_validate_listen(__be64 service_id, __be64 service_mask) { service_id &= service_mask; if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) || ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID)) return -EINVAL; return 0; } static ssize_t ib_ucm_listen(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_listen cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ucm_validate_listen(cmd.service_id, cmd.service_mask); if (result) goto out; result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask); out: ib_ucm_ctx_put(ctx); return result; } static ssize_t ib_ucm_notify(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_notify cmd; struct ib_ucm_context *ctx; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ib_ucm_ctx_get(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ib_ucm_ctx_put(ctx); return result; } static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len) { void *data; *dest = NULL; if (!len) return 0; data = memdup_user((void __user *)(unsigned long)src, len); if (IS_ERR(data)) return PTR_ERR(data); *dest = data; return 0; } static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src) { struct ib_user_path_rec upath; struct ib_sa_path_rec *sa_path; *path = NULL; if (!src) return 0; sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL); if (!sa_path) return -ENOMEM; if (copy_from_user(&upath, (void __user *)(unsigned long)src, sizeof(upath))) { kfree(sa_path); return -EFAULT; } ib_copy_path_rec_from_user(sa_path, &upath); *path = sa_path; return 0; } static ssize_t ib_ucm_send_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_req cmd; int result; param.private_data = NULL; param.primary_path = NULL; param.alternate_path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.primary_path, cmd.primary_path); if (result) goto done; result = ib_ucm_path_get(¶m.alternate_path, cmd.alternate_path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.qp_num = cmd.qpn; param.qp_type = cmd.qp_type; param.starting_psn = cmd.psn; param.peer_to_peer = cmd.peer_to_peer; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.remote_cm_response_timeout = cmd.remote_cm_response_timeout; param.flow_control = cmd.flow_control; param.local_cm_response_timeout = cmd.local_cm_response_timeout; param.retry_count = cmd.retry_count; param.rnr_retry_count = cmd.rnr_retry_count; param.max_cm_retries = cmd.max_cm_retries; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.primary_path); kfree(param.alternate_path); return result; } static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_rep_param param; struct ib_ucm_context *ctx; struct ib_ucm_rep cmd; int result; param.private_data = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) return result; param.qp_num = cmd.qpn; param.starting_psn = cmd.psn; param.private_data_len = cmd.len; param.responder_resources = cmd.responder_resources; param.initiator_depth = cmd.initiator_depth; param.failover_accepted = cmd.failover_accepted; param.flow_control = cmd.flow_control; param.rnr_retry_count = cmd.rnr_retry_count; param.srq = cmd.srq; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { ctx->uid = cmd.uid; result = ib_send_cm_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(param.private_data); return result; } static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len)) { struct ib_ucm_private_data cmd; struct ib_ucm_context *ctx; const void *private_data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, private_data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(private_data); return result; } static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu); } static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq); } static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep); } static ssize_t ib_ucm_send_info(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int (*func)(struct ib_cm_id *cm_id, int status, const void *info, u8 info_len, const void *data, u8 data_len)) { struct ib_ucm_context *ctx; struct ib_ucm_info cmd; const void *data = NULL; const void *info = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = func(ctx->cm_id, cmd.status, info, cmd.info_len, data, cmd.data_len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(info); return result; } static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej); } static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr); } static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_ucm_mra cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) return result; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); kfree(data); return result; } static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_ucm_context *ctx; struct ib_sa_path_rec *path = NULL; struct ib_ucm_lap cmd; const void *data = NULL; int result; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(&data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(&path, cmd.path); if (result) goto done; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(data); kfree(path); return result; } static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_req_param param; struct ib_ucm_context *ctx; struct ib_ucm_sidr_req cmd; int result; param.private_data = NULL; param.path = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.len); if (result) goto done; result = ib_ucm_path_get(¶m.path, cmd.path); if (result) goto done; param.private_data_len = cmd.len; param.service_id = cmd.sid; param.timeout_ms = cmd.timeout; param.max_cm_retries = cmd.max_cm_retries; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_req(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.path); return result; } static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) { struct ib_cm_sidr_rep_param param; struct ib_ucm_sidr_rep cmd; struct ib_ucm_context *ctx; int result; param.info = NULL; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; result = ib_ucm_alloc_data(¶m.private_data, cmd.data, cmd.data_len); if (result) goto done; result = ib_ucm_alloc_data(¶m.info, cmd.info, cmd.info_len); if (result) goto done; param.qp_num = cmd.qpn; param.qkey = cmd.qkey; param.status = cmd.status; param.info_length = cmd.info_len; param.private_data_len = cmd.data_len; ctx = ib_ucm_ctx_get(file, cmd.id); if (!IS_ERR(ctx)) { result = ib_send_cm_sidr_rep(ctx->cm_id, ¶m); ib_ucm_ctx_put(ctx); } else result = PTR_ERR(ctx); done: kfree(param.private_data); kfree(param.info); return result; } static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file, const char __user *inbuf, int in_len, int out_len) = { [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id, [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id, [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id, [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen, [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify, [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req, [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep, [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu, [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq, [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep, [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej, [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra, [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap, [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr, [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req, [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep, [IB_USER_CM_CMD_EVENT] = ib_ucm_event, [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr, }; static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_cmd_hdr hdr; ssize_t result; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!result) result = len; return result; } static unsigned int ib_ucm_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_ucm_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->events)) mask = POLLIN | POLLRDNORM; return mask; } /* * ib_ucm_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ib_ucm_open(struct inode *inode, struct file *filp) { struct ib_ucm_file *file; file = kmalloc(sizeof(*file), GFP_KERNEL); if (!file) return -ENOMEM; INIT_LIST_HEAD(&file->events); INIT_LIST_HEAD(&file->ctxs); init_waitqueue_head(&file->poll_wait); mutex_init(&file->file_mutex); filp->private_data = file; file->filp = filp; file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev); return nonseekable_open(inode, filp); } static int ib_ucm_close(struct inode *inode, struct file *filp) { struct ib_ucm_file *file = filp->private_data; struct ib_ucm_context *ctx; mutex_lock(&file->file_mutex); while (!list_empty(&file->ctxs)) { ctx = list_entry(file->ctxs.next, struct ib_ucm_context, file_list); mutex_unlock(&file->file_mutex); mutex_lock(&ctx_id_mutex); idr_remove(&ctx_id_table, ctx->id); mutex_unlock(&ctx_id_mutex); ib_destroy_cm_id(ctx->cm_id); ib_ucm_cleanup_events(ctx); kfree(ctx); mutex_lock(&file->file_mutex); } mutex_unlock(&file->file_mutex); kfree(file); return 0; } static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES); static void ib_ucm_release_dev(struct device *dev) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); cdev_del(&ucm_dev->cdev); if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(ucm_dev->devnum, dev_map); else clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map); kfree(ucm_dev); } static const struct file_operations ucm_fops = { .owner = THIS_MODULE, .open = ib_ucm_open, .release = ib_ucm_close, .write = ib_ucm_write, .poll = ib_ucm_poll, .llseek = no_llseek, }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_ucm_device *ucm_dev; ucm_dev = container_of(dev, struct ib_ucm_device, dev); return sprintf(buf, "%s\n", ucm_dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static dev_t overflow_maj; static int find_overflow_devnum(void) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { pr_err("ucm: couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES); if (ret >= IB_UCM_MAX_DEVICES) return -1; return ret; } static void ib_ucm_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_ucm_device *ucm_dev; if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1)) return; ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL); if (!ucm_dev) return; ucm_dev->ib_dev = device; devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES); if (devnum >= IB_UCM_MAX_DEVICES) { devnum = find_overflow_devnum(); if (devnum < 0) goto err; ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { ucm_dev->devnum = devnum; base = devnum + IB_UCM_BASE_DEV; set_bit(devnum, dev_map); } cdev_init(&ucm_dev->cdev, &ucm_fops); ucm_dev->cdev.owner = THIS_MODULE; kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum); if (cdev_add(&ucm_dev->cdev, base, 1)) goto err; ucm_dev->dev.class = &cm_class; ucm_dev->dev.parent = device->dma_device; ucm_dev->dev.devt = ucm_dev->cdev.dev; ucm_dev->dev.release = ib_ucm_release_dev; dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum); if (device_register(&ucm_dev->dev)) goto err_cdev; if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev)) goto err_dev; ib_set_client_data(device, &ucm_client, ucm_dev); return; err_dev: device_unregister(&ucm_dev->dev); err_cdev: cdev_del(&ucm_dev->cdev); if (ucm_dev->devnum < IB_UCM_MAX_DEVICES) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); err: kfree(ucm_dev); return; } static void ib_ucm_remove_one(struct ib_device *device, void *client_data) { struct ib_ucm_device *ucm_dev = client_data; if (!ucm_dev) return; device_unregister(&ucm_dev->dev); } static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_CM_ABI_VERSION)); static int __init ib_ucm_init(void) { int ret; ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES, "infiniband_cm"); if (ret) { pr_err("ucm: couldn't register device number\n"); goto error1; } ret = class_create_file(&cm_class, &class_attr_abi_version.attr); if (ret) { pr_err("ucm: couldn't create abi_version attribute\n"); goto error2; } ret = ib_register_client(&ucm_client); if (ret) { pr_err("ucm: couldn't register client\n"); goto error3; } return 0; error3: class_remove_file(&cm_class, &class_attr_abi_version.attr); error2: unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); error1: return ret; } static void __exit ib_ucm_cleanup(void) { ib_unregister_client(&ucm_client); class_remove_file(&cm_class, &class_attr_abi_version.attr); unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES); idr_destroy(&ctx_id_table); } module_init_order(ib_ucm_init, SI_ORDER_THIRD); module_exit(ib_ucm_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_ucma.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_ucma.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_ucma.c (revision 331772) @@ -1,1754 +1,1758 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); MODULE_LICENSE("Dual BSD/GPL"); static unsigned int max_backlog = 1024; struct ucma_file { struct mutex mut; struct file *filp; struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; struct workqueue_struct *close_wq; }; struct ucma_context { int id; struct completion comp; atomic_t ref; int events_reported; int backlog; struct ucma_file *file; struct rdma_cm_id *cm_id; u64 uid; struct list_head list; struct list_head mc_list; /* mark that device is in process of destroying the internal HW * resources, protected by the global mut */ int closing; /* sync between removal event and id destroy, protected by file mut */ int destroying; struct work_struct close_work; }; struct ucma_multicast { struct ucma_context *ctx; int id; int events_reported; u64 uid; u8 join_state; struct list_head list; struct sockaddr_storage addr; }; struct ucma_event { struct ucma_context *ctx; struct ucma_multicast *mc; struct list_head list; struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; struct work_struct close_work; }; static DEFINE_MUTEX(mut); static DEFINE_IDR(ctx_idr); static DEFINE_IDR(multicast_idr); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) { struct ucma_context *ctx; ctx = idr_find(&ctx_idr, id); if (!ctx) ctx = ERR_PTR(-ENOENT); else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); return ctx; } static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) { struct ucma_context *ctx; mutex_lock(&mut); ctx = _ucma_find_context(id, file); if (!IS_ERR(ctx)) { if (ctx->closing) ctx = ERR_PTR(-EIO); else atomic_inc(&ctx->ref); } mutex_unlock(&mut); return ctx; } static void ucma_put_ctx(struct ucma_context *ctx) { if (atomic_dec_and_test(&ctx->ref)) complete(&ctx->comp); } static void ucma_close_event_id(struct work_struct *work) { struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); rdma_destroy_id(uevent_close->cm_id); kfree(uevent_close); } static void ucma_close_id(struct work_struct *work) { struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing * by its creator. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); } static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); atomic_set(&ctx->ref, 1); init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; mutex_lock(&mut); ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (ctx->id < 0) goto error; list_add_tail(&ctx->list, &file->ctx_list); return ctx; error: kfree(ctx); return NULL; } static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc; mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; mutex_lock(&mut); mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); mutex_unlock(&mut); if (mc->id < 0) goto error; mc->ctx = ctx; list_add_tail(&mc->list, &ctx->mc_list); return mc; error: kfree(mc); return NULL; } static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, struct rdma_conn_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; dst->responder_resources =src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; } static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst, struct rdma_ud_param *src) { if (src->private_data_len) memcpy(dst->private_data, src->private_data, src->private_data_len); dst->private_data_len = src->private_data_len; ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); dst->qp_num = src->qp_num; dst->qkey = src->qkey; } static void ucma_set_event_context(struct ucma_context *ctx, struct rdma_cm_event *event, struct ucma_event *uevent) { uevent->ctx = ctx; switch (event->event) { case RDMA_CM_EVENT_MULTICAST_JOIN: case RDMA_CM_EVENT_MULTICAST_ERROR: uevent->mc = __DECONST(struct ucma_multicast *, event->param.ud.private_data); uevent->resp.uid = uevent->mc->uid; uevent->resp.id = uevent->mc->id; break; default: uevent->resp.uid = ctx->uid; uevent->resp.id = ctx->id; break; } } /* Called with file->mut locked for the relevant context. */ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) { struct ucma_context *ctx = cm_id->context; struct ucma_event *con_req_eve; int event_found = 0; if (ctx->destroying) return; /* only if context is pointing to cm_id that it owns it and can be * queued to be closed, otherwise that cm_id is an inflight one that * is part of that context event list pending to be detached and * reattached to its new context as part of ucma_get_event, * handled separately below. */ if (ctx->cm_id == cm_id) { mutex_lock(&mut); ctx->closing = 1; mutex_unlock(&mut); queue_work(ctx->file->close_wq, &ctx->close_work); return; } list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { if (con_req_eve->cm_id == cm_id && con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { list_del(&con_req_eve->list); INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); queue_work(ctx->file->close_wq, &con_req_eve->close_work); event_found = 1; break; } } if (!event_found) pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); } static int ucma_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { struct ucma_event *uevent; struct ucma_context *ctx = cm_id->context; int ret = 0; uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); if (!uevent) return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; mutex_lock(&ctx->file->mut); uevent->cm_id = cm_id; ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; if (cm_id->qp_type == IB_QPT_UD) ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; kfree(uevent); goto out; } ctx->backlog--; } else if (!ctx->uid || ctx->cm_id != cm_id) { /* * We ignore events for new connections until userspace has set * their context. This can only happen if an error occurs on a * new connection before the user accepts it. This is okay, * since the accept will just fail later. However, we do need * to release the underlying HW resources in case of a device * removal event. */ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) ucma_removal_event_handler(cm_id); kfree(uevent); goto out; } list_add_tail(&uevent->list, &ctx->file->event_list); wake_up_interruptible(&ctx->file->poll_wait); if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) ucma_removal_event_handler(cm_id); out: mutex_unlock(&ctx->file->mut); return ret; } static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct ucma_context *ctx; struct rdma_ucm_get_event cmd; struct ucma_event *uevent; int ret = 0; if (out_len < sizeof uevent->resp) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&file->mut); while (list_empty(&file->event_list)) { mutex_unlock(&file->mut); if (file->filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; mutex_lock(&file->mut); } uevent = list_entry(file->event_list.next, struct ucma_event, list); if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { ctx = ucma_alloc_ctx(file); if (!ctx) { ret = -ENOMEM; goto done; } uevent->ctx->backlog++; ctx->cm_id = uevent->cm_id; ctx->cm_id->context = ctx; uevent->resp.id = ctx->id; } if (copy_to_user((void __user *)(unsigned long)cmd.response, &uevent->resp, sizeof uevent->resp)) { ret = -EFAULT; goto done; } list_del(&uevent->list); uevent->ctx->events_reported++; if (uevent->mc) uevent->mc->events_reported++; kfree(uevent); done: mutex_unlock(&file->mut); return ret; } static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) { switch (cmd->ps) { case RDMA_PS_TCP: *qp_type = IB_QPT_RC; return 0; case RDMA_PS_UDP: case RDMA_PS_IPOIB: *qp_type = IB_QPT_UD; return 0; case RDMA_PS_IB: *qp_type = cmd->qp_type; return 0; default: return -EINVAL; } } static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_create_id cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; enum ib_qp_type qp_type; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ret = ucma_get_qp_type(&cmd, &qp_type); if (ret) return ret; mutex_lock(&file->mut); ctx = ucma_alloc_ctx(file); mutex_unlock(&file->mut); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; ctx->cm_id = rdma_create_id(TD_TO_VNET(curthread), ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; } resp.id = ctx->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { ret = -EFAULT; goto err2; } return 0; err2: rdma_destroy_id(ctx->cm_id); err1: mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); kfree(ctx); return ret; } static void ucma_cleanup_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc, *tmp; mutex_lock(&mut); list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { list_del(&mc->list); idr_remove(&multicast_idr, mc->id); kfree(mc); } mutex_unlock(&mut); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { if (uevent->mc != mc) continue; list_del(&uevent->list); kfree(uevent); } } /* * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At * this point, no new events will be reported from the hardware. However, we * still need to cleanup the UCMA context for this ID. Specifically, there * might be events that have not yet been consumed by the user space software. * These might include pending connect requests which we have not completed * processing. We cannot call rdma_destroy_id while holding the lock of the * context (file->mut), as it might cause a deadlock. We therefore extract all * relevant events from the context pending events list while holding the * mutex. After that we release them as needed. */ static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); ucma_cleanup_multicast(ctx); /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { if (uevent->ctx == ctx) list_move_tail(&uevent->list, &list); } list_del(&ctx->list); mutex_unlock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &list, list) { list_del(&uevent->list); if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) rdma_destroy_id(uevent->cm_id); kfree(uevent); } events_reported = ctx->events_reported; kfree(ctx); return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_context *ctx; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&mut); ctx = _ucma_find_context(cmd.id, file); if (!IS_ERR(ctx)) idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&ctx->file->mut); ctx->destroying = 1; mutex_unlock(&ctx->file->mut); flush_workqueue(ctx->file->close_wq); /* At this point it's guaranteed that there is no inflight * closing task */ mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); rdma_destroy_id(ctx->cm_id); } else { mutex_unlock(&mut); } resp.events_reported = ucma_free_ctx(ctx); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_bind cmd; struct sockaddr *addr; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; addr = (struct sockaddr *) &cmd.addr; if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_bind_addr(ctx->cm_id, addr); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_ip cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_addr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; src = (struct sockaddr *) &cmd.src_addr; dst = (struct sockaddr *) &cmd.dst_addr; if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_resolve_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_route cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); /* fall through */ case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, (union ib_gid *)&resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); break; case 2: ib_copy_path_rec_to_user(&resp->ib_route[1], &route->path_rec[1]); /* fall through */ case 1: ib_copy_path_rec_to_user(&resp->ib_route[0], &route->path_rec[0]); break; default: break; } } static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { struct rdma_dev_addr *dev_addr; dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); } static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct rdma_ucm_query_route_resp resp; struct ucma_context *ctx; struct sockaddr *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; memcpy(&resp.dst_addr, addr, addr->sa_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)); if (!ctx->cm_id->device) goto out; resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid; resp.port_num = ctx->cm_id->port_num; if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_ib_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iboe_route(&resp, &ctx->cm_id->route); else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num)) ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; ucma_put_ctx(ctx); return ret; } static void ucma_query_device_addr(struct rdma_cm_id *cm_id, struct rdma_ucm_query_addr_resp *resp) { if (!cm_id->device) return; resp->node_guid = (__force __u64) cm_id->device->node_guid; resp->port_num = cm_id->port_num; resp->pkey = (__force __u16) cpu_to_be16( ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); } static ssize_t ucma_query_addr(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; resp.src_size = rdma_addr_size(addr); memcpy(&resp.src_addr, addr, resp.src_size); addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; resp.dst_size = rdma_addr_size(addr); memcpy(&resp.dst_addr, addr, resp.dst_size); ucma_query_device_addr(ctx->cm_id, &resp); if (copy_to_user(response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_query_path(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_path_resp *resp; int i, ret = 0; if (out_len < sizeof(*resp)) return -ENOSPC; resp = kzalloc(out_len, GFP_KERNEL); if (!resp) return -ENOMEM; resp->num_paths = ctx->cm_id->route.num_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL; ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], &resp->path_data[i].path_rec); } if (copy_to_user(response, resp, sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) ret = -EFAULT; kfree(resp); return ret; } static ssize_t ucma_query_gid(struct ucma_context *ctx, void __user *response, int out_len) { struct rdma_ucm_query_addr_resp resp; struct sockaddr_ib *addr; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; memset(&resp, 0, sizeof resp); ucma_query_device_addr(ctx->cm_id, &resp); addr = (struct sockaddr_ib *) &resp.src_addr; resp.src_size = sizeof(*addr); if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, (union ib_gid *) &addr->sib_addr); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.src_addr); } addr = (struct sockaddr_ib *) &resp.dst_addr; resp.dst_size = sizeof(*addr); if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); } else { addr->sib_family = AF_IB; addr->sib_pkey = (__force __be16) resp.pkey; rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, (union ib_gid *) &addr->sib_addr); addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr); } if (copy_to_user(response, &resp, sizeof(resp))) ret = -EFAULT; return ret; } static ssize_t ucma_query(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_query cmd; struct ucma_context *ctx; void __user *response; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; response = (void __user *)(unsigned long) cmd.response; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_PATH: ret = ucma_query_path(ctx, response, out_len); break; case RDMA_USER_CM_QUERY_GID: ret = ucma_query_gid(ctx, response, out_len); break; default: ret = -ENOSYS; break; } ucma_put_ctx(ctx); return ret; } static void ucma_copy_conn_param(struct rdma_cm_id *id, struct rdma_conn_param *dst, struct rdma_ucm_conn_param *src) { dst->private_data = src->private_data; dst->private_data_len = src->private_data_len; dst->responder_resources =src->responder_resources; dst->initiator_depth = src->initiator_depth; dst->flow_control = src->flow_control; dst->retry_count = src->retry_count; dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_connect cmd; struct rdma_conn_param conn_param; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; if (!cmd.conn_param.valid) return -EINVAL; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); ret = rdma_connect(ctx->cm_id, &conn_param); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_listen cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? cmd.backlog : max_backlog; ret = rdma_listen(ctx->cm_id, ctx->backlog); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_accept cmd; struct rdma_conn_param conn_param; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_reject cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_disconnect cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_disconnect(ctx->cm_id); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_init_qp_attr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_init_qp_attr cmd; struct ib_uverbs_qp_attr resp; struct ucma_context *ctx; struct ib_qp_attr qp_attr; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); if (ret) goto out; ib_copy_qp_attr_to_user(&resp, &qp_attr); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; out: ucma_put_ctx(ctx); return ret; } static int ucma_set_option_id(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret = 0; switch (optname) { case RDMA_OPTION_ID_TOS: if (optlen != sizeof(u8)) { ret = -EINVAL; break; } rdma_set_service_type(ctx->cm_id, *((u8 *) optval)); break; case RDMA_OPTION_ID_REUSEADDR: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; case RDMA_OPTION_ID_AFONLY: if (optlen != sizeof(int)) { ret = -EINVAL; break; } ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_ib_path(struct ucma_context *ctx, struct ib_path_rec_data *path_data, size_t optlen) { struct ib_sa_path_rec sa_path; struct rdma_cm_event event; int ret; if (optlen % sizeof(*path_data)) return -EINVAL; for (; optlen; optlen -= sizeof(*path_data), path_data++) { if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY | IB_PATH_BIDIRECTIONAL)) break; } if (!optlen) return -EINVAL; memset(&sa_path, 0, sizeof(sa_path)); ib_sa_unpack_path(path_data->path_rec, &sa_path); ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); if (ret) return ret; memset(&event, 0, sizeof event); event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; return ucma_event_handler(ctx->cm_id, &event); } static int ucma_set_option_ib(struct ucma_context *ctx, int optname, void *optval, size_t optlen) { int ret; switch (optname) { case RDMA_OPTION_IB_PATH: ret = ucma_set_ib_path(ctx, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { int ret; switch (level) { case RDMA_OPTION_ID: ret = ucma_set_option_id(ctx, optname, optval, optlen); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); break; default: ret = -ENOSYS; } return ret; } static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_set_option cmd; struct ucma_context *ctx; void *optval; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); optval = memdup_user((void __user *) (unsigned long) cmd.optval, cmd.optlen); if (IS_ERR(optval)) { ret = PTR_ERR(optval); goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); kfree(optval); out: ucma_put_ctx(ctx); return ret; } static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_notify cmd; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_process_join(struct ucma_file *file, struct rdma_ucm_join_mcast *cmd, int out_len) { struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct ucma_multicast *mc; struct sockaddr *addr; int ret; u8 join_state; if (out_len < sizeof(resp)) return -ENOSPC; addr = (struct sockaddr *) &cmd->addr; if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) return -EINVAL; if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER) join_state = BIT(FULLMEMBER_JOIN); else if (cmd->join_flags == RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER) join_state = BIT(SENDONLY_FULLMEMBER_JOIN); else return -EINVAL; ctx = ucma_get_ctx(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); mutex_lock(&file->mut); mc = ucma_alloc_multicast(ctx); if (!mc) { ret = -ENOMEM; goto err1; } mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); if (ret) goto err2; resp.id = mc->id; if (copy_to_user((void __user *)(unsigned long) cmd->response, &resp, sizeof(resp))) { ret = -EFAULT; goto err3; } mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; err3: rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); ucma_cleanup_mc_events(mc); err2: mutex_lock(&mut); idr_remove(&multicast_idr, mc->id); mutex_unlock(&mut); list_del(&mc->list); kfree(mc); err1: mutex_unlock(&file->mut); ucma_put_ctx(ctx); return ret; } static ssize_t ucma_join_ip_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_ip_mcast cmd; struct rdma_ucm_join_mcast join_cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; join_cmd.response = cmd.response; join_cmd.uid = cmd.uid; join_cmd.id = cmd.id; join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER; memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); return ucma_process_join(file, &join_cmd, out_len); } static ssize_t ucma_join_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_join_mcast cmd; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; return ucma_process_join(file, &cmd, out_len); } static ssize_t ucma_leave_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_destroy_id cmd; struct rdma_ucm_destroy_id_resp resp; struct ucma_multicast *mc; int ret = 0; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; mutex_lock(&mut); mc = idr_find(&multicast_idr, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); else if (mc->ctx->file != file) mc = ERR_PTR(-EINVAL); else if (!atomic_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); else idr_remove(&multicast_idr, mc->id); mutex_unlock(&mut); if (IS_ERR(mc)) { ret = PTR_ERR(mc); goto out; } rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); mutex_unlock(&mc->ctx->file->mut); ucma_put_ctx(mc->ctx); resp.events_reported = mc->events_reported; kfree(mc); if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; out: return ret; } static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) { /* Acquire mutex's based on pointer comparison to prevent deadlock. */ if (file1 < file2) { mutex_lock(&file1->mut); mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); } else { mutex_lock(&file2->mut); mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); } } static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) { if (file1 < file2) { mutex_unlock(&file2->mut); mutex_unlock(&file1->mut); } else { mutex_unlock(&file1->mut); mutex_unlock(&file2->mut); } } static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) { struct ucma_event *uevent, *tmp; list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) if (uevent->ctx == ctx) list_move_tail(&uevent->list, &file->event_list); } static ssize_t ucma_migrate_id(struct ucma_file *new_file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; struct fd f; struct ucma_file *cur_file; int ret = 0; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; /* Get current fd to protect against it being closed */ f = fdget(cmd.fd); if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; } cur_file = ctx->file; if (cur_file == new_file) { resp.events_reported = ctx->events_reported; goto response; } /* * Migrate events between fd's, maintaining order, and avoiding new * events being added before existing events. */ ucma_lock_files(cur_file, new_file); mutex_lock(&mut); list_move_tail(&ctx->list, &new_file->ctx_list); ucma_move_events(ctx, new_file); ctx->file = new_file; resp.events_reported = ctx->events_reported; mutex_unlock(&mut); ucma_unlock_files(cur_file, new_file); response: if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; ucma_put_ctx(ctx); file_put: fdput(f); return ret; } static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, [RDMA_USER_CM_CMD_REJECT] = ucma_reject, [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, [RDMA_USER_CM_CMD_GET_OPTION] = NULL, [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, [RDMA_USER_CM_CMD_QUERY] = ucma_query, [RDMA_USER_CM_CMD_BIND] = ucma_bind, [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast }; static ssize_t ucma_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct ucma_file *file = filp->private_data; struct rdma_ucm_cmd_hdr hdr; ssize_t ret; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (len < sizeof(hdr)) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT; if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; if (hdr.in + sizeof(hdr) > len) return -EINVAL; if (!ucma_cmd_table[hdr.cmd]) return -ENOSYS; ret = ucma_cmd_table[hdr.cmd](file, buf + sizeof(hdr), hdr.in, hdr.out); if (!ret) ret = len; return ret; } static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait) { struct ucma_file *file = filp->private_data; unsigned int mask = 0; poll_wait(filp, &file->poll_wait, wait); if (!list_empty(&file->event_list)) mask = POLLIN | POLLRDNORM; return mask; } /* * ucma_open() does not need the BKL: * * - no global state is referred to; * - there is no ioctl method to race against; * - no further module initialization is required for open to work * after the device is registered. */ static int ucma_open(struct inode *inode, struct file *filp) { struct ucma_file *file; file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) return -ENOMEM; file->close_wq = alloc_ordered_workqueue("ucma_close_id", WQ_MEM_RECLAIM); if (!file->close_wq) { kfree(file); return -ENOMEM; } INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); mutex_init(&file->mut); filp->private_data = file; file->filp = filp; return nonseekable_open(inode, filp); } static int ucma_close(struct inode *inode, struct file *filp) { struct ucma_file *file = filp->private_data; struct ucma_context *ctx, *tmp; mutex_lock(&file->mut); list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { ctx->destroying = 1; mutex_unlock(&file->mut); mutex_lock(&mut); idr_remove(&ctx_idr, ctx->id); mutex_unlock(&mut); flush_workqueue(file->close_wq); /* At that step once ctx was marked as destroying and workqueue * was flushed we are safe from any inflights handlers that * might put other closing task. */ mutex_lock(&mut); if (!ctx->closing) { mutex_unlock(&mut); /* rdma_destroy_id ensures that no event handlers are * inflight for that id before releasing it. */ rdma_destroy_id(ctx->cm_id); } else { mutex_unlock(&mut); } ucma_free_ctx(ctx); mutex_lock(&file->mut); } mutex_unlock(&file->mut); destroy_workqueue(file->close_wq); kfree(file); return 0; } static long ucma_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case FIONBIO: case FIOASYNC: return (0); default: return (-ENOTTY); } } static const struct file_operations ucma_fops = { .owner = THIS_MODULE, .open = ucma_open, .release = ucma_close, .write = ucma_write, .unlocked_ioctl = ucma_ioctl, .poll = ucma_poll, .llseek = no_llseek, }; static struct miscdevice ucma_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "rdma_cm", .nodename = "infiniband/rdma_cm", .mode = 0666, .fops = &ucma_fops, }; static ssize_t show_abi_version(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION); } static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static int __init ucma_init(void) { int ret; ret = misc_register(&ucma_misc); if (ret) return ret; ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version); if (ret) { pr_err("rdma_ucm: couldn't create abi_version attr\n"); goto err1; } return 0; err1: misc_deregister(&ucma_misc); return ret; } static void __exit ucma_cleanup(void) { device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); idr_destroy(&multicast_idr); } module_init(ucma_init); module_exit(ucma_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_ud_header.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_ud_header.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_ud_header.c (revision 331772) @@ -1,551 +1,555 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \ .field_name = #header ":" #field static const struct ib_field lrh_table[] = { { STRUCT_FIELD(lrh, virtual_lane), .offset_words = 0, .offset_bits = 0, .size_bits = 4 }, { STRUCT_FIELD(lrh, link_version), .offset_words = 0, .offset_bits = 4, .size_bits = 4 }, { STRUCT_FIELD(lrh, service_level), .offset_words = 0, .offset_bits = 8, .size_bits = 4 }, { RESERVED, .offset_words = 0, .offset_bits = 12, .size_bits = 2 }, { STRUCT_FIELD(lrh, link_next_header), .offset_words = 0, .offset_bits = 14, .size_bits = 2 }, { STRUCT_FIELD(lrh, destination_lid), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { RESERVED, .offset_words = 1, .offset_bits = 0, .size_bits = 5 }, { STRUCT_FIELD(lrh, packet_length), .offset_words = 1, .offset_bits = 5, .size_bits = 11 }, { STRUCT_FIELD(lrh, source_lid), .offset_words = 1, .offset_bits = 16, .size_bits = 16 } }; static const struct ib_field eth_table[] = { { STRUCT_FIELD(eth, dmac_h), .offset_words = 0, .offset_bits = 0, .size_bits = 32 }, { STRUCT_FIELD(eth, dmac_l), .offset_words = 1, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(eth, smac_h), .offset_words = 1, .offset_bits = 16, .size_bits = 16 }, { STRUCT_FIELD(eth, smac_l), .offset_words = 2, .offset_bits = 0, .size_bits = 32 }, { STRUCT_FIELD(eth, type), .offset_words = 3, .offset_bits = 0, .size_bits = 16 } }; static const struct ib_field vlan_table[] = { { STRUCT_FIELD(vlan, tag), .offset_words = 0, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(vlan, type), .offset_words = 0, .offset_bits = 16, .size_bits = 16 } }; static const struct ib_field ip4_table[] = { { STRUCT_FIELD(ip4, ver), .offset_words = 0, .offset_bits = 0, .size_bits = 4 }, { STRUCT_FIELD(ip4, hdr_len), .offset_words = 0, .offset_bits = 4, .size_bits = 4 }, { STRUCT_FIELD(ip4, tos), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { STRUCT_FIELD(ip4, tot_len), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { STRUCT_FIELD(ip4, id), .offset_words = 1, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(ip4, frag_off), .offset_words = 1, .offset_bits = 16, .size_bits = 16 }, { STRUCT_FIELD(ip4, ttl), .offset_words = 2, .offset_bits = 0, .size_bits = 8 }, { STRUCT_FIELD(ip4, protocol), .offset_words = 2, .offset_bits = 8, .size_bits = 8 }, { STRUCT_FIELD(ip4, check), .offset_words = 2, .offset_bits = 16, .size_bits = 16 }, { STRUCT_FIELD(ip4, saddr), .offset_words = 3, .offset_bits = 0, .size_bits = 32 }, { STRUCT_FIELD(ip4, daddr), .offset_words = 4, .offset_bits = 0, .size_bits = 32 } }; static const struct ib_field udp_table[] = { { STRUCT_FIELD(udp, sport), .offset_words = 0, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(udp, dport), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { STRUCT_FIELD(udp, length), .offset_words = 1, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(udp, csum), .offset_words = 1, .offset_bits = 16, .size_bits = 16 } }; static const struct ib_field grh_table[] = { { STRUCT_FIELD(grh, ip_version), .offset_words = 0, .offset_bits = 0, .size_bits = 4 }, { STRUCT_FIELD(grh, traffic_class), .offset_words = 0, .offset_bits = 4, .size_bits = 8 }, { STRUCT_FIELD(grh, flow_label), .offset_words = 0, .offset_bits = 12, .size_bits = 20 }, { STRUCT_FIELD(grh, payload_length), .offset_words = 1, .offset_bits = 0, .size_bits = 16 }, { STRUCT_FIELD(grh, next_header), .offset_words = 1, .offset_bits = 16, .size_bits = 8 }, { STRUCT_FIELD(grh, hop_limit), .offset_words = 1, .offset_bits = 24, .size_bits = 8 }, { STRUCT_FIELD(grh, source_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { STRUCT_FIELD(grh, destination_gid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 } }; static const struct ib_field bth_table[] = { { STRUCT_FIELD(bth, opcode), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { STRUCT_FIELD(bth, solicited_event), .offset_words = 0, .offset_bits = 8, .size_bits = 1 }, { STRUCT_FIELD(bth, mig_req), .offset_words = 0, .offset_bits = 9, .size_bits = 1 }, { STRUCT_FIELD(bth, pad_count), .offset_words = 0, .offset_bits = 10, .size_bits = 2 }, { STRUCT_FIELD(bth, transport_header_version), .offset_words = 0, .offset_bits = 12, .size_bits = 4 }, { STRUCT_FIELD(bth, pkey), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { RESERVED, .offset_words = 1, .offset_bits = 0, .size_bits = 8 }, { STRUCT_FIELD(bth, destination_qpn), .offset_words = 1, .offset_bits = 8, .size_bits = 24 }, { STRUCT_FIELD(bth, ack_req), .offset_words = 2, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 2, .offset_bits = 1, .size_bits = 7 }, { STRUCT_FIELD(bth, psn), .offset_words = 2, .offset_bits = 8, .size_bits = 24 } }; static const struct ib_field deth_table[] = { { STRUCT_FIELD(deth, qkey), .offset_words = 0, .offset_bits = 0, .size_bits = 32 }, { RESERVED, .offset_words = 1, .offset_bits = 0, .size_bits = 8 }, { STRUCT_FIELD(deth, source_qpn), .offset_words = 1, .offset_bits = 8, .size_bits = 24 } }; __sum16 ib_ud_ip4_csum(struct ib_ud_header *header) { #if defined(INET) || defined(INET6) struct ip iph; iph.ip_hl = 5; iph.ip_v = 4; iph.ip_tos = header->ip4.tos; iph.ip_len = header->ip4.tot_len; iph.ip_id = header->ip4.id; iph.ip_off = header->ip4.frag_off; iph.ip_ttl = header->ip4.ttl; iph.ip_p = header->ip4.protocol; iph.ip_sum = 0; iph.ip_src.s_addr = header->ip4.saddr; iph.ip_dst.s_addr = header->ip4.daddr; return in_cksum_hdr(&iph); #else return 0; #endif } EXPORT_SYMBOL(ib_ud_ip4_csum); /** * ib_ud_header_init - Initialize UD header structure * @payload_bytes:Length of packet payload * @lrh_present: specify if LRH is present * @eth_present: specify if Eth header is present * @vlan_present: packet is tagged vlan * @grh_present: GRH flag (if non-zero, GRH will be included) * @ip_version: if non-zero, IP header, V4 or V6, will be included * @udp_present :if non-zero, UDP header will be included * @immediate_present: specify if immediate data is present * @header:Structure to initialize */ int ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, int vlan_present, int grh_present, int ip_version, int udp_present, int immediate_present, struct ib_ud_header *header) { size_t udp_bytes = udp_present ? IB_UDP_BYTES : 0; grh_present = grh_present && !ip_version; memset(header, 0, sizeof *header); /* * UDP header without IP header doesn't make sense */ if (udp_present && ip_version != 4 && ip_version != 6) return -EINVAL; if (lrh_present) { u16 packet_length; header->lrh.link_version = 0; header->lrh.link_next_header = grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL; packet_length = (IB_LRH_BYTES + IB_BTH_BYTES + IB_DETH_BYTES + (grh_present ? IB_GRH_BYTES : 0) + payload_bytes + 4 + /* ICRC */ 3) / 4; /* round up */ header->lrh.packet_length = cpu_to_be16(packet_length); } if (vlan_present) header->eth.type = cpu_to_be16(ETH_P_8021Q); if (ip_version == 6 || grh_present) { header->grh.ip_version = 6; header->grh.payload_length = cpu_to_be16((udp_bytes + IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + 4 + /* ICRC */ 3) & ~3); /* round up */ header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b; } if (ip_version == 4) { header->ip4.ver = 4; /* version 4 */ header->ip4.hdr_len = 5; /* 5 words */ header->ip4.tot_len = cpu_to_be16(IB_IP4_BYTES + udp_bytes + IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + 4); /* ICRC */ header->ip4.protocol = IPPROTO_UDP; } if (udp_present && ip_version) header->udp.length = cpu_to_be16(IB_UDP_BYTES + IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes + 4); /* ICRC */ if (immediate_present) header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; else header->bth.opcode = IB_OPCODE_UD_SEND_ONLY; header->bth.pad_count = (4 - payload_bytes) & 3; header->bth.transport_header_version = 0; header->lrh_present = lrh_present; header->eth_present = eth_present; header->vlan_present = vlan_present; header->grh_present = grh_present || (ip_version == 6); header->ipv4_present = ip_version == 4; header->udp_present = udp_present; header->immediate_present = immediate_present; return 0; } EXPORT_SYMBOL(ib_ud_header_init); /** * ib_ud_header_pack - Pack UD header struct into wire format * @header:UD header struct * @buf:Buffer to pack into * * ib_ud_header_pack() packs the UD header structure @header into wire * format in the buffer @buf. */ int ib_ud_header_pack(struct ib_ud_header *header, void *buf) { int len = 0; if (header->lrh_present) { ib_pack(lrh_table, ARRAY_SIZE(lrh_table), &header->lrh, (char *)buf + len); len += IB_LRH_BYTES; } if (header->eth_present) { ib_pack(eth_table, ARRAY_SIZE(eth_table), &header->eth, (char *)buf + len); len += IB_ETH_BYTES; } if (header->vlan_present) { ib_pack(vlan_table, ARRAY_SIZE(vlan_table), &header->vlan, (char *)buf + len); len += IB_VLAN_BYTES; } if (header->grh_present) { ib_pack(grh_table, ARRAY_SIZE(grh_table), &header->grh, (char *)buf + len); len += IB_GRH_BYTES; } if (header->ipv4_present) { ib_pack(ip4_table, ARRAY_SIZE(ip4_table), &header->ip4, (char *)buf + len); len += IB_IP4_BYTES; } if (header->udp_present) { ib_pack(udp_table, ARRAY_SIZE(udp_table), &header->udp, (char *)buf + len); len += IB_UDP_BYTES; } ib_pack(bth_table, ARRAY_SIZE(bth_table), &header->bth, (char *)buf + len); len += IB_BTH_BYTES; ib_pack(deth_table, ARRAY_SIZE(deth_table), &header->deth, (char *)buf + len); len += IB_DETH_BYTES; if (header->immediate_present) { memcpy((char *)buf + len, &header->immediate_data, sizeof header->immediate_data); len += sizeof header->immediate_data; } return len; } EXPORT_SYMBOL(ib_ud_header_pack); /** * ib_ud_header_unpack - Unpack UD header struct from wire format * @header:UD header struct * @buf:Buffer to pack into * * ib_ud_header_pack() unpacks the UD header structure @header from wire * format in the buffer @buf. */ int ib_ud_header_unpack(void *buf, struct ib_ud_header *header) { ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, &header->lrh); buf = (char *)buf + IB_LRH_BYTES; if (header->lrh.link_version != 0) { pr_warn("Invalid LRH.link_version %d\n", header->lrh.link_version); return -EINVAL; } switch (header->lrh.link_next_header) { case IB_LNH_IBA_LOCAL: header->grh_present = 0; break; case IB_LNH_IBA_GLOBAL: header->grh_present = 1; ib_unpack(grh_table, ARRAY_SIZE(grh_table), buf, &header->grh); buf = (char *)buf + IB_GRH_BYTES; if (header->grh.ip_version != 6) { pr_warn("Invalid GRH.ip_version %d\n", header->grh.ip_version); return -EINVAL; } if (header->grh.next_header != 0x1b) { pr_warn("Invalid GRH.next_header 0x%02x\n", header->grh.next_header); return -EINVAL; } break; default: pr_warn("Invalid LRH.link_next_header %d\n", header->lrh.link_next_header); return -EINVAL; } ib_unpack(bth_table, ARRAY_SIZE(bth_table), buf, &header->bth); buf = (char *)buf + IB_BTH_BYTES; switch (header->bth.opcode) { case IB_OPCODE_UD_SEND_ONLY: header->immediate_present = 0; break; case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE: header->immediate_present = 1; break; default: pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode); return -EINVAL; } if (header->bth.transport_header_version != 0) { pr_warn("Invalid BTH.transport_header_version %d\n", header->bth.transport_header_version); return -EINVAL; } ib_unpack(deth_table, ARRAY_SIZE(deth_table), buf, &header->deth); buf = (char *)buf + IB_DETH_BYTES; if (header->immediate_present) memcpy(&header->immediate_data, buf, sizeof header->immediate_data); return 0; } EXPORT_SYMBOL(ib_ud_header_unpack); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_umem.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_umem.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_umem.c (revision 331772) @@ -1,352 +1,356 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include "uverbs.h" #include static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { struct scatterlist *sg; struct page *page; int i; if (umem->nmap > 0) ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->nmap, DMA_BIDIRECTIONAL); for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { page = sg_page(sg); put_page(page); } sg_free_table(&umem->sg_head); return; } /** * ib_umem_get - Pin and DMA map userspace memory. * * If access flags indicate ODP memory, avoid pinning. Instead, stores * the mm for future page fault handling in conjunction with MMU notifiers. * * @context: userspace context to pin memory for * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync) { struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; unsigned long locked; unsigned long cur_base; unsigned long npages; int ret; int i; struct dma_attrs dma_attrs = { 0 }; struct scatterlist *sg, *sg_list_start; int need_release = 0; unsigned int gup_flags = FOLL_WRITE; if (dmasync) dma_attrs.flags |= DMA_ATTR_WRITE_BARRIER; if (!size) return ERR_PTR(-EINVAL); /* * If the combination of the addr and size requested for this memory * region causes an integer overflow, return error. */ if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size)) return ERR_PTR(-EINVAL); if (priv_check(curthread, PRIV_VM_MLOCK) != 0) return ERR_PTR(-EPERM); umem = kzalloc(sizeof *umem, GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); umem->context = context; umem->length = size; umem->address = addr; umem->page_size = PAGE_SIZE; umem->pid = get_pid(task_pid(current)); /* * We ask for writable memory if any of the following * access flags are set. "Local write" and "remote write" * obviously require write access. "Remote atomic" can do * things like fetch and add, which will modify memory, and * "MW bind" can change permissions by binding a window. */ umem->writable = !!(access & (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND)); if (access & IB_ACCESS_ON_DEMAND) { ret = ib_umem_odp_get(context, umem); if (ret) { kfree(umem); return ERR_PTR(ret); } return umem; } umem->odp_data = NULL; page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { kfree(umem); return ERR_PTR(-ENOMEM); } vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL); npages = ib_umem_num_pages(umem); down_write(¤t->mm->mmap_sem); locked = npages + current->mm->pinned_vm; cur_base = addr & PAGE_MASK; if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; goto out; } ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); if (ret) goto out; if (!umem->writable) gup_flags |= FOLL_FORCE; need_release = 1; sg_list_start = umem->sg_head.sgl; while (npages) { ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), gup_flags, page_list, vma_list); if (ret < 0) goto out; umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; for_each_sg(sg_list_start, sg, ret, i) { sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } /* preparing for next loop */ sg_list_start = sg; } umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, umem->npages, DMA_BIDIRECTIONAL, &dma_attrs); if (umem->nmap <= 0) { ret = -ENOMEM; goto out; } ret = 0; out: if (ret < 0) { if (need_release) __ib_umem_release(context->device, umem, 0); put_pid(umem->pid); kfree(umem); } else current->mm->pinned_vm = locked; up_write(¤t->mm->mmap_sem); if (vma_list) free_page((unsigned long) vma_list); free_page((unsigned long) page_list); return ret < 0 ? ERR_PTR(ret) : umem; } EXPORT_SYMBOL(ib_umem_get); static void ib_umem_account(struct work_struct *work) { struct ib_umem *umem = container_of(work, struct ib_umem, work); down_write(&umem->mm->mmap_sem); umem->mm->pinned_vm -= umem->diff; up_write(&umem->mm->mmap_sem); mmput(umem->mm); kfree(umem); } /** * ib_umem_release - release memory pinned with ib_umem_get * @umem: umem struct to release */ void ib_umem_release(struct ib_umem *umem) { struct ib_ucontext *context = umem->context; struct mm_struct *mm; struct task_struct *task; unsigned long diff; if (umem->odp_data) { ib_umem_odp_release(umem); return; } __ib_umem_release(umem->context->device, umem, 1); task = get_pid_task(umem->pid, PIDTYPE_PID); put_pid(umem->pid); if (!task) goto out; mm = get_task_mm(task); put_task_struct(task); if (!mm) goto out; diff = ib_umem_num_pages(umem); /* * We may be called with the mm's mmap_sem already held. This * can happen when a userspace munmap() is the call that drops * the last reference to our file and calls our release * method. If there are memory regions to destroy, we'll end * up here and not be able to take the mmap_sem. In that case * we defer the vm_locked accounting to the system workqueue. */ if (context->closing) { if (!down_write_trylock(&mm->mmap_sem)) { INIT_WORK(&umem->work, ib_umem_account); umem->mm = mm; umem->diff = diff; queue_work(ib_wq, &umem->work); return; } } else down_write(&mm->mmap_sem); mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); out: kfree(umem); } EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { int shift; int i; int n; struct scatterlist *sg; if (umem->odp_data) return ib_umem_num_pages(umem); shift = ilog2(umem->page_size); n = 0; for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) n += sg_dma_len(sg) >> shift; return n; } EXPORT_SYMBOL(ib_umem_page_count); /* * Copy from the given ib_umem's pages to the given buffer. * * umem - the umem to copy from * offset - offset to start copying from * dst - destination buffer * length - buffer length * * Returns 0 on success, or an error code. */ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { size_t end = offset + length; int ret; if (offset > umem->length || length > umem->length - offset) { pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n", offset, umem->length, end); return -EINVAL; } #ifdef __linux__ ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length, offset + ib_umem_offset(umem)); #else ret = 0; #endif if (ret < 0) return ret; else if (ret != length) return -EINVAL; else return 0; } EXPORT_SYMBOL(ib_umem_copy_from); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_umem_odp.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_umem_odp.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_umem_odp.c (revision 331772) @@ -1,667 +1,671 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include static void ib_umem_notifier_start_account(struct ib_umem *item) { mutex_lock(&item->odp_data->umem_mutex); /* Only update private counters for this umem if it has them. * Otherwise skip it. All page faults will be delayed for this umem. */ if (item->odp_data->mn_counters_active) { int notifiers_count = item->odp_data->notifiers_count++; if (notifiers_count == 0) /* Initialize the completion object for waiting on * notifiers. Since notifier_count is zero, no one * should be waiting right now. */ reinit_completion(&item->odp_data->notifier_completion); } mutex_unlock(&item->odp_data->umem_mutex); } static void ib_umem_notifier_end_account(struct ib_umem *item) { mutex_lock(&item->odp_data->umem_mutex); /* Only update private counters for this umem if it has them. * Otherwise skip it. All page faults will be delayed for this umem. */ if (item->odp_data->mn_counters_active) { /* * This sequence increase will notify the QP page fault that * the page that is going to be mapped in the spte could have * been freed. */ ++item->odp_data->notifiers_seq; if (--item->odp_data->notifiers_count == 0) complete_all(&item->odp_data->notifier_completion); } mutex_unlock(&item->odp_data->umem_mutex); } /* Account for a new mmu notifier in an ib_ucontext. */ static void ib_ucontext_notifier_start_account(struct ib_ucontext *context) { atomic_inc(&context->notifier_count); } /* Account for a terminating mmu notifier in an ib_ucontext. * * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since * the function takes the semaphore itself. */ static void ib_ucontext_notifier_end_account(struct ib_ucontext *context) { int zero_notifiers = atomic_dec_and_test(&context->notifier_count); if (zero_notifiers && !list_empty(&context->no_private_counters)) { /* No currently running mmu notifiers. Now is the chance to * add private accounting to all previously added umems. */ struct ib_umem_odp *odp_data, *next; /* Prevent concurrent mmu notifiers from working on the * no_private_counters list. */ down_write(&context->umem_rwsem); /* Read the notifier_count again, with the umem_rwsem * semaphore taken for write. */ if (!atomic_read(&context->notifier_count)) { list_for_each_entry_safe(odp_data, next, &context->no_private_counters, no_private_counters) { mutex_lock(&odp_data->umem_mutex); odp_data->mn_counters_active = true; list_del(&odp_data->no_private_counters); complete_all(&odp_data->notifier_completion); mutex_unlock(&odp_data->umem_mutex); } } up_write(&context->umem_rwsem); } } static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start, u64 end, void *cookie) { /* * Increase the number of notifiers running, to * prevent any further fault handling on this MR. */ ib_umem_notifier_start_account(item); item->odp_data->dying = 1; /* Make sure that the fact the umem is dying is out before we release * all pending page faults. */ smp_wmb(); complete_all(&item->odp_data->notifier_completion); item->context->invalidate_range(item, ib_umem_start(item), ib_umem_end(item)); return 0; } static void ib_umem_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); if (!context->invalidate_range) return; ib_ucontext_notifier_start_account(context); down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, ULLONG_MAX, ib_umem_notifier_release_trampoline, NULL); up_read(&context->umem_rwsem); } static int invalidate_page_trampoline(struct ib_umem *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); item->context->invalidate_range(item, start, start + PAGE_SIZE); ib_umem_notifier_end_account(item); return 0; } static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); if (!context->invalidate_range) return; ib_ucontext_notifier_start_account(context); down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, address, address + PAGE_SIZE, invalidate_page_trampoline, NULL); up_read(&context->umem_rwsem); ib_ucontext_notifier_end_account(context); } static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_start_account(item); item->context->invalidate_range(item, start, end); return 0; } static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); if (!context->invalidate_range) return; ib_ucontext_notifier_start_account(context); down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, invalidate_range_start_trampoline, NULL); up_read(&context->umem_rwsem); } static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, u64 end, void *cookie) { ib_umem_notifier_end_account(item); return 0; } static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); if (!context->invalidate_range) return; down_read(&context->umem_rwsem); rbt_ib_umem_for_each_in_range(&context->umem_tree, start, end, invalidate_range_end_trampoline, NULL); up_read(&context->umem_rwsem); ib_ucontext_notifier_end_account(context); } static const struct mmu_notifier_ops ib_umem_notifiers = { .release = ib_umem_notifier_release, .invalidate_page = ib_umem_notifier_invalidate_page, .invalidate_range_start = ib_umem_notifier_invalidate_range_start, .invalidate_range_end = ib_umem_notifier_invalidate_range_end, }; int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) { int ret_val; pid_t our_pid; struct mm_struct *mm = get_task_mm(current); if (!mm) return -EINVAL; /* Prevent creating ODP MRs in child processes */ rcu_read_lock(); our_pid = get_pid(task_pid_group_leader(current)); rcu_read_unlock(); put_pid(our_pid); if (context->tgid != our_pid) { ret_val = -EINVAL; goto out_mm; } umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); if (!umem->odp_data) { ret_val = -ENOMEM; goto out_mm; } umem->odp_data->umem = umem; mutex_init(&umem->odp_data->umem_mutex); init_completion(&umem->odp_data->notifier_completion); umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->page_list)); if (!umem->odp_data->page_list) { ret_val = -ENOMEM; goto out_odp_data; } umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) * sizeof(*umem->odp_data->dma_list)); if (!umem->odp_data->dma_list) { ret_val = -ENOMEM; goto out_page_list; } /* * When using MMU notifiers, we will get a * notification before the "current" task (and MM) is * destroyed. We use the umem_rwsem semaphore to synchronize. */ down_write(&context->umem_rwsem); context->odp_mrs_count++; if (likely(ib_umem_start(umem) != ib_umem_end(umem))) rbt_ib_umem_insert(&umem->odp_data->interval_tree, &context->umem_tree); if (likely(!atomic_read(&context->notifier_count)) || context->odp_mrs_count == 1) umem->odp_data->mn_counters_active = true; else list_add(&umem->odp_data->no_private_counters, &context->no_private_counters); downgrade_write(&context->umem_rwsem); if (context->odp_mrs_count == 1) { /* * Note that at this point, no MMU notifier is running * for this context! */ atomic_set(&context->notifier_count, 0); INIT_HLIST_NODE(&context->mn.hlist); context->mn.ops = &ib_umem_notifiers; /* * Lock-dep detects a false positive for mmap_sem vs. * umem_rwsem, due to not grasping downgrade_write correctly. */ ret_val = mmu_notifier_register(&context->mn, mm); if (ret_val) { pr_err("Failed to register mmu_notifier %d\n", ret_val); ret_val = -EBUSY; goto out_mutex; } } up_read(&context->umem_rwsem); /* * Note that doing an mmput can cause a notifier for the relevant mm. * If the notifier is called while we hold the umem_rwsem, this will * cause a deadlock. Therefore, we release the reference only after we * released the semaphore. */ mmput(mm); return 0; out_mutex: up_read(&context->umem_rwsem); vfree(umem->odp_data->dma_list); out_page_list: vfree(umem->odp_data->page_list); out_odp_data: kfree(umem->odp_data); out_mm: mmput(mm); return ret_val; } void ib_umem_odp_release(struct ib_umem *umem) { struct ib_ucontext *context = umem->context; /* * Ensure that no more pages are mapped in the umem. * * It is the driver's responsibility to ensure, before calling us, * that the hardware will not attempt to access the MR any more. */ ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), ib_umem_end(umem)); down_write(&context->umem_rwsem); if (likely(ib_umem_start(umem) != ib_umem_end(umem))) rbt_ib_umem_remove(&umem->odp_data->interval_tree, &context->umem_tree); context->odp_mrs_count--; if (!umem->odp_data->mn_counters_active) { list_del(&umem->odp_data->no_private_counters); complete_all(&umem->odp_data->notifier_completion); } /* * Downgrade the lock to a read lock. This ensures that the notifiers * (who lock the mutex for reading) will be able to finish, and we * will be able to enventually obtain the mmu notifiers SRCU. Note * that since we are doing it atomically, no other user could register * and unregister while we do the check. */ downgrade_write(&context->umem_rwsem); if (!context->odp_mrs_count) { struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = NULL; owning_process = get_pid_task(context->tgid, PIDTYPE_PID); if (owning_process == NULL) /* * The process is already dead, notifier were removed * already. */ goto out; owning_mm = get_task_mm(owning_process); if (owning_mm == NULL) /* * The process' mm is already dead, notifier were * removed already. */ goto out_put_task; mmu_notifier_unregister(&context->mn, owning_mm); mmput(owning_mm); out_put_task: put_task_struct(owning_process); } out: up_read(&context->umem_rwsem); vfree(umem->odp_data->dma_list); vfree(umem->odp_data->page_list); kfree(umem->odp_data); kfree(umem); } /* * Map for DMA and insert a single page into the on-demand paging page tables. * * @umem: the umem to insert the page to. * @page_index: index in the umem to add the page to. * @page: the page struct to map and add. * @access_mask: access permissions needed for this page. * @current_seq: sequence number for synchronization with invalidations. * the sequence number is taken from * umem->odp_data->notifiers_seq. * * The function returns -EFAULT if the DMA mapping operation fails. It returns * -EAGAIN if a concurrent invalidation prevents us from updating the page. * * The page is released via put_page even if the operation failed. For * on-demand pinning, the page is released whenever it isn't stored in the * umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem *umem, int page_index, u64 base_virt_addr, struct page *page, u64 access_mask, unsigned long current_seq) { struct ib_device *dev = umem->context->device; dma_addr_t dma_addr; int stored_page = 0; int remove_existing_mapping = 0; int ret = 0; /* * Note: we avoid writing if seq is different from the initial seq, to * handle case of a racing notifier. This check also allows us to bail * early if we have a notifier running in parallel with us. */ if (ib_umem_mmu_notifier_retry(umem, current_seq)) { ret = -EAGAIN; goto out; } if (!(umem->odp_data->dma_list[page_index])) { dma_addr = ib_dma_map_page(dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (ib_dma_mapping_error(dev, dma_addr)) { ret = -EFAULT; goto out; } umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem->odp_data->page_list[page_index] = page; stored_page = 1; } else if (umem->odp_data->page_list[page_index] == page) { umem->odp_data->dma_list[page_index] |= access_mask; } else { pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", umem->odp_data->page_list[page_index], page); /* Better remove the mapping now, to prevent any further * damage. */ remove_existing_mapping = 1; } out: /* On Demand Paging - avoid pinning the page */ if (umem->context->invalidate_range || !stored_page) put_page(page); if (remove_existing_mapping && umem->context->invalidate_range) { invalidate_page_trampoline( umem, base_virt_addr + (page_index * PAGE_SIZE), base_virt_addr + ((page_index+1)*PAGE_SIZE), NULL); ret = -EAGAIN; } return ret; } /** * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR. * * Pins the range of pages passed in the argument, and maps them to * DMA addresses. The DMA addresses of the mapped pages is updated in * umem->odp_data->dma_list. * * Returns the number of pages mapped in success, negative error code * for failure. * An -EAGAIN error code is returned when a concurrent mmu notifier prevents * the function from completing its task. * * @umem: the umem to map and pin * @user_virt: the address from which we need to map. * @bcnt: the minimal number of bytes to pin and map. The mapping might be * bigger due to alignment, and may also be smaller in case of an error * pinning or mapping a page. The actual pages mapped is returned in * the return value. * @access_mask: bit mask of the requested access permissions for the given * range. * @current_seq: the MMU notifiers sequance value for synchronization with * invalidations. the sequance number is read from * umem->odp_data->notifiers_seq before calling this function */ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 access_mask, unsigned long current_seq) { struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = NULL; struct page **local_page_list = NULL; u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; unsigned int flags = 0; if (access_mask == 0) return -EINVAL; if (user_virt < ib_umem_start(umem) || user_virt + bcnt > ib_umem_end(umem)) return -EFAULT; local_page_list = (struct page **)__get_free_page(GFP_KERNEL); if (!local_page_list) return -ENOMEM; off = user_virt & (~PAGE_MASK); user_virt = user_virt & PAGE_MASK; base_virt_addr = user_virt; bcnt += off; /* Charge for the first page offset as well. */ owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); if (owning_process == NULL) { ret = -EINVAL; goto out_no_task; } owning_mm = get_task_mm(owning_process); if (owning_mm == NULL) { ret = -EINVAL; goto out_put_task; } if (access_mask & ODP_WRITE_ALLOWED_BIT) flags |= FOLL_WRITE; start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); /* * Note: this might result in redundent page getting. We can * avoid this by checking dma_list to be 0 before calling * get_user_pages. However, this make the code much more * complex (and doesn't gain us much performance in most use * cases). */ npages = get_user_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, flags, local_page_list, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) break; bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); user_virt += npages << PAGE_SHIFT; mutex_lock(&umem->odp_data->umem_mutex); for (j = 0; j < npages; ++j) { ret = ib_umem_odp_map_dma_single_page( umem, k, base_virt_addr, local_page_list[j], access_mask, current_seq); if (ret < 0) break; k++; } mutex_unlock(&umem->odp_data->umem_mutex); if (ret < 0) { /* Release left over pages when handling errors. */ for (++j; j < npages; ++j) put_page(local_page_list[j]); break; } } if (ret >= 0) { if (npages < 0 && k == start_idx) ret = npages; else ret = k - start_idx; } mmput(owning_mm); out_put_task: put_task_struct(owning_process); out_no_task: free_page((unsigned long)local_page_list); return ret; } EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, u64 bound) { int idx; u64 addr; struct ib_device *dev = umem->context->device; virt = max_t(u64, virt, ib_umem_start(umem)); bound = min_t(u64, bound, ib_umem_end(umem)); /* Note that during the run of this function, the * notifiers_count of the MR is > 0, preventing any racing * faults from completion. We might be racing with other * invalidations, so we must make sure we free each page only * once. */ mutex_lock(&umem->odp_data->umem_mutex); for (addr = virt; addr < bound; addr += (u64)umem->page_size) { idx = (addr - ib_umem_start(umem)) / PAGE_SIZE; if (umem->odp_data->page_list[idx]) { struct page *page = umem->odp_data->page_list[idx]; dma_addr_t dma = umem->odp_data->dma_list[idx]; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; WARN_ON(!dma_addr); ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma & ODP_WRITE_ALLOWED_BIT) { struct page *head_page = compound_head(page); /* * set_page_dirty prefers being called with * the page lock. However, MMU notifiers are * called sometimes with and sometimes without * the lock. We rely on the umem_mutex instead * to prevent other mmu notifiers from * continuing and allowing the page mapping to * be removed. */ set_page_dirty(head_page); } /* on demand pinning support */ if (!umem->context->invalidate_range) put_page(page); umem->odp_data->page_list[idx] = NULL; umem->odp_data->dma_list[idx] = 0; } } mutex_unlock(&umem->odp_data->umem_mutex); } EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_umem_rbtree.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_umem_rbtree.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_umem_rbtree.c (revision 331772) @@ -1,93 +1,97 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include /* * The ib_umem list keeps track of memory regions for which the HW * device request to receive notification when the related memory * mapping is changed. * * ib_umem_lock protects the list. */ static inline u64 node_start(struct umem_odp_node *n) { struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); return ib_umem_start(umem_odp->umem); } /* Note that the representation of the intervals in the interval tree * considers the ending point as contained in the interval, while the * function ib_umem_end returns the first address which is not contained * in the umem. */ static inline u64 node_last(struct umem_odp_node *n) { struct ib_umem_odp *umem_odp = container_of(n, struct ib_umem_odp, interval_tree); return ib_umem_end(umem_odp->umem) - 1; } INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, node_start, node_last, , rbt_ib_umem) /* @last is not a part of the interval. See comment for function * node_last. */ int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 last, umem_call_back cb, void *cookie) { int ret_val = 0; struct umem_odp_node *node; struct ib_umem_odp *umem; if (unlikely(start == last)) return ret_val; for (node = rbt_ib_umem_iter_first(root, start, last - 1); node; node = rbt_ib_umem_iter_next(node, start, last - 1)) { umem = container_of(node, struct ib_umem_odp, interval_tree); ret_val = cb(umem->umem, start, last, cookie) || ret_val; } return ret_val; } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_user_mad.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_user_mad.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_user_mad.c (revision 331772) @@ -1,1401 +1,1405 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Cisco. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #define pr_fmt(fmt) "user_mad: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace MAD packet access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UMAD_MAX_PORTS = 64, IB_UMAD_MAX_AGENTS = 32, IB_UMAD_MAJOR = 231, IB_UMAD_MINOR_BASE = 0 }; /* * Our lifetime rules for these structs are the following: * device special file is opened, we take a reference on the * ib_umad_port's struct ib_umad_device. We drop these * references in the corresponding close(). * * In addition to references coming from open character devices, there * is one more reference to each ib_umad_device representing the * module's reference taken when allocating the ib_umad_device in * ib_umad_add_one(). * * When destroying an ib_umad_device, we drop the module's reference. */ struct ib_umad_port { struct cdev cdev; struct device *dev; struct cdev sm_cdev; struct device *sm_dev; struct semaphore sm_sem; struct mutex file_mutex; struct list_head file_list; struct ib_device *ib_dev; struct ib_umad_device *umad_dev; int dev_num; u8 port_num; }; struct ib_umad_device { struct kobject kobj; struct ib_umad_port port[0]; }; struct ib_umad_file { struct mutex mutex; struct ib_umad_port *port; struct list_head recv_list; struct list_head send_list; struct list_head port_list; spinlock_t send_lock; wait_queue_head_t recv_wait; struct ib_mad_agent *agent[IB_UMAD_MAX_AGENTS]; int agents_dead; u8 use_pkey_index; u8 already_used; }; struct ib_umad_packet { struct ib_mad_send_buf *msg; struct ib_mad_recv_wc *recv_wc; struct list_head list; int length; struct ib_user_mad mad; }; static struct class *umad_class; static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE); static DEFINE_SPINLOCK(port_lock); static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } static struct kobj_type ib_umad_dev_ktype = { .release = ib_umad_release_dev, }; static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : sizeof (struct ib_user_mad_hdr_old); } /* caller must hold file->mutex */ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id) { return file->agents_dead ? NULL : file->agent[id]; } static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent, struct ib_umad_packet *packet) { int ret = 1; mutex_lock(&file->mutex); for (packet->mad.hdr.id = 0; packet->mad.hdr.id < IB_UMAD_MAX_AGENTS; packet->mad.hdr.id++) if (agent == __get_agent(file, packet->mad.hdr.id)) { list_add_tail(&packet->list, &file->recv_list); wake_up_interruptible(&file->recv_wait); ret = 0; break; } mutex_unlock(&file->mutex); return ret; } static void dequeue_send(struct ib_umad_file *file, struct ib_umad_packet *packet) { spin_lock_irq(&file->send_lock); list_del(&packet->list); spin_unlock_irq(&file->send_lock); } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *send_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet = send_wc->send_buf->context[0]; dequeue_send(file, packet); ib_destroy_ah(packet->msg->ah); ib_free_send_mad(packet->msg); if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) { packet->length = IB_MGMT_MAD_HDR; packet->mad.hdr.status = ETIMEDOUT; if (!queue_packet(file, agent, packet)) return; } kfree(packet); } static void recv_handler(struct ib_mad_agent *agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_umad_file *file = agent->context; struct ib_umad_packet *packet; if (mad_recv_wc->wc->status != IB_WC_SUCCESS) goto err1; packet = kzalloc(sizeof *packet, GFP_KERNEL); if (!packet) goto err1; packet->length = mad_recv_wc->mad_len; packet->recv_wc = mad_recv_wc; packet->mad.hdr.status = 0; packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len; packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp); packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid); packet->mad.hdr.sl = mad_recv_wc->wc->sl; packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits; packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index; packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH); if (packet->mad.hdr.grh_present) { struct ib_ah_attr ah_attr; ib_init_ah_from_wc(agent->device, agent->port_num, mad_recv_wc->wc, mad_recv_wc->recv_buf.grh, &ah_attr); packet->mad.hdr.gid_index = ah_attr.grh.sgid_index; packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit; packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class; memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16); packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label); } if (queue_packet(file, agent, packet)) goto err2; return; err2: kfree(packet); err1: ib_free_recv_mad(mad_recv_wc); } static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { struct ib_mad_recv_buf *recv_buf; int left, seg_payload, offset, max_seg_payload; size_t seg_size; recv_buf = &packet->recv_wc->recv_buf; seg_size = packet->recv_wc->mad_seg_size; /* We need enough room to copy the first (or only) MAD segment. */ if ((packet->length <= seg_size && count < hdr_size(file) + packet->length) || (packet->length > seg_size && count < hdr_size(file) + seg_size)) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); seg_payload = min_t(int, packet->length, seg_size); if (copy_to_user(buf, recv_buf->mad, seg_payload)) return -EFAULT; if (seg_payload < packet->length) { /* * Multipacket RMPP MAD message. Copy remainder of message. * Note that last segment may have a shorter payload. */ if (count < hdr_size(file) + packet->length) { /* * The buffer is too small, return the first RMPP segment, * which includes the RMPP message length. */ return -ENOSPC; } offset = ib_get_mad_data_offset(recv_buf->mad->mad_hdr.mgmt_class); max_seg_payload = seg_size - offset; for (left = packet->length - seg_payload, buf += seg_payload; left; left -= seg_payload, buf += seg_payload) { recv_buf = container_of(recv_buf->list.next, struct ib_mad_recv_buf, list); seg_payload = min(left, max_seg_payload); if (copy_to_user(buf, (char *)recv_buf->mad + offset, seg_payload)) return -EFAULT; } } return hdr_size(file) + packet->length; } static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf, struct ib_umad_packet *packet, size_t count) { ssize_t size = hdr_size(file) + packet->length; if (count < size) return -EINVAL; if (copy_to_user(buf, &packet->mad, hdr_size(file))) return -EFAULT; buf += hdr_size(file); if (copy_to_user(buf, packet->mad.data, packet->length)) return -EFAULT; return size; } static ssize_t ib_umad_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; ssize_t ret; if (count < hdr_size(file)) return -EINVAL; mutex_lock(&file->mutex); while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->recv_wait, !list_empty(&file->recv_list))) return -ERESTARTSYS; mutex_lock(&file->mutex); } packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); mutex_unlock(&file->mutex); if (packet->recv_wc) ret = copy_recv_mad(file, buf, packet, count); else ret = copy_send_mad(file, buf, packet, count); if (ret < 0) { /* Requeue packet */ mutex_lock(&file->mutex); list_add(&packet->list, &file->recv_list); mutex_unlock(&file->mutex); } else { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } return ret; } static int copy_rmpp_mad(struct ib_mad_send_buf *msg, const char __user *buf) { int left, seg; /* Copy class specific header */ if ((msg->hdr_len > IB_MGMT_RMPP_HDR) && copy_from_user((char *)msg->mad + IB_MGMT_RMPP_HDR, buf + IB_MGMT_RMPP_HDR, msg->hdr_len - IB_MGMT_RMPP_HDR)) return -EFAULT; /* All headers are in place. Copy data segments. */ for (seg = 1, left = msg->data_len, buf += msg->hdr_len; left > 0; seg++, left -= msg->seg_size, buf += msg->seg_size) { if (copy_from_user(ib_get_rmpp_segment(msg, seg), buf, min(left, msg->seg_size))) return -EFAULT; } return 0; } static int same_destination(struct ib_user_mad_hdr *hdr1, struct ib_user_mad_hdr *hdr2) { if (!hdr1->grh_present && !hdr2->grh_present) return (hdr1->lid == hdr2->lid); if (hdr1->grh_present && hdr2->grh_present) return !memcmp(hdr1->gid, hdr2->gid, 16); return 0; } static int is_duplicate(struct ib_umad_file *file, struct ib_umad_packet *packet) { struct ib_umad_packet *sent_packet; struct ib_mad_hdr *sent_hdr, *hdr; hdr = (struct ib_mad_hdr *) packet->mad.data; list_for_each_entry(sent_packet, &file->send_list, list) { sent_hdr = (struct ib_mad_hdr *) sent_packet->mad.data; if ((hdr->tid != sent_hdr->tid) || (hdr->mgmt_class != sent_hdr->mgmt_class)) continue; /* * No need to be overly clever here. If two new operations have * the same TID, reject the second as a duplicate. This is more * restrictive than required by the spec. */ if (!ib_response_mad(hdr)) { if (!ib_response_mad(sent_hdr)) return 1; continue; } else if (!ib_response_mad(sent_hdr)) continue; if (same_destination(&packet->mad.hdr, &sent_packet->mad.hdr)) return 1; } return 0; } static ssize_t ib_umad_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_umad_file *file = filp->private_data; struct ib_umad_packet *packet; struct ib_mad_agent *agent; struct ib_ah_attr ah_attr; struct ib_ah *ah; struct ib_rmpp_mad *rmpp_mad; __be64 *tid; int ret, data_len, hdr_len, copy_offset, rmpp_active; u8 base_version; if (count < hdr_size(file) + IB_MGMT_RMPP_HDR) return -EINVAL; packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL); if (!packet) return -ENOMEM; if (copy_from_user(&packet->mad, buf, hdr_size(file))) { ret = -EFAULT; goto err; } if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { ret = -EINVAL; goto err; } buf += hdr_size(file); if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) { ret = -EFAULT; goto err; } mutex_lock(&file->mutex); agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { ret = -EINVAL; goto err_up; } memset(&ah_attr, 0, sizeof ah_attr); ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid); ah_attr.sl = packet->mad.hdr.sl; ah_attr.src_path_bits = packet->mad.hdr.path_bits; ah_attr.port_num = file->port->port_num; if (packet->mad.hdr.grh_present) { ah_attr.ah_flags = IB_AH_GRH; memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16); ah_attr.grh.sgid_index = packet->mad.hdr.gid_index; ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label); ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit; ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class; } ah = ib_create_ah(agent->qp->pd, &ah_attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_up; } rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE; } else { copy_offset = IB_MGMT_MAD_HDR; rmpp_active = 0; } base_version = ((struct ib_mad_hdr *)&packet->mad.data)->base_version; data_len = count - hdr_size(file) - hdr_len; packet->msg = ib_create_send_mad(agent, be32_to_cpu(packet->mad.hdr.qpn), packet->mad.hdr.pkey_index, rmpp_active, hdr_len, data_len, GFP_KERNEL, base_version); if (IS_ERR(packet->msg)) { ret = PTR_ERR(packet->msg); goto err_ah; } packet->msg->ah = ah; packet->msg->timeout_ms = packet->mad.hdr.timeout_ms; packet->msg->retries = packet->mad.hdr.retries; packet->msg->context[0] = packet; /* Copy MAD header. Any RMPP header is already in place. */ memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR); if (!rmpp_active) { if (copy_from_user((char *)packet->msg->mad + copy_offset, buf + copy_offset, hdr_len + data_len - copy_offset)) { ret = -EFAULT; goto err_msg; } } else { ret = copy_rmpp_mad(packet->msg, buf); if (ret) goto err_msg; } /* * Set the high-order part of the transaction ID to make MADs from * different agents unique, and allow routing responses back to the * original requestor. */ if (!ib_response_mad(packet->msg->mad)) { tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid; *tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | (be64_to_cpup(tid) & 0xffffffff)); rmpp_mad->mad_hdr.tid = *tid; } if (!ib_mad_kernel_rmpp_agent(agent) && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); } else { spin_lock_irq(&file->send_lock); ret = is_duplicate(file, packet); if (!ret) list_add_tail(&packet->list, &file->send_list); spin_unlock_irq(&file->send_lock); if (ret) { ret = -EINVAL; goto err_msg; } } ret = ib_post_send_mad(packet->msg, NULL); if (ret) goto err_send; mutex_unlock(&file->mutex); return count; err_send: dequeue_send(file, packet); err_msg: ib_free_send_mad(packet->msg); err_ah: ib_destroy_ah(ah); err_up: mutex_unlock(&file->mutex); err: kfree(packet); return ret; } static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait) { struct ib_umad_file *file = filp->private_data; /* we will always be able to post a MAD send */ unsigned int mask = POLLOUT | POLLWRNORM; poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= POLLIN | POLLRDNORM; return mask; } static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, int compat_method_mask) { struct ib_user_mad_reg_req ureq; struct ib_mad_reg_req req; struct ib_mad_agent *agent = NULL; int agent_id; int ret; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (!file->port->ib_dev) { dev_notice(file->port->dev, "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; } if (copy_from_user(&ureq, arg, sizeof ureq)) { ret = -EFAULT; goto out; } if (ureq.qpn != 0 && ureq.qpn != 1) { dev_notice(file->port->dev, "ib_umad_reg_agent: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; goto out; } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) if (!__get_agent(file, agent_id)) goto found; dev_notice(file->port->dev, "ib_umad_reg_agent: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; memcpy(req.oui, ureq.oui, sizeof req.oui); if (compat_method_mask) { u32 *umm = (u32 *) ureq.method_mask; int i; for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i) req.method_mask[i] = umm[i * 2] | ((u64) umm[i * 2 + 1] << 32); } else memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, send_handler, recv_handler, file, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req, id)))) { ret = -EFAULT; goto out; } if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { dev_warn(file->port->dev, "process %s did not enable P_Key index support.\n", current->comm); dev_warn(file->port->dev, " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } file->agent[agent_id] = agent; ret = 0; out: mutex_unlock(&file->mutex); if (ret && agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) { struct ib_user_mad_reg_req2 ureq; struct ib_mad_reg_req req; struct ib_mad_agent *agent = NULL; int agent_id; int ret; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (!file->port->ib_dev) { dev_notice(file->port->dev, "ib_umad_reg_agent2: invalid device\n"); ret = -EPIPE; goto out; } if (copy_from_user(&ureq, arg, sizeof(ureq))) { ret = -EFAULT; goto out; } if (ureq.qpn != 0 && ureq.qpn != 1) { dev_notice(file->port->dev, "ib_umad_reg_agent2: invalid QPN %d specified\n", ureq.qpn); ret = -EINVAL; goto out; } if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { const u32 flags = IB_USER_MAD_REG_FLAGS_CAP; dev_notice(file->port->dev, "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); ret = -EINVAL; if (put_user(flags, (u32 __user *) ((char *)arg + offsetof(struct ib_user_mad_reg_req2, flags)))) ret = -EFAULT; goto out; } for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) if (!__get_agent(file, agent_id)) goto found; dev_notice(file->port->dev, "ib_umad_reg_agent2: Max Agents (%u) reached\n", IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; if (ureq.oui & 0xff000000) { dev_notice(file->port->dev, "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", ureq.oui); ret = -EINVAL; goto out; } req.oui[2] = ureq.oui & 0x0000ff; req.oui[1] = (ureq.oui & 0x00ff00) >> 8; req.oui[0] = (ureq.oui & 0xff0000) >> 16; memcpy(req.method_mask, ureq.method_mask, sizeof(req.method_mask)); } agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, send_handler, recv_handler, file, ureq.flags); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; goto out; } if (put_user(agent_id, (u32 __user *)((char *)arg + offsetof(struct ib_user_mad_reg_req2, id)))) { ret = -EFAULT; goto out; } if (!file->already_used) { file->already_used = 1; file->use_pkey_index = 1; } file->agent[agent_id] = agent; ret = 0; out: mutex_unlock(&file->mutex); if (ret && agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; u32 id; int ret = 0; if (get_user(id, arg)) return -EFAULT; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { ret = -EINVAL; goto out; } agent = file->agent[id]; file->agent[id] = NULL; out: mutex_unlock(&file->mutex); if (agent) ib_unregister_mad_agent(agent); mutex_unlock(&file->port->file_mutex); return ret; } static long ib_umad_enable_pkey(struct ib_umad_file *file) { int ret = 0; mutex_lock(&file->mutex); if (file->already_used) ret = -EINVAL; else file->use_pkey_index = 1; mutex_unlock(&file->mutex); return ret; } static long ib_umad_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); case IB_USER_MAD_REGISTER_AGENT2: return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); default: return -ENOIOCTLCMD; } } #ifdef CONFIG_COMPAT static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { case IB_USER_MAD_REGISTER_AGENT: return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1); case IB_USER_MAD_UNREGISTER_AGENT: return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); case IB_USER_MAD_REGISTER_AGENT2: return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); default: return -ENOIOCTLCMD; } } #endif /* * ib_umad_open() does not need the BKL: * * - the ib_umad_port structures are properly reference counted, and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - the ioctl method does not affect any global state outside of the * file structure being operated on; */ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; int ret = -ENXIO; port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, cdev); mutex_lock(&port->file_mutex); if (!port->ib_dev) goto out; ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); if (!file) goto out; mutex_init(&file->mutex); spin_lock_init(&file->send_lock); INIT_LIST_HEAD(&file->recv_list); INIT_LIST_HEAD(&file->send_list); init_waitqueue_head(&file->recv_wait); file->port = port; filp->private_data = file; list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); if (ret) { list_del(&file->port_list); kfree(file); goto out; } kobject_get(&port->umad_dev->kobj); out: mutex_unlock(&port->file_mutex); return ret; } static int ib_umad_close(struct inode *inode, struct file *filp) { struct ib_umad_file *file = filp->private_data; struct ib_umad_device *dev = file->port->umad_dev; struct ib_umad_packet *packet, *tmp; int already_dead; int i; mutex_lock(&file->port->file_mutex); mutex_lock(&file->mutex); already_dead = file->agents_dead; file->agents_dead = 1; list_for_each_entry_safe(packet, tmp, &file->recv_list, list) { if (packet->recv_wc) ib_free_recv_mad(packet->recv_wc); kfree(packet); } list_del(&file->port_list); mutex_unlock(&file->mutex); if (!already_dead) for (i = 0; i < IB_UMAD_MAX_AGENTS; ++i) if (file->agent[i]) ib_unregister_mad_agent(file->agent[i]); mutex_unlock(&file->port->file_mutex); kfree(file); kobject_put(&dev->kobj); return 0; } static const struct file_operations umad_fops = { .owner = THIS_MODULE, .read = ib_umad_read, .write = ib_umad_write, .poll = ib_umad_poll, .unlocked_ioctl = ib_umad_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ib_umad_compat_ioctl, #endif .open = ib_umad_open, .release = ib_umad_close, .llseek = no_llseek, }; static int ib_umad_sm_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_port_modify props = { .set_port_cap_mask = IB_PORT_SM }; int ret; port = container_of(inode->i_cdev->si_drv1, struct ib_umad_port, sm_cdev); if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { ret = -EAGAIN; goto fail; } } else { if (down_interruptible(&port->sm_sem)) { ret = -ERESTARTSYS; goto fail; } } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); if (ret) goto err_up_sem; filp->private_data = port; ret = nonseekable_open(inode, filp); if (ret) goto err_clr_sm_cap; kobject_get(&port->umad_dev->kobj); return 0; err_clr_sm_cap: swap(props.set_port_cap_mask, props.clr_port_cap_mask); ib_modify_port(port->ib_dev, port->port_num, 0, &props); err_up_sem: up(&port->sm_sem); fail: return ret; } static int ib_umad_sm_close(struct inode *inode, struct file *filp) { struct ib_umad_port *port = filp->private_data; struct ib_port_modify props = { .clr_port_cap_mask = IB_PORT_SM }; int ret = 0; mutex_lock(&port->file_mutex); if (port->ib_dev) ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); mutex_unlock(&port->file_mutex); up(&port->sm_sem); kobject_put(&port->umad_dev->kobj); return ret; } static const struct file_operations umad_sm_fops = { .owner = THIS_MODULE, .open = ib_umad_sm_open, .release = ib_umad_sm_close, .llseek = no_llseek, }; static struct ib_client umad_client = { .name = "umad", .add = ib_umad_add_one, .remove = ib_umad_remove_one }; static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%s\n", port->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_port(struct device *dev, struct device_attribute *attr, char *buf) { struct ib_umad_port *port = dev_get_drvdata(dev); if (!port) return -ENODEV; return sprintf(buf, "%d\n", port->port_num); } static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_MAD_ABI_VERSION)); static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); static int find_overflow_devnum(struct ib_device *device) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { dev_err(&device->dev, "couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS); if (ret >= IB_UMAD_MAX_PORTS) return -1; return ret; } static int ib_umad_init_port(struct ib_device *device, int port_num, struct ib_umad_device *umad_dev, struct ib_umad_port *port) { int devnum; dev_t base; spin_lock(&port_lock); devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); devnum = find_overflow_devnum(device); if (devnum < 0) return -1; spin_lock(&port_lock); port->dev_num = devnum + IB_UMAD_MAX_PORTS; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { port->dev_num = devnum; base = devnum + base_dev; set_bit(devnum, dev_map); } spin_unlock(&port_lock); port->ib_dev = device; port->port_num = port_num; sema_init(&port->sm_sem, 1); mutex_init(&port->file_mutex); INIT_LIST_HEAD(&port->file_list); cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; port->cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; port->dev = device_create(umad_class, device->dma_device, port->cdev.dev, port, "umad%d", port->dev_num); if (IS_ERR(port->dev)) goto err_cdev; if (device_create_file(port->dev, &dev_attr_ibdev)) goto err_dev; if (device_create_file(port->dev, &dev_attr_port)) goto err_dev; base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; port->sm_cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; port->sm_dev = device_create(umad_class, device->dma_device, port->sm_cdev.dev, port, "issm%d", port->dev_num); if (IS_ERR(port->sm_dev)) goto err_sm_cdev; if (device_create_file(port->sm_dev, &dev_attr_ibdev)) goto err_sm_dev; if (device_create_file(port->sm_dev, &dev_attr_port)) goto err_sm_dev; return 0; err_sm_dev: device_destroy(umad_class, port->sm_cdev.dev); err_sm_cdev: cdev_del(&port->sm_cdev); err_dev: device_destroy(umad_class, port->cdev.dev); err_cdev: cdev_del(&port->cdev); if (port->dev_num < IB_UMAD_MAX_PORTS) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); return -1; } static void ib_umad_kill_port(struct ib_umad_port *port) { struct ib_umad_file *file; int id; dev_set_drvdata(port->dev, NULL); dev_set_drvdata(port->sm_dev, NULL); device_destroy(umad_class, port->cdev.dev); device_destroy(umad_class, port->sm_cdev.dev); cdev_del(&port->cdev); cdev_del(&port->sm_cdev); mutex_lock(&port->file_mutex); port->ib_dev = NULL; list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) if (file->agent[id]) ib_unregister_mad_agent(file->agent[id]); } mutex_unlock(&port->file_mutex); if (port->dev_num < IB_UMAD_MAX_PORTS) clear_bit(port->dev_num, dev_map); else clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map); } static void ib_umad_add_one(struct ib_device *device) { struct ib_umad_device *umad_dev; int s, e, i; int count = 0; s = rdma_start_port(device); e = rdma_end_port(device); umad_dev = kzalloc(sizeof *umad_dev + (e - s + 1) * sizeof (struct ib_umad_port), GFP_KERNEL); if (!umad_dev) return; kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); for (i = s; i <= e; ++i) { if (!rdma_cap_ib_mad(device, i)) continue; umad_dev->port[i - s].umad_dev = umad_dev; if (ib_umad_init_port(device, i, umad_dev, &umad_dev->port[i - s])) goto err; count++; } if (!count) goto free; ib_set_client_data(device, &umad_client, umad_dev); return; err: while (--i >= s) { if (!rdma_cap_ib_mad(device, i)) continue; ib_umad_kill_port(&umad_dev->port[i - s]); } free: kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device, void *client_data) { struct ib_umad_device *umad_dev = client_data; int i; if (!umad_dev) return; for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) { if (rdma_cap_ib_mad(device, i + rdma_start_port(device))) ib_umad_kill_port(&umad_dev->port[i]); } kobject_put(&umad_dev->kobj); } static char *umad_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_umad_init(void) { int ret; ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { pr_err("couldn't register device number\n"); goto out; } umad_class = class_create(THIS_MODULE, "infiniband_mad"); if (IS_ERR(umad_class)) { ret = PTR_ERR(umad_class); pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } umad_class->devnode = umad_devnode; ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { pr_err("couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&umad_client); if (ret) { pr_err("couldn't register ib_umad client\n"); goto out_class; } return 0; out_class: class_destroy(umad_class); out_chrdev: unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); out: return ret; } static void __exit ib_umad_cleanup(void) { ib_unregister_client(&umad_client); class_destroy(umad_class); unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2); } module_init_order(ib_umad_init, SI_ORDER_THIRD); module_exit(ib_umad_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_cmd.c (revision 331772) @@ -1,4251 +1,4255 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #define LINUXKPI_PARAM_PREFIX ibcore_ #include #include #include #include #include #include #include "uverbs.h" #include "core_priv.h" #include struct uverbs_lock_class { char name[16]; }; static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" }; static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" }; /* * The ib_uobject locking scheme is as follows: * * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it * needs to be held during all idr write operations. When an object is * looked up, a reference must be taken on the object's kref before * dropping this lock. For read operations, the rcu_read_lock() * and rcu_write_lock() but similarly the kref reference is grabbed * before the rcu_read_unlock(). * * - Each object also has an rwsem. This rwsem must be held for * reading while an operation that uses the object is performed. * For example, while registering an MR, the associated PD's * uobject.mutex must be held for reading. The rwsem must be held * for writing while initializing or destroying an object. * * - In addition, each object has a "live" flag. If this flag is not * set, then lookups of the object will fail even if it is found in * the idr. This handles a reader that blocks and does not acquire * the rwsem until after the object is destroyed. The destroy * operation will set the live flag to 0 and then drop the rwsem; * this will allow the reader to acquire the rwsem, see that the * live flag is 0, and then drop the rwsem and its reference to * object. The underlying storage will not be freed until the last * reference to the object is dropped. */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); uobj->live = 0; } static void release_uobj(struct kref *kref) { kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu); } static void put_uobj(struct ib_uobject *uobj) { kref_put(&uobj->ref, release_uobj); } static void put_uobj_read(struct ib_uobject *uobj) { up_read(&uobj->mutex); put_uobj(uobj); } static void put_uobj_write(struct ib_uobject *uobj) { up_write(&uobj->mutex); put_uobj(uobj); } static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; idr_preload(GFP_KERNEL); spin_lock(&ib_uverbs_idr_lock); ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); if (ret >= 0) uobj->id = ret; spin_unlock(&ib_uverbs_idr_lock); idr_preload_end(); return ret < 0 ? ret : 0; } void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) { spin_lock(&ib_uverbs_idr_lock); idr_remove(idr, uobj->id); spin_unlock(&ib_uverbs_idr_lock); } static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id, struct ib_ucontext *context) { struct ib_uobject *uobj; rcu_read_lock(); uobj = idr_find(idr, id); if (uobj) { if (uobj->context == context) kref_get(&uobj->ref); else uobj = NULL; } rcu_read_unlock(); return uobj; } static struct ib_uobject *idr_read_uobj(struct idr *idr, int id, struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; uobj = __idr_get_uobj(idr, id, context); if (!uobj) return NULL; if (nested) down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING); else down_read(&uobj->mutex); if (!uobj->live) { put_uobj_read(uobj); return NULL; } return uobj; } static struct ib_uobject *idr_write_uobj(struct idr *idr, int id, struct ib_ucontext *context) { struct ib_uobject *uobj; uobj = __idr_get_uobj(idr, id, context); if (!uobj) return NULL; down_write(&uobj->mutex); if (!uobj->live) { put_uobj_write(uobj); return NULL; } return uobj; } static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context, int nested) { struct ib_uobject *uobj; uobj = idr_read_uobj(idr, id, context, nested); return uobj ? uobj->object : NULL; } static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0); } static void put_pd_read(struct ib_pd *pd) { put_uobj_read(pd->uobject); } static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested) { return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested); } static void put_cq_read(struct ib_cq *cq) { put_uobj_read(cq->uobject); } static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0); } static void put_ah_read(struct ib_ah *ah) { put_uobj_read(ah->uobject); } static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0); } static void put_wq_read(struct ib_wq *wq) { put_uobj_read(wq->uobject); } static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0); } static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table) { put_uobj_read(ind_table->uobject); } static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) { struct ib_uobject *uobj; uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); return uobj ? uobj->object : NULL; } static void put_qp_read(struct ib_qp *qp) { put_uobj_read(qp->uobject); } static void put_qp_write(struct ib_qp *qp) { put_uobj_write(qp->uobject); } static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); } static void put_srq_read(struct ib_srq *srq) { put_uobj_read(srq->uobject); } static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, struct ib_uobject **uobj) { *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); return *uobj ? (*uobj)->object : NULL; } static void put_xrcd_read(struct ib_uobject *uobj) { put_uobj_read(uobj); } ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; struct ib_udata udata; struct ib_ucontext *ucontext; struct file *filp; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&file->mutex); if (file->ucontext) { ret = -EINVAL; goto err; } INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); ucontext = ib_dev->alloc_ucontext(ib_dev, &udata); if (IS_ERR(ucontext)) { ret = PTR_ERR(ucontext); goto err; } ucontext->device = ib_dev; INIT_LIST_HEAD(&ucontext->pd_list); INIT_LIST_HEAD(&ucontext->mr_list); INIT_LIST_HEAD(&ucontext->mw_list); INIT_LIST_HEAD(&ucontext->cq_list); INIT_LIST_HEAD(&ucontext->qp_list); INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->wq_list); INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list); INIT_LIST_HEAD(&ucontext->xrcd_list); INIT_LIST_HEAD(&ucontext->rule_list); rcu_read_lock(); ucontext->tgid = get_pid(task_pid_group_leader(current)); rcu_read_unlock(); ucontext->closing = 0; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING ucontext->umem_tree = RB_ROOT; init_rwsem(&ucontext->umem_rwsem); ucontext->odp_mrs_count = 0; INIT_LIST_HEAD(&ucontext->no_private_counters); if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) ucontext->invalidate_range = NULL; #endif resp.num_comp_vectors = file->device->num_comp_vectors; ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; resp.async_fd = ret; filp = ib_uverbs_alloc_event_file(file, ib_dev, 1); if (IS_ERR(filp)) { ret = PTR_ERR(filp); goto err_fd; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_file; } file->ucontext = ucontext; fd_install(resp.async_fd, filp); mutex_unlock(&file->mutex); return in_len; err_file: ib_uverbs_free_async_event_file(file); fput(filp); err_fd: put_unused_fd(resp.async_fd); err_free: put_pid(ucontext->tgid); ib_dev->dealloc_ucontext(ucontext); err: mutex_unlock(&file->mutex); return ret; } static void copy_query_dev_fields(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_uverbs_query_device_resp *resp, struct ib_device_attr *attr) { resp->fw_ver = attr->fw_ver; resp->node_guid = ib_dev->node_guid; resp->sys_image_guid = attr->sys_image_guid; resp->max_mr_size = attr->max_mr_size; resp->page_size_cap = attr->page_size_cap; resp->vendor_id = attr->vendor_id; resp->vendor_part_id = attr->vendor_part_id; resp->hw_ver = attr->hw_ver; resp->max_qp = attr->max_qp; resp->max_qp_wr = attr->max_qp_wr; resp->device_cap_flags = (u32)(attr->device_cap_flags); resp->max_sge = attr->max_sge; resp->max_sge_rd = attr->max_sge_rd; resp->max_cq = attr->max_cq; resp->max_cqe = attr->max_cqe; resp->max_mr = attr->max_mr; resp->max_pd = attr->max_pd; resp->max_qp_rd_atom = attr->max_qp_rd_atom; resp->max_ee_rd_atom = attr->max_ee_rd_atom; resp->max_res_rd_atom = attr->max_res_rd_atom; resp->max_qp_init_rd_atom = attr->max_qp_init_rd_atom; resp->max_ee_init_rd_atom = attr->max_ee_init_rd_atom; resp->atomic_cap = attr->atomic_cap; resp->max_ee = attr->max_ee; resp->max_rdd = attr->max_rdd; resp->max_mw = attr->max_mw; resp->max_raw_ipv6_qp = attr->max_raw_ipv6_qp; resp->max_raw_ethy_qp = attr->max_raw_ethy_qp; resp->max_mcast_grp = attr->max_mcast_grp; resp->max_mcast_qp_attach = attr->max_mcast_qp_attach; resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach; resp->max_ah = attr->max_ah; resp->max_fmr = attr->max_fmr; resp->max_map_per_fmr = attr->max_map_per_fmr; resp->max_srq = attr->max_srq; resp->max_srq_wr = attr->max_srq_wr; resp->max_srq_sge = attr->max_srq_sge; resp->max_pkeys = attr->max_pkeys; resp->local_ca_ack_delay = attr->local_ca_ack_delay; resp->phys_port_cnt = ib_dev->phys_port_cnt; } ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; memset(&resp, 0, sizeof resp); copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; struct ib_port_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; ret = ib_query_port(ib_dev, cmd.port_num, &attr); if (ret) return ret; memset(&resp, 0, sizeof resp); resp.state = attr.state; resp.max_mtu = attr.max_mtu; resp.active_mtu = attr.active_mtu; resp.gid_tbl_len = attr.gid_tbl_len; resp.port_cap_flags = attr.port_cap_flags; resp.max_msg_sz = attr.max_msg_sz; resp.bad_pkey_cntr = attr.bad_pkey_cntr; resp.qkey_viol_cntr = attr.qkey_viol_cntr; resp.pkey_tbl_len = attr.pkey_tbl_len; resp.lid = attr.lid; resp.sm_lid = attr.sm_lid; resp.lmc = attr.lmc; resp.max_vl_num = attr.max_vl_num; resp.sm_sl = attr.sm_sl; resp.subnet_timeout = attr.subnet_timeout; resp.init_type_reply = attr.init_type_reply; resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; resp.link_layer = rdma_port_get_link_layer(ib_dev, cmd.port_num); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata); if (IS_ERR(pd)) { ret = PTR_ERR(pd); goto err; } pd->device = ib_dev; pd->uobject = uobj; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); uobj->object = pd; ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj); if (ret) goto err_idr; memset(&resp, 0, sizeof resp); resp.pd_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->pd_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_pd_idr, uobj); err_idr: ib_dealloc_pd(pd); err: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; struct ib_uobject *uobj; struct ib_pd *pd; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext); if (!uobj) return -EINVAL; pd = uobj->object; if (atomic_read(&pd->usecnt)) { ret = -EBUSY; goto err_put; } ret = pd->device->dealloc_pd(uobj->object); WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); if (ret) goto err_put; uobj->live = 0; put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_pd_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; err_put: put_uobj_write(uobj); return ret; } struct xrcd_table_entry { struct rb_node node; struct ib_xrcd *xrcd; struct inode *inode; }; static int xrcd_table_insert(struct ib_uverbs_device *dev, struct inode *inode, struct ib_xrcd *xrcd) { struct xrcd_table_entry *entry, *scan; struct rb_node **p = &dev->xrcd_tree.rb_node; struct rb_node *parent = NULL; entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) return -ENOMEM; entry->xrcd = xrcd; entry->inode = inode; while (*p) { parent = *p; scan = rb_entry(parent, struct xrcd_table_entry, node); if (inode < scan->inode) { p = &(*p)->rb_left; } else if (inode > scan->inode) { p = &(*p)->rb_right; } else { kfree(entry); return -EEXIST; } } rb_link_node(&entry->node, parent, p); rb_insert_color(&entry->node, &dev->xrcd_tree); igrab(inode); return 0; } static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, struct inode *inode) { struct xrcd_table_entry *entry; struct rb_node *p = dev->xrcd_tree.rb_node; while (p) { entry = rb_entry(p, struct xrcd_table_entry, node); if (inode < entry->inode) p = p->rb_left; else if (inode > entry->inode) p = p->rb_right; else return entry; } return NULL; } static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) { struct xrcd_table_entry *entry; entry = xrcd_table_search(dev, inode); if (!entry) return NULL; return entry->xrcd; } static void xrcd_table_delete(struct ib_uverbs_device *dev, struct inode *inode) { struct xrcd_table_entry *entry; entry = xrcd_table_search(dev, inode); if (entry) { iput(inode); rb_erase(&entry->node, &dev->xrcd_tree); kfree(entry); } } ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_xrcd cmd; struct ib_uverbs_open_xrcd_resp resp; struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; struct fd f = {NULL}; struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); mutex_lock(&file->device->xrcd_tree_mutex); if (cmd.fd != -1) { /* search for file descriptor */ f = fdget(cmd.fd); if (!f.file) { ret = -EBADF; goto err_tree_mutex_unlock; } inode = f.file->f_dentry->d_inode; xrcd = find_xrcd(file->device, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ ret = -EAGAIN; goto err_tree_mutex_unlock; } if (xrcd && cmd.oflags & O_EXCL) { ret = -EINVAL; goto err_tree_mutex_unlock; } } obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) { ret = -ENOMEM; goto err_tree_mutex_unlock; } init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); down_write(&obj->uobject.mutex); if (!xrcd) { xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata); if (IS_ERR(xrcd)) { ret = PTR_ERR(xrcd); goto err; } xrcd->inode = inode; xrcd->device = ib_dev; atomic_set(&xrcd->usecnt, 0); mutex_init(&xrcd->tgt_qp_mutex); INIT_LIST_HEAD(&xrcd->tgt_qp_list); new_xrcd = 1; } atomic_set(&obj->refcnt, 0); obj->uobject.object = xrcd; ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); if (ret) goto err_idr; memset(&resp, 0, sizeof resp); resp.xrcd_handle = obj->uobject.id; if (inode) { if (new_xrcd) { /* create new inode/xrcd table entry */ ret = xrcd_table_insert(file->device, inode, xrcd); if (ret) goto err_insert_xrcd; } atomic_inc(&xrcd->usecnt); } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } if (f.file) fdput(f); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); mutex_unlock(&file->mutex); obj->uobject.live = 1; up_write(&obj->uobject.mutex); mutex_unlock(&file->device->xrcd_tree_mutex); return in_len; err_copy: if (inode) { if (new_xrcd) xrcd_table_delete(file->device, inode); atomic_dec(&xrcd->usecnt); } err_insert_xrcd: idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); err_idr: ib_dealloc_xrcd(xrcd); err: put_uobj_write(&obj->uobject); err_tree_mutex_unlock: if (f.file) fdput(f); mutex_unlock(&file->device->xrcd_tree_mutex); return ret; } ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_close_xrcd cmd; struct ib_uobject *uobj; struct ib_xrcd *xrcd = NULL; struct inode *inode = NULL; struct ib_uxrcd_object *obj; int live; int ret = 0; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; mutex_lock(&file->device->xrcd_tree_mutex); uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); if (!uobj) { ret = -EINVAL; goto out; } xrcd = uobj->object; inode = xrcd->inode; obj = container_of(uobj, struct ib_uxrcd_object, uobject); if (atomic_read(&obj->refcnt)) { put_uobj_write(uobj); ret = -EBUSY; goto out; } if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { ret = ib_dealloc_xrcd(uobj->object); if (!ret) uobj->live = 0; } live = uobj->live; if (inode && ret) atomic_inc(&xrcd->usecnt); put_uobj_write(uobj); if (ret) goto out; if (inode && !live) xrcd_table_delete(file->device, inode); idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); ret = in_len; out: mutex_unlock(&file->device->xrcd_tree_mutex); return ret; } void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd) { struct inode *inode; inode = xrcd->inode; if (inode && !atomic_dec_and_test(&xrcd->usecnt)) return; ib_dealloc_xrcd(xrcd); if (inode) xrcd_table_delete(dev, inode); } ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; struct ib_udata udata; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mr *mr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; ret = ib_check_mr_access(cmd.access_flags); if (ret) return ret; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; } if (cmd.access_flags & IB_ACCESS_ON_DEMAND) { if (!(pd->device->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) { pr_debug("ODP support not available\n"); ret = -EINVAL; goto err_put; } } mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, &udata); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto err_put; } mr->device = pd->device; mr->pd = pd; mr->uobject = uobj; atomic_inc(&pd->usecnt); uobj->object = mr; ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); if (ret) goto err_unreg; memset(&resp, 0, sizeof resp); resp.lkey = mr->lkey; resp.rkey = mr->rkey; resp.mr_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->mr_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_mr_idr, uobj); err_unreg: ib_dereg_mr(mr); err_put: put_pd_read(pd); err_free: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_rereg_mr cmd; struct ib_uverbs_rereg_mr_resp resp; struct ib_udata udata; struct ib_pd *pd = NULL; struct ib_mr *mr; struct ib_pd *old_pd; int ret; struct ib_uobject *uobj; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long) cmd.response + sizeof(resp), in_len - sizeof(cmd), out_len - sizeof(resp)); if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) return -EINVAL; if ((cmd.flags & IB_MR_REREG_TRANS) && (!cmd.start || !cmd.hca_va || 0 >= cmd.length || (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) return -EINVAL; uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); if (!uobj) return -EINVAL; mr = uobj->object; if (cmd.flags & IB_MR_REREG_ACCESS) { ret = ib_check_mr_access(cmd.access_flags); if (ret) goto put_uobjs; } if (cmd.flags & IB_MR_REREG_PD) { pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto put_uobjs; } } old_pd = mr->pd; ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length, cmd.hca_va, cmd.access_flags, pd, &udata); if (!ret) { if (cmd.flags & IB_MR_REREG_PD) { atomic_inc(&pd->usecnt); mr->pd = pd; atomic_dec(&old_pd->usecnt); } } else { goto put_uobj_pd; } memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) ret = -EFAULT; else ret = in_len; put_uobj_pd: if (cmd.flags & IB_MR_REREG_PD) put_pd_read(pd); put_uobjs: put_uobj_write(mr->uobject); return ret; } ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; struct ib_mr *mr; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext); if (!uobj) return -EINVAL; mr = uobj->object; ret = ib_dereg_mr(mr); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_alloc_mw cmd; struct ib_uverbs_alloc_mw_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; struct ib_udata udata; int ret; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, 0, file->ucontext, &mw_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_free; } INIT_UDATA(&udata, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof(resp)); mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata); if (IS_ERR(mw)) { ret = PTR_ERR(mw); goto err_put; } mw->device = pd->device; mw->pd = pd; mw->uobject = uobj; atomic_inc(&pd->usecnt); uobj->object = mw; ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); if (ret) goto err_unalloc; memset(&resp, 0, sizeof(resp)); resp.rkey = mw->rkey; resp.mw_handle = uobj->id; if (copy_to_user((void __user *)(unsigned long)cmd.response, &resp, sizeof(resp))) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->mw_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_mw_idr, uobj); err_unalloc: uverbs_dealloc_mw(mw); err_put: put_pd_read(pd); err_free: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_mw cmd; struct ib_mw *mw; struct ib_uobject *uobj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); if (!uobj) return -EINVAL; mw = uobj->object; ret = uverbs_dealloc_mw(mw); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_mw_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; struct file *filp; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) return ret; resp.fd = ret; filp = ib_uverbs_alloc_event_file(file, ib_dev, 0); if (IS_ERR(filp)) { put_unused_fd(resp.fd); return PTR_ERR(filp); } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { put_unused_fd(resp.fd); fput(filp); return -EFAULT; } fd_install(resp.fd, filp); return in_len; } static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw, struct ib_uverbs_ex_create_cq *cmd, size_t cmd_sz, int (*cb)(struct ib_uverbs_file *file, struct ib_ucq_object *obj, struct ib_uverbs_ex_create_cq_resp *resp, struct ib_udata *udata, void *context), void *context) { struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; struct ib_uverbs_ex_create_cq_resp resp; struct ib_cq_init_attr attr = {}; if (cmd->comp_vector >= file->device->num_comp_vectors) return ERR_PTR(-EINVAL); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return ERR_PTR(-ENOMEM); init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); if (cmd->comp_channel >= 0) { ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel); if (!ev_file) { ret = -EINVAL; goto err; } } obj->uverbs_file = file; obj->comp_events_reported = 0; obj->async_events_reported = 0; INIT_LIST_HEAD(&obj->comp_list); INIT_LIST_HEAD(&obj->async_list); attr.cqe = cmd->cqe; attr.comp_vector = cmd->comp_vector; if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags)) attr.flags = cmd->flags; cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw); if (IS_ERR(cq)) { ret = PTR_ERR(cq); goto err_file; } cq->device = ib_dev; cq->uobject = &obj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); obj->uobject.object = cq; ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject); if (ret) goto err_free; memset(&resp, 0, sizeof resp); resp.base.cq_handle = obj->uobject.id; resp.base.cqe = cq->cqe; resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); ret = cb(file, obj, &resp, ucore, context); if (ret) goto err_cb; mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->cq_list); mutex_unlock(&file->mutex); obj->uobject.live = 1; up_write(&obj->uobject.mutex); return obj; err_cb: idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject); err_free: ib_destroy_cq(cq); err_file: if (ev_file) ib_uverbs_release_ucq(file, ev_file, obj); err: put_uobj_write(&obj->uobject); return ERR_PTR(ret); } static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file, struct ib_ucq_object *obj, struct ib_uverbs_ex_create_cq_resp *resp, struct ib_udata *ucore, void *context) { if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) return -EFAULT; return 0; } ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_cq cmd; struct ib_uverbs_ex_create_cq cmd_ex; struct ib_uverbs_create_cq_resp resp; struct ib_udata ucore; struct ib_udata uhw; struct ib_ucq_object *obj; if (out_len < sizeof(resp)) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp)); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + sizeof(resp), in_len - sizeof(cmd), out_len - sizeof(resp)); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; cmd_ex.cqe = cmd.cqe; cmd_ex.comp_vector = cmd.comp_vector; cmd_ex.comp_channel = cmd.comp_channel; obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex, offsetof(typeof(cmd_ex), comp_channel) + sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb, NULL); if (IS_ERR(obj)) return PTR_ERR(obj); return in_len; } static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file, struct ib_ucq_object *obj, struct ib_uverbs_ex_create_cq_resp *resp, struct ib_udata *ucore, void *context) { if (ib_copy_to_udata(ucore, resp, resp->response_length)) return -EFAULT; return 0; } int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_create_cq_resp resp; struct ib_uverbs_ex_create_cq cmd; struct ib_ucq_object *obj; int err; if (ucore->inlen < sizeof(cmd)) return -EINVAL; err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); if (err) return err; if (cmd.comp_mask) return -EINVAL; if (cmd.reserved) return -EINVAL; if (ucore->outlen < (offsetof(typeof(resp), response_length) + sizeof(resp.response_length))) return -ENOSPC; obj = create_cq(file, ib_dev, ucore, uhw, &cmd, min(ucore->inlen, sizeof(cmd)), ib_uverbs_ex_create_cq_cb, NULL); if (IS_ERR(obj)) return PTR_ERR(obj); return 0; } ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_resize_cq cmd; struct ib_uverbs_resize_cq_resp resp; struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; ret = cq->device->resize_cq(cq, cmd.cqe, &udata); if (ret) goto out; resp.cqe = cq->cqe; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp.cqe)) ret = -EFAULT; out: put_cq_read(cq); return ret ? ret : in_len; } static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) { struct ib_uverbs_wc tmp; tmp.wr_id = wc->wr_id; tmp.status = wc->status; tmp.opcode = wc->opcode; tmp.vendor_err = wc->vendor_err; tmp.byte_len = wc->byte_len; tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data; tmp.qp_num = wc->qp->qp_num; tmp.src_qp = wc->src_qp; tmp.wc_flags = wc->wc_flags; tmp.pkey_index = wc->pkey_index; tmp.slid = wc->slid; tmp.sl = wc->sl; tmp.dlid_path_bits = wc->dlid_path_bits; tmp.port_num = wc->port_num; tmp.reserved = 0; if (copy_to_user(dest, &tmp, sizeof tmp)) return -EFAULT; return 0; } ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_poll_cq cmd; struct ib_uverbs_poll_cq_resp resp; u8 __user *header_ptr; u8 __user *data_ptr; struct ib_cq *cq; struct ib_wc wc; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; /* we copy a struct ib_uverbs_poll_cq_resp to user space */ header_ptr = (void __user *)(unsigned long) cmd.response; data_ptr = header_ptr + sizeof resp; memset(&resp, 0, sizeof resp); while (resp.count < cmd.ne) { ret = ib_poll_cq(cq, 1, &wc); if (ret < 0) goto out_put; if (!ret) break; ret = copy_wc_to_user(data_ptr, &wc); if (ret) goto out_put; data_ptr += sizeof(struct ib_uverbs_wc); ++resp.count; } if (copy_to_user(header_ptr, &resp, sizeof resp)) { ret = -EFAULT; goto out_put; } ret = in_len; out_put: put_cq_read(cq); return ret; } ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_req_notify_cq cmd; struct ib_cq *cq; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) return -EINVAL; ib_req_notify_cq(cq, cmd.solicited_only ? IB_CQ_SOLICITED : IB_CQ_NEXT_COMP); put_cq_read(cq); return in_len; } ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_cq cmd; struct ib_uverbs_destroy_cq_resp resp; struct ib_uobject *uobj; struct ib_cq *cq; struct ib_ucq_object *obj; struct ib_uverbs_event_file *ev_file; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext); if (!uobj) return -EINVAL; cq = uobj->object; ev_file = cq->cq_context; obj = container_of(cq->uobject, struct ib_ucq_object, uobject); ret = ib_destroy_cq(cq); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_cq_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_ucq(file, ev_file, obj); memset(&resp, 0, sizeof resp); resp.comp_events_reported = obj->comp_events_reported; resp.async_events_reported = obj->async_events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } static int create_qp(struct ib_uverbs_file *file, struct ib_udata *ucore, struct ib_udata *uhw, struct ib_uverbs_ex_create_qp *cmd, size_t cmd_sz, int (*cb)(struct ib_uverbs_file *file, struct ib_uverbs_ex_create_qp_resp *resp, struct ib_udata *udata), void *context) { struct ib_uqp_object *obj; struct ib_device *device; struct ib_pd *pd = NULL; struct ib_xrcd *xrcd = NULL; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_cq *scq = NULL, *rcq = NULL; struct ib_srq *srq = NULL; struct ib_qp *qp; char *buf; struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; struct ib_rwq_ind_table *ind_tbl = NULL; bool has_sq = true; if (cmd->qp_type == IB_QPT_RAW_PACKET && priv_check(curthread, PRIV_NET_RAW) != 0) return -EPERM; obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) + sizeof(cmd->rwq_ind_tbl_handle) && (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) { ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle, file->ucontext); if (!ind_tbl) { ret = -EINVAL; goto err_put; } attr.rwq_ind_tbl = ind_tbl; } if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) + sizeof(cmd->reserved1)) && cmd->reserved1) { ret = -EOPNOTSUPP; goto err_put; } if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) { ret = -EINVAL; goto err_put; } if (ind_tbl && !cmd->max_send_wr) has_sq = false; if (cmd->qp_type == IB_QPT_XRC_TGT) { xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext, &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put; } device = xrcd->device; } else { if (cmd->qp_type == IB_QPT_XRC_INI) { cmd->max_recv_wr = 0; cmd->max_recv_sge = 0; } else { if (cmd->is_srq) { srq = idr_read_srq(cmd->srq_handle, file->ucontext); if (!srq || srq->srq_type != IB_SRQT_BASIC) { ret = -EINVAL; goto err_put; } } if (!ind_tbl) { if (cmd->recv_cq_handle != cmd->send_cq_handle) { rcq = idr_read_cq(cmd->recv_cq_handle, file->ucontext, 0); if (!rcq) { ret = -EINVAL; goto err_put; } } } } if (has_sq) scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq); if (!ind_tbl) rcq = rcq ?: scq; pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd || (!scq && has_sq)) { ret = -EINVAL; goto err_put; } device = pd->device; } attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.send_cq = scq; attr.recv_cq = rcq; attr.srq = srq; attr.xrcd = xrcd; attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; attr.qp_type = cmd->qp_type; attr.create_flags = 0; attr.cap.max_send_wr = cmd->max_send_wr; attr.cap.max_recv_wr = cmd->max_recv_wr; attr.cap.max_send_sge = cmd->max_send_sge; attr.cap.max_recv_sge = cmd->max_recv_sge; attr.cap.max_inline_data = cmd->max_inline_data; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); if (cmd_sz >= offsetof(typeof(*cmd), create_flags) + sizeof(cmd->create_flags)) attr.create_flags = cmd->create_flags; if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_CROSS_CHANNEL | IB_QP_CREATE_MANAGED_SEND | IB_QP_CREATE_MANAGED_RECV | IB_QP_CREATE_SCATTER_FCS)) { ret = -EINVAL; goto err_put; } buf = (char *)cmd + sizeof(*cmd); if (cmd_sz > sizeof(*cmd)) if (!(buf[0] == 0 && !memcmp(buf, buf + 1, cmd_sz - sizeof(*cmd) - 1))) { ret = -EINVAL; goto err_put; } if (cmd->qp_type == IB_QPT_XRC_TGT) qp = ib_create_qp(pd, &attr); else qp = device->create_qp(pd, &attr, uhw); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } if (cmd->qp_type != IB_QPT_XRC_TGT) { qp->real_qp = qp; qp->device = device; qp->pd = pd; qp->send_cq = attr.send_cq; qp->recv_cq = attr.recv_cq; qp->srq = attr.srq; qp->rwq_ind_tbl = ind_tbl; qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) atomic_inc(&attr.recv_cq->usecnt); if (attr.srq) atomic_inc(&attr.srq->usecnt); if (ind_tbl) atomic_inc(&ind_tbl->usecnt); } qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.base.qpn = qp->qp_num; resp.base.qp_handle = obj->uevent.uobject.id; resp.base.max_recv_sge = attr.cap.max_recv_sge; resp.base.max_send_sge = attr.cap.max_send_sge; resp.base.max_recv_wr = attr.cap.max_recv_wr; resp.base.max_send_wr = attr.cap.max_send_wr; resp.base.max_inline_data = attr.cap.max_inline_data; resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); ret = cb(file, &resp, ucore); if (ret) goto err_cb; if (xrcd) { obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); } if (pd) put_pd_read(pd); if (scq) put_cq_read(scq); if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); if (ind_tbl) put_rwq_indirection_table_read(ind_tbl); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); mutex_unlock(&file->mutex); obj->uevent.uobject.live = 1; up_write(&obj->uevent.uobject.mutex); return 0; err_cb: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); err_put: if (xrcd) put_xrcd_read(xrcd_uobj); if (pd) put_pd_read(pd); if (scq) put_cq_read(scq); if (rcq && rcq != scq) put_cq_read(rcq); if (srq) put_srq_read(srq); if (ind_tbl) put_rwq_indirection_table_read(ind_tbl); put_uobj_write(&obj->uevent.uobject); return ret; } static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file, struct ib_uverbs_ex_create_qp_resp *resp, struct ib_udata *ucore) { if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base))) return -EFAULT; return 0; } ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_qp cmd; struct ib_uverbs_ex_create_qp cmd_ex; struct ib_udata ucore; struct ib_udata uhw; ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp); int err; if (out_len < resp_size) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof(cmd))) return -EFAULT; INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), resp_size); INIT_UDATA(&uhw, buf + sizeof(cmd), (unsigned long)cmd.response + resp_size, in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr), out_len - resp_size); memset(&cmd_ex, 0, sizeof(cmd_ex)); cmd_ex.user_handle = cmd.user_handle; cmd_ex.pd_handle = cmd.pd_handle; cmd_ex.send_cq_handle = cmd.send_cq_handle; cmd_ex.recv_cq_handle = cmd.recv_cq_handle; cmd_ex.srq_handle = cmd.srq_handle; cmd_ex.max_send_wr = cmd.max_send_wr; cmd_ex.max_recv_wr = cmd.max_recv_wr; cmd_ex.max_send_sge = cmd.max_send_sge; cmd_ex.max_recv_sge = cmd.max_recv_sge; cmd_ex.max_inline_data = cmd.max_inline_data; cmd_ex.sq_sig_all = cmd.sq_sig_all; cmd_ex.qp_type = cmd.qp_type; cmd_ex.is_srq = cmd.is_srq; err = create_qp(file, &ucore, &uhw, &cmd_ex, offsetof(typeof(cmd_ex), is_srq) + sizeof(cmd.is_srq), ib_uverbs_create_qp_cb, NULL); if (err) return err; return in_len; } static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file, struct ib_uverbs_ex_create_qp_resp *resp, struct ib_udata *ucore) { if (ib_copy_to_udata(ucore, resp, resp->response_length)) return -EFAULT; return 0; } int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_create_qp_resp resp; struct ib_uverbs_ex_create_qp cmd = {0}; int err; if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) + sizeof(cmd.comp_mask))) return -EINVAL; err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); if (err) return err; if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK) return -EINVAL; if (cmd.reserved) return -EINVAL; if (ucore->outlen < (offsetof(typeof(resp), response_length) + sizeof(resp.response_length))) return -ENOSPC; err = create_qp(file, ucore, uhw, &cmd, min(ucore->inlen, sizeof(cmd)), ib_uverbs_ex_create_qp_cb, NULL); if (err) return err; return 0; } ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_open_qp cmd; struct ib_uverbs_create_qp_resp resp; struct ib_udata udata; struct ib_uqp_object *obj; struct ib_xrcd *xrcd; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_qp *qp; struct ib_qp_open_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); if (!xrcd) { ret = -EINVAL; goto err_put; } attr.event_handler = ib_uverbs_qp_event_handler; attr.qp_context = file; attr.qp_num = cmd.qpn; attr.qp_type = cmd.qp_type; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); INIT_LIST_HEAD(&obj->mcast_list); qp = ib_open_qp(xrcd, &attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_put; } qp->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = qp; ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.qpn = qp->qp_num; resp.qp_handle = obj->uevent.uobject.id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_remove; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); mutex_unlock(&file->mutex); obj->uevent.uobject.live = 1; up_write(&obj->uevent.uobject.mutex); return in_len; err_remove: idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); err_destroy: ib_destroy_qp(qp); err_put: put_xrcd_read(xrcd_uobj); put_uobj_write(&obj->uevent.uobject); return ret; } ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_qp cmd; struct ib_uverbs_query_qp_resp resp; struct ib_qp *qp; struct ib_qp_attr *attr; struct ib_qp_init_attr *init_attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; attr = kmalloc(sizeof *attr, GFP_KERNEL); init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL); if (!attr || !init_attr) { ret = -ENOMEM; goto out; } qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr); put_qp_read(qp); if (ret) goto out; memset(&resp, 0, sizeof resp); resp.qp_state = attr->qp_state; resp.cur_qp_state = attr->cur_qp_state; resp.path_mtu = attr->path_mtu; resp.path_mig_state = attr->path_mig_state; resp.qkey = attr->qkey; resp.rq_psn = attr->rq_psn; resp.sq_psn = attr->sq_psn; resp.dest_qp_num = attr->dest_qp_num; resp.qp_access_flags = attr->qp_access_flags; resp.pkey_index = attr->pkey_index; resp.alt_pkey_index = attr->alt_pkey_index; resp.sq_draining = attr->sq_draining; resp.max_rd_atomic = attr->max_rd_atomic; resp.max_dest_rd_atomic = attr->max_dest_rd_atomic; resp.min_rnr_timer = attr->min_rnr_timer; resp.port_num = attr->port_num; resp.timeout = attr->timeout; resp.retry_cnt = attr->retry_cnt; resp.rnr_retry = attr->rnr_retry; resp.alt_port_num = attr->alt_port_num; resp.alt_timeout = attr->alt_timeout; memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16); resp.dest.flow_label = attr->ah_attr.grh.flow_label; resp.dest.sgid_index = attr->ah_attr.grh.sgid_index; resp.dest.hop_limit = attr->ah_attr.grh.hop_limit; resp.dest.traffic_class = attr->ah_attr.grh.traffic_class; resp.dest.dlid = attr->ah_attr.dlid; resp.dest.sl = attr->ah_attr.sl; resp.dest.src_path_bits = attr->ah_attr.src_path_bits; resp.dest.static_rate = attr->ah_attr.static_rate; resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH); resp.dest.port_num = attr->ah_attr.port_num; memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16); resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label; resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index; resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit; resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class; resp.alt_dest.dlid = attr->alt_ah_attr.dlid; resp.alt_dest.sl = attr->alt_ah_attr.sl; resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits; resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate; resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH); resp.alt_dest.port_num = attr->alt_ah_attr.port_num; resp.max_send_wr = init_attr->cap.max_send_wr; resp.max_recv_wr = init_attr->cap.max_recv_wr; resp.max_send_sge = init_attr->cap.max_send_sge; resp.max_recv_sge = init_attr->cap.max_recv_sge; resp.max_inline_data = init_attr->cap.max_inline_data; resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: kfree(attr); kfree(init_attr); return ret ? ret : in_len; } /* Remove ignored fields set in the attribute mask */ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) { switch (qp_type) { case IB_QPT_XRC_INI: return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); case IB_QPT_XRC_TGT: return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY); default: return mask; } } ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_qp cmd; struct ib_udata udata; struct ib_qp *qp; struct ib_qp_attr *attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } attr->qp_state = cmd.qp_state; attr->cur_qp_state = cmd.cur_qp_state; attr->path_mtu = cmd.path_mtu; attr->path_mig_state = cmd.path_mig_state; attr->qkey = cmd.qkey; attr->rq_psn = cmd.rq_psn; attr->sq_psn = cmd.sq_psn; attr->dest_qp_num = cmd.dest_qp_num; attr->qp_access_flags = cmd.qp_access_flags; attr->pkey_index = cmd.pkey_index; attr->alt_pkey_index = cmd.alt_pkey_index; attr->en_sqd_async_notify = cmd.en_sqd_async_notify; attr->max_rd_atomic = cmd.max_rd_atomic; attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; attr->min_rnr_timer = cmd.min_rnr_timer; attr->port_num = cmd.port_num; attr->timeout = cmd.timeout; attr->retry_cnt = cmd.retry_cnt; attr->rnr_retry = cmd.rnr_retry; attr->alt_port_num = cmd.alt_port_num; attr->alt_timeout = cmd.alt_timeout; memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); attr->ah_attr.grh.flow_label = cmd.dest.flow_label; attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; attr->ah_attr.dlid = cmd.dest.dlid; attr->ah_attr.sl = cmd.dest.sl; attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; attr->ah_attr.static_rate = cmd.dest.static_rate; attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; attr->ah_attr.port_num = cmd.dest.port_num; memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; attr->alt_ah_attr.sl = cmd.alt_dest.sl; attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); if (ret) goto release_qp; ret = qp->device->modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); } else { ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); } if (ret) goto release_qp; ret = in_len; release_qp: put_qp_read(qp); out: kfree(attr); return ret; } ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_qp cmd; struct ib_uverbs_destroy_qp_resp resp; struct ib_uobject *uobj; struct ib_qp *qp; struct ib_uqp_object *obj; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; memset(&resp, 0, sizeof resp); uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext); if (!uobj) return -EINVAL; qp = uobj->object; obj = container_of(uobj, struct ib_uqp_object, uevent.uobject); if (!list_empty(&obj->mcast_list)) { put_uobj_write(uobj); return -EBUSY; } ret = ib_destroy_qp(qp); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; if (obj->uxrcd) atomic_dec(&obj->uxrcd->refcnt); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } static void *alloc_wr(size_t wr_size, __u32 num_sge) { return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) + num_sge * sizeof (struct ib_sge), GFP_KERNEL); }; ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_send cmd; struct ib_uverbs_post_send_resp resp; struct ib_uverbs_send_wr *user_wr; struct ib_send_wr *wr = NULL, *last, *next, *bad_wr; struct ib_qp *qp; int i, sg_ind; int is_ud; ssize_t ret = -EINVAL; size_t next_size; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count + cmd.sge_count * sizeof (struct ib_uverbs_sge)) return -EINVAL; if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr)) return -EINVAL; user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL); if (!user_wr) return -ENOMEM; qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) goto out; is_ud = qp->qp_type == IB_QPT_UD; sg_ind = 0; last = NULL; for (i = 0; i < cmd.wr_count; ++i) { if (copy_from_user(user_wr, buf + sizeof cmd + i * cmd.wqe_size, cmd.wqe_size)) { ret = -EFAULT; goto out_put; } if (user_wr->num_sge + sg_ind > cmd.sge_count) { ret = -EINVAL; goto out_put; } if (is_ud) { struct ib_ud_wr *ud; if (user_wr->opcode != IB_WR_SEND && user_wr->opcode != IB_WR_SEND_WITH_IMM) { ret = -EINVAL; goto out_put; } next_size = sizeof(*ud); ud = alloc_wr(next_size, user_wr->num_sge); if (!ud) { ret = -ENOMEM; goto out_put; } ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext); if (!ud->ah) { kfree(ud); ret = -EINVAL; goto out_put; } ud->remote_qpn = user_wr->wr.ud.remote_qpn; ud->remote_qkey = user_wr->wr.ud.remote_qkey; next = &ud->wr; } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || user_wr->opcode == IB_WR_RDMA_WRITE || user_wr->opcode == IB_WR_RDMA_READ) { struct ib_rdma_wr *rdma; next_size = sizeof(*rdma); rdma = alloc_wr(next_size, user_wr->num_sge); if (!rdma) { ret = -ENOMEM; goto out_put; } rdma->remote_addr = user_wr->wr.rdma.remote_addr; rdma->rkey = user_wr->wr.rdma.rkey; next = &rdma->wr; } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { struct ib_atomic_wr *atomic; next_size = sizeof(*atomic); atomic = alloc_wr(next_size, user_wr->num_sge); if (!atomic) { ret = -ENOMEM; goto out_put; } atomic->remote_addr = user_wr->wr.atomic.remote_addr; atomic->compare_add = user_wr->wr.atomic.compare_add; atomic->swap = user_wr->wr.atomic.swap; atomic->rkey = user_wr->wr.atomic.rkey; next = &atomic->wr; } else if (user_wr->opcode == IB_WR_SEND || user_wr->opcode == IB_WR_SEND_WITH_IMM || user_wr->opcode == IB_WR_SEND_WITH_INV) { next_size = sizeof(*next); next = alloc_wr(next_size, user_wr->num_sge); if (!next) { ret = -ENOMEM; goto out_put; } } else { ret = -EINVAL; goto out_put; } if (user_wr->opcode == IB_WR_SEND_WITH_IMM || user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { next->ex.imm_data = (__be32 __force) user_wr->ex.imm_data; } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) { next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey; } if (!last) wr = next; else last->next = next; last = next; next->next = NULL; next->wr_id = user_wr->wr_id; next->num_sge = user_wr->num_sge; next->opcode = user_wr->opcode; next->send_flags = user_wr->send_flags; if (next->num_sge) { next->sg_list = (void *)((char *)next + ALIGN(next_size, sizeof(struct ib_sge))); if (copy_from_user(next->sg_list, (const char *)buf + sizeof cmd + cmd.wr_count * cmd.wqe_size + sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; goto out_put; } sg_ind += next->num_sge; } else next->sg_list = NULL; } resp.bad_wr = 0; ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out_put: put_qp_read(qp); while (wr) { if (is_ud && ud_wr(wr)->ah) put_ah_read(ud_wr(wr)->ah); next = wr->next; kfree(wr); wr = next; } out: kfree(user_wr); return ret ? ret : in_len; } static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf, int in_len, u32 wr_count, u32 sge_count, u32 wqe_size) { struct ib_uverbs_recv_wr *user_wr; struct ib_recv_wr *wr = NULL, *last, *next; int sg_ind; int i; int ret; if (in_len < wqe_size * wr_count + sge_count * sizeof (struct ib_uverbs_sge)) return ERR_PTR(-EINVAL); if (wqe_size < sizeof (struct ib_uverbs_recv_wr)) return ERR_PTR(-EINVAL); user_wr = kmalloc(wqe_size, GFP_KERNEL); if (!user_wr) return ERR_PTR(-ENOMEM); sg_ind = 0; last = NULL; for (i = 0; i < wr_count; ++i) { if (copy_from_user(user_wr, buf + i * wqe_size, wqe_size)) { ret = -EFAULT; goto err; } if (user_wr->num_sge + sg_ind > sge_count) { ret = -EINVAL; goto err; } next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) + user_wr->num_sge * sizeof (struct ib_sge), GFP_KERNEL); if (!next) { ret = -ENOMEM; goto err; } if (!last) wr = next; else last->next = next; last = next; next->next = NULL; next->wr_id = user_wr->wr_id; next->num_sge = user_wr->num_sge; if (next->num_sge) { next->sg_list = (void *)((char *)next + ALIGN(sizeof *next, sizeof (struct ib_sge))); if (copy_from_user(next->sg_list, (const char *)buf + wr_count * wqe_size + sg_ind * sizeof (struct ib_sge), next->num_sge * sizeof (struct ib_sge))) { ret = -EFAULT; goto err; } sg_ind += next->num_sge; } else next->sg_list = NULL; } kfree(user_wr); return wr; err: kfree(user_wr); while (wr) { next = wr->next; kfree(wr); wr = next; } return ERR_PTR(ret); } ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_recv cmd; struct ib_uverbs_post_recv_resp resp; struct ib_recv_wr *wr, *next, *bad_wr; struct ib_qp *qp; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, in_len - sizeof cmd, cmd.wr_count, cmd.sge_count, cmd.wqe_size); if (IS_ERR(wr)) return PTR_ERR(wr); qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) goto out; resp.bad_wr = 0; ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); put_qp_read(qp); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: while (wr) { next = wr->next; kfree(wr); wr = next; } return ret ? ret : in_len; } ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_post_srq_recv cmd; struct ib_uverbs_post_srq_recv_resp resp; struct ib_recv_wr *wr, *next, *bad_wr; struct ib_srq *srq; ssize_t ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd, in_len - sizeof cmd, cmd.wr_count, cmd.sge_count, cmd.wqe_size); if (IS_ERR(wr)) return PTR_ERR(wr); srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) goto out; resp.bad_wr = 0; ret = srq->device->post_srq_recv(srq, wr, &bad_wr); put_srq_read(srq); if (ret) for (next = wr; next; next = next->next) { ++resp.bad_wr; if (next == bad_wr) break; } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; out: while (wr) { next = wr->next; kfree(wr); wr = next; } return ret ? ret : in_len; } ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_ah cmd; struct ib_uverbs_create_ah_resp resp; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_ah *ah; struct ib_ah_attr attr; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err; } attr.dlid = cmd.attr.dlid; attr.sl = cmd.attr.sl; attr.src_path_bits = cmd.attr.src_path_bits; attr.static_rate = cmd.attr.static_rate; attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0; attr.port_num = cmd.attr.port_num; attr.grh.flow_label = cmd.attr.grh.flow_label; attr.grh.sgid_index = cmd.attr.grh.sgid_index; attr.grh.hop_limit = cmd.attr.grh.hop_limit; attr.grh.traffic_class = cmd.attr.grh.traffic_class; memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); ah = ib_create_ah(pd, &attr); if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } ah->uobject = uobj; uobj->object = ah; ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj); if (ret) goto err_destroy; resp.ah_handle = uobj->id; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->ah_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return in_len; err_copy: idr_remove_uobj(&ib_uverbs_ah_idr, uobj); err_destroy: ib_destroy_ah(ah); err_put: put_pd_read(pd); err: put_uobj_write(uobj); return ret; } ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_ah cmd; struct ib_ah *ah; struct ib_uobject *uobj; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext); if (!uobj) return -EINVAL; ah = uobj->object; ret = ib_destroy_ah(ah); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return in_len; } ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_attach_mcast cmd; struct ib_qp *qp; struct ib_uqp_object *obj; struct ib_uverbs_mcast_entry *mcast; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { ret = 0; goto out_put; } mcast = kmalloc(sizeof *mcast, GFP_KERNEL); if (!mcast) { ret = -ENOMEM; goto out_put; } mcast->lid = cmd.mlid; memcpy(mcast->gid.raw, cmd.gid, sizeof mcast->gid.raw); ret = ib_attach_mcast(qp, &mcast->gid, cmd.mlid); if (!ret) list_add_tail(&mcast->list, &obj->mcast_list); else kfree(mcast); out_put: put_qp_write(qp); return ret ? ret : in_len; } ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_detach_mcast cmd; struct ib_uqp_object *obj; struct ib_qp *qp; struct ib_uverbs_mcast_entry *mcast; int ret = -EINVAL; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); if (ret) goto out_put; obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject); list_for_each_entry(mcast, &obj->mcast_list, list) if (cmd.mlid == mcast->lid && !memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) { list_del(&mcast->list); kfree(mcast); break; } out_put: put_qp_write(qp); return ret ? ret : in_len; } static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) { /* Returns user space filter size, includes padding */ return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; } static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { /* * User space filter structures must be 64 bit aligned, otherwise this * may pass, but we won't handle additional new attributes. */ if (kern_filter_size > ib_real_filter_sz) { if (memchr_inv((char *)kern_spec_filter + ib_real_filter_sz, 0, kern_filter_size - ib_real_filter_sz)) return -EINVAL; return ib_real_filter_sz; } return kern_filter_size; } static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { ssize_t actual_filter_sz; ssize_t kern_filter_sz; ssize_t ib_filter_sz; void *kern_spec_mask; void *kern_spec_val; if (kern_spec->reserved) return -EINVAL; ib_spec->type = kern_spec->type; kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); /* User flow spec size must be aligned to 4 bytes */ if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) return -EINVAL; kern_spec_val = (char *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); kern_spec_mask = (char *)kern_spec_val + kern_filter_sz; switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); if (actual_filter_sz <= 0) return -EINVAL; ib_spec->size = sizeof(struct ib_flow_spec_eth); memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV4: ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); if (actual_filter_sz <= 0) return -EINVAL; ib_spec->size = sizeof(struct ib_flow_spec_ipv4); memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV6: ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); if (actual_filter_sz <= 0) return -EINVAL; ib_spec->size = sizeof(struct ib_flow_spec_ipv6); memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) return -EINVAL; break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, kern_filter_sz, ib_filter_sz); if (actual_filter_sz <= 0) return -EINVAL; ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; default: return -EINVAL; } return 0; } int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_create_wq cmd = {}; struct ib_uverbs_ex_create_wq_resp resp = {}; struct ib_uwq_object *obj; int err = 0; struct ib_cq *cq; struct ib_pd *pd; struct ib_wq *wq; struct ib_wq_init_attr wq_init_attr = {}; size_t required_cmd_sz; size_t required_resp_len; required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge); required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn); if (ucore->inlen < required_cmd_sz) return -EINVAL; if (ucore->outlen < required_resp_len) return -ENOSPC; if (ucore->inlen > sizeof(cmd) && !ib_is_udata_cleared(ucore, sizeof(cmd), ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); if (err) return err; if (cmd.comp_mask) return -EOPNOTSUPP; obj = kmalloc(sizeof(*obj), GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &wq_lock_class); down_write(&obj->uevent.uobject.mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); if (!pd) { err = -EINVAL; goto err_uobj; } cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); if (!cq) { err = -EINVAL; goto err_put_pd; } wq_init_attr.cq = cq; wq_init_attr.max_sge = cmd.max_sge; wq_init_attr.max_wr = cmd.max_wr; wq_init_attr.wq_context = file; wq_init_attr.wq_type = cmd.wq_type; wq_init_attr.event_handler = ib_uverbs_wq_event_handler; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); wq = pd->device->create_wq(pd, &wq_init_attr, uhw); if (IS_ERR(wq)) { err = PTR_ERR(wq); goto err_put_cq; } wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; wq->wq_type = wq_init_attr.wq_type; wq->cq = cq; wq->pd = pd; wq->device = pd->device; wq->wq_context = wq_init_attr.wq_context; atomic_set(&wq->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&cq->usecnt); wq->uobject = &obj->uevent.uobject; obj->uevent.uobject.object = wq; err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); if (err) goto destroy_wq; memset(&resp, 0, sizeof(resp)); resp.wq_handle = obj->uevent.uobject.id; resp.max_sge = wq_init_attr.max_sge; resp.max_wr = wq_init_attr.max_wr; resp.wqn = wq->wq_num; resp.response_length = required_resp_len; err = ib_copy_to_udata(ucore, &resp, resp.response_length); if (err) goto err_copy; put_pd_read(pd); put_cq_read(cq); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list); mutex_unlock(&file->mutex); obj->uevent.uobject.live = 1; up_write(&obj->uevent.uobject.mutex); return 0; err_copy: idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject); destroy_wq: ib_destroy_wq(wq); err_put_cq: put_cq_read(cq); err_put_pd: put_pd_read(pd); err_uobj: put_uobj_write(&obj->uevent.uobject); return err; } int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_wq cmd = {}; struct ib_uverbs_ex_destroy_wq_resp resp = {}; struct ib_wq *wq; struct ib_uobject *uobj; struct ib_uwq_object *obj; size_t required_cmd_sz; size_t required_resp_len; int ret; required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle); required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (ucore->inlen < required_cmd_sz) return -EINVAL; if (ucore->outlen < required_resp_len) return -ENOSPC; if (ucore->inlen > sizeof(cmd) && !ib_is_udata_cleared(ucore, sizeof(cmd), ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; resp.response_length = required_resp_len; uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle, file->ucontext); if (!uobj) return -EINVAL; wq = uobj->object; obj = container_of(uobj, struct ib_uwq_object, uevent.uobject); ret = ib_destroy_wq(wq); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_wq_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_uevent(file, &obj->uevent); resp.events_reported = obj->uevent.events_reported; put_uobj(uobj); ret = ib_copy_to_udata(ucore, &resp, resp.response_length); if (ret) return ret; return 0; } int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_modify_wq cmd = {}; struct ib_wq *wq; struct ib_wq_attr wq_attr = {}; size_t required_cmd_sz; int ret; required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state); if (ucore->inlen < required_cmd_sz) return -EINVAL; if (ucore->inlen > sizeof(cmd) && !ib_is_udata_cleared(ucore, sizeof(cmd), ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); if (ret) return ret; if (!cmd.attr_mask) return -EINVAL; if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE)) return -EINVAL; wq = idr_read_wq(cmd.wq_handle, file->ucontext); if (!wq) return -EINVAL; wq_attr.curr_wq_state = cmd.curr_wq_state; wq_attr.wq_state = cmd.wq_state; ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw); put_wq_read(wq); return ret; } int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_create_rwq_ind_table cmd = {}; struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; struct ib_uobject *uobj; int err = 0; struct ib_rwq_ind_table_init_attr init_attr = {}; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_wq **wqs = NULL; u32 *wqs_handles = NULL; struct ib_wq *wq = NULL; int i, j, num_read_wqs; u32 num_wq_handles; u32 expected_in_size; size_t required_cmd_sz_header; size_t required_resp_len; required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size); required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num); if (ucore->inlen < required_cmd_sz_header) return -EINVAL; if (ucore->outlen < required_resp_len) return -ENOSPC; err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header); if (err) return err; ucore->inbuf = (const char *)ucore->inbuf + required_cmd_sz_header; ucore->inlen -= required_cmd_sz_header; if (cmd.comp_mask) return -EOPNOTSUPP; if (cmd.log_ind_tbl_size > IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE) return -EINVAL; num_wq_handles = 1 << cmd.log_ind_tbl_size; expected_in_size = num_wq_handles * sizeof(__u32); if (num_wq_handles == 1) /* input size for wq handles is u64 aligned */ expected_in_size += sizeof(__u32); if (ucore->inlen < expected_in_size) return -EINVAL; if (ucore->inlen > expected_in_size && !ib_is_udata_cleared(ucore, expected_in_size, ucore->inlen - expected_in_size)) return -EOPNOTSUPP; wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles), GFP_KERNEL); if (!wqs_handles) return -ENOMEM; err = ib_copy_from_udata(wqs_handles, ucore, num_wq_handles * sizeof(__u32)); if (err) goto err_free; wqs = kcalloc(num_wq_handles, sizeof(*wqs), GFP_KERNEL); if (!wqs) { err = -ENOMEM; goto err_free; } for (num_read_wqs = 0; num_read_wqs < num_wq_handles; num_read_wqs++) { wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext); if (!wq) { err = -EINVAL; goto put_wqs; } wqs[num_read_wqs] = wq; } uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); if (!uobj) { err = -ENOMEM; goto put_wqs; } init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class); down_write(&uobj->mutex); init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; init_attr.ind_tbl = wqs; rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw); if (IS_ERR(rwq_ind_tbl)) { err = PTR_ERR(rwq_ind_tbl); goto err_uobj; } rwq_ind_tbl->ind_tbl = wqs; rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; rwq_ind_tbl->uobject = uobj; uobj->object = rwq_ind_tbl; rwq_ind_tbl->device = ib_dev; atomic_set(&rwq_ind_tbl->usecnt, 0); for (i = 0; i < num_wq_handles; i++) atomic_inc(&wqs[i]->usecnt); err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); if (err) goto destroy_ind_tbl; resp.ind_tbl_handle = uobj->id; resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num; resp.response_length = required_resp_len; err = ib_copy_to_udata(ucore, &resp, resp.response_length); if (err) goto err_copy; kfree(wqs_handles); for (j = 0; j < num_read_wqs; j++) put_wq_read(wqs[j]); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); return 0; err_copy: idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); destroy_ind_tbl: ib_destroy_rwq_ind_table(rwq_ind_tbl); err_uobj: put_uobj_write(uobj); put_wqs: for (j = 0; j < num_read_wqs; j++) put_wq_read(wqs[j]); err_free: kfree(wqs_handles); kfree(wqs); return err; } int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {}; struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_uobject *uobj; int ret; struct ib_wq **ind_tbl; size_t required_cmd_sz; required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle); if (ucore->inlen < required_cmd_sz) return -EINVAL; if (ucore->inlen > sizeof(cmd) && !ib_is_udata_cleared(ucore, sizeof(cmd), ucore->inlen - sizeof(cmd))) return -EOPNOTSUPP; ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); if (ret) return ret; if (cmd.comp_mask) return -EOPNOTSUPP; uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle, file->ucontext); if (!uobj) return -EINVAL; rwq_ind_tbl = uobj->object; ind_tbl = rwq_ind_tbl->ind_tbl; ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); kfree(ind_tbl); return ret; } int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow_resp resp; struct ib_uobject *uobj; struct ib_flow *flow_id; struct ib_uverbs_flow_attr *kern_flow_attr; struct ib_flow_attr *flow_attr; struct ib_qp *qp; int err = 0; void *kern_spec; void *ib_spec; int i; if (ucore->inlen < sizeof(cmd)) return -EINVAL; if (ucore->outlen < sizeof(resp)) return -ENOSPC; err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); if (err) return err; ucore->inbuf = (const char *)ucore->inbuf + sizeof(cmd); ucore->inlen -= sizeof(cmd); if (cmd.comp_mask) return -EINVAL; if (priv_check(curthread, PRIV_NET_RAW) != 0) return -EPERM; if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED) return -EINVAL; if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) || (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT))) return -EINVAL; if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) return -EINVAL; if (cmd.flow_attr.size > ucore->inlen || cmd.flow_attr.size > (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) return -EINVAL; if (cmd.flow_attr.reserved[0] || cmd.flow_attr.reserved[1]) return -EINVAL; if (cmd.flow_attr.num_of_specs) { kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, GFP_KERNEL); if (!kern_flow_attr) return -ENOMEM; memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); err = ib_copy_from_udata(kern_flow_attr + 1, ucore, cmd.flow_attr.size); if (err) goto err_free_attr; } else { kern_flow_attr = &cmd.flow_attr; } uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); if (!uobj) { err = -ENOMEM; goto err_free_attr; } init_uobj(uobj, 0, file->ucontext, &rule_lock_class); down_write(&uobj->mutex); qp = idr_read_qp(cmd.qp_handle, file->ucontext); if (!qp) { err = -EINVAL; goto err_uobj; } flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * sizeof(union ib_flow_spec), GFP_KERNEL); if (!flow_attr) { err = -ENOMEM; goto err_put; } flow_attr->type = kern_flow_attr->type; flow_attr->priority = kern_flow_attr->priority; flow_attr->num_of_specs = kern_flow_attr->num_of_specs; flow_attr->port = kern_flow_attr->port; flow_attr->flags = kern_flow_attr->flags; flow_attr->size = sizeof(*flow_attr); kern_spec = kern_flow_attr + 1; ib_spec = flow_attr + 1; for (i = 0; i < flow_attr->num_of_specs && cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && cmd.flow_attr.size >= ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { err = kern_spec_to_ib_spec(kern_spec, ib_spec); if (err) goto err_free; flow_attr->size += ((union ib_flow_spec *) ib_spec)->size; cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; kern_spec = (char *)kern_spec + ((struct ib_uverbs_flow_spec *) kern_spec)->size; ib_spec = (char *)ib_spec + ((union ib_flow_spec *)ib_spec)->size; } if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", i, cmd.flow_attr.size); err = -EINVAL; goto err_free; } flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); goto err_free; } flow_id->qp = qp; flow_id->uobject = uobj; uobj->object = flow_id; err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); if (err) goto destroy_flow; memset(&resp, 0, sizeof(resp)); resp.flow_handle = uobj->id; err = ib_copy_to_udata(ucore, &resp, sizeof(resp)); if (err) goto err_copy; put_qp_read(qp); mutex_lock(&file->mutex); list_add_tail(&uobj->list, &file->ucontext->rule_list); mutex_unlock(&file->mutex); uobj->live = 1; up_write(&uobj->mutex); kfree(flow_attr); if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return 0; err_copy: idr_remove_uobj(&ib_uverbs_rule_idr, uobj); destroy_flow: ib_destroy_flow(flow_id); err_free: kfree(flow_attr); err_put: put_qp_read(qp); err_uobj: put_uobj_write(uobj); err_free_attr: if (cmd.flow_attr.num_of_specs) kfree(kern_flow_attr); return err; } int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_destroy_flow cmd; struct ib_flow *flow_id; struct ib_uobject *uobj; int ret; if (ucore->inlen < sizeof(cmd)) return -EINVAL; ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); if (ret) return ret; if (cmd.comp_mask) return -EINVAL; uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, file->ucontext); if (!uobj) return -EINVAL; flow_id = uobj->object; ret = ib_destroy_flow(flow_id); if (!ret) uobj->live = 0; put_uobj_write(uobj); idr_remove_uobj(&ib_uverbs_rule_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); put_uobj(uobj); return ret; } static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) { struct ib_uverbs_create_srq_resp resp; struct ib_usrq_object *obj; struct ib_pd *pd; struct ib_srq *srq; struct ib_uobject *uninitialized_var(xrcd_uobj); struct ib_srq_init_attr attr; int ret; obj = kmalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd->srq_type == IB_SRQT_XRC) { attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); if (!attr.ext.xrc.xrcd) { ret = -EINVAL; goto err; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); if (!attr.ext.xrc.cq) { ret = -EINVAL; goto err_put_xrcd; } } pd = idr_read_pd(cmd->pd_handle, file->ucontext); if (!pd) { ret = -EINVAL; goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; attr.srq_context = file; attr.srq_type = cmd->srq_type; attr.attr.max_wr = cmd->max_wr; attr.attr.max_sge = cmd->max_sge; attr.attr.srq_limit = cmd->srq_limit; obj->uevent.events_reported = 0; INIT_LIST_HEAD(&obj->uevent.event_list); srq = pd->device->create_srq(pd, &attr, udata); if (IS_ERR(srq)) { ret = PTR_ERR(srq); goto err_put; } srq->device = pd->device; srq->pd = pd; srq->srq_type = cmd->srq_type; srq->uobject = &obj->uevent.uobject; srq->event_handler = attr.event_handler; srq->srq_context = attr.srq_context; if (cmd->srq_type == IB_SRQT_XRC) { srq->ext.xrc.cq = attr.ext.xrc.cq; srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; atomic_inc(&attr.ext.xrc.cq->usecnt); atomic_inc(&attr.ext.xrc.xrcd->usecnt); } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); obj->uevent.uobject.object = srq; ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); if (ret) goto err_destroy; memset(&resp, 0, sizeof resp); resp.srq_handle = obj->uevent.uobject.id; resp.max_wr = attr.attr.max_wr; resp.max_sge = attr.attr.max_sge; if (cmd->srq_type == IB_SRQT_XRC) resp.srqn = srq->ext.xrc.srq_num; if (copy_to_user((void __user *) (unsigned long) cmd->response, &resp, sizeof resp)) { ret = -EFAULT; goto err_copy; } if (cmd->srq_type == IB_SRQT_XRC) { put_uobj_read(xrcd_uobj); put_cq_read(attr.ext.xrc.cq); } put_pd_read(pd); mutex_lock(&file->mutex); list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); mutex_unlock(&file->mutex); obj->uevent.uobject.live = 1; up_write(&obj->uevent.uobject.mutex); return 0; err_copy: idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); err_destroy: ib_destroy_srq(srq); err_put: put_pd_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) put_cq_read(attr.ext.xrc.cq); err_put_xrcd: if (cmd->srq_type == IB_SRQT_XRC) { atomic_dec(&obj->uxrcd->refcnt); put_uobj_read(xrcd_uobj); } err: put_uobj_write(&obj->uevent.uobject); return ret; } ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_srq cmd; struct ib_uverbs_create_xsrq xcmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; xcmd.response = cmd.response; xcmd.user_handle = cmd.user_handle; xcmd.srq_type = IB_SRQT_BASIC; xcmd.pd_handle = cmd.pd_handle; xcmd.max_wr = cmd.max_wr; xcmd.max_sge = cmd.max_sge; xcmd.srq_limit = cmd.srq_limit; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata); if (ret) return ret; return in_len; } ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_create_xsrq cmd; struct ib_uverbs_create_srq_resp resp; struct ib_udata udata; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr), out_len - sizeof resp); ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata); if (ret) return ret; return in_len; } ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_modify_srq cmd; struct ib_udata udata; struct ib_srq *srq; struct ib_srq_attr attr; int ret; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, out_len); srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; attr.max_wr = cmd.max_wr; attr.srq_limit = cmd.srq_limit; ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata); put_srq_read(srq); return ret ? ret : in_len; } ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_query_srq cmd; struct ib_uverbs_query_srq_resp resp; struct ib_srq_attr attr; struct ib_srq *srq; int ret; if (out_len < sizeof resp) return -ENOSPC; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; srq = idr_read_srq(cmd.srq_handle, file->ucontext); if (!srq) return -EINVAL; ret = ib_query_srq(srq, &attr); put_srq_read(srq); if (ret) return ret; memset(&resp, 0, sizeof resp); resp.max_wr = attr.max_wr; resp.max_sge = attr.max_sge; resp.srq_limit = attr.srq_limit; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) return -EFAULT; return in_len; } ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) { struct ib_uverbs_destroy_srq cmd; struct ib_uverbs_destroy_srq_resp resp; struct ib_uobject *uobj; struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; struct ib_usrq_object *us; enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext); if (!uobj) return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); srq_type = srq->srq_type; ret = ib_destroy_srq(srq); if (!ret) uobj->live = 0; put_uobj_write(uobj); if (ret) return ret; if (srq_type == IB_SRQT_XRC) { us = container_of(obj, struct ib_usrq_object, uevent); atomic_dec(&us->uxrcd->refcnt); } idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); list_del(&uobj->list); mutex_unlock(&file->mutex); ib_uverbs_release_uevent(file, obj); memset(&resp, 0, sizeof resp); resp.events_reported = obj->events_reported; put_uobj(uobj); if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) ret = -EFAULT; return ret ? ret : in_len; } int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) { struct ib_uverbs_ex_query_device_resp resp = { {0} }; struct ib_uverbs_ex_query_device cmd; struct ib_device_attr attr = {0}; int err; if (ucore->inlen < sizeof(cmd)) return -EINVAL; err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); if (err) return err; if (cmd.comp_mask) return -EINVAL; if (cmd.reserved) return -EINVAL; resp.response_length = offsetof(typeof(resp), odp_caps); if (ucore->outlen < resp.response_length) return -ENOSPC; err = ib_dev->query_device(ib_dev, &attr, uhw); if (err) return err; copy_query_dev_fields(file, ib_dev, &resp.base, &attr); if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps)) goto end; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING resp.odp_caps.general_caps = attr.odp_caps.general_caps; resp.odp_caps.per_transport_caps.rc_odp_caps = attr.odp_caps.per_transport_caps.rc_odp_caps; resp.odp_caps.per_transport_caps.uc_odp_caps = attr.odp_caps.per_transport_caps.uc_odp_caps; resp.odp_caps.per_transport_caps.ud_odp_caps = attr.odp_caps.per_transport_caps.ud_odp_caps; #endif resp.response_length += sizeof(resp.odp_caps); if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask)) goto end; resp.timestamp_mask = attr.timestamp_mask; resp.response_length += sizeof(resp.timestamp_mask); if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock)) goto end; resp.hca_core_clock = attr.hca_core_clock; resp.response_length += sizeof(resp.hca_core_clock); if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex)) goto end; resp.device_cap_flags_ex = attr.device_cap_flags; resp.response_length += sizeof(resp.device_cap_flags_ex); if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) goto end; resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; resp.rss_caps.max_rwq_indirection_tables = attr.rss_caps.max_rwq_indirection_tables; resp.rss_caps.max_rwq_indirection_table_size = attr.rss_caps.max_rwq_indirection_table_size; resp.response_length += sizeof(resp.rss_caps); if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) goto end; resp.max_wq_type_rq = attr.max_wq_type_rq; resp.response_length += sizeof(resp.max_wq_type_rq); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; } Index: stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_main.c (revision 331772) @@ -1,1430 +1,1434 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = 32 }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); DEFINE_IDR(ib_uverbs_wq_idr); DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, struct ib_device *ib_dev, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device, [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq, [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp, [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq, [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, }; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); int uverbs_dealloc_mw(struct ib_mw *mw) { struct ib_pd *pd = mw->pd; int ret; ret = mw->device->dealloc_mw(mw); if (!ret) atomic_dec(&pd->usecnt); return ret; } static void ib_uverbs_release_dev(struct kobject *kobj) { struct ib_uverbs_device *dev = container_of(kobj, struct ib_uverbs_device, kobj); cleanup_srcu_struct(&dev->disassociate_srcu); kfree(dev); } static struct kobj_type ib_uverbs_dev_ktype = { .release = ib_uverbs_release_dev, }; static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->lock); kref_put(&ev_file->ref, ib_uverbs_release_event_file); } spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj) { struct ib_uverbs_event *evt, *tmp; spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } static void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); kfree(uobj); } /* Remove MWs before QPs, in order to support type 2A MWs. */ list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { struct ib_mw *mw = uobj->object; idr_remove_uobj(&ib_uverbs_mw_idr, uobj); uverbs_dealloc_mw(mw); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { struct ib_flow *flow_id = uobj->object; idr_remove_uobj(&ib_uverbs_rule_idr, uobj); ib_destroy_flow(flow_id); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) { struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj); ib_destroy_rwq_ind_table(rwq_ind_tbl); kfree(ind_tbl); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) { struct ib_wq *wq = uobj->object; struct ib_uwq_object *uwq = container_of(uobj, struct ib_uwq_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_wq_idr, uobj); ib_destroy_wq(wq); ib_uverbs_release_uevent(file, &uwq->uevent); kfree(uwq); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); ib_destroy_srq(srq); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); ib_destroy_cq(cq); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); ib_dereg_mr(mr); kfree(uobj); } mutex_lock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; struct ib_uxrcd_object *uxrcd = container_of(uobj, struct ib_uxrcd_object, uobject); idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); ib_uverbs_dealloc_xrcd(file->device, xrcd); kfree(uxrcd); } mutex_unlock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); kfree(uobj); } put_pid(context->tgid); return context->device->dealloc_ucontext(context); } static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev) { complete(&dev->comp); } static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); struct ib_device *ib_dev; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (ib_dev && !ib_dev->disassociate_ucontext) module_put(ib_dev->owner); srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); kfree(file); } static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; spin_lock_irq(&file->lock); while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, (!list_empty(&file->event_list) || /* The barriers built into wait_event_interruptible() * and wake_up() guarentee this will see the null set * without using RCU */ !file->uverbs_file->device->ib_dev))) return -ERESTARTSYS; /* If device was disassociated and no event exists set an error */ if (list_empty(&file->event_list) && !file->uverbs_file->device->ib_dev) return -EIO; spin_lock_irq(&file->lock); } event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(file->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&file->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; struct ib_uverbs_event_file *file = filp->private_data; poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; int closed_already = 0; mutex_lock(&file->uverbs_file->device->lists_mutex); spin_lock_irq(&file->lock); closed_already = file->is_closed; file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } spin_unlock_irq(&file->lock); if (!closed_already) { list_del(&file->list); if (file->is_async) ib_unregister_event_handler(&file->uverbs_file-> event_handler); } mutex_unlock(&file->uverbs_file->device->lists_mutex); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync, .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!file) return; spin_lock_irqsave(&file->lock, flags); if (file->is_closed) { spin_unlock_irqrestore(&file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->lock, flags); return; } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->lock, flags); wake_up_interruptible(&file->poll_wait); kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file->lock, flags); wake_up_interruptible(&file->async_file->poll_wait); kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject || !event->element.qp->uobject->live) return; uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj = container_of(event->element.wq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.srq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); ib_uverbs_async_handler(file, event->element.port_num, event->event, NULL, NULL); } void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file) { kref_put(&file->async_file->ref, ib_uverbs_release_event_file); file->async_file = NULL; } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; int ret; ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); kref_init(&ev_file->ref); spin_lock_init(&ev_file->lock); INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; kref_get(&ev_file->uverbs_file->ref); ev_file->async_queue = NULL; ev_file->is_closed = 0; /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); if (IS_ERR(filp)) goto err_put_refs; filp->private_data = ev_file; mutex_lock(&uverbs_file->device->lists_mutex); list_add_tail(&ev_file->list, &uverbs_file->device->uverbs_events_file_list); mutex_unlock(&uverbs_file->device->lists_mutex); if (is_async) { WARN_ON(uverbs_file->async_file); uverbs_file->async_file = ev_file; kref_get(&uverbs_file->async_file->ref); INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler, ib_dev, ib_uverbs_event_handler); ret = ib_register_event_handler(&uverbs_file->event_handler); if (ret) goto err_put_file; /* At that point async file stuff was fully set */ ev_file->is_async = 1; } return filp; err_put_file: fput(filp); kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file); uverbs_file->async_file = NULL; return ERR_PTR(ret); err_put_refs: kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file); kref_put(&ev_file->ref, ib_uverbs_release_event_file); return filp; } /* * Look up a completion event file by FD. If lookup is successful, * takes a ref to the event file struct that it returns; if * unsuccessful, returns NULL. */ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; struct fd f = fdget(fd); if (!f.file) return NULL; if (f.file->f_op != &uverbs_event_fops) goto out; ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; } kref_get(&ev_file->ref); out: fdput(f); return ev_file; } static int verify_command_mask(struct ib_device *ib_dev, __u32 command) { u64 mask; if (command <= IB_USER_VERBS_CMD_OPEN_QP) mask = ib_dev->uverbs_cmd_mask; else mask = ib_dev->uverbs_ex_cmd_mask; if (mask & ((u64)1 << command)) return 0; return -1; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; struct ib_uverbs_cmd_hdr hdr; __u32 command; __u32 flags; int srcu_key; ssize_t ret; if (WARN_ON_ONCE(!ib_safe_file_access(filp))) return -EACCES; if (count < sizeof hdr) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto out; } if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | IB_USER_VERBS_CMD_COMMAND_MASK)) { ret = -EINVAL; goto out; } command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; if (verify_command_mask(ib_dev, command)) { ret = -EOPNOTSUPP; goto out; } if (!file->ucontext && command != IB_USER_VERBS_CMD_GET_CONTEXT) { ret = -EINVAL; goto out; } flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; if (!flags) { if (command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[command]) { ret = -EINVAL; goto out; } if (hdr.in_words * 4 != count) { ret = -EINVAL; goto out; } ret = uverbs_cmd_table[command](file, ib_dev, buf + sizeof(hdr), hdr.in_words * 4, hdr.out_words * 4); } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { struct ib_uverbs_ex_cmd_hdr ex_hdr; struct ib_udata ucore; struct ib_udata uhw; size_t written_count = count; if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || !uverbs_ex_cmd_table[command]) { ret = -ENOSYS; goto out; } if (!file->ucontext) { ret = -EINVAL; goto out; } if (count < (sizeof(hdr) + sizeof(ex_hdr))) { ret = -EINVAL; goto out; } if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) { ret = -EFAULT; goto out; } count -= sizeof(hdr) + sizeof(ex_hdr); buf += sizeof(hdr) + sizeof(ex_hdr); if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) { ret = -EINVAL; goto out; } if (ex_hdr.cmd_hdr_reserved) { ret = -EINVAL; goto out; } if (ex_hdr.response) { if (!hdr.out_words && !ex_hdr.provider_out_words) { ret = -EINVAL; goto out; } if (!access_ok(VERIFY_WRITE, (void __user *) (unsigned long) ex_hdr.response, (hdr.out_words + ex_hdr.provider_out_words) * 8)) { ret = -EFAULT; goto out; } } else { if (hdr.out_words || ex_hdr.provider_out_words) { ret = -EINVAL; goto out; } } INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, hdr.in_words * 8, hdr.out_words * 8); INIT_UDATA_BUF_OR_NULL(&uhw, buf + ucore.inlen, (unsigned long) ex_hdr.response + ucore.outlen, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw); if (!ret) ret = written_count; } else { ret = -ENOSYS; } out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *ib_dev; int ret = 0; int srcu_key; srcu_key = srcu_read_lock(&file->device->disassociate_srcu); ib_dev = srcu_dereference(file->device->ib_dev, &file->device->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto out; } if (!file->ucontext) ret = -ENODEV; else ret = ib_dev->mmap(file->ucontext, vma); out: srcu_read_unlock(&file->device->disassociate_srcu, srcu_key); return ret; } /* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; struct ib_device *ib_dev; int ret; int module_dependent; int srcu_key; dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); if (!atomic_inc_not_zero(&dev->refcount)) return -ENXIO; srcu_key = srcu_read_lock(&dev->disassociate_srcu); mutex_lock(&dev->lists_mutex); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (!ib_dev) { ret = -EIO; goto err; } /* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device. */ module_dependent = !(ib_dev->disassociate_ucontext); if (module_dependent) { if (!try_module_get(ib_dev->owner)) { ret = -ENODEV; goto err; } } file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) { ret = -ENOMEM; if (module_dependent) goto err_module; goto err; } file->device = dev; file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); mutex_init(&file->cleanup_mutex); filp->private_data = file; kobject_get(&dev->kobj); list_add_tail(&file->list, &dev->uverbs_file_list); mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return nonseekable_open(inode, filp); err_module: module_put(ib_dev->owner); err: mutex_unlock(&dev->lists_mutex); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); if (atomic_dec_and_test(&dev->refcount)) ib_uverbs_comp_dev(dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_device *dev = file->device; mutex_lock(&file->cleanup_mutex); if (file->ucontext) { ib_uverbs_cleanup_ucontext(file, file->ucontext); file->ucontext = NULL; } mutex_unlock(&file->cleanup_mutex); mutex_lock(&file->device->lists_mutex); if (!file->is_closed) { list_del(&file->list); file->is_closed = 1; } mutex_unlock(&file->device->lists_mutex); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); kobject_put(&dev->kobj); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, }; static struct ib_client uverbs_client = { .name = "uverbs", .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { int ret = -ENODEV; int srcu_key; struct ib_uverbs_device *dev = dev_get_drvdata(device); struct ib_device *ib_dev; if (!dev) return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sprintf(buf, "%s\n", ib_dev->name); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); int ret = -ENODEV; int srcu_key; struct ib_device *ib_dev; if (!dev) return -ENODEV; srcu_key = srcu_read_lock(&dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); if (ib_dev) ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver); srcu_read_unlock(&dev->disassociate_srcu, srcu_key); return ret; } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static CLASS_ATTR_STRING(abi_version, S_IRUGO, __stringify(IB_USER_VERBS_ABI_VERSION)); static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); /* * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by * requesting a new major number and doubling the number of max devices we * support. It's stupid, but simple. */ static int find_overflow_devnum(void) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); if (ret >= IB_UVERBS_MAX_DEVICES) return -1; return ret; } static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev || !dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->device); } static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev || !dev->ib_dev->dma_device) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, NULL }; static struct attribute_group device_group = { .name = "device", .attrs = device_attrs }; static void ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; int ret; if (!device->alloc_ucontext) return; uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; ret = init_srcu_struct(&uverbs_dev->disassociate_srcu); if (ret) { kfree(uverbs_dev); return; } atomic_set(&uverbs_dev->refcount, 1); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype); mutex_init(&uverbs_dev->lists_mutex); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); devnum = find_overflow_devnum(); if (devnum < 0) goto err; spin_lock(&map_lock); uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { uverbs_dev->devnum = devnum; base = devnum + IB_UVERBS_BASE_DEV; set_bit(devnum, dev_map); } spin_unlock(&map_lock); rcu_assign_pointer(uverbs_dev->ib_dev, device); uverbs_dev->num_comp_vectors = device->num_comp_vectors; cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: device_destroy(uverbs_class, uverbs_dev->cdev.dev); err_cdev: cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); err: if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); wait_for_completion(&uverbs_dev->comp); kobject_put(&uverbs_dev->kobj); return; } static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, struct ib_device *ib_dev) { struct ib_uverbs_file *file; struct ib_uverbs_event_file *event_file; struct ib_event event; /* Pending running commands to terminate */ synchronize_srcu(&uverbs_dev->disassociate_srcu); event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; event.device = ib_dev; mutex_lock(&uverbs_dev->lists_mutex); while (!list_empty(&uverbs_dev->uverbs_file_list)) { struct ib_ucontext *ucontext; file = list_first_entry(&uverbs_dev->uverbs_file_list, struct ib_uverbs_file, list); file->is_closed = 1; list_del(&file->list); kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); ib_uverbs_event_handler(&file->event_handler, &event); mutex_lock(&file->cleanup_mutex); ucontext = file->ucontext; file->ucontext = NULL; mutex_unlock(&file->cleanup_mutex); /* At this point ib_uverbs_close cannot be running * ib_uverbs_cleanup_ucontext */ if (ucontext) { /* We must release the mutex before going ahead and * calling disassociate_ucontext. disassociate_ucontext * might end up indirectly calling uverbs_close, * for example due to freeing the resources * (e.g mmput). */ ib_dev->disassociate_ucontext(ucontext); ib_uverbs_cleanup_ucontext(file, ucontext); } mutex_lock(&uverbs_dev->lists_mutex); kref_put(&file->ref, ib_uverbs_release_file); } while (!list_empty(&uverbs_dev->uverbs_events_file_list)) { event_file = list_first_entry(&uverbs_dev-> uverbs_events_file_list, struct ib_uverbs_event_file, list); spin_lock_irq(&event_file->lock); event_file->is_closed = 1; spin_unlock_irq(&event_file->lock); list_del(&event_file->list); if (event_file->is_async) { ib_unregister_event_handler(&event_file->uverbs_file-> event_handler); event_file->uverbs_file->event_handler.device = NULL; } wake_up_interruptible(&event_file->poll_wait); kill_fasync(&event_file->async_queue, SIGIO, POLL_IN); } mutex_unlock(&uverbs_dev->lists_mutex); } static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) { struct ib_uverbs_device *uverbs_dev = client_data; int wait_clients = 1; if (!uverbs_dev) return; sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); device_destroy(uverbs_class, uverbs_dev->cdev.dev); cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(uverbs_dev->devnum, dev_map); else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); if (device->disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not * valid any more. * uverbs_device is still available until all clients close * their files, then the uverbs device ref count will be zero * and its resources will be freed. * Note: At this point no more files can be opened since the * cdev was deleted, however active clients can still issue * commands and close their open files. */ rcu_assign_pointer(uverbs_dev->ib_dev, NULL); ib_uverbs_free_hw_resources(uverbs_dev, device); wait_clients = 0; } if (atomic_dec_and_test(&uverbs_dev->refcount)) ib_uverbs_comp_dev(uverbs_dev); if (wait_clients) wait_for_completion(&uverbs_dev->comp); kobject_put(&uverbs_dev->kobj); } static char *uverbs_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_uverbs_init(void) { int ret; ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { pr_err("user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } uverbs_class->devnode = uverbs_devnode; ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { pr_err("user_verbs: couldn't register client\n"); goto out_class; } return 0; out_class: class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_ah_idr); idr_destroy(&ib_uverbs_cq_idr); idr_destroy(&ib_uverbs_qp_idr); idr_destroy(&ib_uverbs_srq_idr); } module_init_order(ib_uverbs_init, SI_ORDER_THIRD); module_exit(ib_uverbs_cleanup); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_uverbs_marshall.c (revision 331772) @@ -1,148 +1,152 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, struct ib_ah_attr *src) { memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid); dst->grh.flow_label = src->grh.flow_label; dst->grh.sgid_index = src->grh.sgid_index; dst->grh.hop_limit = src->grh.hop_limit; dst->grh.traffic_class = src->grh.traffic_class; memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); dst->dlid = src->dlid; dst->sl = src->sl; dst->src_path_bits = src->src_path_bits; dst->static_rate = src->static_rate; dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0; dst->port_num = src->port_num; dst->reserved = 0; } EXPORT_SYMBOL(ib_copy_ah_attr_to_user); void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src) { dst->qp_state = src->qp_state; dst->cur_qp_state = src->cur_qp_state; dst->path_mtu = src->path_mtu; dst->path_mig_state = src->path_mig_state; dst->qkey = src->qkey; dst->rq_psn = src->rq_psn; dst->sq_psn = src->sq_psn; dst->dest_qp_num = src->dest_qp_num; dst->qp_access_flags = src->qp_access_flags; dst->max_send_wr = src->cap.max_send_wr; dst->max_recv_wr = src->cap.max_recv_wr; dst->max_send_sge = src->cap.max_send_sge; dst->max_recv_sge = src->cap.max_recv_sge; dst->max_inline_data = src->cap.max_inline_data; ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr); ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr); dst->pkey_index = src->pkey_index; dst->alt_pkey_index = src->alt_pkey_index; dst->en_sqd_async_notify = src->en_sqd_async_notify; dst->sq_draining = src->sq_draining; dst->max_rd_atomic = src->max_rd_atomic; dst->max_dest_rd_atomic = src->max_dest_rd_atomic; dst->min_rnr_timer = src->min_rnr_timer; dst->port_num = src->port_num; dst->timeout = src->timeout; dst->retry_cnt = src->retry_cnt; dst->rnr_retry = src->rnr_retry; dst->alt_port_num = src->alt_port_num; dst->alt_timeout = src->alt_timeout; memset(dst->reserved, 0, sizeof(dst->reserved)); } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, struct ib_sa_path_rec *src) { memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); dst->dlid = src->dlid; dst->slid = src->slid; dst->raw_traffic = src->raw_traffic; dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; dst->reversible = src->reversible; dst->numb_path = src->numb_path; dst->pkey = src->pkey; dst->sl = src->sl; dst->mtu_selector = src->mtu_selector; dst->mtu = src->mtu; dst->rate_selector = src->rate_selector; dst->rate = src->rate; dst->packet_life_time = src->packet_life_time; dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; } EXPORT_SYMBOL(ib_copy_path_rec_to_user); void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, struct ib_user_path_rec *src) { memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid); memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid); dst->dlid = src->dlid; dst->slid = src->slid; dst->raw_traffic = src->raw_traffic; dst->flow_label = src->flow_label; dst->hop_limit = src->hop_limit; dst->traffic_class = src->traffic_class; dst->reversible = src->reversible; dst->numb_path = src->numb_path; dst->pkey = src->pkey; dst->sl = src->sl; dst->mtu_selector = src->mtu_selector; dst->mtu = src->mtu; dst->rate_selector = src->rate_selector; dst->rate = src->rate; dst->packet_life_time = src->packet_life_time; dst->preference = src->preference; dst->packet_life_time_selector = src->packet_life_time_selector; memset(dst->dmac, 0, sizeof(dst->dmac)); dst->net = NULL; dst->ifindex = 0; dst->gid_type = IB_GID_TYPE_IB; } EXPORT_SYMBOL(ib_copy_path_rec_from_user); Index: stable/11/sys/ofed/drivers/infiniband/core/ib_verbs.c =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/ib_verbs.c (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/ib_verbs.c (revision 331772) @@ -1,2068 +1,2072 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include "core_priv.h" static const char * const ib_events[] = { [IB_EVENT_CQ_ERR] = "CQ error", [IB_EVENT_QP_FATAL] = "QP fatal error", [IB_EVENT_QP_REQ_ERR] = "QP request error", [IB_EVENT_QP_ACCESS_ERR] = "QP access error", [IB_EVENT_COMM_EST] = "communication established", [IB_EVENT_SQ_DRAINED] = "send queue drained", [IB_EVENT_PATH_MIG] = "path migration successful", [IB_EVENT_PATH_MIG_ERR] = "path migration error", [IB_EVENT_DEVICE_FATAL] = "device fatal error", [IB_EVENT_PORT_ACTIVE] = "port active", [IB_EVENT_PORT_ERR] = "port error", [IB_EVENT_LID_CHANGE] = "LID change", [IB_EVENT_PKEY_CHANGE] = "P_key change", [IB_EVENT_SM_CHANGE] = "SM change", [IB_EVENT_SRQ_ERR] = "SRQ error", [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", [IB_EVENT_CLIENT_REREGISTER] = "client reregister", [IB_EVENT_GID_CHANGE] = "GID changed", }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event) { size_t index = event; return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? ib_events[index] : "unrecognized event"; } EXPORT_SYMBOL(ib_event_msg); static const char * const wc_statuses[] = { [IB_WC_SUCCESS] = "success", [IB_WC_LOC_LEN_ERR] = "local length error", [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", [IB_WC_LOC_PROT_ERR] = "local protection error", [IB_WC_WR_FLUSH_ERR] = "WR flushed", [IB_WC_MW_BIND_ERR] = "memory management operation error", [IB_WC_BAD_RESP_ERR] = "bad response error", [IB_WC_LOC_ACCESS_ERR] = "local access error", [IB_WC_REM_INV_REQ_ERR] = "invalid request error", [IB_WC_REM_ACCESS_ERR] = "remote access error", [IB_WC_REM_OP_ERR] = "remote operation error", [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", [IB_WC_REM_ABORT_ERR] = "operation aborted", [IB_WC_INV_EECN_ERR] = "invalid EE context number", [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", [IB_WC_FATAL_ERR] = "fatal error", [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", [IB_WC_GENERAL_ERR] = "general error", }; const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) { size_t index = status; return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? wc_statuses[index] : "unrecognized status"; } EXPORT_SYMBOL(ib_wc_status_msg); __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 1; case IB_RATE_5_GBPS: return 2; case IB_RATE_10_GBPS: return 4; case IB_RATE_20_GBPS: return 8; case IB_RATE_30_GBPS: return 12; case IB_RATE_40_GBPS: return 16; case IB_RATE_60_GBPS: return 24; case IB_RATE_80_GBPS: return 32; case IB_RATE_120_GBPS: return 48; default: return -1; } } EXPORT_SYMBOL(ib_rate_to_mult); __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) { switch (mult) { case 1: return IB_RATE_2_5_GBPS; case 2: return IB_RATE_5_GBPS; case 4: return IB_RATE_10_GBPS; case 8: return IB_RATE_20_GBPS; case 12: return IB_RATE_30_GBPS; case 16: return IB_RATE_40_GBPS; case 24: return IB_RATE_60_GBPS; case 32: return IB_RATE_80_GBPS; case 48: return IB_RATE_120_GBPS; default: return IB_RATE_PORT_CURRENT; } } EXPORT_SYMBOL(mult_to_ib_rate); __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 2500; case IB_RATE_5_GBPS: return 5000; case IB_RATE_10_GBPS: return 10000; case IB_RATE_20_GBPS: return 20000; case IB_RATE_30_GBPS: return 30000; case IB_RATE_40_GBPS: return 40000; case IB_RATE_60_GBPS: return 60000; case IB_RATE_80_GBPS: return 80000; case IB_RATE_120_GBPS: return 120000; case IB_RATE_14_GBPS: return 14062; case IB_RATE_56_GBPS: return 56250; case IB_RATE_112_GBPS: return 112500; case IB_RATE_168_GBPS: return 168750; case IB_RATE_25_GBPS: return 25781; case IB_RATE_100_GBPS: return 103125; case IB_RATE_200_GBPS: return 206250; case IB_RATE_300_GBPS: return 309375; default: return -1; } } EXPORT_SYMBOL(ib_rate_to_mbps); __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { switch (node_type) { case RDMA_NODE_IB_CA: case RDMA_NODE_IB_SWITCH: case RDMA_NODE_IB_ROUTER: return RDMA_TRANSPORT_IB; case RDMA_NODE_RNIC: return RDMA_TRANSPORT_IWARP; case RDMA_NODE_USNIC: return RDMA_TRANSPORT_USNIC; case RDMA_NODE_USNIC_UDP: return RDMA_TRANSPORT_USNIC_UDP; default: BUG(); return 0; } } EXPORT_SYMBOL(rdma_node_get_transport); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) { if (device->get_link_layer) return device->get_link_layer(device, port_num); switch (rdma_node_get_transport(device->node_type)) { case RDMA_TRANSPORT_IB: return IB_LINK_LAYER_INFINIBAND; case RDMA_TRANSPORT_IWARP: case RDMA_TRANSPORT_USNIC: case RDMA_TRANSPORT_USNIC_UDP: return IB_LINK_LAYER_ETHERNET; default: return IB_LINK_LAYER_UNSPECIFIED; } } EXPORT_SYMBOL(rdma_port_get_link_layer); /* Protection domains */ /** * ib_alloc_pd - Allocates an unused protection domain. * @device: The device on which to allocate the protection domain. * * A protection domain object provides an association between QPs, shared * receive queues, address handles, memory regions, and memory windows. * * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller) { struct ib_pd *pd; int mr_access_flags = 0; pd = device->alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) return pd; pd->device = device; pd->uobject = NULL; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); pd->flags = flags; if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; else mr_access_flags |= IB_ACCESS_LOCAL_WRITE; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { pr_warn("%s: enabling unsafe global rkey\n", caller); mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; } if (mr_access_flags) { struct ib_mr *mr; mr = pd->device->get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return ERR_CAST(mr); } mr->device = pd->device; mr->pd = pd; mr->uobject = NULL; mr->need_inval = false; pd->__internal_mr = mr; if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) pd->local_dma_lkey = pd->__internal_mr->lkey; if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) pd->unsafe_global_rkey = pd->__internal_mr->rkey; } return pd; } EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. * @pd: The protection domain to deallocate. * * It is an error to call this function while any resources in the pd still * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ void ib_dealloc_pd(struct ib_pd *pd) { int ret; if (pd->__internal_mr) { ret = pd->device->dereg_mr(pd->__internal_mr); WARN_ON(ret); pd->__internal_mr = NULL; } /* uverbs manipulates usecnt with proper locking, while the kabi requires the caller to guarantee we can't race here. */ WARN_ON(atomic_read(&pd->usecnt)); /* Making delalloc_pd a void return is a WIP, no driver should return an error here. */ ret = pd->device->dealloc_pd(pd); WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd"); } EXPORT_SYMBOL(ib_dealloc_pd); /* Address handles */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ib_ah *ah; ah = pd->device->create_ah(pd, ah_attr); if (!IS_ERR(ah)) { ah->device = pd->device; ah->pd = pd; ah->uobject = NULL; atomic_inc(&pd->usecnt); } return ah; } EXPORT_SYMBOL(ib_create_ah); static int ib_get_header_version(const union rdma_network_hdr *hdr) { const struct ip *ip4h = (const struct ip *)&hdr->roce4grh; struct ip ip4h_checked; const struct ip6_hdr *ip6h = (const struct ip6_hdr *)&hdr->ibgrh; /* If it's IPv6, the version must be 6, otherwise, the first * 20 bytes (before the IPv4 header) are garbled. */ if ((ip6h->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) return (ip4h->ip_v == 4) ? 4 : 0; /* version may be 6 or 4 because the first 20 bytes could be garbled */ /* RoCE v2 requires no options, thus header length * must be 5 words */ if (ip4h->ip_hl != 5) return 6; /* Verify checksum. * We can't write on scattered buffers so we need to copy to * temp buffer. */ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); ip4h_checked.ip_sum = 0; #if defined(INET) || defined(INET6) ip4h_checked.ip_sum = in_cksum_hdr(&ip4h_checked); #endif /* if IPv4 header checksum is OK, believe it */ if (ip4h->ip_sum == ip4h_checked.ip_sum) return 4; return 6; } static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u8 port_num, const struct ib_grh *grh) { int grh_version; if (rdma_protocol_ib(device, port_num)) return RDMA_NETWORK_IB; grh_version = ib_get_header_version((const union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; if (grh->next_hdr == IPPROTO_UDP) return RDMA_NETWORK_IPV6; return RDMA_NETWORK_ROCE_V1; } struct find_gid_index_context { u16 vlan_id; enum ib_gid_type gid_type; }; static bool find_gid_index(const union ib_gid *gid, const struct ib_gid_attr *gid_attr, void *context) { struct find_gid_index_context *ctx = (struct find_gid_index_context *)context; if (ctx->gid_type != gid_attr->gid_type) return false; if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) || (is_vlan_dev(gid_attr->ndev) && vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id)) return false; return true; } static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, u16 vlan_id, const union ib_gid *sgid, enum ib_gid_type gid_type, u16 *gid_index) { struct find_gid_index_context context = {.vlan_id = vlan_id, .gid_type = gid_type}; return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, &context, gid_index); } static int get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, enum rdma_network_type net_type, union ib_gid *sgid, union ib_gid *dgid) { struct sockaddr_in src_in; struct sockaddr_in dst_in; __be32 src_saddr, dst_saddr; if (!sgid || !dgid) return -EINVAL; if (net_type == RDMA_NETWORK_IPV4) { memcpy(&src_in.sin_addr.s_addr, &hdr->roce4grh.ip_src, 4); memcpy(&dst_in.sin_addr.s_addr, &hdr->roce4grh.ip_dst, 4); src_saddr = src_in.sin_addr.s_addr; dst_saddr = dst_in.sin_addr.s_addr; ipv6_addr_set_v4mapped(src_saddr, (struct in6_addr *)sgid); ipv6_addr_set_v4mapped(dst_saddr, (struct in6_addr *)dgid); return 0; } else if (net_type == RDMA_NETWORK_IPV6 || net_type == RDMA_NETWORK_IB) { *dgid = hdr->ibgrh.dgid; *sgid = hdr->ibgrh.sgid; return 0; } else { return -EINVAL; } } int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct ib_ah_attr *ah_attr) { u32 flow_class; u16 gid_index; int ret; enum rdma_network_type net_type = RDMA_NETWORK_IB; enum ib_gid_type gid_type = IB_GID_TYPE_IB; int hoplimit = 0xff; union ib_gid dgid; union ib_gid sgid; memset(ah_attr, 0, sizeof *ah_attr); if (rdma_cap_eth_ah(device, port_num)) { if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) net_type = wc->network_hdr_type; else net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } ret = get_gids_from_rdma_hdr((const union rdma_network_hdr *)grh, net_type, &sgid, &dgid); if (ret) return ret; if (rdma_protocol_roce(device, port_num)) { int if_index = 0; u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ? wc->vlan_id : 0xffff; struct net_device *idev; struct net_device *resolved_dev; if (!(wc->wc_flags & IB_WC_GRH)) return -EPROTOTYPE; if (!device->get_netdev) return -EOPNOTSUPP; idev = device->get_netdev(device, port_num); if (!idev) return -ENODEV; ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, ah_attr->dmac, wc->wc_flags & IB_WC_WITH_VLAN ? NULL : &vlan_id, &if_index, &hoplimit); if (ret) { dev_put(idev); return ret; } resolved_dev = dev_get_by_index(&init_net, if_index); if (resolved_dev->if_flags & IFF_LOOPBACK) { dev_put(resolved_dev); resolved_dev = idev; dev_hold(resolved_dev); } rcu_read_lock(); if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev, resolved_dev)) ret = -EHOSTUNREACH; rcu_read_unlock(); dev_put(idev); dev_put(resolved_dev); if (ret) return ret; ret = get_sgid_index_from_eth(device, port_num, vlan_id, &dgid, gid_type, &gid_index); if (ret) return ret; } ah_attr->dlid = wc->slid; ah_attr->sl = wc->sl; ah_attr->src_path_bits = wc->dlid_path_bits; ah_attr->port_num = port_num; if (wc->wc_flags & IB_WC_GRH) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.dgid = sgid; if (!rdma_cap_eth_ah(device, port_num)) { if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { ret = ib_find_cached_gid_by_port(device, &dgid, IB_GID_TYPE_IB, port_num, NULL, &gid_index); if (ret) return ret; } else { gid_index = 0; } } ah_attr->grh.sgid_index = (u8) gid_index; flow_class = be32_to_cpu(grh->version_tclass_flow); ah_attr->grh.flow_label = flow_class & 0xFFFFF; ah_attr->grh.hop_limit = hoplimit; ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; } return 0; } EXPORT_SYMBOL(ib_init_ah_from_wc); struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num) { struct ib_ah_attr ah_attr; int ret; ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); if (ret) return ERR_PTR(ret); return ib_create_ah(pd, &ah_attr); } EXPORT_SYMBOL(ib_create_ah_from_wc); int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->modify_ah ? ah->device->modify_ah(ah, ah_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_modify_ah); int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) { return ah->device->query_ah ? ah->device->query_ah(ah, ah_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_ah); int ib_destroy_ah(struct ib_ah *ah) { struct ib_pd *pd; int ret; pd = ah->pd; ret = ah->device->destroy_ah(ah); if (!ret) atomic_dec(&pd->usecnt); return ret; } EXPORT_SYMBOL(ib_destroy_ah); /* Shared receive queues */ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr) { struct ib_srq *srq; if (!pd->device->create_srq) return ERR_PTR(-ENOSYS); srq = pd->device->create_srq(pd, srq_init_attr, NULL); if (!IS_ERR(srq)) { srq->device = pd->device; srq->pd = pd; srq->uobject = NULL; srq->event_handler = srq_init_attr->event_handler; srq->srq_context = srq_init_attr->srq_context; srq->srq_type = srq_init_attr->srq_type; if (srq->srq_type == IB_SRQT_XRC) { srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq; atomic_inc(&srq->ext.xrc.xrcd->usecnt); atomic_inc(&srq->ext.xrc.cq->usecnt); } atomic_inc(&pd->usecnt); atomic_set(&srq->usecnt, 0); } return srq; } EXPORT_SYMBOL(ib_create_srq); int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask) { return srq->device->modify_srq ? srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : -ENOSYS; } EXPORT_SYMBOL(ib_modify_srq); int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) { return srq->device->query_srq ? srq->device->query_srq(srq, srq_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq(struct ib_srq *srq) { struct ib_pd *pd; enum ib_srq_type srq_type; struct ib_xrcd *uninitialized_var(xrcd); struct ib_cq *uninitialized_var(cq); int ret; if (atomic_read(&srq->usecnt)) return -EBUSY; pd = srq->pd; srq_type = srq->srq_type; if (srq_type == IB_SRQT_XRC) { xrcd = srq->ext.xrc.xrcd; cq = srq->ext.xrc.cq; } ret = srq->device->destroy_srq(srq); if (!ret) { atomic_dec(&pd->usecnt); if (srq_type == IB_SRQT_XRC) { atomic_dec(&xrcd->usecnt); atomic_dec(&cq->usecnt); } } return ret; } EXPORT_SYMBOL(ib_destroy_srq); /* Queue pairs */ static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; unsigned long flags; spin_lock_irqsave(&qp->device->event_handler_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) if (event->element.qp->event_handler) event->element.qp->event_handler(event, event->element.qp->qp_context); spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) { mutex_lock(&xrcd->tgt_qp_mutex); list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); mutex_unlock(&xrcd->tgt_qp_mutex); } static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, void (*event_handler)(struct ib_event *, void *), void *qp_context) { struct ib_qp *qp; unsigned long flags; qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); qp->real_qp = real_qp; atomic_inc(&real_qp->usecnt); qp->device = real_qp->device; qp->event_handler = event_handler; qp->qp_context = qp_context; qp->qp_num = real_qp->qp_num; qp->qp_type = real_qp->qp_type; spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); list_add(&qp->open_list, &real_qp->open_list); spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); return qp; } struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, struct ib_qp_open_attr *qp_open_attr) { struct ib_qp *qp, *real_qp; if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) return ERR_PTR(-EINVAL); qp = ERR_PTR(-EINVAL); mutex_lock(&xrcd->tgt_qp_mutex); list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { if (real_qp->qp_num == qp_open_attr->qp_num) { qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, qp_open_attr->qp_context); break; } } mutex_unlock(&xrcd->tgt_qp_mutex); return qp; } EXPORT_SYMBOL(ib_open_qp); static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr) { struct ib_qp *real_qp = qp; qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; qp->pd = NULL; qp->send_cq = qp->recv_cq = NULL; qp->srq = NULL; qp->xrcd = qp_init_attr->xrcd; atomic_inc(&qp_init_attr->xrcd->usecnt); INIT_LIST_HEAD(&qp->open_list); qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, qp_init_attr->qp_context); if (!IS_ERR(qp)) __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); else real_qp->device->destroy_qp(real_qp); return qp; } struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; struct ib_qp *qp; if (qp_init_attr->rwq_ind_tbl && (qp_init_attr->recv_cq || qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || qp_init_attr->cap.max_recv_sge)) return ERR_PTR(-EINVAL); qp = device->create_qp(pd, qp_init_attr, NULL); if (IS_ERR(qp)) return qp; qp->device = device; qp->real_qp = qp; qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; atomic_set(&qp->usecnt, 0); spin_lock_init(&qp->mr_lock); if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) return ib_create_xrc_qp(qp, qp_init_attr); qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { qp->recv_cq = NULL; qp->srq = NULL; } else { qp->recv_cq = qp_init_attr->recv_cq; if (qp_init_attr->recv_cq) atomic_inc(&qp_init_attr->recv_cq->usecnt); qp->srq = qp_init_attr->srq; if (qp->srq) atomic_inc(&qp_init_attr->srq->usecnt); } qp->pd = pd; qp->send_cq = qp_init_attr->send_cq; qp->xrcd = NULL; atomic_inc(&pd->usecnt); if (qp_init_attr->send_cq) atomic_inc(&qp_init_attr->send_cq->usecnt); if (qp_init_attr->rwq_ind_tbl) atomic_inc(&qp->rwq_ind_tbl->usecnt); /* * Note: all hw drivers guarantee that max_send_sge is lower than * the device RDMA WRITE SGE limit but not all hw drivers ensure that * max_send_sge <= max_sge_rd. */ qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); return qp; } EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_RAW_PACKET] = IB_QP_PORT, [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, }, [IB_QPS_INIT] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } }, [IB_QPS_RTR] = { .valid = 1, .req_param = { [IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN), [IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), [IB_QPT_XRC_INI] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN), [IB_QPT_XRC_TGT] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), }, }, }, [IB_QPS_RTR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .req_param = { [IB_QPT_UD] = IB_QP_SQ_PSN, [IB_QPT_UC] = IB_QP_SQ_PSN, [IB_QPT_RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC), [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | IB_QP_SQ_PSN), [IB_QPT_SMI] = IB_QP_SQ_PSN, [IB_QPT_GSI] = IB_QP_SQ_PSN, }, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } } }, [IB_QPS_RTS] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY } }, }, [IB_QPS_SQD] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } }, [IB_QPS_SQD] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_INI] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PATH_MIG_STATE), [IB_QPT_XRC_TGT] = (IB_QP_PORT | IB_QP_AV | IB_QP_TIMEOUT | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE), [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), } } }, [IB_QPS_SQE] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_RTS] = { .valid = 1, .opt_param = { [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_UC] = (IB_QP_CUR_STATE | IB_QP_ACCESS_FLAGS), [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), } } }, [IB_QPS_ERR] = { [IB_QPS_RESET] = { .valid = 1 }, [IB_QPS_ERR] = { .valid = 1 } } }; int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask, enum rdma_link_layer ll) { enum ib_qp_attr_mask req_param, opt_param; if (cur_state < 0 || cur_state > IB_QPS_ERR || next_state < 0 || next_state > IB_QPS_ERR) return 0; if (mask & IB_QP_CUR_STATE && cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) return 0; if (!qp_state_table[cur_state][next_state].valid) return 0; req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; if ((mask & req_param) != req_param) return 0; if (mask & ~(req_param | opt_param | IB_QP_STATE)) return 0; return 1; } EXPORT_SYMBOL(ib_modify_qp_is_ok); int ib_resolve_eth_dmac(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { int ret = 0; if (*qp_attr_mask & IB_QP_AV) { if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) return -EINVAL; if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) return 0; if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); } else { union ib_gid sgid; struct ib_gid_attr sgid_attr; int ifindex; int hop_limit; ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, qp_attr->ah_attr.grh.sgid_index, &sgid, &sgid_attr); if (ret || !sgid_attr.ndev) { if (!ret) ret = -ENXIO; goto out; } ifindex = sgid_attr.ndev->if_index; ret = rdma_addr_find_l2_eth_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, qp_attr->ah_attr.dmac, NULL, &ifindex, &hop_limit); dev_put(sgid_attr.ndev); qp_attr->ah_attr.grh.hop_limit = hop_limit; } } out: return ret; } EXPORT_SYMBOL(ib_resolve_eth_dmac); int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { int ret; ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); if (ret) return ret; return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); int ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { return qp->device->query_qp ? qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : -ENOSYS; } EXPORT_SYMBOL(ib_query_qp); int ib_close_qp(struct ib_qp *qp) { struct ib_qp *real_qp; unsigned long flags; real_qp = qp->real_qp; if (real_qp == qp) return -EINVAL; spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); list_del(&qp->open_list); spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); atomic_dec(&real_qp->usecnt); kfree(qp); return 0; } EXPORT_SYMBOL(ib_close_qp); static int __ib_destroy_shared_qp(struct ib_qp *qp) { struct ib_xrcd *xrcd; struct ib_qp *real_qp; int ret; real_qp = qp->real_qp; xrcd = real_qp->xrcd; mutex_lock(&xrcd->tgt_qp_mutex); ib_close_qp(qp); if (atomic_read(&real_qp->usecnt) == 0) list_del(&real_qp->xrcd_list); else real_qp = NULL; mutex_unlock(&xrcd->tgt_qp_mutex); if (real_qp) { ret = ib_destroy_qp(real_qp); if (!ret) atomic_dec(&xrcd->usecnt); else __ib_insert_xrcd_qp(xrcd, real_qp); } return 0; } int ib_destroy_qp(struct ib_qp *qp) { struct ib_pd *pd; struct ib_cq *scq, *rcq; struct ib_srq *srq; struct ib_rwq_ind_table *ind_tbl; int ret; if (atomic_read(&qp->usecnt)) return -EBUSY; if (qp->real_qp != qp) return __ib_destroy_shared_qp(qp); pd = qp->pd; scq = qp->send_cq; rcq = qp->recv_cq; srq = qp->srq; ind_tbl = qp->rwq_ind_tbl; ret = qp->device->destroy_qp(qp); if (!ret) { if (pd) atomic_dec(&pd->usecnt); if (scq) atomic_dec(&scq->usecnt); if (rcq) atomic_dec(&rcq->usecnt); if (srq) atomic_dec(&srq->usecnt); if (ind_tbl) atomic_dec(&ind_tbl->usecnt); } return ret; } EXPORT_SYMBOL(ib_destroy_qp); /* Completion queues */ struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, const struct ib_cq_init_attr *cq_attr) { struct ib_cq *cq; cq = device->create_cq(device, cq_attr, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; cq->uobject = NULL; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; atomic_set(&cq->usecnt, 0); } return cq; } EXPORT_SYMBOL(ib_create_cq); int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { return cq->device->modify_cq ? cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; } EXPORT_SYMBOL(ib_modify_cq); int ib_destroy_cq(struct ib_cq *cq) { if (atomic_read(&cq->usecnt)) return -EBUSY; return cq->device->destroy_cq(cq); } EXPORT_SYMBOL(ib_destroy_cq); int ib_resize_cq(struct ib_cq *cq, int cqe) { return cq->device->resize_cq ? cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; } EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ int ib_dereg_mr(struct ib_mr *mr) { struct ib_pd *pd = mr->pd; int ret; ret = mr->device->dereg_mr(mr); if (!ret) atomic_dec(&pd->usecnt); return ret; } EXPORT_SYMBOL(ib_dereg_mr); /** * ib_alloc_mr() - Allocates a memory region * @pd: protection domain associated with the region * @mr_type: memory region type * @max_num_sg: maximum sg entries available for registration. * * Notes: * Memory registeration page/sg lists must not exceed max_num_sg. * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed * max_num_sg * used_page_size. * */ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { struct ib_mr *mr; if (!pd->device->alloc_mr) return ERR_PTR(-ENOSYS); mr = pd->device->alloc_mr(pd, mr_type, max_num_sg); if (!IS_ERR(mr)) { mr->device = pd->device; mr->pd = pd; mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; } return mr; } EXPORT_SYMBOL(ib_alloc_mr); /* "Fast" memory regions */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) { struct ib_fmr *fmr; if (!pd->device->alloc_fmr) return ERR_PTR(-ENOSYS); fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); if (!IS_ERR(fmr)) { fmr->device = pd->device; fmr->pd = pd; atomic_inc(&pd->usecnt); } return fmr; } EXPORT_SYMBOL(ib_alloc_fmr); int ib_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *fmr; if (list_empty(fmr_list)) return 0; fmr = list_entry(fmr_list->next, struct ib_fmr, list); return fmr->device->unmap_fmr(fmr_list); } EXPORT_SYMBOL(ib_unmap_fmr); int ib_dealloc_fmr(struct ib_fmr *fmr) { struct ib_pd *pd; int ret; pd = fmr->pd; ret = fmr->device->dealloc_fmr(fmr); if (!ret) atomic_dec(&pd->usecnt); return ret; } EXPORT_SYMBOL(ib_dealloc_fmr); /* Multicast groups */ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; if (!qp->device->attach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; ret = qp->device->attach_mcast(qp, gid, lid); if (!ret) atomic_inc(&qp->usecnt); return ret; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { int ret; if (!qp->device->detach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; ret = qp->device->detach_mcast(qp, gid, lid); if (!ret) atomic_dec(&qp->usecnt); return ret; } EXPORT_SYMBOL(ib_detach_mcast); struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) { struct ib_xrcd *xrcd; if (!device->alloc_xrcd) return ERR_PTR(-ENOSYS); xrcd = device->alloc_xrcd(device, NULL, NULL); if (!IS_ERR(xrcd)) { xrcd->device = device; xrcd->inode = NULL; atomic_set(&xrcd->usecnt, 0); mutex_init(&xrcd->tgt_qp_mutex); INIT_LIST_HEAD(&xrcd->tgt_qp_list); } return xrcd; } EXPORT_SYMBOL(ib_alloc_xrcd); int ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct ib_qp *qp; int ret; if (atomic_read(&xrcd->usecnt)) return -EBUSY; while (!list_empty(&xrcd->tgt_qp_list)) { qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); ret = ib_destroy_qp(qp); if (ret) return ret; } return xrcd->device->dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); /** * ib_create_wq - Creates a WQ associated with the specified protection * domain. * @pd: The protection domain associated with the WQ. * @wq_init_attr: A list of initial attributes required to create the * WQ. If WQ creation succeeds, then the attributes are updated to * the actual capabilities of the created WQ. * * wq_init_attr->max_wr and wq_init_attr->max_sge determine * the requested size of the WQ, and set to the actual values allocated * on return. * If ib_create_wq() succeeds, then max_wr and max_sge will always be * at least as large as the requested values. */ struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *wq_attr) { struct ib_wq *wq; if (!pd->device->create_wq) return ERR_PTR(-ENOSYS); wq = pd->device->create_wq(pd, wq_attr, NULL); if (!IS_ERR(wq)) { wq->event_handler = wq_attr->event_handler; wq->wq_context = wq_attr->wq_context; wq->wq_type = wq_attr->wq_type; wq->cq = wq_attr->cq; wq->device = pd->device; wq->pd = pd; wq->uobject = NULL; atomic_inc(&pd->usecnt); atomic_inc(&wq_attr->cq->usecnt); atomic_set(&wq->usecnt, 0); } return wq; } EXPORT_SYMBOL(ib_create_wq); /** * ib_destroy_wq - Destroys the specified WQ. * @wq: The WQ to destroy. */ int ib_destroy_wq(struct ib_wq *wq) { int err; struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; if (atomic_read(&wq->usecnt)) return -EBUSY; err = wq->device->destroy_wq(wq); if (!err) { atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); } return err; } EXPORT_SYMBOL(ib_destroy_wq); /** * ib_modify_wq - Modifies the specified WQ. * @wq: The WQ to modify. * @wq_attr: On input, specifies the WQ attributes to modify. * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ * are being modified. * On output, the current values of selected WQ attributes are returned. */ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask) { int err; if (!wq->device->modify_wq) return -ENOSYS; err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); return err; } EXPORT_SYMBOL(ib_modify_wq); /* * ib_create_rwq_ind_table - Creates a RQ Indirection Table. * @device: The device on which to create the rwq indirection table. * @ib_rwq_ind_table_init_attr: A list of initial attributes required to * create the Indirection Table. * * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less * than the created ib_rwq_ind_table object and the caller is responsible * for its memory allocation/free. */ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr) { struct ib_rwq_ind_table *rwq_ind_table; int i; u32 table_size; if (!device->create_rwq_ind_table) return ERR_PTR(-ENOSYS); table_size = (1 << init_attr->log_ind_tbl_size); rwq_ind_table = device->create_rwq_ind_table(device, init_attr, NULL); if (IS_ERR(rwq_ind_table)) return rwq_ind_table; rwq_ind_table->ind_tbl = init_attr->ind_tbl; rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; rwq_ind_table->device = device; rwq_ind_table->uobject = NULL; atomic_set(&rwq_ind_table->usecnt, 0); for (i = 0; i < table_size; i++) atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); return rwq_ind_table; } EXPORT_SYMBOL(ib_create_rwq_ind_table); /* * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. * @wq_ind_table: The Indirection Table to destroy. */ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) { int err, i; u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; if (atomic_read(&rwq_ind_table->usecnt)) return -EBUSY; err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); if (!err) { for (i = 0; i < table_size; i++) atomic_dec(&ind_tbl[i]->usecnt); } return err; } EXPORT_SYMBOL(ib_destroy_rwq_ind_table); struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { struct ib_flow *flow_id; if (!qp->device->create_flow) return ERR_PTR(-ENOSYS); flow_id = qp->device->create_flow(qp, flow_attr, domain); if (!IS_ERR(flow_id)) atomic_inc(&qp->usecnt); return flow_id; } EXPORT_SYMBOL(ib_create_flow); int ib_destroy_flow(struct ib_flow *flow_id) { int err; struct ib_qp *qp = flow_id->qp; err = qp->device->destroy_flow(flow_id); if (!err) atomic_dec(&qp->usecnt); return err; } EXPORT_SYMBOL(ib_destroy_flow); int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { return mr->device->check_mr_status ? mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; } EXPORT_SYMBOL(ib_check_mr_status); int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state) { if (!device->set_vf_link_state) return -ENOSYS; return device->set_vf_link_state(device, vf, port, state); } EXPORT_SYMBOL(ib_set_vf_link_state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info) { if (!device->get_vf_config) return -ENOSYS; return device->get_vf_config(device, vf, port, info); } EXPORT_SYMBOL(ib_get_vf_config); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats) { if (!device->get_vf_stats) return -ENOSYS; return device->get_vf_stats(device, vf, port, stats); } EXPORT_SYMBOL(ib_get_vf_stats); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type) { if (!device->set_vf_guid) return -ENOSYS; return device->set_vf_guid(device, vf, port, guid, type); } EXPORT_SYMBOL(ib_set_vf_guid); /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. * @mr: memory region * @sg: dma mapped scatterlist * @sg_nents: number of entries in sg * @sg_offset: offset in bytes into sg * @page_size: page vector desired page size * * Constraints: * - The first sg element is allowed to have an offset. * - Each sg element must either be aligned to page_size or virtually * contiguous to the previous element. In case an sg element has a * non-contiguous offset, the mapping prefix will not include it. * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. * * After this completes successfully, the memory region * is ready for registration. */ int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { if (unlikely(!mr->device->map_mr_sg)) return -ENOSYS; mr->page_size = page_size; return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); } EXPORT_SYMBOL(ib_map_mr_sg); /** * ib_sg_to_pages() - Convert the largest prefix of a sg list * to a page vector * @mr: memory region * @sgl: dma mapped scatterlist * @sg_nents: number of entries in sg * @sg_offset_p: IN: start offset in bytes into sg * OUT: offset in bytes for element n of the sg of the first * byte that has not been processed where n is the return * value of this function. * @set_page: driver page assignment function pointer * * Core service helper for drivers to convert the largest * prefix of given sg list to a page vector. The sg list * prefix converted is the prefix that meet the requirements * of ib_map_mr_sg. * * Returns the number of sg elements that were assigned to * a page vector. */ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) { struct scatterlist *sg; u64 last_end_dma_addr = 0; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; unsigned int last_page_off = 0; u64 page_mask = ~((u64)mr->page_size - 1); int i, ret; if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) return -EINVAL; mr->iova = sg_dma_address(&sgl[0]) + sg_offset; mr->length = 0; for_each_sg(sgl, sg, sg_nents, i) { u64 dma_addr = sg_dma_address(sg) + sg_offset; u64 prev_addr = dma_addr; unsigned int dma_len = sg_dma_len(sg) - sg_offset; u64 end_dma_addr = dma_addr + dma_len; u64 page_addr = dma_addr & page_mask; /* * For the second and later elements, check whether either the * end of element i-1 or the start of element i is not aligned * on a page boundary. */ if (i && (last_page_off != 0 || page_addr != dma_addr)) { /* Stop mapping if there is a gap. */ if (last_end_dma_addr != dma_addr) break; /* * Coalesce this element with the last. If it is small * enough just update mr->length. Otherwise start * mapping from the next page. */ goto next_page; } do { ret = set_page(mr, page_addr); if (unlikely(ret < 0)) { sg_offset = prev_addr - sg_dma_address(sg); mr->length += prev_addr - dma_addr; if (sg_offset_p) *sg_offset_p = sg_offset; return i || sg_offset ? i : ret; } prev_addr = page_addr; next_page: page_addr += mr->page_size; } while (page_addr < end_dma_addr); mr->length += dma_len; last_end_dma_addr = end_dma_addr; last_page_off = end_dma_addr & ~page_mask; sg_offset = 0; } if (sg_offset_p) *sg_offset_p = 0; return i; } EXPORT_SYMBOL(ib_sg_to_pages); struct ib_drain_cqe { struct ib_cqe cqe; struct completion done; }; static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) { struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, cqe); complete(&cqe->done); } /* * Post a WR and block until its completion is reaped for the SQ. */ static void __ib_drain_sq(struct ib_qp *qp) { struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe sdrain; struct ib_send_wr swr = {}, *bad_swr; int ret; if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, "IB_POLL_DIRECT poll_ctx not supported for drain\n"); return; } swr.wr_cqe = &sdrain.cqe; sdrain.cqe.done = ib_drain_qp_done; init_completion(&sdrain.done); ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; } ret = ib_post_send(qp, &swr, &bad_swr); if (ret) { WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); return; } wait_for_completion(&sdrain.done); } /* * Post a WR and block until its completion is reaped for the RQ. */ static void __ib_drain_rq(struct ib_qp *qp) { struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; struct ib_drain_cqe rdrain; struct ib_recv_wr rwr = {}, *bad_rwr; int ret; if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, "IB_POLL_DIRECT poll_ctx not supported for drain\n"); return; } rwr.wr_cqe = &rdrain.cqe; rdrain.cqe.done = ib_drain_qp_done; init_completion(&rdrain.done); ret = ib_modify_qp(qp, &attr, IB_QP_STATE); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; } ret = ib_post_recv(qp, &rwr, &bad_rwr); if (ret) { WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); return; } wait_for_completion(&rdrain.done); } /** * ib_drain_sq() - Block until all SQ CQEs have been consumed by the * application. * @qp: queue pair to drain * * If the device has a provider-specific drain function, then * call that. Otherwise call the generic drain function * __ib_drain_sq(). * * The caller must: * * ensure there is room in the CQ and SQ for the drain work request and * completion. * * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be * IB_POLL_DIRECT. * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_sq(struct ib_qp *qp) { if (qp->device->drain_sq) qp->device->drain_sq(qp); else __ib_drain_sq(qp); } EXPORT_SYMBOL(ib_drain_sq); /** * ib_drain_rq() - Block until all RQ CQEs have been consumed by the * application. * @qp: queue pair to drain * * If the device has a provider-specific drain function, then * call that. Otherwise call the generic drain function * __ib_drain_rq(). * * The caller must: * * ensure there is room in the CQ and RQ for the drain work request and * completion. * * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be * IB_POLL_DIRECT. * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_rq(struct ib_qp *qp) { if (qp->device->drain_rq) qp->device->drain_rq(qp); else __ib_drain_rq(qp); } EXPORT_SYMBOL(ib_drain_rq); /** * ib_drain_qp() - Block until all CQEs have been consumed by the * application on both the RQ and SQ. * @qp: queue pair to drain * * The caller must: * * ensure there is room in the CQ(s), SQ, and RQ for drain work requests * and completions. * * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be * IB_POLL_DIRECT. * * ensure that there are no other contexts that are posting WRs concurrently. * Otherwise the drain is not guaranteed. */ void ib_drain_qp(struct ib_qp *qp) { ib_drain_sq(qp); if (!qp->srq) ib_drain_rq(qp); } EXPORT_SYMBOL(ib_drain_qp); Index: stable/11/sys/ofed/drivers/infiniband/core/iwcm.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/iwcm.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/iwcm.h (revision 331772) @@ -1,62 +1,65 @@ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ + #ifndef IWCM_H #define IWCM_H enum iw_cm_state { IW_CM_STATE_IDLE, /* unbound, inactive */ IW_CM_STATE_LISTEN, /* listen waiting for connect */ IW_CM_STATE_CONN_RECV, /* inbound waiting for user accept */ IW_CM_STATE_CONN_SENT, /* outbound waiting for peer accept */ IW_CM_STATE_ESTABLISHED, /* established */ IW_CM_STATE_CLOSING, /* disconnect */ IW_CM_STATE_DESTROYING /* object being deleted */ }; struct iwcm_id_private { struct iw_cm_id id; enum iw_cm_state state; unsigned long flags; struct ib_qp *qp; struct completion destroy_comp; wait_queue_head_t connect_wait; struct list_head work_list; spinlock_t lock; atomic_t refcount; struct list_head work_free_list; }; #define IWCM_F_DROP_EVENTS 1 #define IWCM_F_CONNECT_WAIT 2 #endif /* IWCM_H */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/iwcm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/iwpm_util.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/iwpm_util.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/iwpm_util.h (revision 331772) @@ -1,75 +1,80 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * Copyright (c) 2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ + #ifndef _IWPM_UTIL_H #define _IWPM_UTIL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IWPM_PID_UNDEFINED -1 #define IWPM_PID_UNAVAILABLE -2 #define IWPM_REG_UNDEF 0x01 #define IWPM_REG_VALID 0x02 #define IWPM_REG_INCOMPL 0x04 /** * iwpm_compare_sockaddr - Compare two sockaddr storage structs * * Returns 0 if they are holding the same ip/tcp address info, * otherwise returns 1 */ int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, struct sockaddr_storage *b_sockaddr); /** * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port * @sockaddr: Socket address to print * @msg: Message to print */ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); #endif Index: stable/11/sys/ofed/drivers/infiniband/core/mad_priv.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/mad_priv.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/mad_priv.h (revision 331772) @@ -1,225 +1,227 @@ /* * Copyright (c) 2004, 2005, Voltaire, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 HNR Consulting. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef __IB_MAD_PRIV_H__ #define __IB_MAD_PRIV_H__ #include #include #include #include #include #include #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ /* QP and CQ parameters */ #define IB_MAD_QP_SEND_SIZE 128 #define IB_MAD_QP_RECV_SIZE 512 #define IB_MAD_QP_MIN_SIZE 64 #define IB_MAD_QP_MAX_SIZE 8192 #define IB_MAD_SEND_REQ_MAX_SG 2 #define IB_MAD_RECV_REQ_MAX_SG 1 #define IB_MAD_SEND_Q_PSN 0 /* Registration table sizes */ #define MAX_MGMT_CLASS 80 #define MAX_MGMT_VERSION 0x83 #define MAX_MGMT_OUI 8 #define MAX_MGMT_VENDOR_RANGE2 (IB_MGMT_CLASS_VENDOR_RANGE2_END - \ IB_MGMT_CLASS_VENDOR_RANGE2_START + 1) struct ib_mad_list_head { struct list_head list; struct ib_cqe cqe; struct ib_mad_queue *mad_queue; }; struct ib_mad_private_header { struct ib_mad_list_head mad_list; struct ib_mad_recv_wc recv_wc; struct ib_wc wc; u64 mapping; } __attribute__ ((packed)); struct ib_mad_private { struct ib_mad_private_header header; size_t mad_size; struct ib_grh grh; u8 mad[0]; } __attribute__ ((packed)); struct ib_rmpp_segment { struct list_head list; u32 num; u8 data[0]; }; struct ib_mad_agent_private { struct list_head agent_list; struct ib_mad_agent agent; struct ib_mad_reg_req *reg_req; struct ib_mad_qp_info *qp_info; spinlock_t lock; struct list_head send_list; struct list_head wait_list; struct list_head done_list; struct delayed_work timed_work; unsigned long timeout; struct list_head local_list; struct work_struct local_work; struct list_head rmpp_list; atomic_t refcount; struct completion comp; }; struct ib_mad_snoop_private { struct ib_mad_agent agent; struct ib_mad_qp_info *qp_info; int snoop_index; int mad_snoop_flags; atomic_t refcount; struct completion comp; }; struct ib_mad_send_wr_private { struct ib_mad_list_head mad_list; struct list_head agent_list; struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_buf send_buf; u64 header_mapping; u64 payload_mapping; struct ib_ud_wr send_wr; struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG]; __be64 tid; unsigned long timeout; int max_retries; int retries_left; int retry; int refcount; enum ib_wc_status status; /* RMPP control */ struct list_head rmpp_list; struct ib_rmpp_segment *last_ack_seg; struct ib_rmpp_segment *cur_seg; int last_ack; int seg_num; int newwin; int pad; }; struct ib_mad_local_private { struct list_head completion_list; struct ib_mad_private *mad_priv; struct ib_mad_agent_private *recv_mad_agent; struct ib_mad_send_wr_private *mad_send_wr; size_t return_wc_byte_len; }; struct ib_mad_mgmt_method_table { struct ib_mad_agent_private *agent[IB_MGMT_MAX_METHODS]; }; struct ib_mad_mgmt_class_table { struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS]; }; struct ib_mad_mgmt_vendor_class { u8 oui[MAX_MGMT_OUI][3]; struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_OUI]; }; struct ib_mad_mgmt_vendor_class_table { struct ib_mad_mgmt_vendor_class *vendor_class[MAX_MGMT_VENDOR_RANGE2]; }; struct ib_mad_mgmt_version_table { struct ib_mad_mgmt_class_table *class; struct ib_mad_mgmt_vendor_class_table *vendor; }; struct ib_mad_queue { spinlock_t lock; struct list_head list; int count; int max_active; struct ib_mad_qp_info *qp_info; }; struct ib_mad_qp_info { struct ib_mad_port_private *port_priv; struct ib_qp *qp; struct ib_mad_queue send_queue; struct ib_mad_queue recv_queue; struct list_head overflow_list; spinlock_t snoop_lock; struct ib_mad_snoop_private **snoop_table; int snoop_table_size; atomic_t snoop_count; }; struct ib_mad_port_private { struct list_head port_list; struct ib_device *device; int port_num; struct ib_cq *cq; struct ib_pd *pd; spinlock_t reg_lock; struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION]; struct list_head agent_list; struct workqueue_struct *wq; struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE]; }; int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_send_wr_private * ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv, const struct ib_mad_recv_wc *mad_recv_wc); void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, int timeout_ms); #endif /* __IB_MAD_PRIV_H__ */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/mad_priv.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/mad_rmpp.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/mad_rmpp.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/mad_rmpp.h (revision 331772) @@ -1,58 +1,60 @@ /* * Copyright (c) 2005 Intel Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef __MAD_RMPP_H__ #define __MAD_RMPP_H__ enum { IB_RMPP_RESULT_PROCESSED, IB_RMPP_RESULT_CONSUMED, IB_RMPP_RESULT_INTERNAL, IB_RMPP_RESULT_UNHANDLED }; int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr); struct ib_mad_recv_wc * ib_process_rmpp_recv_wc(struct ib_mad_agent_private *agent, struct ib_mad_recv_wc *mad_recv_wc); int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_send_wc *mad_send_wc); void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc); void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent); int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr); #endif /* __MAD_RMPP_H__ */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/mad_rmpp.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/opa_smi.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/opa_smi.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/opa_smi.h (revision 331772) @@ -1,78 +1,81 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ #ifndef __OPA_SMI_H_ #define __OPA_SMI_H_ #include #include #include "smi.h" enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int opa_smi_get_fwd_port(struct opa_smp *smp); extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp); extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp, bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM * via process_mad */ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp, struct ib_device *device) { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ return (device->process_mad && !opa_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD; } /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM * via process_mad */ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *smp, struct ib_device *device) { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ return (device->process_mad && opa_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD; } #endif /* __OPA_SMI_H_ */ Index: stable/11/sys/ofed/drivers/infiniband/core/sa.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/sa.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/sa.h (revision 331772) @@ -1,66 +1,68 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef SA_H #define SA_H #include static inline void ib_sa_client_get(struct ib_sa_client *client) { atomic_inc(&client->users); } static inline void ib_sa_client_put(struct ib_sa_client *client) { if (atomic_dec_and_test(&client->users)) complete(&client->comp); } int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), void *context, struct ib_sa_query **sa_query); int mcast_init(void); void mcast_cleanup(void); #endif /* SA_H */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/sa.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/smi.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/smi.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/smi.h (revision 331772) @@ -1,90 +1,91 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2007 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * + * $FreeBSD$ */ #ifndef __SMI_H_ #define __SMI_H_ #include enum smi_action { IB_SMI_DISCARD, IB_SMI_HANDLE }; enum smi_forward_action { IB_SMI_LOCAL, /* SMP should be completed up the stack */ IB_SMI_SEND, /* received DR SMP should be forwarded to the send queue */ IB_SMI_FORWARD /* SMP should be forwarded (for switches only) */ }; enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch, int port_num, int phys_port_cnt); int smi_get_fwd_port(struct ib_smp *smp); extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp); extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp, bool is_switch, int port_num); /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM * via process_mad */ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp, struct ib_device *device) { /* C14-9:3 -- We're at the end of the DR segment of path */ /* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */ return ((device->process_mad && !ib_get_smp_direction(smp) && (smp->hop_ptr == smp->hop_cnt + 1)) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } /* * Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM * via process_mad */ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp, struct ib_device *device) { /* C14-13:3 -- We're at the end of the DR segment of path */ /* C14-13:4 -- Hop Pointer == 0 -> give to SM */ return ((device->process_mad && ib_get_smp_direction(smp) && !smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD); } #endif /* __SMI_H_ */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/smi.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/drivers/infiniband/core/uverbs.h =================================================================== --- stable/11/sys/ofed/drivers/infiniband/core/uverbs.h (revision 331771) +++ stable/11/sys/ofed/drivers/infiniband/core/uverbs.h (revision 331772) @@ -1,296 +1,298 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef UVERBS_H #define UVERBS_H #include #include #include #include #include #include #include #include #include #include #include #define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (const void __user *) (ibuf); \ (udata)->outbuf = (void __user *) (obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) #define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \ (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) /* * Our lifetime rules for these structs are the following: * * struct ib_uverbs_device: One reference is held by the module and * released in ib_uverbs_remove_one(). Another reference is taken by * ib_uverbs_open() each time the character special file is opened, * and released in ib_uverbs_release_file() when the file is released. * * struct ib_uverbs_file: One reference is held by the VFS and * released when the file is closed. Another reference is taken when * an asynchronous event queue file is created and released when the * event file is closed. * * struct ib_uverbs_event_file: One reference is held by the VFS and * released when the file is closed. For asynchronous event files, * another reference is held by the corresponding main context file * and released when that file is closed. For completion event files, * a reference is taken when a CQ is created that uses the file, and * released when the CQ is destroyed. */ struct ib_uverbs_device { atomic_t refcount; int num_comp_vectors; struct completion comp; struct device *dev; struct ib_device __rcu *ib_dev; int devnum; struct cdev cdev; struct rb_root xrcd_tree; struct mutex xrcd_tree_mutex; struct kobject kobj; struct srcu_struct disassociate_srcu; struct mutex lists_mutex; /* protect lists */ struct list_head uverbs_file_list; struct list_head uverbs_events_file_list; }; struct ib_uverbs_event_file { struct kref ref; int is_async; struct ib_uverbs_file *uverbs_file; spinlock_t lock; int is_closed; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; struct list_head event_list; struct list_head list; }; struct ib_uverbs_file { struct kref ref; struct mutex mutex; struct mutex cleanup_mutex; /* protect cleanup */ struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; struct ib_uverbs_event_file *async_file; struct list_head list; int is_closed; }; struct ib_uverbs_event { union { struct ib_uverbs_async_event_desc async; struct ib_uverbs_comp_event_desc comp; } desc; struct list_head list; struct list_head obj_list; u32 *counter; }; struct ib_uverbs_mcast_entry { struct list_head list; union ib_gid gid; u16 lid; }; struct ib_uevent_object { struct ib_uobject uobject; struct list_head event_list; u32 events_reported; }; struct ib_uxrcd_object { struct ib_uobject uobject; atomic_t refcnt; }; struct ib_usrq_object { struct ib_uevent_object uevent; struct ib_uxrcd_object *uxrcd; }; struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; struct ib_uxrcd_object *uxrcd; }; struct ib_uwq_object { struct ib_uevent_object uevent; }; struct ib_ucq_object { struct ib_uobject uobject; struct ib_uverbs_file *uverbs_file; struct list_head comp_list; struct list_head async_list; u32 comp_events_reported; u32 async_events_reported; }; extern spinlock_t ib_uverbs_idr_lock; extern struct idr ib_uverbs_pd_idr; extern struct idr ib_uverbs_mr_idr; extern struct idr ib_uverbs_mw_idr; extern struct idr ib_uverbs_ah_idr; extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; extern struct idr ib_uverbs_rule_idr; extern struct idr ib_uverbs_wq_idr; extern struct idr ib_uverbs_rwq_ind_tbl_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_device *ib_dev, int is_async); void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file); struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj); void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj); void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); int uverbs_dealloc_mw(struct ib_mw *mw); struct ib_uverbs_flow_spec { union { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_spec_eth eth; struct ib_uverbs_flow_spec_ipv4 ipv4; struct ib_uverbs_flow_spec_tcp_udp tcp_udp; struct ib_uverbs_flow_spec_ipv6 ipv6; }; }; #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ struct ib_device *ib_dev, \ const char __user *buf, int in_len, \ int out_len) IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(rereg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(alloc_mw); IB_UVERBS_DECLARE_CMD(dealloc_mw); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(resize_cq); IB_UVERBS_DECLARE_CMD(poll_cq); IB_UVERBS_DECLARE_CMD(req_notify_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); IB_UVERBS_DECLARE_CMD(open_qp); IB_UVERBS_DECLARE_CMD(query_qp); IB_UVERBS_DECLARE_CMD(modify_qp); IB_UVERBS_DECLARE_CMD(destroy_qp); IB_UVERBS_DECLARE_CMD(post_send); IB_UVERBS_DECLARE_CMD(post_recv); IB_UVERBS_DECLARE_CMD(post_srq_recv); IB_UVERBS_DECLARE_CMD(create_ah); IB_UVERBS_DECLARE_CMD(destroy_ah); IB_UVERBS_DECLARE_CMD(attach_mcast); IB_UVERBS_DECLARE_CMD(detach_mcast); IB_UVERBS_DECLARE_CMD(create_srq); IB_UVERBS_DECLARE_CMD(modify_srq); IB_UVERBS_DECLARE_CMD(query_srq); IB_UVERBS_DECLARE_CMD(destroy_srq); IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); #define IB_UVERBS_DECLARE_EX_CMD(name) \ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ struct ib_device *ib_dev, \ struct ib_udata *ucore, \ struct ib_udata *uhw) IB_UVERBS_DECLARE_EX_CMD(create_flow); IB_UVERBS_DECLARE_EX_CMD(destroy_flow); IB_UVERBS_DECLARE_EX_CMD(query_device); IB_UVERBS_DECLARE_EX_CMD(create_cq); IB_UVERBS_DECLARE_EX_CMD(create_qp); IB_UVERBS_DECLARE_EX_CMD(create_wq); IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); #endif /* UVERBS_H */ Property changes on: stable/11/sys/ofed/drivers/infiniband/core/uverbs.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib.h (revision 331772) @@ -1,120 +1,124 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2010 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(_RDMA_IB_H) #define _RDMA_IB_H #include #include #include /* * Define a native infiniband address as in Linux upstream * 8d36eb01da5d371feffa280e501377b5c450f5a5 */ #define AF_IB 41 struct ib_addr { union { __u8 uib_addr8[16]; __be16 uib_addr16[8]; __be32 uib_addr32[4]; __be64 uib_addr64[2]; } ib_u; #define sib_addr8 ib_u.uib_addr8 #define sib_addr16 ib_u.uib_addr16 #define sib_addr32 ib_u.uib_addr32 #define sib_addr64 ib_u.uib_addr64 #define sib_raw ib_u.uib_addr8 #define sib_subnet_prefix ib_u.uib_addr64[0] #define sib_interface_id ib_u.uib_addr64[1] }; static inline int ib_addr_any(const struct ib_addr *a) { return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); } static inline int ib_addr_loopback(const struct ib_addr *a) { return ((a->sib_addr32[0] | a->sib_addr32[1] | a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); } static inline void ib_addr_set(struct ib_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { addr->sib_addr32[0] = w1; addr->sib_addr32[1] = w2; addr->sib_addr32[2] = w3; addr->sib_addr32[3] = w4; } static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2) { return memcmp(a1, a2, sizeof(struct ib_addr)); } struct sockaddr_ib { unsigned short int sib_family; /* AF_IB */ __be16 sib_pkey; __be32 sib_flowinfo; struct ib_addr sib_addr; __be64 sib_sid; __be64 sib_sid_mask; __u64 sib_scope_id; }; /* * The IB interfaces that use write() as bi-directional ioctl() are * fundamentally unsafe, since there are lots of ways to trigger "write()" * calls from various contexts with elevated privileges. That includes the * traditional suid executable error message writes, but also various kernel * interfaces that can write to file descriptors. * * This function provides protection for the legacy API by restricting the * calling context. */ static inline bool ib_safe_file_access(struct file *filp) { struct thread *td = curthread; /* * Check if called from userspace through a devfs related * system call belonging to the given file: */ return (filp->_file != NULL && filp->_file == td->td_fpop && filp->_file->f_cred == td->td_ucred); } #endif /* _RDMA_IB_H */ Index: stable/11/sys/ofed/include/rdma/ib_addr.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_addr.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_addr.h (revision 331772) @@ -1,343 +1,345 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_ADDR_H) #define IB_ADDR_H #include #include #include #include #include #include #include #include #include #include #include #include #include struct rdma_addr_client { atomic_t refcount; struct completion comp; }; /** * rdma_addr_register_client - Register an address client. */ void rdma_addr_register_client(struct rdma_addr_client *client); /** * rdma_addr_unregister_client - Deregister an address client. * @client: Client object to deregister. */ void rdma_addr_unregister_client(struct rdma_addr_client *client); /** * struct rdma_dev_addr - Contains resolved RDMA hardware addresses * @src_dev_addr: Source MAC address. * @dst_dev_addr: Destination MAC address. * @broadcast: Broadcast address of the device. * @dev_type: The interface hardware type of the device. * @bound_dev_if: An optional device interface index. * @transport: The transport type used. * @net: Network namespace containing the bound_dev_if net_dev. */ struct vnet; struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; struct vnet *net; enum rdma_network_type network; int hoplimit; }; /** * rdma_translate_ip - Translate a local IP address to an RDMA hardware * address. * * The dev_addr->net field must be initialized. */ int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr, u16 *vlan_id); /** * rdma_resolve_ip - Resolve source and destination IP addresses to * RDMA hardware addresses. * @client: Address client associated with request. * @src_addr: An optional source address to use in the resolution. If a * source address is not provided, a usable address will be returned via * the callback. * @dst_addr: The destination address to resolve. * @addr: A reference to a data location that will receive the resolved * addresses. The data location must remain valid until the callback has * been invoked. The net field of the addr struct must be valid. * @timeout_ms: Amount of time to wait for the address resolution to complete. * @callback: Call invoked once address resolution has completed, timed out, * or been canceled. A status of 0 indicates success. * @context: User-specified context associated with the call. */ int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), void *context); int rdma_resolve_ip_route(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr); void rdma_addr_cancel(struct rdma_dev_addr *addr); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); int rdma_addr_size(struct sockaddr *addr); int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, u8 *smac, u16 *vlan_id, int *if_index, int *hoplimit); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; } static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey) { dev_addr->broadcast[8] = pkey >> 8; dev_addr->broadcast[9] = (unsigned char) pkey; } static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev) { uint16_t tag; if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0) return 0xffff; return tag; } static inline int rdma_ip2gid(const struct sockaddr *addr, union ib_gid *gid) { switch (addr->sa_family) { case AF_INET: ipv6_addr_set_v4mapped(((const struct sockaddr_in *) addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; case AF_INET6: memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16); /* make sure scope ID gets zeroed inside GID */ if (IN6_IS_SCOPE_LINKLOCAL((struct in6_addr *)gid->raw) || IN6_IS_ADDR_MC_INTFACELOCAL((struct in6_addr *)gid->raw)) { gid->raw[2] = 0; gid->raw[3] = 0; } break; default: return -EINVAL; } return 0; } /* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) { if (ipv6_addr_v4mapped((const struct in6_addr *)gid)) { struct sockaddr_in *out_in = (struct sockaddr_in *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin_len = sizeof(*out_in); out_in->sin_family = AF_INET; memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4); } else { struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out; memset(out_in, 0, sizeof(*out_in)); out_in->sin6_len = sizeof(*out_in); out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); } } static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { struct net_device *dev; struct ifaddr *ifa; dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); if (dev) { TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET) continue; ipv6_addr_set_v4mapped(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; } dev_put(dev); } } static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { if (dev_addr->transport == RDMA_TRANSPORT_IB && dev_addr->dev_type != ARPHRD_INFINIBAND) iboe_addr_get_sgid(dev_addr, gid); else memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } static inline enum ib_mtu iboe_get_mtu(int mtu) { /* * reduce IB headers from effective IBoE MTU. 28 stands for * atomic header which is the biggest possible header after BTH */ mtu = mtu - IB_GRH_BYTES - IB_BTH_BYTES - 28; if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096)) return IB_MTU_4096; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048)) return IB_MTU_2048; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024)) return IB_MTU_1024; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512)) return IB_MTU_512; else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256)) return IB_MTU_256; else return 0; } static inline int iboe_get_rate(struct net_device *dev) { uint64_t baudrate = dev->if_baudrate; #ifdef if_baudrate_pf int exp; for (exp = dev->if_baudrate_pf; exp > 0; exp--) baudrate *= 10; #endif if (baudrate >= IF_Gbps(40)) return IB_RATE_40_GBPS; else if (baudrate >= IF_Gbps(30)) return IB_RATE_30_GBPS; else if (baudrate >= IF_Gbps(20)) return IB_RATE_20_GBPS; else if (baudrate >= IF_Gbps(10)) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; } static inline int rdma_link_local_addr(struct in6_addr *addr) { if (addr->s6_addr32[0] == htonl(0xfe800000) && addr->s6_addr32[1] == 0) return 1; return 0; } static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac) { memcpy(mac, &addr->s6_addr[8], 3); memcpy(mac + 3, &addr->s6_addr[13], 3); mac[0] ^= 2; } static inline int rdma_is_multicast_addr(struct in6_addr *addr) { return addr->s6_addr[0] == 0xff; } static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; mac[0] = 0x33; mac[1] = 0x33; for (i = 2; i < 6; ++i) mac[i] = addr->s6_addr[i + 10]; } static inline u16 rdma_get_vlan_id(union ib_gid *dgid) { u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) { return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev)); } #endif /* IB_ADDR_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_addr.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_cache.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_cache.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_cache.h (revision 331772) @@ -1,168 +1,170 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef _IB_CACHE_H #define _IB_CACHE_H #include /** * ib_get_cached_gid - Returns a cached GID table entry * @device: The device to query. * @port_num: The port number of the device to query. * @index: The index into the cached GID table to query. * @gid: The GID value found at the specified index. * @attr: The GID attribute found at the specified index (only in RoCE). * NULL means ignore (output parameter). * * ib_get_cached_gid() fetches the specified GID table entry stored in * the local software cache. */ int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); /** * ib_find_cached_gid - Returns the port number and GID table index where * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @ndev: In RoCE, the net device of the device. NULL means ignore. * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. * * ib_find_cached_gid() searches for the specified GID value in * the local software cache. */ int ib_find_cached_gid(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index); /** * ib_find_cached_gid_by_port - Returns the GID table index where a specified * GID value occurs * @device: The device to query. * @gid: The GID value to search for. * @gid_type: The GID type to search for. * @port_num: The port number of the device where the GID value sould be * searched. * @ndev: In RoCE, the net device of the device. Null means ignore. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. * * ib_find_cached_gid() searches for the specified GID value in * the local software cache. */ int ib_find_cached_gid_by_port(struct ib_device *device, const union ib_gid *gid, enum ib_gid_type gid_type, u8 port_num, struct net_device *ndev, u16 *index); int ib_find_gid_by_filter(struct ib_device *device, const union ib_gid *gid, u8 port_num, bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *, void *), void *context, u16 *index); /** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. * @port_num: The port number of the device to query. * @index: The index into the cached PKey table to query. * @pkey: The PKey value found at the specified index. * * ib_get_cached_pkey() fetches the specified PKey table entry stored in * the local software cache. */ int ib_get_cached_pkey(struct ib_device *device_handle, u8 port_num, int index, u16 *pkey); /** * ib_find_cached_pkey - Returns the PKey table index where a specified * PKey value occurs. * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the cached PKey table where the PKey was found. * * ib_find_cached_pkey() searches the specified PKey table in * the local software cache. */ int ib_find_cached_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); /** * ib_find_exact_cached_pkey - Returns the PKey table index where a specified * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) * @device: The device to query. * @port_num: The port number of the device to search for the PKey. * @pkey: The PKey value to search for. * @index: The index into the cached PKey table where the PKey was found. * * ib_find_exact_cached_pkey() searches the specified PKey table in * the local software cache. */ int ib_find_exact_cached_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); /** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. * @port_num: The port number of the device to query. * @lmc: The lmc value for the specified port for that device. * * ib_get_cached_lmc() fetches the specified lmc table entry stored in * the local software cache. */ int ib_get_cached_lmc(struct ib_device *device, u8 port_num, u8 *lmc); #endif /* _IB_CACHE_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_cache.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_cm.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_cm.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_cm.h (revision 331772) @@ -1,606 +1,609 @@ /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ + #if !defined(IB_CM_H) #define IB_CM_H #include #include /* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ extern struct class cm_class; enum ib_cm_state { IB_CM_IDLE, IB_CM_LISTEN, IB_CM_REQ_SENT, IB_CM_REQ_RCVD, IB_CM_MRA_REQ_SENT, IB_CM_MRA_REQ_RCVD, IB_CM_REP_SENT, IB_CM_REP_RCVD, IB_CM_MRA_REP_SENT, IB_CM_MRA_REP_RCVD, IB_CM_ESTABLISHED, IB_CM_DREQ_SENT, IB_CM_DREQ_RCVD, IB_CM_TIMEWAIT, IB_CM_SIDR_REQ_SENT, IB_CM_SIDR_REQ_RCVD }; enum ib_cm_lap_state { IB_CM_LAP_UNINIT, IB_CM_LAP_IDLE, IB_CM_LAP_SENT, IB_CM_LAP_RCVD, IB_CM_MRA_LAP_SENT, IB_CM_MRA_LAP_RCVD, }; enum ib_cm_event_type { IB_CM_REQ_ERROR, IB_CM_REQ_RECEIVED, IB_CM_REP_ERROR, IB_CM_REP_RECEIVED, IB_CM_RTU_RECEIVED, IB_CM_USER_ESTABLISHED, IB_CM_DREQ_ERROR, IB_CM_DREQ_RECEIVED, IB_CM_DREP_RECEIVED, IB_CM_TIMEWAIT_EXIT, IB_CM_MRA_RECEIVED, IB_CM_REJ_RECEIVED, IB_CM_LAP_ERROR, IB_CM_LAP_RECEIVED, IB_CM_APR_RECEIVED, IB_CM_SIDR_REQ_ERROR, IB_CM_SIDR_REQ_RECEIVED, IB_CM_SIDR_REP_RECEIVED }; enum ib_cm_data_size { IB_CM_REQ_PRIVATE_DATA_SIZE = 92, IB_CM_MRA_PRIVATE_DATA_SIZE = 222, IB_CM_REJ_PRIVATE_DATA_SIZE = 148, IB_CM_REP_PRIVATE_DATA_SIZE = 196, IB_CM_RTU_PRIVATE_DATA_SIZE = 224, IB_CM_DREQ_PRIVATE_DATA_SIZE = 220, IB_CM_DREP_PRIVATE_DATA_SIZE = 224, IB_CM_REJ_ARI_LENGTH = 72, IB_CM_LAP_PRIVATE_DATA_SIZE = 168, IB_CM_APR_PRIVATE_DATA_SIZE = 148, IB_CM_APR_INFO_LENGTH = 72, IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, IB_CM_SIDR_REP_INFO_LENGTH = 72, }; struct ib_cm_id; struct ib_cm_req_event_param { struct ib_cm_id *listen_id; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; enum ib_qp_type qp_type; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int local_cm_response_timeout:5; unsigned int flow_control:1; unsigned int remote_cm_response_timeout:5; unsigned int retry_count:3; unsigned int rnr_retry_count:3; unsigned int srq:1; }; struct ib_cm_rep_event_param { __be64 remote_ca_guid; u32 remote_qkey; u32 remote_qpn; u32 starting_psn; u8 responder_resources; u8 initiator_depth; unsigned int target_ack_delay:5; unsigned int failover_accepted:2; unsigned int flow_control:1; unsigned int rnr_retry_count:3; unsigned int srq:1; }; enum ib_cm_rej_reason { IB_CM_REJ_NO_QP = 1, IB_CM_REJ_NO_EEC = 2, IB_CM_REJ_NO_RESOURCES = 3, IB_CM_REJ_TIMEOUT = 4, IB_CM_REJ_UNSUPPORTED = 5, IB_CM_REJ_INVALID_COMM_ID = 6, IB_CM_REJ_INVALID_COMM_INSTANCE = 7, IB_CM_REJ_INVALID_SERVICE_ID = 8, IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, IB_CM_REJ_STALE_CONN = 10, IB_CM_REJ_RDC_NOT_EXIST = 11, IB_CM_REJ_INVALID_GID = 12, IB_CM_REJ_INVALID_LID = 13, IB_CM_REJ_INVALID_SL = 14, IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, IB_CM_REJ_INVALID_HOP_LIMIT = 16, IB_CM_REJ_INVALID_PACKET_RATE = 17, IB_CM_REJ_INVALID_ALT_GID = 18, IB_CM_REJ_INVALID_ALT_LID = 19, IB_CM_REJ_INVALID_ALT_SL = 20, IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, IB_CM_REJ_PORT_CM_REDIRECT = 24, IB_CM_REJ_PORT_REDIRECT = 25, IB_CM_REJ_INVALID_MTU = 26, IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, IB_CM_REJ_CONSUMER_DEFINED = 28, IB_CM_REJ_INVALID_RNR_RETRY = 29, IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, IB_CM_REJ_INVALID_CLASS_VERSION = 31, IB_CM_REJ_INVALID_FLOW_LABEL = 32, IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 }; struct ib_cm_rej_event_param { enum ib_cm_rej_reason reason; void *ari; u8 ari_length; }; struct ib_cm_mra_event_param { u8 service_timeout; }; struct ib_cm_lap_event_param { struct ib_sa_path_rec *alternate_path; }; enum ib_cm_apr_status { IB_CM_APR_SUCCESS, IB_CM_APR_INVALID_COMM_ID, IB_CM_APR_UNSUPPORTED, IB_CM_APR_REJECT, IB_CM_APR_REDIRECT, IB_CM_APR_IS_CURRENT, IB_CM_APR_INVALID_QPN_EECN, IB_CM_APR_INVALID_LID, IB_CM_APR_INVALID_GID, IB_CM_APR_INVALID_FLOW_LABEL, IB_CM_APR_INVALID_TCLASS, IB_CM_APR_INVALID_HOP_LIMIT, IB_CM_APR_INVALID_PACKET_RATE, IB_CM_APR_INVALID_SL }; struct ib_cm_apr_event_param { enum ib_cm_apr_status ap_status; void *apr_info; u8 info_len; }; struct ib_cm_sidr_req_event_param { struct ib_cm_id *listen_id; __be64 service_id; /* P_Key that was used by the GMP's BTH header */ u16 bth_pkey; u8 port; u16 pkey; }; enum ib_cm_sidr_status { IB_SIDR_SUCCESS, IB_SIDR_UNSUPPORTED, IB_SIDR_REJECT, IB_SIDR_NO_QP, IB_SIDR_REDIRECT, IB_SIDR_UNSUPPORTED_VERSION }; struct ib_cm_sidr_rep_event_param { enum ib_cm_sidr_status status; u32 qkey; u32 qpn; void *info; u8 info_len; }; struct ib_cm_event { enum ib_cm_event_type event; union { struct ib_cm_req_event_param req_rcvd; struct ib_cm_rep_event_param rep_rcvd; /* No data for RTU received events. */ struct ib_cm_rej_event_param rej_rcvd; struct ib_cm_mra_event_param mra_rcvd; struct ib_cm_lap_event_param lap_rcvd; struct ib_cm_apr_event_param apr_rcvd; /* No data for DREQ/DREP received events. */ struct ib_cm_sidr_req_event_param sidr_req_rcvd; struct ib_cm_sidr_rep_event_param sidr_rep_rcvd; enum ib_wc_status send_status; } param; void *private_data; }; #define CM_REQ_ATTR_ID cpu_to_be16(0x0010) #define CM_MRA_ATTR_ID cpu_to_be16(0x0011) #define CM_REJ_ATTR_ID cpu_to_be16(0x0012) #define CM_REP_ATTR_ID cpu_to_be16(0x0013) #define CM_RTU_ATTR_ID cpu_to_be16(0x0014) #define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) #define CM_DREP_ATTR_ID cpu_to_be16(0x0016) #define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) #define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) #define CM_LAP_ATTR_ID cpu_to_be16(0x0019) #define CM_APR_ATTR_ID cpu_to_be16(0x001A) /** * ib_cm_handler - User-defined callback to process communication events. * @cm_id: Communication identifier associated with the reported event. * @event: Information about the communication event. * * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events * generated as a result of listen requests result in the allocation of a * new @cm_id. The new @cm_id is returned to the user through this callback. * Clients are responsible for destroying the new @cm_id. For peer-to-peer * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds * to a user's existing communication identifier. * * Users may not call ib_destroy_cm_id while in the context of this callback; * however, returning a non-zero value instructs the communication manager to * destroy the @cm_id after the callback completes. */ typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, struct ib_cm_event *event); struct ib_cm_id { ib_cm_handler cm_handler; void *context; struct ib_device *device; __be64 service_id; __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; __be32 remote_id; u32 remote_cm_qpn; /* 1 unless redirected */ }; /** * ib_create_cm_id - Allocate a communication identifier. * @device: Device associated with the cm_id. All related communication will * be associated with the specified device. * @cm_handler: Callback invoked to notify the user of CM events. * @context: User specified context associated with the communication * identifier. * * Communication identifiers are used to track connection states, service * ID resolution requests, and listen requests. */ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, ib_cm_handler cm_handler, void *context); /** * ib_destroy_cm_id - Destroy a connection identifier. * @cm_id: Connection identifier to destroy. * * This call blocks until the connection identifier is destroyed. */ void ib_destroy_cm_id(struct ib_cm_id *cm_id); #define IB_SERVICE_ID_AGN_MASK cpu_to_be64(0xFF00000000000000ULL) #define IB_CM_ASSIGN_SERVICE_ID cpu_to_be64(0x0200000000000000ULL) #define IB_CMA_SERVICE_ID cpu_to_be64(0x0000000001000000ULL) #define IB_CMA_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFF000000ULL) #define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL) #define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL) /** * ib_cm_listen - Initiates listening on the specified service ID for * connection and service ID resolution requests. * @cm_id: Connection identifier associated with the listen request. * @service_id: Service identifier matched against incoming connection * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. * @service_mask: Mask applied to service ID used to listen across a * range of service IDs. If set to 0, the service ID is matched * exactly. This parameter is ignored if %service_id is set to * IB_CM_ASSIGN_SERVICE_ID. */ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, __be64 service_id); struct ib_cm_req_param { struct ib_sa_path_rec *primary_path; struct ib_sa_path_rec *alternate_path; __be64 service_id; u32 qp_num; enum ib_qp_type qp_type; u32 starting_psn; const void *private_data; u8 private_data_len; u8 peer_to_peer; u8 responder_resources; u8 initiator_depth; u8 remote_cm_response_timeout; u8 flow_control; u8 local_cm_response_timeout; u8 retry_count; u8 rnr_retry_count; u8 max_cm_retries; u8 srq; }; /** * ib_send_cm_req - Sends a connection request to the remote node. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection request information needed to establish the * connection. */ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param); struct ib_cm_rep_param { u32 qp_num; u32 starting_psn; const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 failover_accepted; u8 flow_control; u8 rnr_retry_count; u8 srq; }; /** * ib_send_cm_rep - Sends a connection reply in response to a connection * request. * @cm_id: Connection identifier that will be associated with the * connection request. * @param: Connection reply information needed to establish the * connection. */ int ib_send_cm_rep(struct ib_cm_id *cm_id, struct ib_cm_rep_param *param); /** * ib_send_cm_rtu - Sends a connection ready to use message in response * to a connection reply message. * @cm_id: Connection identifier associated with the connection request. * @private_data: Optional user-defined private data sent with the * ready to use message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rtu(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_dreq - Sends a disconnection request for an existing * connection. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection request message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_send_cm_drep - Sends a disconnection reply to a disconnection request. * @cm_id: Connection identifier associated with the connection being * released. * @private_data: Optional user-defined private data sent with the * disconnection reply message. * @private_data_len: Size of the private data buffer, in bytes. * * If the cm_id is in the correct state, the CM will transition the connection * to the timewait state, even if an error occurs sending the DREP message. */ int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * ib_cm_notify - Notifies the CM of an event reported to the consumer. * @cm_id: Connection identifier to transition to established. * @event: Type of event. * * This routine should be invoked by users to notify the CM of relevant * communication events. Events that should be reported to the CM and * when to report them are: * * IB_EVENT_COMM_EST - Used when a message is received on a connected * QP before an RTU has been received. * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over * to the alternate path. */ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event); /** * ib_send_cm_rej - Sends a connection rejection message to the * remote node. * @cm_id: Connection identifier associated with the connection being * rejected. * @reason: Reason for the connection request rejection. * @ari: Optional additional rejection information. * @ari_length: Size of the additional rejection information, in bytes. * @private_data: Optional user-defined private data sent with the * rejection message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason, void *ari, u8 ari_length, const void *private_data, u8 private_data_len); #define IB_CM_MRA_FLAG_DELAY 0x80 /* Send MRA only after a duplicate msg */ /** * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection * message. * @cm_id: Connection identifier associated with the connection message. * @service_timeout: The lower 5-bits specify the maximum time required for * the sender to reply to the connection message. The upper 3-bits * specify additional control flags. * @private_data: Optional user-defined private data sent with the * message receipt acknowledgement. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_mra(struct ib_cm_id *cm_id, u8 service_timeout, const void *private_data, u8 private_data_len); /** * ib_send_cm_lap - Sends a load alternate path request. * @cm_id: Connection identifier associated with the load alternate path * message. * @alternate_path: A path record that identifies the alternate path to * load. * @private_data: Optional user-defined private data sent with the * load alternate path message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_lap(struct ib_cm_id *cm_id, struct ib_sa_path_rec *alternate_path, const void *private_data, u8 private_data_len); /** * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @cm_id: Communication identifier associated with the QP attributes to * initialize. * @qp_attr: On input, specifies the desired QP state. On output, the * mandatory and desired optional attributes will be set in order to * modify the QP to the specified state. * @qp_attr_mask: The QP attribute mask that may be used to transition the * QP to the specified state. * * Users must set the @qp_attr->qp_state to the desired QP state. This call * will set all required attributes for the given transition, along with * known optional attributes. Users may override the attributes returned from * this call before calling ib_modify_qp. */ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); /** * ib_send_cm_apr - Sends an alternate path response message in response to * a load alternate path request. * @cm_id: Connection identifier associated with the alternate path response. * @status: Reply status sent with the alternate path response. * @info: Optional additional information sent with the alternate path * response. * @info_length: Size of the additional information, in bytes. * @private_data: Optional user-defined private data sent with the * alternate path response message. * @private_data_len: Size of the private data buffer, in bytes. */ int ib_send_cm_apr(struct ib_cm_id *cm_id, enum ib_cm_apr_status status, void *info, u8 info_length, const void *private_data, u8 private_data_len); struct ib_cm_sidr_req_param { struct ib_sa_path_rec *path; __be64 service_id; int timeout_ms; const void *private_data; u8 private_data_len; u8 max_cm_retries; }; /** * ib_send_cm_sidr_req - Sends a service ID resolution request to the * remote node. * @cm_id: Communication identifier that will be associated with the * service ID resolution request. * @param: Service ID resolution request information. */ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, struct ib_cm_sidr_req_param *param); struct ib_cm_sidr_rep_param { u32 qp_num; u32 qkey; enum ib_cm_sidr_status status; const void *info; u8 info_length; const void *private_data; u8 private_data_len; }; /** * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the * remote node. * @cm_id: Communication identifier associated with the received service ID * resolution request. * @param: Service ID resolution reply information. */ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_param *param); #endif /* IB_CM_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_cm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_fmr_pool.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_fmr_pool.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_fmr_pool.h (revision 331772) @@ -1,93 +1,95 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_FMR_POOL_H) #define IB_FMR_POOL_H #include struct ib_fmr_pool; /** * struct ib_fmr_pool_param - Parameters for creating FMR pool * @max_pages_per_fmr:Maximum number of pages per map request. * @page_shift: Log2 of sizeof "pages" mapped by this fmr * @access:Access flags for FMRs in pool. * @pool_size:Number of FMRs to allocate for pool. * @dirty_watermark:Flush is triggered when @dirty_watermark dirty * FMRs are present. * @flush_function:Callback called when unmapped FMRs are flushed and * more FMRs are possibly available for mapping * @flush_arg:Context passed to user's flush function. * @cache:If set, FMRs may be reused after unmapping for identical map * requests. */ struct ib_fmr_pool_param { int max_pages_per_fmr; int page_shift; enum ib_access_flags access; int pool_size; int dirty_watermark; void (*flush_function)(struct ib_fmr_pool *pool, void *arg); void *flush_arg; unsigned cache:1; }; struct ib_pool_fmr { struct ib_fmr *fmr; struct ib_fmr_pool *pool; struct list_head list; struct hlist_node cache_node; int ref_count; int remap_count; u64 io_virtual_address; int page_list_len; u64 page_list[0]; }; struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, struct ib_fmr_pool_param *params); void ib_destroy_fmr_pool(struct ib_fmr_pool *pool); int ib_flush_fmr_pool(struct ib_fmr_pool *pool); struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, u64 *page_list, int list_len, u64 io_virtual_address); int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); #endif /* IB_FMR_POOL_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_fmr_pool.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_hdrs.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_hdrs.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_hdrs.h (revision 331772) @@ -1,178 +1,181 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $FreeBSD$ */ #ifndef IB_HDRS_H #define IB_HDRS_H #include #include #include #define IB_SEQ_NAK (3 << 29) /* AETH NAK opcode values */ #define IB_RNR_NAK 0x20 #define IB_NAK_PSN_ERROR 0x60 #define IB_NAK_INVALID_REQUEST 0x61 #define IB_NAK_REMOTE_ACCESS_ERROR 0x62 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 #define IB_NAK_INVALID_RD_REQUEST 0x64 #define IB_BTH_REQ_ACK BIT(31) #define IB_BTH_SOLICITED BIT(23) #define IB_BTH_MIG_REQ BIT(22) #define IB_GRH_VERSION 6 #define IB_GRH_VERSION_MASK 0xF #define IB_GRH_VERSION_SHIFT 28 #define IB_GRH_TCLASS_MASK 0xFF #define IB_GRH_TCLASS_SHIFT 20 #define IB_GRH_FLOW_MASK 0xFFFFF #define IB_GRH_FLOW_SHIFT 0 #define IB_GRH_NEXT_HDR 0x1B struct ib_reth { __be64 vaddr; /* potentially unaligned */ __be32 rkey; __be32 length; } __packed; struct ib_atomic_eth { __be64 vaddr; /* potentially unaligned */ __be32 rkey; __be64 swap_data; /* potentially unaligned */ __be64 compare_data; /* potentially unaligned */ } __packed; union ib_ehdrs { struct { __be32 deth[2]; __be32 imm_data; } ud; struct { struct ib_reth reth; __be32 imm_data; } rc; struct { __be32 aeth; __be64 atomic_ack_eth; /* potentially unaligned */ } __packed at; __be32 imm_data; __be32 aeth; __be32 ieth; struct ib_atomic_eth atomic_eth; } __packed; struct ib_other_headers { __be32 bth[3]; union ib_ehdrs u; } __packed; struct ib_header { __be16 lrh[4]; union { struct { struct ib_grh grh; struct ib_other_headers oth; } l; struct ib_other_headers oth; } u; } __packed; /* accessors for unaligned __be64 items */ static inline u64 ib_u64_get(__be64 *p) { return get_unaligned_be64(p); } static inline void ib_u64_put(u64 val, __be64 *p) { put_unaligned_be64(val, p); } static inline u64 get_ib_reth_vaddr(struct ib_reth *reth) { return ib_u64_get(&reth->vaddr); } static inline void put_ib_reth_vaddr(u64 val, struct ib_reth *reth) { ib_u64_put(val, &reth->vaddr); } static inline u64 get_ib_ateth_vaddr(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->vaddr); } static inline void put_ib_ateth_vaddr(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->vaddr); } static inline u64 get_ib_ateth_swap(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->swap_data); } static inline void put_ib_ateth_swap(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->swap_data); } static inline u64 get_ib_ateth_compare(struct ib_atomic_eth *ateth) { return ib_u64_get(&ateth->compare_data); } static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth) { ib_u64_put(val, &ateth->compare_data); } #endif /* IB_HDRS_H */ Index: stable/11/sys/ofed/include/rdma/ib_mad.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_mad.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_mad.h (revision 331772) @@ -1,849 +1,851 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004-2006 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_MAD_H) #define IB_MAD_H #include #include #include /* Management base versions */ #define IB_MGMT_BASE_VERSION 1 #define OPA_MGMT_BASE_VERSION 0x80 #define OPA_SMP_CLASS_VERSION 0x80 /* Management classes */ #define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 #define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 #define IB_MGMT_CLASS_SUBN_ADM 0x03 #define IB_MGMT_CLASS_PERF_MGMT 0x04 #define IB_MGMT_CLASS_BM 0x05 #define IB_MGMT_CLASS_DEVICE_MGMT 0x06 #define IB_MGMT_CLASS_CM 0x07 #define IB_MGMT_CLASS_SNMP 0x08 #define IB_MGMT_CLASS_DEVICE_ADM 0x10 #define IB_MGMT_CLASS_BOOT_MGMT 0x11 #define IB_MGMT_CLASS_BIS 0x12 #define IB_MGMT_CLASS_CONG_MGMT 0x21 #define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 #define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F #define IB_OPENIB_OUI (0x001405) /* Management methods */ #define IB_MGMT_METHOD_GET 0x01 #define IB_MGMT_METHOD_SET 0x02 #define IB_MGMT_METHOD_GET_RESP 0x81 #define IB_MGMT_METHOD_SEND 0x03 #define IB_MGMT_METHOD_TRAP 0x05 #define IB_MGMT_METHOD_REPORT 0x06 #define IB_MGMT_METHOD_REPORT_RESP 0x86 #define IB_MGMT_METHOD_TRAP_REPRESS 0x07 #define IB_MGMT_METHOD_RESP 0x80 #define IB_BM_ATTR_MOD_RESP cpu_to_be32(1) #define IB_MGMT_MAX_METHODS 128 /* MAD Status field bit masks */ #define IB_MGMT_MAD_STATUS_SUCCESS 0x0000 #define IB_MGMT_MAD_STATUS_BUSY 0x0001 #define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002 #define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004 #define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008 #define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c #define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c /* RMPP information */ #define IB_MGMT_RMPP_VERSION 1 #define IB_MGMT_RMPP_TYPE_DATA 1 #define IB_MGMT_RMPP_TYPE_ACK 2 #define IB_MGMT_RMPP_TYPE_STOP 3 #define IB_MGMT_RMPP_TYPE_ABORT 4 #define IB_MGMT_RMPP_FLAG_ACTIVE 1 #define IB_MGMT_RMPP_FLAG_FIRST (1<<1) #define IB_MGMT_RMPP_FLAG_LAST (1<<2) #define IB_MGMT_RMPP_NO_RESPTIME 0x1F #define IB_MGMT_RMPP_STATUS_SUCCESS 0 #define IB_MGMT_RMPP_STATUS_RESX 1 #define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 #define IB_MGMT_RMPP_STATUS_T2L 118 #define IB_MGMT_RMPP_STATUS_BAD_LEN 119 #define IB_MGMT_RMPP_STATUS_BAD_SEG 120 #define IB_MGMT_RMPP_STATUS_BADT 121 #define IB_MGMT_RMPP_STATUS_W2S 122 #define IB_MGMT_RMPP_STATUS_S2B 123 #define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 #define IB_MGMT_RMPP_STATUS_UNV 125 #define IB_MGMT_RMPP_STATUS_TMR 126 #define IB_MGMT_RMPP_STATUS_UNSPEC 127 #define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 #define IB_QP0 0 #define IB_QP1 cpu_to_be32(1) #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 #define IB_DEFAULT_PKEY_PARTIAL 0x7FFF #define IB_DEFAULT_PKEY_FULL 0xFFFF /* * Generic trap/notice types */ #define IB_NOTICE_TYPE_FATAL 0x80 #define IB_NOTICE_TYPE_URGENT 0x81 #define IB_NOTICE_TYPE_SECURITY 0x82 #define IB_NOTICE_TYPE_SM 0x83 #define IB_NOTICE_TYPE_INFO 0x84 /* * Generic trap/notice producers */ #define IB_NOTICE_PROD_CA cpu_to_be16(1) #define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) #define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) #define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) enum { IB_MGMT_MAD_HDR = 24, IB_MGMT_MAD_DATA = 232, IB_MGMT_RMPP_HDR = 36, IB_MGMT_RMPP_DATA = 220, IB_MGMT_VENDOR_HDR = 40, IB_MGMT_VENDOR_DATA = 216, IB_MGMT_SA_HDR = 56, IB_MGMT_SA_DATA = 200, IB_MGMT_DEVICE_HDR = 64, IB_MGMT_DEVICE_DATA = 192, IB_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + IB_MGMT_MAD_DATA, OPA_MGMT_MAD_DATA = 2024, OPA_MGMT_RMPP_DATA = 2012, OPA_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + OPA_MGMT_MAD_DATA, }; struct ib_mad_hdr { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; __be16 class_specific; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; }; struct ib_rmpp_hdr { u8 rmpp_version; u8 rmpp_type; u8 rmpp_rtime_flags; u8 rmpp_status; __be32 seg_num; __be32 paylen_newwin; }; typedef u64 __bitwise ib_sa_comp_mask; #define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n))) /* * ib_sa_hdr and ib_sa_mad structures must be packed because they have * 64-bit fields that are only 32-bit aligned. 64-bit architectures will * lay them out wrong otherwise. (And unfortunately they are sent on * the wire so we can't change the layout) */ struct ib_sa_hdr { __be64 sm_key; __be16 attr_offset; __be16 reserved; ib_sa_comp_mask comp_mask; } __attribute__ ((packed)); struct ib_mad { struct ib_mad_hdr mad_hdr; u8 data[IB_MGMT_MAD_DATA]; }; struct opa_mad { struct ib_mad_hdr mad_hdr; u8 data[OPA_MGMT_MAD_DATA]; }; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[IB_MGMT_RMPP_DATA]; }; struct opa_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 data[OPA_MGMT_RMPP_DATA]; }; struct ib_sa_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; u8 data[IB_MGMT_SA_DATA]; } __attribute__ ((packed)); struct ib_vendor_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; u8 reserved; u8 oui[3]; u8 data[IB_MGMT_VENDOR_DATA]; }; #define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) #define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F #define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5 struct ib_class_port_info { u8 base_version; u8 class_version; __be16 capability_mask; /* 27 bits for cap_mask2, 5 bits for resp_time */ __be32 cap_mask2_resp_time; u8 redirect_gid[16]; __be32 redirect_tcslfl; __be16 redirect_lid; __be16 redirect_pkey; __be32 redirect_qp; __be32 redirect_qkey; u8 trap_gid[16]; __be32 trap_tcslfl; __be16 trap_lid; __be16 trap_pkey; __be32 trap_hlqp; __be32 trap_qkey; }; /** * ib_get_cpi_resp_time - Returns the resp_time value from * cap_mask2_resp_time in ib_class_port_info. * @cpi: A struct ib_class_port_info mad. */ static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi) { return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) & IB_CLASS_PORT_INFO_RESP_TIME_MASK); } /** * ib_set_cpi_resptime - Sets the response time in an * ib_class_port_info mad. * @cpi: A struct ib_class_port_info. * @rtime: The response time to set. */ static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi, u8 rtime) { cpi->cap_mask2_resp_time = (cpi->cap_mask2_resp_time & cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK); } /** * ib_get_cpi_capmask2 - Returns the capmask2 value from * cap_mask2_resp_time in ib_class_port_info. * @cpi: A struct ib_class_port_info mad. */ static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi) { return (be32_to_cpu(cpi->cap_mask2_resp_time) >> IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } /** * ib_set_cpi_capmask2 - Sets the capmask2 in an * ib_class_port_info mad. * @cpi: A struct ib_class_port_info. * @capmask2: The capmask2 to set. */ static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi, u32 capmask2) { cpi->cap_mask2_resp_time = (cpi->cap_mask2_resp_time & cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) | cpu_to_be32(capmask2 << IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE); } struct ib_mad_notice_attr { u8 generic_type; u8 prod_type_msb; __be16 prod_type_lsb; __be16 trap_num; __be16 issuer_lid; __be16 toggle_count; union { struct { u8 details[54]; } raw_data; struct { __be16 reserved; __be16 lid; /* where violation happened */ u8 port_num; /* where violation happened */ } __packed ntc_129_131; struct { __be16 reserved; __be16 lid; /* LID where change occurred */ u8 reserved2; u8 local_changes; /* low bit - local changes */ __be32 new_cap_mask; /* new capability mask */ u8 reserved3; u8 change_flags; /* low 3 bits only */ } __packed ntc_144; struct { __be16 reserved; __be16 lid; /* lid where sys guid changed */ __be16 reserved2; __be64 new_sys_guid; } __packed ntc_145; struct { __be16 reserved; __be16 lid; __be16 dr_slid; u8 method; u8 reserved2; __be16 attr_id; __be32 attr_mod; __be64 mkey; u8 reserved3; u8 dr_trunc_hop; u8 dr_rtn_path[30]; } __packed ntc_256; struct { __be16 reserved; __be16 lid1; __be16 lid2; __be32 key; __be32 sl_qp1; /* SL: high 4 bits */ __be32 qp2; /* high 8 bits reserved */ union ib_gid gid1; union ib_gid gid2; } __packed ntc_257_258; } details; }; /** * ib_mad_send_buf - MAD data buffer and work request for sends. * @next: A pointer used to chain together MADs for posting. * @mad: References an allocated MAD data buffer for MADs that do not have * RMPP active. For MADs using RMPP, references the common and management * class specific headers. * @mad_agent: MAD agent that allocated the buffer. * @ah: The address handle to use when sending the MAD. * @context: User-controlled context fields. * @hdr_len: Indicates the size of the data header of the MAD. This length * includes the common MAD, RMPP, and class specific headers. * @data_len: Indicates the total size of user-transferred data. * @seg_count: The number of RMPP segments allocated for this send. * @seg_size: Size of the data in each RMPP segment. This does not include * class specific headers. * @seg_rmpp_size: Size of each RMPP segment including the class specific * headers. * @timeout_ms: Time to wait for a response. * @retries: Number of times to retry a request for a response. For MADs * using RMPP, this applies per window. On completion, returns the number * of retries needed to complete the transfer. * * Users are responsible for initializing the MAD buffer itself, with the * exception of any RMPP header. Additional segment buffer space allocated * beyond data_len is padding. */ struct ib_mad_send_buf { struct ib_mad_send_buf *next; void *mad; struct ib_mad_agent *mad_agent; struct ib_ah *ah; void *context[2]; int hdr_len; int data_len; int seg_count; int seg_size; int seg_rmpp_size; int timeout_ms; int retries; }; /** * ib_response_mad - Returns if the specified MAD has been generated in * response to a sent request or trap. */ int ib_response_mad(const struct ib_mad_hdr *hdr); /** * ib_get_rmpp_resptime - Returns the RMPP response time. * @rmpp_hdr: An RMPP header. */ static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) { return rmpp_hdr->rmpp_rtime_flags >> 3; } /** * ib_get_rmpp_flags - Returns the RMPP flags. * @rmpp_hdr: An RMPP header. */ static inline u8 ib_get_rmpp_flags(const struct ib_rmpp_hdr *rmpp_hdr) { return rmpp_hdr->rmpp_rtime_flags & 0x7; } /** * ib_set_rmpp_resptime - Sets the response time in an RMPP header. * @rmpp_hdr: An RMPP header. * @rtime: The response time to set. */ static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) { rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); } /** * ib_set_rmpp_flags - Sets the flags in an RMPP header. * @rmpp_hdr: An RMPP header. * @flags: The flags to set. */ static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) { rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) | (flags & 0x7); } struct ib_mad_agent; struct ib_mad_send_wc; struct ib_mad_recv_wc; /** * ib_mad_send_handler - callback handler for a sent MAD. * @mad_agent: MAD agent that sent the MAD. * @mad_send_wc: Send work completion information on the sent MAD. */ typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc); /** * ib_mad_snoop_handler - Callback handler for snooping sent MADs. * @mad_agent: MAD agent that snooped the MAD. * @send_buf: send MAD data buffer. * @mad_send_wc: Work completion information on the sent MAD. Valid * only for snooping that occurs on a send completion. * * Clients snooping MADs should not modify data referenced by the @send_buf * or @mad_send_wc. */ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_send_wc *mad_send_wc); /** * ib_mad_recv_handler - callback handler for a received MAD. * @mad_agent: MAD agent requesting the received MAD. * @send_buf: Send buffer if found, else NULL * @mad_recv_wc: Received work completion information on the received MAD. * * MADs received in response to a send request operation will be handed to * the user before the send operation completes. All data buffers given * to registered agents through this routine are owned by the receiving * client, except for snooping agents. Clients snooping MADs should not * modify the data referenced by @mad_recv_wc. */ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc); /** * ib_mad_agent - Used to track MAD registration with the access layer. * @device: Reference to device registration is on. * @qp: Reference to QP used for sending and receiving MADs. * @mr: Memory region for system memory usable for DMA. * @recv_handler: Callback handler for a received MAD. * @send_handler: Callback handler for a sent MAD. * @snoop_handler: Callback handler for snooped sent MADs. * @context: User-specified context associated with this registration. * @hi_tid: Access layer assigned transaction ID for this client. * Unsolicited MADs sent by this client will have the upper 32-bits * of their TID set to this value. * @flags: registration flags * @port_num: Port number on which QP is registered * @rmpp_version: If set, indicates the RMPP version used by this agent. */ enum { IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP, }; struct ib_mad_agent { struct ib_device *device; struct ib_qp *qp; ib_mad_recv_handler recv_handler; ib_mad_send_handler send_handler; ib_mad_snoop_handler snoop_handler; void *context; u32 hi_tid; u32 flags; u8 port_num; u8 rmpp_version; }; /** * ib_mad_send_wc - MAD send completion information. * @send_buf: Send MAD data buffer associated with the send MAD request. * @status: Completion status. * @vendor_err: Optional vendor error information returned with a failed * request. */ struct ib_mad_send_wc { struct ib_mad_send_buf *send_buf; enum ib_wc_status status; u32 vendor_err; }; /** * ib_mad_recv_buf - received MAD buffer information. * @list: Reference to next data buffer for a received RMPP MAD. * @grh: References a data buffer containing the global route header. * The data refereced by this buffer is only valid if the GRH is * valid. * @mad: References the start of the received MAD. */ struct ib_mad_recv_buf { struct list_head list; struct ib_grh *grh; union { struct ib_mad *mad; struct opa_mad *opa_mad; }; }; /** * ib_mad_recv_wc - received MAD information. * @wc: Completion information for the received data. * @recv_buf: Specifies the location of the received data buffer(s). * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. * @mad_len: The length of the received MAD, without duplicated headers. * @mad_seg_size: The size of individual MAD segments * * For received response, the wr_id contains a pointer to the ib_mad_send_buf * for the corresponding send request. */ struct ib_mad_recv_wc { struct ib_wc *wc; struct ib_mad_recv_buf recv_buf; struct list_head rmpp_list; int mad_len; size_t mad_seg_size; }; /** * ib_mad_reg_req - MAD registration request * @mgmt_class: Indicates which management class of MADs should be receive * by the caller. This field is only required if the user wishes to * receive unsolicited MADs, otherwise it should be 0. * @mgmt_class_version: Indicates which version of MADs for the given * management class to receive. * @oui: Indicates IEEE OUI when mgmt_class is a vendor class * in the range from 0x30 to 0x4f. Otherwise not used. * @method_mask: The caller will receive unsolicited MADs for any method * where @method_mask = 1. * */ struct ib_mad_reg_req { u8 mgmt_class; u8 mgmt_class_version; u8 oui[3]; DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); }; /** * ib_register_mad_agent - Register to send/receive MADs. * @device: The device to register with. * @port_num: The port on the specified device to use. * @qp_type: Specifies which QP to access. Must be either * IB_QPT_SMI or IB_QPT_GSI. * @mad_reg_req: Specifies which unsolicited MADs should be received * by the caller. This parameter may be NULL if the caller only * wishes to receive solicited responses. * @rmpp_version: If set, indicates that the client will send * and receive MADs that contain the RMPP header for the given version. * If set to 0, indicates that RMPP is not used by this client. * @send_handler: The completion callback routine invoked after a send * request has completed. * @recv_handler: The completion callback routine invoked for a received * MAD. * @context: User specified context associated with the registration. * @registration_flags: Registration flags to set for this agent */ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 port_num, enum ib_qp_type qp_type, struct ib_mad_reg_req *mad_reg_req, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context, u32 registration_flags); enum ib_mad_snoop_flags { /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ IB_MAD_SNOOP_RECVS = (1<<4) /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ }; /** * ib_register_mad_snoop - Register to snoop sent and received MADs. * @device: The device to register with. * @port_num: The port on the specified device to use. * @qp_type: Specifies which QP traffic to snoop. Must be either * IB_QPT_SMI or IB_QPT_GSI. * @mad_snoop_flags: Specifies information where snooping occurs. * @send_handler: The callback routine invoked for a snooped send. * @recv_handler: The callback routine invoked for a snooped receive. * @context: User specified context associated with the registration. */ struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, u8 port_num, enum ib_qp_type qp_type, int mad_snoop_flags, ib_mad_snoop_handler snoop_handler, ib_mad_recv_handler recv_handler, void *context); /** * ib_unregister_mad_agent - Unregisters a client from using MAD services. * @mad_agent: Corresponding MAD registration request to deregister. * * After invoking this routine, MAD services are no longer usable by the * client on the associated QP. */ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); /** * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated * with the registered client. * @send_buf: Specifies the information needed to send the MAD(s). * @bad_send_buf: Specifies the MAD on which an error was encountered. This * parameter is optional if only a single MAD is posted. * * Sent MADs are not guaranteed to complete in the order that they were posted. * * If the MAD requires RMPP, the data buffer should contain a single copy * of the common MAD, RMPP, and class specific headers, followed by the class * defined data. If the class defined data would not divide evenly into * RMPP segments, then space must be allocated at the end of the referenced * buffer for any required padding. To indicate the amount of class defined * data being transferred, the paylen_newwin field in the RMPP header should * be set to the size of the class specific header plus the amount of class * defined data being transferred. The paylen_newwin field should be * specified in network-byte order. */ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, struct ib_mad_send_buf **bad_send_buf); /** * ib_free_recv_mad - Returns data buffers used to receive a MAD. * @mad_recv_wc: Work completion information for a received MAD. * * Clients receiving MADs through their ib_mad_recv_handler must call this * routine to return the work completion buffers to the access layer. */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); /** * ib_cancel_mad - Cancels an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. * @send_buf: Indicates the MAD to cancel. * * MADs will be returned to the user through the corresponding * ib_mad_send_handler. */ void ib_cancel_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf); /** * ib_modify_mad - Modifies an outstanding send MAD operation. * @mad_agent: Specifies the registration associated with sent MAD. * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ int ib_modify_mad(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, u32 timeout_ms); /** * ib_redirect_mad_qp - Registers a QP for MAD services. * @qp: Reference to a QP that requires MAD services. * @rmpp_version: If set, indicates that the client will send * and receive MADs that contain the RMPP header for the given version. * If set to 0, indicates that RMPP is not used by this client. * @send_handler: The completion callback routine invoked after a send * request has completed. * @recv_handler: The completion callback routine invoked for a received * MAD. * @context: User specified context associated with the registration. * * Use of this call allows clients to use MAD services, such as RMPP, * on user-owned QPs. After calling this routine, users may send * MADs on the specified QP by calling ib_mad_post_send. */ struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, void *context); /** * ib_process_mad_wc - Processes a work completion associated with a * MAD sent or received on a redirected QP. * @mad_agent: Specifies the registered MAD service using the redirected QP. * @wc: References a work completion associated with a sent or received * MAD segment. * * This routine is used to complete or continue processing on a MAD request. * If the work completion is associated with a send operation, calling * this routine is required to continue an RMPP transfer or to wait for a * corresponding response, if it is a request. If the work completion is * associated with a receive operation, calling this routine is required to * process an inbound or outbound RMPP transfer, or to match a response MAD * with its corresponding request. */ int ib_process_mad_wc(struct ib_mad_agent *mad_agent, struct ib_wc *wc); /** * ib_create_send_mad - Allocate and initialize a data buffer and work request * for sending a MAD. * @mad_agent: Specifies the registered MAD service to associate with the MAD. * @remote_qpn: Specifies the QPN of the receiving node. * @pkey_index: Specifies which PKey the MAD will be sent using. This field * is valid only if the remote_qpn is QP 1. * @rmpp_active: Indicates if the send will enable RMPP. * @hdr_len: Indicates the size of the data header of the MAD. This length * should include the common MAD header, RMPP header, plus any class * specific header. * @data_len: Indicates the size of any user-transferred data. The call will * automatically adjust the allocated buffer size to account for any * additional padding that may be necessary. * @gfp_mask: GFP mask used for the memory allocation. * @base_version: Base Version of this MAD * * This routine allocates a MAD for sending. The returned MAD send buffer * will reference a data buffer usable for sending a MAD, along * with an initialized work request structure. Users may modify the returned * MAD data buffer before posting the send. * * The returned MAD header, class specific headers, and any padding will be * cleared. Users are responsible for initializing the common MAD header, * any class specific header, and MAD data area. * If @rmpp_active is set, the RMPP header will be initialized for sending. */ struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, int hdr_len, int data_len, gfp_t gfp_mask, u8 base_version); /** * ib_is_mad_class_rmpp - returns whether given management class * supports RMPP. * @mgmt_class: management class * * This routine returns whether the management class supports RMPP. */ int ib_is_mad_class_rmpp(u8 mgmt_class); /** * ib_get_mad_data_offset - returns the data offset for a given * management class. * @mgmt_class: management class * * This routine returns the data offset in the MAD for the management * class requested. */ int ib_get_mad_data_offset(u8 mgmt_class); /** * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment. * @send_buf: Previously allocated send data buffer. * @seg_num: number of segment to return * * This routine returns a pointer to the data buffer of an RMPP MAD. * Users must provide synchronization to @send_buf around this call. */ void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num); /** * ib_free_send_mad - Returns data buffers used to send a MAD. * @send_buf: Previously allocated send data buffer. */ void ib_free_send_mad(struct ib_mad_send_buf *send_buf); /** * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP. * @agent: the agent in question * @return: true if agent is performing rmpp, false otherwise. */ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent); #endif /* IB_MAD_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_mad.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_marshall.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_marshall.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_marshall.h (revision 331772) @@ -1,53 +1,55 @@ /* * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_USER_MARSHALL_H) #define IB_USER_MARSHALL_H #include #include #include #include void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, struct ib_qp_attr *src); void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, struct ib_ah_attr *src); void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, struct ib_sa_path_rec *src); void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst, struct ib_user_path_rec *src); #endif /* IB_USER_MARSHALL_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_marshall.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_pack.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_pack.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_pack.h (revision 331772) @@ -1,305 +1,307 @@ /* * Copyright (c) 2004 Topspin Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_PACK_H #define IB_PACK_H #include enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, IB_IP4_BYTES = 20, IB_UDP_BYTES = 8, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 }; struct ib_field { size_t struct_offset_bytes; size_t struct_size_bytes; int offset_words; int offset_bits; int size_bits; char *field_name; }; #define RESERVED \ .field_name = "reserved" /* * This macro cleans up the definitions of constants for BTH opcodes. * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives * the correct value. * * In short, user code should use the constants defined using the * macro rather than worrying about adding together other constants. */ #define IB_OPCODE(transport, op) \ IB_OPCODE_ ## transport ## _ ## op = \ IB_OPCODE_ ## transport + IB_OPCODE_ ## op enum { /* transport types -- just used to define real constants */ IB_OPCODE_RC = 0x00, IB_OPCODE_UC = 0x20, IB_OPCODE_RD = 0x40, IB_OPCODE_UD = 0x60, /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */ IB_OPCODE_CNP = 0x80, /* operations -- just used to define real constants */ IB_OPCODE_SEND_FIRST = 0x00, IB_OPCODE_SEND_MIDDLE = 0x01, IB_OPCODE_SEND_LAST = 0x02, IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, IB_OPCODE_SEND_ONLY = 0x04, IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, IB_OPCODE_RDMA_WRITE_FIRST = 0x06, IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, IB_OPCODE_RDMA_WRITE_LAST = 0x08, IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, IB_OPCODE_RDMA_READ_REQUEST = 0x0c, IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, IB_OPCODE_ACKNOWLEDGE = 0x11, IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, IB_OPCODE_COMPARE_SWAP = 0x13, IB_OPCODE_FETCH_ADD = 0x14, /* opcode 0x15 is reserved */ IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16, IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17, /* real constants follow -- see comment about above IB_OPCODE() macro for more details */ /* RC */ IB_OPCODE(RC, SEND_FIRST), IB_OPCODE(RC, SEND_MIDDLE), IB_OPCODE(RC, SEND_LAST), IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(RC, SEND_ONLY), IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_WRITE_FIRST), IB_OPCODE(RC, RDMA_WRITE_MIDDLE), IB_OPCODE(RC, RDMA_WRITE_LAST), IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_WRITE_ONLY), IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), IB_OPCODE(RC, RDMA_READ_REQUEST), IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), IB_OPCODE(RC, ACKNOWLEDGE), IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RC, COMPARE_SWAP), IB_OPCODE(RC, FETCH_ADD), IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE), IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE), /* UC */ IB_OPCODE(UC, SEND_FIRST), IB_OPCODE(UC, SEND_MIDDLE), IB_OPCODE(UC, SEND_LAST), IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(UC, SEND_ONLY), IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(UC, RDMA_WRITE_FIRST), IB_OPCODE(UC, RDMA_WRITE_MIDDLE), IB_OPCODE(UC, RDMA_WRITE_LAST), IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(UC, RDMA_WRITE_ONLY), IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), /* RD */ IB_OPCODE(RD, SEND_FIRST), IB_OPCODE(RD, SEND_MIDDLE), IB_OPCODE(RD, SEND_LAST), IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), IB_OPCODE(RD, SEND_ONLY), IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_WRITE_FIRST), IB_OPCODE(RD, RDMA_WRITE_MIDDLE), IB_OPCODE(RD, RDMA_WRITE_LAST), IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_WRITE_ONLY), IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), IB_OPCODE(RD, RDMA_READ_REQUEST), IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), IB_OPCODE(RD, ACKNOWLEDGE), IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), IB_OPCODE(RD, COMPARE_SWAP), IB_OPCODE(RD, FETCH_ADD), /* UD */ IB_OPCODE(UD, SEND_ONLY), IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) }; enum { IB_LNH_RAW = 0, IB_LNH_IP = 1, IB_LNH_IBA_LOCAL = 2, IB_LNH_IBA_GLOBAL = 3 }; struct ib_unpacked_lrh { u8 virtual_lane; u8 link_version; u8 service_level; u8 link_next_header; __be16 destination_lid; __be16 packet_length; __be16 source_lid; }; struct ib_unpacked_grh { u8 ip_version; u8 traffic_class; __be32 flow_label; __be16 payload_length; u8 next_header; u8 hop_limit; union ib_gid source_gid; union ib_gid destination_gid; }; struct ib_unpacked_bth { u8 opcode; u8 solicited_event; u8 mig_req; u8 pad_count; u8 transport_header_version; __be16 pkey; __be32 destination_qpn; u8 ack_req; __be32 psn; }; struct ib_unpacked_deth { __be32 qkey; __be32 source_qpn; }; struct ib_unpacked_eth { u8 dmac_h[4]; u8 dmac_l[2]; u8 smac_h[2]; u8 smac_l[4]; __be16 type; }; struct ib_unpacked_ip4 { u8 ver; u8 hdr_len; u8 tos; __be16 tot_len; __be16 id; __be16 frag_off; u8 ttl; u8 protocol; __sum16 check; __be32 saddr; __be32 daddr; }; struct ib_unpacked_udp { __be16 sport; __be16 dport; __be16 length; __be16 csum; }; struct ib_unpacked_vlan { __be16 tag; __be16 type; }; struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; int eth_present; struct ib_unpacked_eth eth; int vlan_present; struct ib_unpacked_vlan vlan; int grh_present; struct ib_unpacked_grh grh; int ipv4_present; struct ib_unpacked_ip4 ip4; int udp_present; struct ib_unpacked_udp udp; struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; int immediate_present; __be32 immediate_data; }; void ib_pack(const struct ib_field *desc, int desc_len, void *structure, void *buf); void ib_unpack(const struct ib_field *desc, int desc_len, void *buf, void *structure); __sum16 ib_ud_ip4_csum(struct ib_ud_header *header); int ib_ud_header_init(int payload_bytes, int lrh_present, int eth_present, int vlan_present, int grh_present, int ip_version, int udp_present, int immediate_present, struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, void *buf); int ib_ud_header_unpack(void *buf, struct ib_ud_header *header); #endif /* IB_PACK_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_pack.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_pma.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_pma.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_pma.h (revision 331772) @@ -1,157 +1,159 @@ /* * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. * All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_PMA_H) #define IB_PMA_H #include /* * PMA class portinfo capability mask bits */ #define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8) #define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9) #define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10) #define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12) #define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) #define IB_PMA_PORT_SAMPLES_CONTROL cpu_to_be16(0x0010) #define IB_PMA_PORT_SAMPLES_RESULT cpu_to_be16(0x0011) #define IB_PMA_PORT_COUNTERS cpu_to_be16(0x0012) #define IB_PMA_PORT_COUNTERS_EXT cpu_to_be16(0x001D) #define IB_PMA_PORT_SAMPLES_RESULT_EXT cpu_to_be16(0x001E) struct ib_pma_mad { struct ib_mad_hdr mad_hdr; u8 reserved[40]; u8 data[192]; } __packed; struct ib_pma_portsamplescontrol { u8 opcode; u8 port_select; u8 tick; u8 counter_width; /* resv: 7:3, counter width: 2:0 */ __be32 counter_mask0_9; /* 2, 10 3-bit fields */ __be16 counter_mask10_14; /* 1, 5 3-bit fields */ u8 sample_mechanisms; u8 sample_status; /* only lower 2 bits */ __be64 option_mask; __be64 vendor_mask; __be32 sample_start; __be32 sample_interval; __be16 tag; __be16 counter_select[15]; __be32 reserved1; __be64 samples_only_option_mask; __be32 reserved2[28]; }; struct ib_pma_portsamplesresult { __be16 tag; __be16 sample_status; /* only lower 2 bits */ __be32 counter[15]; }; struct ib_pma_portsamplesresult_ext { __be16 tag; __be16 sample_status; /* only lower 2 bits */ __be32 extended_width; /* only upper 2 bits */ __be64 counter[15]; }; struct ib_pma_portcounters { u8 reserved; u8 port_select; __be16 counter_select; __be16 symbol_error_counter; u8 link_error_recovery_counter; u8 link_downed_counter; __be16 port_rcv_errors; __be16 port_rcv_remphys_errors; __be16 port_rcv_switch_relay_errors; __be16 port_xmit_discards; u8 port_xmit_constraint_errors; u8 port_rcv_constraint_errors; u8 reserved1; u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */ __be16 reserved2; __be16 vl15_dropped; __be32 port_xmit_data; __be32 port_rcv_data; __be32 port_xmit_packets; __be32 port_rcv_packets; __be32 port_xmit_wait; } __packed; #define IB_PMA_SEL_SYMBOL_ERROR cpu_to_be16(0x0001) #define IB_PMA_SEL_LINK_ERROR_RECOVERY cpu_to_be16(0x0002) #define IB_PMA_SEL_LINK_DOWNED cpu_to_be16(0x0004) #define IB_PMA_SEL_PORT_RCV_ERRORS cpu_to_be16(0x0008) #define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS cpu_to_be16(0x0010) #define IB_PMA_SEL_PORT_XMIT_DISCARDS cpu_to_be16(0x0040) #define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS cpu_to_be16(0x0200) #define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS cpu_to_be16(0x0400) #define IB_PMA_SEL_PORT_VL15_DROPPED cpu_to_be16(0x0800) #define IB_PMA_SEL_PORT_XMIT_DATA cpu_to_be16(0x1000) #define IB_PMA_SEL_PORT_RCV_DATA cpu_to_be16(0x2000) #define IB_PMA_SEL_PORT_XMIT_PACKETS cpu_to_be16(0x4000) #define IB_PMA_SEL_PORT_RCV_PACKETS cpu_to_be16(0x8000) struct ib_pma_portcounters_ext { u8 reserved; u8 port_select; __be16 counter_select; __be32 reserved1; __be64 port_xmit_data; __be64 port_rcv_data; __be64 port_xmit_packets; __be64 port_rcv_packets; __be64 port_unicast_xmit_packets; __be64 port_unicast_rcv_packets; __be64 port_multicast_xmit_packets; __be64 port_multicast_rcv_packets; } __packed; #define IB_PMA_SELX_PORT_XMIT_DATA cpu_to_be16(0x0001) #define IB_PMA_SELX_PORT_RCV_DATA cpu_to_be16(0x0002) #define IB_PMA_SELX_PORT_XMIT_PACKETS cpu_to_be16(0x0004) #define IB_PMA_SELX_PORT_RCV_PACKETS cpu_to_be16(0x0008) #define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS cpu_to_be16(0x0010) #define IB_PMA_SELX_PORT_UNI_RCV_PACKETS cpu_to_be16(0x0020) #define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS cpu_to_be16(0x0040) #define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS cpu_to_be16(0x0080) #endif /* IB_PMA_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_pma.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_sa.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_sa.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_sa.h (revision 331772) @@ -1,465 +1,467 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_SA_H #define IB_SA_H #include #include #include #include #include enum { IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ IB_SA_METHOD_GET_TABLE = 0x12, IB_SA_METHOD_GET_TABLE_RESP = 0x92, IB_SA_METHOD_DELETE = 0x15, IB_SA_METHOD_DELETE_RESP = 0x95, IB_SA_METHOD_GET_MULTI = 0x14, IB_SA_METHOD_GET_MULTI_RESP = 0x94, IB_SA_METHOD_GET_TRACE_TBL = 0x13 }; enum { IB_SA_ATTR_CLASS_PORTINFO = 0x01, IB_SA_ATTR_NOTICE = 0x02, IB_SA_ATTR_INFORM_INFO = 0x03, IB_SA_ATTR_NODE_REC = 0x11, IB_SA_ATTR_PORT_INFO_REC = 0x12, IB_SA_ATTR_SL2VL_REC = 0x13, IB_SA_ATTR_SWITCH_REC = 0x14, IB_SA_ATTR_LINEAR_FDB_REC = 0x15, IB_SA_ATTR_RANDOM_FDB_REC = 0x16, IB_SA_ATTR_MCAST_FDB_REC = 0x17, IB_SA_ATTR_SM_INFO_REC = 0x18, IB_SA_ATTR_LINK_REC = 0x20, IB_SA_ATTR_GUID_INFO_REC = 0x30, IB_SA_ATTR_SERVICE_REC = 0x31, IB_SA_ATTR_PARTITION_REC = 0x33, IB_SA_ATTR_PATH_REC = 0x35, IB_SA_ATTR_VL_ARB_REC = 0x36, IB_SA_ATTR_MC_MEMBER_REC = 0x38, IB_SA_ATTR_TRACE_REC = 0x39, IB_SA_ATTR_MULTI_PATH_REC = 0x3a, IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b, IB_SA_ATTR_INFORM_INFO_REC = 0xf3 }; enum ib_sa_selector { IB_SA_GT = 0, IB_SA_LT = 1, IB_SA_EQ = 2, /* * The meaning of "best" depends on the attribute: for * example, for MTU best will return the largest available * MTU, while for packet life time, best will return the * smallest available life time. */ IB_SA_BEST = 3 }; /* * There are 4 types of join states: * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. * The order corresponds to JoinState bits in MCMemberRecord. */ enum ib_sa_mc_join_states { FULLMEMBER_JOIN, NONMEMBER_JOIN, SENDONLY_NONMEBER_JOIN, SENDONLY_FULLMEMBER_JOIN, NUM_JOIN_MEMBERSHIP_TYPES, }; #define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) /* * Structures for SA records are named "struct ib_sa_xxx_rec." No * attempt is made to pack structures to match the physical layout of * SA records in SA MADs; all packing and unpacking is handled by the * SA query code. * * For a record with structure ib_sa_xxx_rec, the naming convention * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we * never use different abbreviations or otherwise change the spelling * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). * * Reserved rows are indicated with comments to help maintainability. */ #define IB_SA_PATH_REC_SERVICE_ID (IB_SA_COMP_MASK( 0) |\ IB_SA_COMP_MASK( 1)) #define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) #define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) #define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) #define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) #define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) /* reserved: 7 */ #define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) #define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) #define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) #define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) #define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) #define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) #define IB_SA_PATH_REC_QOS_CLASS IB_SA_COMP_MASK(14) #define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) #define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) #define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) #define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) #define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) #define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) #define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) #define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) struct ib_sa_path_rec { __be64 service_id; union ib_gid dgid; union ib_gid sgid; __be16 dlid; __be16 slid; int raw_traffic; /* reserved */ __be32 flow_label; u8 hop_limit; u8 traffic_class; int reversible; u8 numb_path; __be16 pkey; __be16 qos_class; u8 sl; u8 mtu_selector; u8 mtu; u8 rate_selector; u8 rate; u8 packet_life_time_selector; u8 packet_life_time; u8 preference; u8 dmac[ETH_ALEN]; /* ignored in IB */ int ifindex; /* ignored in IB */ struct vnet *net; enum ib_gid_type gid_type; }; static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec) { return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL; } #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) #define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) #define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) #define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) #define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) #define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) #define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) #define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) #define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) #define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) #define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) #define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) #define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) #define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) #define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) #define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) #define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) #define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) struct ib_sa_mcmember_rec { union ib_gid mgid; union ib_gid port_gid; __be32 qkey; __be16 mlid; u8 mtu_selector; u8 mtu; u8 traffic_class; __be16 pkey; u8 rate_selector; u8 rate; u8 packet_life_time_selector; u8 packet_life_time; u8 sl; __be32 flow_label; u8 hop_limit; u8 scope; u8 join_state; int proxy_join; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ #define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0) #define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1) #define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2) /* reserved: 3 */ #define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4) #define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5) #define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6) #define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7) #define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8) #define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9) #define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10) #define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11) #define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12) #define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13) #define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14) #define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15) #define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16) #define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17) #define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18) #define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19) #define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20) #define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21) #define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22) #define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23) #define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24) #define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25) #define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26) #define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27) #define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28) #define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29) #define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30) #define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31) #define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32) #define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33) #define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34) #define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35) #define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36) #define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF struct ib_sa_service_rec { u64 id; union ib_gid gid; __be16 pkey; /* reserved */ u32 lease; u8 key[16]; u8 name[64]; u8 data8[16]; u16 data16[8]; u32 data32[4]; u64 data64[2]; }; #define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0) #define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1) #define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2) #define IB_SA_GUIDINFO_REC_RES2 IB_SA_COMP_MASK(3) #define IB_SA_GUIDINFO_REC_GID0 IB_SA_COMP_MASK(4) #define IB_SA_GUIDINFO_REC_GID1 IB_SA_COMP_MASK(5) #define IB_SA_GUIDINFO_REC_GID2 IB_SA_COMP_MASK(6) #define IB_SA_GUIDINFO_REC_GID3 IB_SA_COMP_MASK(7) #define IB_SA_GUIDINFO_REC_GID4 IB_SA_COMP_MASK(8) #define IB_SA_GUIDINFO_REC_GID5 IB_SA_COMP_MASK(9) #define IB_SA_GUIDINFO_REC_GID6 IB_SA_COMP_MASK(10) #define IB_SA_GUIDINFO_REC_GID7 IB_SA_COMP_MASK(11) struct ib_sa_guidinfo_rec { __be16 lid; u8 block_num; /* reserved */ u8 res1; __be32 res2; u8 guid_info_list[64]; }; struct ib_sa_client { atomic_t users; struct completion comp; }; /** * ib_sa_register_client - Register an SA client. */ void ib_sa_register_client(struct ib_sa_client *client); /** * ib_sa_unregister_client - Deregister an SA client. * @client: Client object to deregister. */ void ib_sa_unregister_client(struct ib_sa_client *client); struct ib_sa_query; void ib_sa_cancel_query(int id, struct ib_sa_query *query); int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), void *context, struct ib_sa_query **query); int ib_sa_service_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), void *context, struct ib_sa_query **sa_query); struct ib_sa_multicast { struct ib_sa_mcmember_rec rec; ib_sa_comp_mask comp_mask; int (*callback)(int status, struct ib_sa_multicast *multicast); void *context; }; /** * ib_sa_join_multicast - Initiates a join request to the specified multicast * group. * @client: SA client * @device: Device associated with the multicast group. * @port_num: Port on the specified device to associate with the multicast * group. * @rec: SA multicast member record specifying group attributes. * @comp_mask: Component mask indicating which group attributes of %rec are * valid. * @gfp_mask: GFP mask for memory allocations. * @callback: User callback invoked once the join operation completes. * @context: User specified context stored with the ib_sa_multicast structure. * * This call initiates a multicast join request with the SA for the specified * multicast group. If the join operation is started successfully, it returns * an ib_sa_multicast structure that is used to track the multicast operation. * Users must free this structure by calling ib_free_multicast, even if the * join operation later fails. (The callback status is non-zero.) * * If the join operation fails; status will be non-zero, with the following * failures possible: * -ETIMEDOUT: The request timed out. * -EIO: An error occurred sending the query. * -EINVAL: The MCMemberRecord values differed from the existing group's. * -ENETRESET: Indicates that an fatal error has occurred on the multicast * group, and the user must rejoin the group to continue using it. */ struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, gfp_t gfp_mask, int (*callback)(int status, struct ib_sa_multicast *multicast), void *context); /** * ib_free_multicast - Frees the multicast tracking structure, and releases * any reference on the multicast group. * @multicast: Multicast tracking structure allocated by ib_join_multicast. * * This call blocks until the multicast identifier is destroyed. It may * not be called from within the multicast callback; however, returning a non- * zero value from the callback will result in destroying the multicast * tracking structure. */ void ib_sa_free_multicast(struct ib_sa_multicast *multicast); /** * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and * returns it if found. * @device: Device associated with the multicast group. * @port_num: Port on the specified device to associate with the multicast * group. * @mgid: MGID of multicast group. * @rec: Location to copy SA multicast member record. */ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num, union ib_gid *mgid, struct ib_sa_mcmember_rec *rec); /** * ib_init_ah_from_mcmember - Initialize address handle attributes based on * an SA multicast member record. */ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct ib_sa_mcmember_rec *rec, struct net_device *ndev, enum ib_gid_type gid_type, struct ib_ah_attr *ah_attr); /** * ib_init_ah_from_path - Initialize address handle attributes based on an SA * path record. */ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec * to IB MAD wire format. */ void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute); /** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. */ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query); /* Support get SA ClassPortInfo */ int ib_sa_classport_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u8 port_num, int timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_class_port_info *resp, void *context), void *context, struct ib_sa_query **sa_query); #endif /* IB_SA_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_sa.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_smi.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_smi.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_smi.h (revision 331772) @@ -1,175 +1,177 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_SMI_H) #define IB_SMI_H #include #define IB_SMP_DATA_SIZE 64 #define IB_SMP_MAX_PATH_HOPS 64 struct ib_smp { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; u8 hop_ptr; u8 hop_cnt; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; __be64 mkey; __be16 dr_slid; __be16 dr_dlid; u8 reserved[28]; u8 data[IB_SMP_DATA_SIZE]; u8 initial_path[IB_SMP_MAX_PATH_HOPS]; u8 return_path[IB_SMP_MAX_PATH_HOPS]; } __attribute__ ((packed)); #define IB_SMP_DIRECTION cpu_to_be16(0x8000) /* Subnet management attributes */ #define IB_SMP_ATTR_NOTICE cpu_to_be16(0x0002) #define IB_SMP_ATTR_NODE_DESC cpu_to_be16(0x0010) #define IB_SMP_ATTR_NODE_INFO cpu_to_be16(0x0011) #define IB_SMP_ATTR_SWITCH_INFO cpu_to_be16(0x0012) #define IB_SMP_ATTR_GUID_INFO cpu_to_be16(0x0014) #define IB_SMP_ATTR_PORT_INFO cpu_to_be16(0x0015) #define IB_SMP_ATTR_PKEY_TABLE cpu_to_be16(0x0016) #define IB_SMP_ATTR_SL_TO_VL_TABLE cpu_to_be16(0x0017) #define IB_SMP_ATTR_VL_ARB_TABLE cpu_to_be16(0x0018) #define IB_SMP_ATTR_LINEAR_FORWARD_TABLE cpu_to_be16(0x0019) #define IB_SMP_ATTR_RANDOM_FORWARD_TABLE cpu_to_be16(0x001A) #define IB_SMP_ATTR_MCAST_FORWARD_TABLE cpu_to_be16(0x001B) #define IB_SMP_ATTR_SM_INFO cpu_to_be16(0x0020) #define IB_SMP_ATTR_VENDOR_DIAG cpu_to_be16(0x0030) #define IB_SMP_ATTR_LED_INFO cpu_to_be16(0x0031) #define IB_SMP_ATTR_VENDOR_MASK cpu_to_be16(0xFF00) struct ib_port_info { __be64 mkey; __be64 gid_prefix; __be16 lid; __be16 sm_lid; __be32 cap_mask; __be16 diag_code; __be16 mkey_lease_period; u8 local_port_num; u8 link_width_enabled; u8 link_width_supported; u8 link_width_active; u8 linkspeed_portstate; /* 4 bits, 4 bits */ u8 portphysstate_linkdown; /* 4 bits, 4 bits */ u8 mkeyprot_resv_lmc; /* 2 bits, 3, 3 */ u8 linkspeedactive_enabled; /* 4 bits, 4 bits */ u8 neighbormtu_mastersmsl; /* 4 bits, 4 bits */ u8 vlcap_inittype; /* 4 bits, 4 bits */ u8 vl_high_limit; u8 vl_arb_high_cap; u8 vl_arb_low_cap; u8 inittypereply_mtucap; /* 4 bits, 4 bits */ u8 vlstallcnt_hoqlife; /* 3 bits, 5 bits */ u8 operationalvl_pei_peo_fpi_fpo; /* 4 bits, 1, 1, 1, 1 */ __be16 mkey_violations; __be16 pkey_violations; __be16 qkey_violations; u8 guid_cap; u8 clientrereg_resv_subnetto; /* 1 bit, 2 bits, 5 */ u8 resv_resptimevalue; /* 3 bits, 5 bits */ u8 localphyerrors_overrunerrors; /* 4 bits, 4 bits */ __be16 max_credit_hint; u8 resv; u8 link_roundtrip_latency[3]; }; struct ib_node_info { u8 base_version; u8 class_version; u8 node_type; u8 num_ports; __be64 sys_guid; __be64 node_guid; __be64 port_guid; __be16 partition_cap; __be16 device_id; __be32 revision; u8 local_port_num; u8 vendor_id[3]; } __packed; struct ib_vl_weight_elem { u8 vl; /* IB: VL is low 4 bits, upper 4 bits reserved */ /* OPA: VL is low 5 bits, upper 3 bits reserved */ u8 weight; }; static inline u8 ib_get_smp_direction(const struct ib_smp *smp) { return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); } /* * SM Trap/Notice numbers */ #define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) #define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) #define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) #define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) #define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) #define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) #define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) #define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) /* * Other local changes flags (trap 144). */ #define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ #define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ #define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 /* * M_Key volation flags in dr_trunc_hop (trap 256). */ #define IB_NOTICE_TRAP_DR_NOTICE 0x80 #define IB_NOTICE_TRAP_DR_TRUNC 0x40 #endif /* IB_SMI_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_smi.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_umem.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_umem.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_umem.h (revision 331772) @@ -1,108 +1,110 @@ /* * Copyright (c) 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_UMEM_H #define IB_UMEM_H #include #include #include struct ib_ucontext; struct ib_umem_odp; struct ib_umem { struct ib_ucontext *context; size_t length; unsigned long address; int page_size; int writable; struct work_struct work; pid_t pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; struct sg_table sg_head; int nmap; int npages; }; /* Returns the offset of the umem start relative to the first page. */ static inline int ib_umem_offset(struct ib_umem *umem) { return umem->address & ((unsigned long)umem->page_size - 1); } /* Returns the first page of an ODP umem. */ static inline unsigned long ib_umem_start(struct ib_umem *umem) { return umem->address - ib_umem_offset(umem); } /* Returns the address of the page after the last one of an ODP umem. */ static inline unsigned long ib_umem_end(struct ib_umem *umem) { return PAGE_ALIGN(umem->address + umem->length); } static inline size_t ib_umem_num_pages(struct ib_umem *umem) { return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT; } #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); #else /* CONFIG_INFINIBAND_USER_MEM */ #include static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, int access, int dmasync) { return ERR_PTR(-EINVAL); } static inline void ib_umem_release(struct ib_umem *umem) { } static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { return -EINVAL; } #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_umem.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/ib_umem_odp.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_umem_odp.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_umem_odp.h (revision 331772) @@ -1,161 +1,165 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_UMEM_ODP_H #define IB_UMEM_ODP_H #include #include #include struct umem_odp_node { u64 __subtree_last; struct rb_node rb; }; struct ib_umem_odp { /* * An array of the pages included in the on-demand paging umem. * Indices of pages that are currently not mapped into the device will * contain NULL. */ struct page **page_list; /* * An array of the same size as page_list, with DMA addresses mapped * for pages the pages in page_list. The lower two bits designate * access permissions. See ODP_READ_ALLOWED_BIT and * ODP_WRITE_ALLOWED_BIT. */ dma_addr_t *dma_list; /* * The umem_mutex protects the page_list and dma_list fields of an ODP * umem, allowing only a single thread to map/unmap pages. The mutex * also protects access to the mmu notifier counters. */ struct mutex umem_mutex; void *private; /* for the HW driver to use. */ /* When false, use the notifier counter in the ucontext struct. */ bool mn_counters_active; int notifiers_seq; int notifiers_count; /* A linked list of umems that don't have private mmu notifier * counters yet. */ struct list_head no_private_counters; struct ib_umem *umem; /* Tree tracking */ struct umem_odp_node interval_tree; struct completion notifier_completion; int dying; }; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem); void ib_umem_odp_release(struct ib_umem *umem); /* * The lower 2 bits of the DMA address signal the R/W permissions for * the entry. To upgrade the permissions, provide the appropriate * bitmask to the map_dma_pages function. * * Be aware that upgrading a mapped address might result in change of * the DMA address for the page. */ #define ODP_READ_ALLOWED_BIT (1<<0ULL) #define ODP_WRITE_ALLOWED_BIT (1<<1ULL) #define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, u64 access_mask, unsigned long current_seq); void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of * the return values of the functions called. */ int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, umem_call_back cb, void *cookie); struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root, u64 start, u64 last); struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node, u64 start, u64 last); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, unsigned long mmu_seq) { /* * This code is strongly based on the KVM code from * mmu_notifier_retry. Should be called with * the relevant locks taken (item->odp_data->umem_mutex * and the ucontext umem_mutex semaphore locked for read). */ /* Do not allow page faults while the new ib_umem hasn't seen a state * with zero notifiers yet, and doesn't have its own valid set of * private counters. */ if (!item->odp_data->mn_counters_active) return 1; if (unlikely(item->odp_data->notifiers_count)) return 1; if (item->odp_data->notifiers_seq != mmu_seq) return 1; return 0; } #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem) { return -EINVAL; } static inline void ib_umem_odp_release(struct ib_umem *umem) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #endif /* IB_UMEM_ODP_H */ Index: stable/11/sys/ofed/include/rdma/ib_verbs.h =================================================================== --- stable/11/sys/ofed/include/rdma/ib_verbs.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/ib_verbs.h (revision 331772) @@ -1,3369 +1,3371 @@ /* * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2004 Infinicon Corporation. All rights reserved. * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(IB_VERBS_H) #define IB_VERBS_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct ifla_vf_info; struct ifla_vf_stats; extern struct workqueue_struct *ib_wq; extern struct workqueue_struct *ib_comp_wq; union ib_gid { u8 raw[16]; struct { __be64 subnet_prefix; __be64 interface_id; } global; }; extern union ib_gid zgid; enum ib_gid_type { /* If link layer is Ethernet, this is RoCE V1 */ IB_GID_TYPE_IB = 0, IB_GID_TYPE_ROCE = 0, IB_GID_TYPE_ROCE_UDP_ENCAP = 1, IB_GID_TYPE_SIZE }; #define ROCE_V2_UDP_DPORT 4791 struct ib_gid_attr { enum ib_gid_type gid_type; struct net_device *ndev; }; enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, RDMA_NODE_RNIC, RDMA_NODE_USNIC, RDMA_NODE_USNIC_UDP, }; enum { /* set the local administered indication */ IB_SA_WELL_KNOWN_GUID = BIT_ULL(57) | 2, }; enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, RDMA_TRANSPORT_USNIC, RDMA_TRANSPORT_USNIC_UDP }; enum rdma_protocol_type { RDMA_PROTOCOL_IB, RDMA_PROTOCOL_IBOE, RDMA_PROTOCOL_IWARP, RDMA_PROTOCOL_USNIC_UDP }; __attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type); enum rdma_network_type { RDMA_NETWORK_IB, RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB, RDMA_NETWORK_IPV4, RDMA_NETWORK_IPV6 }; static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type) { if (network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) return IB_GID_TYPE_ROCE_UDP_ENCAP; /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */ return IB_GID_TYPE_IB; } static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, union ib_gid *gid) { if (gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; if (ipv6_addr_v4mapped((struct in6_addr *)gid)) return RDMA_NETWORK_IPV4; else return RDMA_NETWORK_IPV6; } enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, IB_LINK_LAYER_INFINIBAND, IB_LINK_LAYER_ETHERNET, }; enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = (1 << 0), IB_DEVICE_BAD_PKEY_CNTR = (1 << 1), IB_DEVICE_BAD_QKEY_CNTR = (1 << 2), IB_DEVICE_RAW_MULTI = (1 << 3), IB_DEVICE_AUTO_PATH_MIG = (1 << 4), IB_DEVICE_CHANGE_PHY_PORT = (1 << 5), IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6), IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7), IB_DEVICE_SHUTDOWN_PORT = (1 << 8), IB_DEVICE_INIT_TYPE = (1 << 9), IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10), IB_DEVICE_SYS_IMAGE_GUID = (1 << 11), IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12), IB_DEVICE_SRQ_RESIZE = (1 << 13), IB_DEVICE_N_NOTIFY_CQ = (1 << 14), /* * This device supports a per-device lkey or stag that can be * used without performing a memory registration for the local * memory. Note that ULPs should never check this flag, but * instead of use the local_dma_lkey flag in the ib_pd structure, * which will always contain a usable lkey. */ IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15), IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16), IB_DEVICE_MEM_WINDOW = (1 << 17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB * messages and can verify the validity of checksum for * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ IB_DEVICE_UD_IP_CSUM = (1 << 18), IB_DEVICE_UD_TSO = (1 << 19), IB_DEVICE_XRC = (1 << 20), /* * This device supports the IB "base memory management extension", * which includes support for fast registrations (IB_WR_REG_MR, * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should * also be set by any iWarp device which must support FRs to comply * to the iWarp verbs spec. iWarp devices also support the * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the * stag. */ IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21), IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22), IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23), IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24), IB_DEVICE_RC_IP_CSUM = (1 << 25), IB_DEVICE_RAW_IP_CSUM = (1 << 26), /* * Devices should set IB_DEVICE_CROSS_CHANNEL if they * support execution of WQEs that involve synchronization * of I/O operations with single completion queue managed * by hardware. */ IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), IB_DEVICE_RAW_SCATTER_FCS = (1ULL << 34), }; enum ib_signature_prot_cap { IB_PROT_T10DIF_TYPE_1 = 1, IB_PROT_T10DIF_TYPE_2 = 1 << 1, IB_PROT_T10DIF_TYPE_3 = 1 << 2, }; enum ib_signature_guard_cap { IB_GUARD_T10DIF_CRC = 1, IB_GUARD_T10DIF_CSUM = 1 << 1, }; enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, IB_ATOMIC_GLOB }; enum ib_odp_general_cap_bits { IB_ODP_SUPPORT = 1 << 0, }; enum ib_odp_transport_cap_bits { IB_ODP_SUPPORT_SEND = 1 << 0, IB_ODP_SUPPORT_RECV = 1 << 1, IB_ODP_SUPPORT_WRITE = 1 << 2, IB_ODP_SUPPORT_READ = 1 << 3, IB_ODP_SUPPORT_ATOMIC = 1 << 4, }; struct ib_odp_caps { uint64_t general_caps; struct { uint32_t rc_odp_caps; uint32_t uc_odp_caps; uint32_t ud_odp_caps; } per_transport_caps; }; struct ib_rss_caps { /* Corresponding bit will be set if qp type from * 'enum ib_qp_type' is supported, e.g. * supported_qpts |= 1 << IB_QPT_UD */ u32 supported_qpts; u32 max_rwq_indirection_tables; u32 max_rwq_indirection_table_size; }; enum ib_cq_creation_flags { IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, }; struct ib_cq_init_attr { unsigned int cqe; int comp_vector; u32 flags; }; struct ib_device_attr { u64 fw_ver; __be64 sys_image_guid; u64 max_mr_size; u64 page_size_cap; u32 vendor_id; u32 vendor_part_id; u32 hw_ver; int max_qp; int max_qp_wr; u64 device_cap_flags; int max_sge; int max_sge_rd; int max_cq; int max_cqe; int max_mr; int max_pd; int max_qp_rd_atom; int max_ee_rd_atom; int max_res_rd_atom; int max_qp_init_rd_atom; int max_ee_init_rd_atom; enum ib_atomic_cap atomic_cap; enum ib_atomic_cap masked_atomic_cap; int max_ee; int max_rdd; int max_mw; int max_raw_ipv6_qp; int max_raw_ethy_qp; int max_mcast_grp; int max_mcast_qp_attach; int max_total_mcast_qp_attach; int max_ah; int max_fmr; int max_map_per_fmr; int max_srq; int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; int sig_prot_cap; int sig_guard_cap; struct ib_odp_caps odp_caps; uint64_t timestamp_mask; uint64_t hca_core_clock; /* in KHZ */ struct ib_rss_caps rss_caps; u32 max_wq_type_rq; }; enum ib_mtu { IB_MTU_256 = 1, IB_MTU_512 = 2, IB_MTU_1024 = 3, IB_MTU_2048 = 4, IB_MTU_4096 = 5 }; static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; case IB_MTU_512: return 512; case IB_MTU_1024: return 1024; case IB_MTU_2048: return 2048; case IB_MTU_4096: return 4096; default: return -1; } } enum ib_port_state { IB_PORT_NOP = 0, IB_PORT_DOWN = 1, IB_PORT_INIT = 2, IB_PORT_ARMED = 3, IB_PORT_ACTIVE = 4, IB_PORT_ACTIVE_DEFER = 5, IB_PORT_DUMMY = -1, /* force enum signed */ }; enum ib_port_cap_flags { IB_PORT_SM = 1 << 1, IB_PORT_NOTICE_SUP = 1 << 2, IB_PORT_TRAP_SUP = 1 << 3, IB_PORT_OPT_IPD_SUP = 1 << 4, IB_PORT_AUTO_MIGR_SUP = 1 << 5, IB_PORT_SL_MAP_SUP = 1 << 6, IB_PORT_MKEY_NVRAM = 1 << 7, IB_PORT_PKEY_NVRAM = 1 << 8, IB_PORT_LED_INFO_SUP = 1 << 9, IB_PORT_SM_DISABLED = 1 << 10, IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, IB_PORT_DEVICE_MGMT_SUP = 1 << 19, IB_PORT_VENDOR_CLASS_SUP = 1 << 20, IB_PORT_DR_NOTICE_SUP = 1 << 21, IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, IB_PORT_CLIENT_REG_SUP = 1 << 25, IB_PORT_IP_BASED_GIDS = 1 << 26, }; enum ib_port_width { IB_WIDTH_1X = 1, IB_WIDTH_4X = 2, IB_WIDTH_8X = 4, IB_WIDTH_12X = 8 }; static inline int ib_width_enum_to_int(enum ib_port_width width) { switch (width) { case IB_WIDTH_1X: return 1; case IB_WIDTH_4X: return 4; case IB_WIDTH_8X: return 8; case IB_WIDTH_12X: return 12; default: return -1; } } enum ib_port_speed { IB_SPEED_SDR = 1, IB_SPEED_DDR = 2, IB_SPEED_QDR = 4, IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, IB_SPEED_EDR = 32 }; /** * struct rdma_hw_stats * @timestamp - Used by the core code to track when the last update was * @lifespan - Used by the core code to determine how old the counters * should be before being updated again. Stored in jiffies, defaults * to 10 milliseconds, drivers can override the default be specifying * their own value during their allocation routine. * @name - Array of pointers to static names used for the counters in * directory. * @num_counters - How many hardware counters there are. If name is * shorter than this number, a kernel oops will result. Driver authors * are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters) * in their code to prevent this. * @value - Array of u64 counters that are accessed by the sysfs code and * filled in by the drivers get_stats routine */ struct rdma_hw_stats { unsigned long timestamp; unsigned long lifespan; const char * const *names; int num_counters; u64 value[]; }; #define RDMA_HW_STATS_DEFAULT_LIFESPAN 10 /** * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct * for drivers. * @names - Array of static const char * * @num_counters - How many elements in array * @lifespan - How many milliseconds between updates */ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct( const char * const *names, int num_counters, unsigned long lifespan) { struct rdma_hw_stats *stats; stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64), GFP_KERNEL); if (!stats) return NULL; stats->names = names; stats->num_counters = num_counters; stats->lifespan = msecs_to_jiffies(lifespan); return stats; } /* Define bits for the various functionality this port needs to be supported by * the core. */ /* Management 0x00000FFF */ #define RDMA_CORE_CAP_IB_MAD 0x00000001 #define RDMA_CORE_CAP_IB_SMI 0x00000002 #define RDMA_CORE_CAP_IB_CM 0x00000004 #define RDMA_CORE_CAP_IW_CM 0x00000008 #define RDMA_CORE_CAP_IB_SA 0x00000010 #define RDMA_CORE_CAP_OPA_MAD 0x00000020 /* Address format 0x000FF000 */ #define RDMA_CORE_CAP_AF_IB 0x00001000 #define RDMA_CORE_CAP_ETH_AH 0x00002000 /* Protocol 0xFFF00000 */ #define RDMA_CORE_CAP_PROT_IB 0x00100000 #define RDMA_CORE_CAP_PROT_ROCE 0x00200000 #define RDMA_CORE_CAP_PROT_IWARP 0x00400000 #define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000 #define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_SMI \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_IB_SA \ | RDMA_CORE_CAP_AF_IB) #define RDMA_CORE_PORT_IBA_ROCE (RDMA_CORE_CAP_PROT_ROCE \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \ (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \ | RDMA_CORE_CAP_IB_MAD \ | RDMA_CORE_CAP_IB_CM \ | RDMA_CORE_CAP_AF_IB \ | RDMA_CORE_CAP_ETH_AH) #define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \ | RDMA_CORE_CAP_IW_CM) #define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \ | RDMA_CORE_CAP_OPA_MAD) struct ib_port_attr { u64 subnet_prefix; enum ib_port_state state; enum ib_mtu max_mtu; enum ib_mtu active_mtu; int gid_tbl_len; u32 port_cap_flags; u32 max_msg_sz; u32 bad_pkey_cntr; u32 qkey_viol_cntr; u16 pkey_tbl_len; u16 lid; u16 sm_lid; u8 lmc; u8 max_vl_num; u8 sm_sl; u8 subnet_timeout; u8 init_type_reply; u8 active_width; u8 active_speed; u8 phys_state; bool grh_required; }; enum ib_device_modify_flags { IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 << 0, IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; #define IB_DEVICE_NODE_DESC_MAX 64 struct ib_device_modify { u64 sys_image_guid; char node_desc[IB_DEVICE_NODE_DESC_MAX]; }; enum ib_port_modify_flags { IB_PORT_SHUTDOWN = 1, IB_PORT_INIT_TYPE = (1<<2), IB_PORT_RESET_QKEY_CNTR = (1<<3) }; struct ib_port_modify { u32 set_port_cap_mask; u32 clr_port_cap_mask; u8 init_type; }; enum ib_event_type { IB_EVENT_CQ_ERR, IB_EVENT_QP_FATAL, IB_EVENT_QP_REQ_ERR, IB_EVENT_QP_ACCESS_ERR, IB_EVENT_COMM_EST, IB_EVENT_SQ_DRAINED, IB_EVENT_PATH_MIG, IB_EVENT_PATH_MIG_ERR, IB_EVENT_DEVICE_FATAL, IB_EVENT_PORT_ACTIVE, IB_EVENT_PORT_ERR, IB_EVENT_LID_CHANGE, IB_EVENT_PKEY_CHANGE, IB_EVENT_SM_CHANGE, IB_EVENT_SRQ_ERR, IB_EVENT_SRQ_LIMIT_REACHED, IB_EVENT_QP_LAST_WQE_REACHED, IB_EVENT_CLIENT_REREGISTER, IB_EVENT_GID_CHANGE, IB_EVENT_WQ_FATAL, }; const char *__attribute_const__ ib_event_msg(enum ib_event_type event); struct ib_event { struct ib_device *device; union { struct ib_cq *cq; struct ib_qp *qp; struct ib_srq *srq; struct ib_wq *wq; u8 port_num; } element; enum ib_event_type event; }; struct ib_event_handler { struct ib_device *device; void (*handler)(struct ib_event_handler *, struct ib_event *); struct list_head list; }; #define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ do { \ (_ptr)->device = _device; \ (_ptr)->handler = _handler; \ INIT_LIST_HEAD(&(_ptr)->list); \ } while (0) struct ib_global_route { union ib_gid dgid; u32 flow_label; u8 sgid_index; u8 hop_limit; u8 traffic_class; }; struct ib_grh { __be32 version_tclass_flow; __be16 paylen; u8 next_hdr; u8 hop_limit; union ib_gid sgid; union ib_gid dgid; }; union rdma_network_hdr { struct ib_grh ibgrh; struct { /* The IB spec states that if it's IPv4, the header * is located in the last 20 bytes of the header. */ u8 reserved[20]; struct ip roce4grh; }; }; enum { IB_MULTICAST_QPN = 0xffffff }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) #define IB_MULTICAST_LID_BASE cpu_to_be16(0xC000) enum ib_ah_flags { IB_AH_GRH = 1 }; enum ib_rate { IB_RATE_PORT_CURRENT = 0, IB_RATE_2_5_GBPS = 2, IB_RATE_5_GBPS = 5, IB_RATE_10_GBPS = 3, IB_RATE_20_GBPS = 6, IB_RATE_30_GBPS = 4, IB_RATE_40_GBPS = 7, IB_RATE_60_GBPS = 8, IB_RATE_80_GBPS = 9, IB_RATE_120_GBPS = 10, IB_RATE_14_GBPS = 11, IB_RATE_56_GBPS = 12, IB_RATE_112_GBPS = 13, IB_RATE_168_GBPS = 14, IB_RATE_25_GBPS = 15, IB_RATE_100_GBPS = 16, IB_RATE_200_GBPS = 17, IB_RATE_300_GBPS = 18 }; /** * ib_rate_to_mult - Convert the IB rate enum to a multiple of the * base rate of 2.5 Gbit/sec. For example, IB_RATE_5_GBPS will be * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate); /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); /** * enum ib_mr_type - memory region type * @IB_MR_TYPE_MEM_REG: memory region that is used for * normal registration * @IB_MR_TYPE_SIGNATURE: memory region that is used for * signature operations (data-integrity * capable regions) * @IB_MR_TYPE_SG_GAPS: memory region that is capable to * register any arbitrary sg lists (without * the normal mr constraints - see * ib_map_mr_sg) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, IB_MR_TYPE_SG_GAPS, }; /** * Signature types * IB_SIG_TYPE_NONE: Unprotected. * IB_SIG_TYPE_T10_DIF: Type T10-DIF */ enum ib_signature_type { IB_SIG_TYPE_NONE, IB_SIG_TYPE_T10_DIF, }; /** * Signature T10-DIF block-guard types * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. * IB_T10DIF_CSUM: Corresponds to IP checksum rules. */ enum ib_t10_dif_bg_type { IB_T10DIF_CRC, IB_T10DIF_CSUM }; /** * struct ib_t10_dif_domain - Parameters specific for T10-DIF * domain. * @bg_type: T10-DIF block guard type (CRC|CSUM) * @pi_interval: protection information interval. * @bg: seed of guard computation. * @app_tag: application tag of guard block * @ref_tag: initial guard block reference tag. * @ref_remap: Indicate wethear the reftag increments each block * @app_escape: Indicate to skip block check if apptag=0xffff * @ref_escape: Indicate to skip block check if reftag=0xffffffff * @apptag_check_mask: check bitmask of application tag. */ struct ib_t10_dif_domain { enum ib_t10_dif_bg_type bg_type; u16 pi_interval; u16 bg; u16 app_tag; u32 ref_tag; bool ref_remap; bool app_escape; bool ref_escape; u16 apptag_check_mask; }; /** * struct ib_sig_domain - Parameters for signature domain * @sig_type: specific signauture type * @sig: union of all signature domain attributes that may * be used to set domain layout. */ struct ib_sig_domain { enum ib_signature_type sig_type; union { struct ib_t10_dif_domain dif; } sig; }; /** * struct ib_sig_attrs - Parameters for signature handover operation * @check_mask: bitmask for signature byte check (8 bytes) * @mem: memory domain layout desciptor. * @wire: wire domain layout desciptor. */ struct ib_sig_attrs { u8 check_mask; struct ib_sig_domain mem; struct ib_sig_domain wire; }; enum ib_sig_err_type { IB_SIG_BAD_GUARD, IB_SIG_BAD_REFTAG, IB_SIG_BAD_APPTAG, }; /** * struct ib_sig_err - signature error descriptor */ struct ib_sig_err { enum ib_sig_err_type err_type; u32 expected; u32 actual; u64 sig_err_offset; u32 key; }; enum ib_mr_status_check { IB_MR_CHECK_SIG_STATUS = 1, }; /** * struct ib_mr_status - Memory region status container * * @fail_status: Bitmask of MR checks status. For each * failed check a corresponding status bit is set. * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS * failure. */ struct ib_mr_status { u32 fail_status; struct ib_sig_err sig_err; }; /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. * @mult: multiple to convert. */ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct ib_ah_attr { struct ib_global_route grh; u16 dlid; u8 sl; u8 src_path_bits; u8 static_rate; u8 ah_flags; u8 port_num; u8 dmac[ETH_ALEN]; }; enum ib_wc_status { IB_WC_SUCCESS, IB_WC_LOC_LEN_ERR, IB_WC_LOC_QP_OP_ERR, IB_WC_LOC_EEC_OP_ERR, IB_WC_LOC_PROT_ERR, IB_WC_WR_FLUSH_ERR, IB_WC_MW_BIND_ERR, IB_WC_BAD_RESP_ERR, IB_WC_LOC_ACCESS_ERR, IB_WC_REM_INV_REQ_ERR, IB_WC_REM_ACCESS_ERR, IB_WC_REM_OP_ERR, IB_WC_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR, IB_WC_LOC_RDD_VIOL_ERR, IB_WC_REM_INV_RD_REQ_ERR, IB_WC_REM_ABORT_ERR, IB_WC_INV_EECN_ERR, IB_WC_INV_EEC_STATE_ERR, IB_WC_FATAL_ERR, IB_WC_RESP_TIMEOUT_ERR, IB_WC_GENERAL_ERR }; const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status); enum ib_wc_opcode { IB_WC_SEND, IB_WC_RDMA_WRITE, IB_WC_RDMA_READ, IB_WC_COMP_SWAP, IB_WC_FETCH_ADD, IB_WC_LSO, IB_WC_LOCAL_INV, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). */ IB_WC_RECV = 1 << 7, IB_WC_RECV_RDMA_WITH_IMM, IB_WC_DUMMY = -1, /* force enum signed */ }; enum ib_wc_flags { IB_WC_GRH = 1, IB_WC_WITH_IMM = (1<<1), IB_WC_WITH_INVALIDATE = (1<<2), IB_WC_IP_CSUM_OK = (1<<3), IB_WC_WITH_SMAC = (1<<4), IB_WC_WITH_VLAN = (1<<5), IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6), }; struct ib_wc { union { u64 wr_id; struct ib_cqe *wr_cqe; }; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; u32 byte_len; struct ib_qp *qp; union { __be32 imm_data; u32 invalidate_rkey; } ex; u32 src_qp; int wc_flags; u16 pkey_index; u16 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ u8 smac[ETH_ALEN]; u16 vlan_id; u8 network_hdr_type; }; enum ib_cq_notify_flags { IB_CQ_SOLICITED = 1 << 0, IB_CQ_NEXT_COMP = 1 << 1, IB_CQ_SOLICITED_MASK = IB_CQ_SOLICITED | IB_CQ_NEXT_COMP, IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, }; enum ib_srq_type { IB_SRQT_BASIC, IB_SRQT_XRC }; enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, }; struct ib_srq_attr { u32 max_wr; u32 max_sge; u32 srq_limit; }; struct ib_srq_init_attr { void (*event_handler)(struct ib_event *, void *); void *srq_context; struct ib_srq_attr attr; enum ib_srq_type srq_type; union { struct { struct ib_xrcd *xrcd; struct ib_cq *cq; } xrc; } ext; }; struct ib_qp_cap { u32 max_send_wr; u32 max_recv_wr; u32 max_send_sge; u32 max_recv_sge; u32 max_inline_data; /* * Maximum number of rdma_rw_ctx structures in flight at a time. * ib_create_qp() will calculate the right amount of neededed WRs * and MRs based on this. */ u32 max_rdma_ctxs; }; enum ib_sig_type { IB_SIGNAL_ALL_WR, IB_SIGNAL_REQ_WR }; enum ib_qp_type { /* * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries * here (and in that order) since the MAD layer uses them as * indices into a 2-entry table. */ IB_QPT_SMI, IB_QPT_GSI, IB_QPT_RC, IB_QPT_UC, IB_QPT_UD, IB_QPT_RAW_IPV6, IB_QPT_RAW_ETHERTYPE, IB_QPT_RAW_PACKET = 8, IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer */ IB_QPT_RESERVED1 = 0x1000, IB_QPT_RESERVED2, IB_QPT_RESERVED3, IB_QPT_RESERVED4, IB_QPT_RESERVED5, IB_QPT_RESERVED6, IB_QPT_RESERVED7, IB_QPT_RESERVED8, IB_QPT_RESERVED9, IB_QPT_RESERVED10, }; enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, IB_QP_CREATE_CROSS_CHANNEL = 1 << 2, IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, IB_QP_CREATE_SCATTER_FCS = 1 << 8, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, }; /* * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler * callback to destroy the passed in QP. */ struct ib_qp_init_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; enum ib_qp_create_flags create_flags; /* * Only needed for special QP types, or when using the RW API. */ u8 port_num; struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_qp_open_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; enum ib_qp_type qp_type; }; enum ib_rnr_timeout { IB_RNR_TIMER_655_36 = 0, IB_RNR_TIMER_000_01 = 1, IB_RNR_TIMER_000_02 = 2, IB_RNR_TIMER_000_03 = 3, IB_RNR_TIMER_000_04 = 4, IB_RNR_TIMER_000_06 = 5, IB_RNR_TIMER_000_08 = 6, IB_RNR_TIMER_000_12 = 7, IB_RNR_TIMER_000_16 = 8, IB_RNR_TIMER_000_24 = 9, IB_RNR_TIMER_000_32 = 10, IB_RNR_TIMER_000_48 = 11, IB_RNR_TIMER_000_64 = 12, IB_RNR_TIMER_000_96 = 13, IB_RNR_TIMER_001_28 = 14, IB_RNR_TIMER_001_92 = 15, IB_RNR_TIMER_002_56 = 16, IB_RNR_TIMER_003_84 = 17, IB_RNR_TIMER_005_12 = 18, IB_RNR_TIMER_007_68 = 19, IB_RNR_TIMER_010_24 = 20, IB_RNR_TIMER_015_36 = 21, IB_RNR_TIMER_020_48 = 22, IB_RNR_TIMER_030_72 = 23, IB_RNR_TIMER_040_96 = 24, IB_RNR_TIMER_061_44 = 25, IB_RNR_TIMER_081_92 = 26, IB_RNR_TIMER_122_88 = 27, IB_RNR_TIMER_163_84 = 28, IB_RNR_TIMER_245_76 = 29, IB_RNR_TIMER_327_68 = 30, IB_RNR_TIMER_491_52 = 31 }; enum ib_qp_attr_mask { IB_QP_STATE = 1, IB_QP_CUR_STATE = (1<<1), IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), IB_QP_ACCESS_FLAGS = (1<<3), IB_QP_PKEY_INDEX = (1<<4), IB_QP_PORT = (1<<5), IB_QP_QKEY = (1<<6), IB_QP_AV = (1<<7), IB_QP_PATH_MTU = (1<<8), IB_QP_TIMEOUT = (1<<9), IB_QP_RETRY_CNT = (1<<10), IB_QP_RNR_RETRY = (1<<11), IB_QP_RQ_PSN = (1<<12), IB_QP_MAX_QP_RD_ATOMIC = (1<<13), IB_QP_ALT_PATH = (1<<14), IB_QP_MIN_RNR_TIMER = (1<<15), IB_QP_SQ_PSN = (1<<16), IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), IB_QP_DEST_QPN = (1<<20), IB_QP_RESERVED1 = (1<<21), IB_QP_RESERVED2 = (1<<22), IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), }; enum ib_qp_state { IB_QPS_RESET, IB_QPS_INIT, IB_QPS_RTR, IB_QPS_RTS, IB_QPS_SQD, IB_QPS_SQE, IB_QPS_ERR, IB_QPS_DUMMY = -1, /* force enum signed */ }; enum ib_mig_state { IB_MIG_MIGRATED, IB_MIG_REARM, IB_MIG_ARMED }; enum ib_mw_type { IB_MW_TYPE_1 = 1, IB_MW_TYPE_2 = 2 }; struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; enum ib_mtu path_mtu; enum ib_mig_state path_mig_state; u32 qkey; u32 rq_psn; u32 sq_psn; u32 dest_qp_num; int qp_access_flags; struct ib_qp_cap cap; struct ib_ah_attr ah_attr; struct ib_ah_attr alt_ah_attr; u16 pkey_index; u16 alt_pkey_index; u8 en_sqd_async_notify; u8 sq_draining; u8 max_rd_atomic; u8 max_dest_rd_atomic; u8 min_rnr_timer; u8 port_num; u8 timeout; u8 retry_cnt; u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; }; enum ib_wr_opcode { IB_WR_RDMA_WRITE, IB_WR_RDMA_WRITE_WITH_IMM, IB_WR_SEND, IB_WR_SEND_WITH_IMM, IB_WR_RDMA_READ, IB_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD, IB_WR_LSO, IB_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV, IB_WR_LOCAL_INV, IB_WR_REG_MR, IB_WR_MASKED_ATOMIC_CMP_AND_SWP, IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_REG_SIG_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ IB_WR_RESERVED1 = 0xf0, IB_WR_RESERVED2, IB_WR_RESERVED3, IB_WR_RESERVED4, IB_WR_RESERVED5, IB_WR_RESERVED6, IB_WR_RESERVED7, IB_WR_RESERVED8, IB_WR_RESERVED9, IB_WR_RESERVED10, IB_WR_DUMMY = -1, /* force enum signed */ }; enum ib_send_flags { IB_SEND_FENCE = 1, IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), IB_SEND_IP_CSUM = (1<<4), /* reserve bits 26-31 for low level drivers' internal use */ IB_SEND_RESERVED_START = (1 << 26), IB_SEND_RESERVED_END = (1 << 31), }; struct ib_sge { u64 addr; u32 length; u32 lkey; }; struct ib_cqe { void (*done)(struct ib_cq *cq, struct ib_wc *wc); }; struct ib_send_wr { struct ib_send_wr *next; union { u64 wr_id; struct ib_cqe *wr_cqe; }; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; int send_flags; union { __be32 imm_data; u32 invalidate_rkey; } ex; }; struct ib_rdma_wr { struct ib_send_wr wr; u64 remote_addr; u32 rkey; }; static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr) { return container_of(wr, struct ib_rdma_wr, wr); } struct ib_atomic_wr { struct ib_send_wr wr; u64 remote_addr; u64 compare_add; u64 swap; u64 compare_add_mask; u64 swap_mask; u32 rkey; }; static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr) { return container_of(wr, struct ib_atomic_wr, wr); } struct ib_ud_wr { struct ib_send_wr wr; struct ib_ah *ah; void *header; int hlen; int mss; u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ }; static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr) { return container_of(wr, struct ib_ud_wr, wr); } struct ib_reg_wr { struct ib_send_wr wr; struct ib_mr *mr; u32 key; int access; }; static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr) { return container_of(wr, struct ib_reg_wr, wr); } struct ib_sig_handover_wr { struct ib_send_wr wr; struct ib_sig_attrs *sig_attrs; struct ib_mr *sig_mr; int access_flags; struct ib_sge *prot; }; static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) { return container_of(wr, struct ib_sig_handover_wr, wr); } struct ib_recv_wr { struct ib_recv_wr *next; union { u64 wr_id; struct ib_cqe *wr_cqe; }; struct ib_sge *sg_list; int num_sge; }; enum ib_access_flags { IB_ACCESS_LOCAL_WRITE = 1, IB_ACCESS_REMOTE_WRITE = (1<<1), IB_ACCESS_REMOTE_READ = (1<<2), IB_ACCESS_REMOTE_ATOMIC = (1<<3), IB_ACCESS_MW_BIND = (1<<4), IB_ZERO_BASED = (1<<5), IB_ACCESS_ON_DEMAND = (1<<6), }; struct ib_phys_buf { u64 addr; u64 size; }; /* * XXX: these are apparently used for ->rereg_user_mr, no idea why they * are hidden here instead of a uapi header! */ enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), IB_MR_REREG_ACCESS = (1<<2), IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; struct ib_fmr_attr { int max_pages; int max_maps; u8 page_shift; }; struct ib_umem; struct ib_ucontext { struct ib_device *device; struct list_head pd_list; struct list_head mr_list; struct list_head mw_list; struct list_head cq_list; struct list_head qp_list; struct list_head srq_list; struct list_head ah_list; struct list_head xrcd_list; struct list_head rule_list; struct list_head wq_list; struct list_head rwq_ind_tbl_list; int closing; pid_t tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct rb_root umem_tree; /* * Protects .umem_rbroot and tree, as well as odp_mrs_count and * mmu notifiers registration. */ struct rw_semaphore umem_rwsem; void (*invalidate_range)(struct ib_umem *umem, unsigned long start, unsigned long end); struct mmu_notifier mn; atomic_t notifier_count; /* A list of umems that don't have private mmu notifier counters yet. */ struct list_head no_private_counters; int odp_mrs_count; #endif }; struct ib_uobject { u64 user_handle; /* handle given to us by userspace */ struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ struct rcu_head rcu; /* kfree_rcu() overhead */ int live; }; struct ib_udata { const void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; }; struct ib_pd { u32 local_dma_lkey; u32 flags; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ u32 unsafe_global_rkey; /* * Implementation details of the RDMA core, don't use in drivers: */ struct ib_mr *__internal_mr; }; struct ib_xrcd { struct ib_device *device; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; }; struct ib_ah { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); enum ib_poll_context { IB_POLL_DIRECT, /* caller context, no hw completions */ IB_POLL_SOFTIRQ, /* poll from softirq context */ IB_POLL_WORKQUEUE, /* poll from workqueue */ }; struct ib_cq { struct ib_device *device; struct ib_uobject *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ enum ib_poll_context poll_ctx; struct work_struct work; }; struct ib_srq { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; enum ib_srq_type srq_type; atomic_t usecnt; union { struct { struct ib_xrcd *xrcd; struct ib_cq *cq; u32 srq_num; } xrc; } ext; }; enum ib_wq_type { IB_WQT_RQ }; enum ib_wq_state { IB_WQS_RESET, IB_WQS_RDY, IB_WQS_ERR }; struct ib_wq { struct ib_device *device; struct ib_uobject *uobject; void *wq_context; void (*event_handler)(struct ib_event *, void *); struct ib_pd *pd; struct ib_cq *cq; u32 wq_num; enum ib_wq_state state; enum ib_wq_type wq_type; atomic_t usecnt; }; struct ib_wq_init_attr { void *wq_context; enum ib_wq_type wq_type; u32 max_wr; u32 max_sge; struct ib_cq *cq; void (*event_handler)(struct ib_event *, void *); }; enum ib_wq_attr_mask { IB_WQ_STATE = 1 << 0, IB_WQ_CUR_STATE = 1 << 1, }; struct ib_wq_attr { enum ib_wq_state wq_state; enum ib_wq_state curr_wq_state; }; struct ib_rwq_ind_table { struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; u32 ind_tbl_num; u32 log_ind_tbl_size; struct ib_wq **ind_tbl; }; struct ib_rwq_ind_table_init_attr { u32 log_ind_tbl_size; /* Each entry is a pointer to Receive Work Queue */ struct ib_wq **ind_tbl; }; /* * @max_write_sge: Maximum SGE elements per RDMA WRITE request. * @max_read_sge: Maximum SGE elements per RDMA READ request. */ struct ib_qp { struct ib_device *device; struct ib_pd *pd; struct ib_cq *send_cq; struct ib_cq *recv_cq; spinlock_t mr_lock; struct ib_srq *srq; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; /* count times opened, mcast attaches, flow attaches */ atomic_t usecnt; struct list_head open_list; struct ib_qp *real_qp; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; u32 max_write_sge; u32 max_read_sge; enum ib_qp_type qp_type; struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_mr { struct ib_device *device; struct ib_pd *pd; u32 lkey; u32 rkey; u64 iova; u32 length; unsigned int page_size; bool need_inval; union { struct ib_uobject *uobject; /* user */ struct list_head qp_entry; /* FR */ }; }; struct ib_mw { struct ib_device *device; struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; enum ib_mw_type type; }; struct ib_fmr { struct ib_device *device; struct ib_pd *pd; struct list_head list; u32 lkey; u32 rkey; }; /* Supported steering options */ enum ib_flow_attr_type { /* steering according to rule specifications */ IB_FLOW_ATTR_NORMAL = 0x0, /* default unicast and multicast rule - * receive all Eth traffic which isn't steered to any QP */ IB_FLOW_ATTR_ALL_DEFAULT = 0x1, /* default multicast rule - * receive all Eth multicast traffic which isn't steered to any QP */ IB_FLOW_ATTR_MC_DEFAULT = 0x2, /* sniffer rule - receive all port traffic */ IB_FLOW_ATTR_SNIFFER = 0x3 }; /* Supported steering header types */ enum ib_flow_spec_type { /* L2 headers*/ IB_FLOW_SPEC_ETH = 0x20, IB_FLOW_SPEC_IB = 0x22, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, IB_FLOW_SPEC_IPV6 = 0x31, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41 }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 /* Flow steering rule priority is set according to it's domain. * Lower domain value means higher priority. */ enum ib_flow_domain { IB_FLOW_DOMAIN_USER, IB_FLOW_DOMAIN_ETHTOOL, IB_FLOW_DOMAIN_RFS, IB_FLOW_DOMAIN_NIC, IB_FLOW_DOMAIN_NUM /* Must be last */ }; enum ib_flow_flags { IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ IB_FLOW_ATTR_FLAGS_RESERVED = 1UL << 2 /* Must be last */ }; struct ib_flow_eth_filter { u8 dst_mac[6]; u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; /* Must be last */ u8 real_sz[0]; }; struct ib_flow_spec_eth { enum ib_flow_spec_type type; u16 size; struct ib_flow_eth_filter val; struct ib_flow_eth_filter mask; }; struct ib_flow_ib_filter { __be16 dlid; __u8 sl; /* Must be last */ u8 real_sz[0]; }; struct ib_flow_spec_ib { enum ib_flow_spec_type type; u16 size; struct ib_flow_ib_filter val; struct ib_flow_ib_filter mask; }; /* IPv4 header flags */ enum ib_ipv4_flags { IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */ IB_IPV4_MORE_FRAG = 0X4 /* For All fragmented packets except the last have this flag set */ }; struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; u8 proto; u8 tos; u8 ttl; u8 flags; /* Must be last */ u8 real_sz[0]; }; struct ib_flow_spec_ipv4 { enum ib_flow_spec_type type; u16 size; struct ib_flow_ipv4_filter val; struct ib_flow_ipv4_filter mask; }; struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; __be32 flow_label; u8 next_hdr; u8 traffic_class; u8 hop_limit; /* Must be last */ u8 real_sz[0]; }; struct ib_flow_spec_ipv6 { enum ib_flow_spec_type type; u16 size; struct ib_flow_ipv6_filter val; struct ib_flow_ipv6_filter mask; }; struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; /* Must be last */ u8 real_sz[0]; }; struct ib_flow_spec_tcp_udp { enum ib_flow_spec_type type; u16 size; struct ib_flow_tcp_udp_filter val; struct ib_flow_tcp_udp_filter mask; }; union ib_flow_spec { struct { enum ib_flow_spec_type type; u16 size; }; struct ib_flow_spec_eth eth; struct ib_flow_spec_ib ib; struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; struct ib_flow_spec_ipv6 ipv6; }; struct ib_flow_attr { enum ib_flow_attr_type type; u16 size; u16 priority; u32 flags; u8 num_of_specs; u8 port; /* Following are the optional layers according to user request * struct ib_flow_spec_xxx * struct ib_flow_spec_yyy */ }; struct ib_flow { struct ib_qp *qp; struct ib_uobject *uobject; }; struct ib_mad_hdr; struct ib_grh; enum ib_process_mad_flags { IB_MAD_IGNORE_MKEY = 1, IB_MAD_IGNORE_BKEY = 2, IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY }; enum ib_mad_result { IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ }; #define IB_DEVICE_NAME_MAX 64 struct ib_cache { rwlock_t lock; struct ib_event_handler event_handler; struct ib_pkey_cache **pkey_cache; struct ib_gid_table **gid_cache; u8 *lmc_cache; }; struct ib_dma_mapping_ops { int (*mapping_error)(struct ib_device *dev, u64 dma_addr); u64 (*map_single)(struct ib_device *dev, void *ptr, size_t size, enum dma_data_direction direction); void (*unmap_single)(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction); u64 (*map_page)(struct ib_device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction); void (*unmap_page)(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction); int (*map_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); int (*map_sg_attrs)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *attrs); void (*unmap_sg_attrs)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *attrs); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, enum dma_data_direction dir); void (*sync_single_for_device)(struct ib_device *dev, u64 dma_handle, size_t size, enum dma_data_direction dir); void *(*alloc_coherent)(struct ib_device *dev, size_t size, u64 *dma_handle, gfp_t flag); void (*free_coherent)(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle); }; struct iw_cm_verbs; struct ib_port_immutable { int pkey_tbl_len; int gid_tbl_len; u32 core_cap_flags; u32 max_mad_size; }; struct ib_device { struct device *dma_device; char name[IB_DEVICE_NAME_MAX]; struct list_head event_handler_list; spinlock_t event_handler_lock; spinlock_t client_data_lock; struct list_head core_list; /* Access to the client_data_list is protected by the client_data_lock * spinlock and the lists_rwsem read-write semaphore */ struct list_head client_data_list; struct ib_cache cache; /** * port_immutable is indexed by port number */ struct ib_port_immutable *port_immutable; int num_comp_vectors; struct iw_cm_verbs *iwcm; /** * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the * driver initialized data. The struct is kfree()'ed by the sysfs * core when the device is removed. A lifespan of -1 in the return * struct tells the core to set a default lifespan. */ struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device, u8 port_num); /** * get_hw_stats - Fill in the counter value(s) in the stats struct. * @index - The index in the value array we wish to have updated, or * num_counters if we want all stats updated * Return codes - * < 0 - Error, no counters updated * index - Updated the single counter pointed to by index * num_counters - Updated all counters (will reset the timestamp * and prevent further calls for lifespan milliseconds) * Drivers are allowed to update all counters in leiu of just the * one given in index at their option */ int (*get_hw_stats)(struct ib_device *device, struct rdma_hw_stats *stats, u8 port, int index); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr, struct ib_udata *udata); int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer (*get_link_layer)(struct ib_device *device, u8 port_num); /* When calling get_netdev, the HW vendor's driver should return the * net device of device @device at port @port_num or NULL if such * a net device doesn't exist. The vendor driver should call dev_hold * on this net device. The HW vendor's device driver must guarantee * that this function returns NULL before the net device reaches * NETDEV_UNREGISTER_FINAL state. */ struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); /* When calling add_gid, the HW vendor's driver should * add the gid of device @device at gid index @index of * port @port_num to be @gid. Meta-info of that gid (for example, * the network device related to this gid is available * at @attr. @context allows the HW vendor driver to store extra * information together with a GID entry. The HW vendor may allocate * memory to contain this information and store it in @context when a * new GID entry is written to. Params are consistent until the next * call of add_gid or delete_gid. The function should return 0 on * success or error otherwise. The function could be called * concurrently for different ports. This function is only called * when roce_gid_table is used. */ int (*add_gid)(struct ib_device *device, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr, void **context); /* When calling del_gid, the HW vendor's driver should delete the * gid of device @device at gid index @index of port @port_num. * Upon the deletion of a GID entry, the HW vendor must free any * allocated memory. The caller will clear @context afterwards. * This function is only called when roce_gid_table is used. */ int (*del_gid)(struct ib_device *device, u8 port_num, unsigned int index, void **context); int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int (*modify_device)(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int (*modify_port)(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, struct ib_udata *udata); int (*dealloc_ucontext)(struct ib_ucontext *context); int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_pd * (*alloc_pd)(struct ib_device *device, struct ib_ucontext *context, struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, struct ib_ah_attr *ah_attr); int (*modify_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); struct ib_srq * (*create_srq)(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); int (*destroy_srq)(struct ib_srq *srq); int (*post_srq_recv)(struct ib_srq *srq, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); struct ib_qp * (*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp); int (*post_send)(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr); int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); struct ib_cq * (*create_cq)(struct ib_device *device, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int (*peek_cq)(struct ib_cq *cq, int wc_cnt); int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags); int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt); struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, struct ib_phys_buf *phys_buf_array, int num_phys_buf, int mr_access_flags, u64 *iova_start); struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata); int (*dereg_mr)(struct ib_mr *mr); struct ib_mr * (*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); struct ib_mw * (*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); int (*map_phys_fmr)(struct ib_fmr *fmr, u64 *page_list, int list_len, u64 iova); int (*unmap_fmr)(struct list_head *fmr_list); int (*dealloc_fmr)(struct ib_fmr *fmr); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*process_mad)(struct ib_device *device, int process_mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in_mad, size_t in_mad_size, struct ib_mad_hdr *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, struct ib_ucontext *ucontext, struct ib_udata *udata); int (*dealloc_xrcd)(struct ib_xrcd *xrcd); struct ib_flow * (*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain); int (*destroy_flow)(struct ib_flow *flow_id); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); void (*drain_rq)(struct ib_qp *qp); void (*drain_sq)(struct ib_qp *qp); int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port, int state); int (*get_vf_config)(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq * (*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); int (*destroy_wq)(struct ib_wq *wq); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table * (*create_rwq_ind_table)(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); struct ib_dma_mapping_ops *dma_ops; struct module *owner; struct device dev; struct kobject *ports_parent; struct list_head port_list; enum { IB_DEV_UNINITIALIZED, IB_DEV_REGISTERED, IB_DEV_UNREGISTERED } reg_state; int uverbs_abi_ver; u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; u8 node_type; u8 phys_port_cnt; struct ib_device_attr attrs; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; /** * The following mandatory functions are used only at device * registration. Keep functions such as these at the end of this * structure to avoid cache line misses when accessing struct ib_device * in fast paths. */ int (*get_port_immutable)(struct ib_device *, u8, struct ib_port_immutable *); void (*get_dev_fw_str)(struct ib_device *, char *str, size_t str_len); }; struct ib_client { char *name; void (*add) (struct ib_device *); void (*remove)(struct ib_device *, void *client_data); /* Returns the net_dev belonging to this ib_client and matching the * given parameters. * @dev: An RDMA device that the net_dev use for communication. * @port: A physical port number on the RDMA device. * @pkey: P_Key that the net_dev uses if applicable. * @gid: A GID that the net_dev uses to communicate. * @addr: An IP address the net_dev is configured with. * @client_data: The device's client data set by ib_set_client_data(). * * An ib_client that implements a net_dev on top of RDMA devices * (such as IP over IB) should implement this callback, allowing the * rdma_cm module to find the right net_dev for a given request. * * The caller is responsible for calling dev_put on the returned * netdev. */ struct net_device *(*get_net_dev_by_params)( struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr, void *client_data); struct list_head list; }; struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str, size_t str_len); int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client); void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; } static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } static inline bool ib_is_udata_cleared(struct ib_udata *udata, size_t offset, size_t len) { const void __user *p = (const char __user *)udata->inbuf + offset; bool ret; u8 *buf; if (len > USHRT_MAX) return false; buf = memdup_user(p, len); if (IS_ERR(buf)) return false; ret = !memchr_inv(buf, 0, len); kfree(buf); return ret; } /** * ib_modify_qp_is_ok - Check that the supplied attribute mask * contains all required attributes and no attributes not allowed for * the given QP state transition. * @cur_state: Current QP state * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes * @ll : link layer of port * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It * checks that cur_state and next_state are valid QP states, that a * transition from cur_state to next_state is allowed by the IB spec, * and that the attribute mask supplied is allowed for the transition. */ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask, enum rdma_link_layer ll); int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num); /** * rdma_cap_ib_switch - Check if the device is IB switch * @device: Device to check * * Device driver is responsible for setting is_switch bit on * in ib_device structure at init time. * * Return: true if the device is IB switch. */ static inline bool rdma_cap_ib_switch(const struct ib_device *device) { return device->is_switch; } /** * rdma_start_port - Return the first valid port number for the device * specified * * @device: Device to be checked * * Return start port number */ static inline u8 rdma_start_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : 1; } /** * rdma_end_port - Return the last valid port number for the device * specified * * @device: Device to be checked * * Return last port number */ static inline u8 rdma_end_port(const struct ib_device *device) { return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt; } static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB; } static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP); } static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP; } static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE; } static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP; } static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num) { return rdma_protocol_ib(device, port_num) || rdma_protocol_roce(device, port_num); } /** * rdma_cap_ib_mad - Check if the port of a device supports Infiniband * Management Datagrams. * @device: Device to check * @port_num: Port number to check * * Management Datagrams (MAD) are a required part of the InfiniBand * specification and are supported on all InfiniBand devices. A slightly * extended version are also supported on OPA interfaces. * * Return: true if the port supports sending/receiving of MAD packets. */ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD; } /** * rdma_cap_opa_mad - Check if the port of device provides support for OPA * Management Datagrams. * @device: Device to check * @port_num: Port number to check * * Intel OmniPath devices extend and/or replace the InfiniBand Management * datagrams with their own versions. These OPA MADs share many but not all of * the characteristics of InfiniBand MADs. * * OPA MADs differ in the following ways: * * 1) MADs are variable size up to 2K * IBTA defined MADs remain fixed at 256 bytes * 2) OPA SMPs must carry valid PKeys * 3) OPA SMP packets are a different format * * Return: true if the port supports OPA MAD packet formats. */ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num) { return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD; } /** * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI). * @device: Device to check * @port_num: Port number to check * * Each InfiniBand node is required to provide a Subnet Management Agent * that the subnet manager can access. Prior to the fabric being fully * configured by the subnet manager, the SMA is accessed via a well known * interface called the Subnet Management Interface (SMI). This interface * uses directed route packets to communicate with the SM to get around the * chicken and egg problem of the SM needing to know what's on the fabric * in order to configure the fabric, and needing to configure the fabric in * order to send packets to the devices on the fabric. These directed * route packets do not need the fabric fully configured in order to reach * their destination. The SMI is the only method allowed to send * directed route packets on an InfiniBand fabric. * * Return: true if the port provides an SMI. */ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI; } /** * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband * Communication Manager. * @device: Device to check * @port_num: Port number to check * * The InfiniBand Communication Manager is one of many pre-defined General * Service Agents (GSA) that are accessed via the General Service * Interface (GSI). It's role is to facilitate establishment of connections * between nodes as well as other management related tasks for established * connections. * * Return: true if the port supports an IB CM (this does not guarantee that * a CM is actually running however). */ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM; } /** * rdma_cap_iw_cm - Check if the port of device has the capability IWARP * Communication Manager. * @device: Device to check * @port_num: Port number to check * * Similar to above, but specific to iWARP connections which have a different * managment protocol than InfiniBand. * * Return: true if the port supports an iWARP CM (this does not guarantee that * a CM is actually running however). */ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM; } /** * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband * Subnet Administration. * @device: Device to check * @port_num: Port number to check * * An InfiniBand Subnet Administration (SA) service is a pre-defined General * Service Agent (GSA) provided by the Subnet Manager (SM). On InfiniBand * fabrics, devices should resolve routes to other hosts by contacting the * SA to query the proper route. * * Return: true if the port should act as a client to the fabric Subnet * Administration interface. This does not imply that the SA service is * running locally. */ static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA; } /** * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband * Multicast. * @device: Device to check * @port_num: Port number to check * * InfiniBand multicast registration is more complex than normal IPv4 or * IPv6 multicast registration. Each Host Channel Adapter must register * with the Subnet Manager when it wishes to join a multicast group. It * should do so only once regardless of how many queue pairs it subscribes * to this group. And it should leave the group only after all queue pairs * attached to the group have been detached. * * Return: true if the port must undertake the additional adminstrative * overhead of registering/unregistering with the SM and tracking of the * total number of queue pairs attached to the multicast group. */ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num) { return rdma_cap_ib_sa(device, port_num); } /** * rdma_cap_af_ib - Check if the port of device has the capability * Native Infiniband Address. * @device: Device to check * @port_num: Port number to check * * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default * GID. RoCE uses a different mechanism, but still generates a GID via * a prescribed mechanism and port specific data. * * Return: true if the port uses a GID address to identify devices on the * network. */ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB; } /** * rdma_cap_eth_ah - Check if the port of device has the capability * Ethernet Address Handle. * @device: Device to check * @port_num: Port number to check * * RoCE is InfiniBand over Ethernet, and it uses a well defined technique * to fabricate GIDs over Ethernet/IP specific addresses native to the * port. Normally, packet headers are generated by the sending host * adapter, but when sending connectionless datagrams, we must manually * inject the proper headers for the fabric we are communicating over. * * Return: true if we are running as a RoCE port and must force the * addition of a Global Route Header built from our Ethernet Address * Handle into our header list for connectionless packets. */ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH; } /** * rdma_max_mad_size - Return the max MAD size required by this RDMA Port. * * @device: Device * @port_num: Port number * * This MAD size includes the MAD headers and MAD payload. No other headers * are included. * * Return the max MAD size required by the Port. Will return 0 if the port * does not support MADs */ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num) { return device->port_immutable[port_num].max_mad_size; } /** * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table * @device: Device to check * @port_num: Port number to check * * RoCE GID table mechanism manages the various GIDs for a device. * * NOTE: if allocating the port's GID table has failed, this call will still * return true, but any RoCE GID table API will fail. * * Return: true if the port uses RoCE GID table mechanism in order to manage * its GIDs. */ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device, u8 port_num) { return rdma_protocol_roce(device, port_num) && device->add_gid && device->del_gid; } /* * Check if the device supports READ W/ INVALIDATE. */ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) { /* * iWarp drivers must support READ W/ INVALIDATE. No other protocol * has support for it yet. */ return rdma_protocol_iwarp(dev, port_num); } int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid, struct ib_gid_attr *attr); int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int ib_modify_device(struct ib_device *device, int device_modify_mask, struct ib_device_modify *device_modify); int ib_modify_port(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, enum ib_gid_type gid_type, struct net_device *ndev, u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); enum ib_pd_flags { /* * Create a memory registration for all memory in the system and place * the rkey for it into pd->unsafe_global_rkey. This can be used by * ULPs to avoid the overhead of dynamic MRs. * * This flag is generally considered unsafe and must only be used in * extremly trusted environments. Every use of it will log a warning * in the kernel log. */ IB_PD_UNSAFE_GLOBAL_RKEY = 0x01, }; struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), __func__) void ib_dealloc_pd(struct ib_pd *pd); /** * ib_create_ah - Creates an address handle for the given address vector. * @pd: The protection domain associated with the address handle. * @ah_attr: The attributes of the address vector. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); /** * ib_init_ah_from_wc - Initializes address handle attributes from a * work completion. * @device: Device on which the received message arrived. * @port_num: Port on which the received message arrived. * @wc: Work completion associated with the received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. */ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, struct ib_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the * sender of the specified work completion. * @pd: The protection domain associated with the address handle. * @wc: Work completion information associated with a received message. * @grh: References the received global route header. This parameter is * ignored unless the work completion indicates that the GRH is valid. * @port_num: The outbound port number to associate with the address. * * The address handle is used to reference a local or global destination * in all UD QP post sends. */ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, const struct ib_grh *grh, u8 port_num); /** * ib_modify_ah - Modifies the address vector associated with an address * handle. * @ah: The address handle to modify. * @ah_attr: The new address vector attributes to associate with the * address handle. */ int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); /** * ib_query_ah - Queries the address vector associated with an address * handle. * @ah: The address handle to query. * @ah_attr: The address vector attributes associated with the address * handle. */ int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); /** * ib_destroy_ah - Destroys an address handle. * @ah: The address handle to destroy. */ int ib_destroy_ah(struct ib_ah *ah); /** * ib_create_srq - Creates a SRQ associated with the specified protection * domain. * @pd: The protection domain associated with the SRQ. * @srq_init_attr: A list of initial attributes required to create the * SRQ. If SRQ creation succeeds, then the attributes are updated to * the actual capabilities of the created SRQ. * * srq_attr->max_wr and srq_attr->max_sge are read the determine the * requested size of the SRQ, and set to the actual values allocated * on return. If ib_create_srq() succeeds, then max_wr and max_sge * will always be at least as large as the requested values. */ struct ib_srq *ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr); /** * ib_modify_srq - Modifies the attributes for the specified SRQ. * @srq: The SRQ to modify. * @srq_attr: On input, specifies the SRQ attributes to modify. On output, * the current values of selected SRQ attributes are returned. * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ * are being modified. * * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or * IB_SRQ_LIMIT to set the SRQ's limit and request notification when * the number of receives queued drops below the limit. */ int ib_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask); /** * ib_query_srq - Returns the attribute list and current values for the * specified SRQ. * @srq: The SRQ to query. * @srq_attr: The attributes of the specified SRQ. */ int ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); /** * ib_destroy_srq - Destroys the specified SRQ. * @srq: The SRQ to destroy. */ int ib_destroy_srq(struct ib_srq *srq); /** * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. * @srq: The SRQ to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); } /** * ib_create_qp - Creates a QP associated with the specified protection * domain. * @pd: The protection domain associated with the QP. * @qp_init_attr: A list of initial attributes required to create the * QP. If QP creation succeeds, then the attributes are updated to * the actual capabilities of the created QP. */ struct ib_qp *ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); /** * ib_modify_qp - Modifies the attributes for the specified QP and then * transitions the QP to the given state. * @qp: The QP to modify. * @qp_attr: On input, specifies the QP attributes to modify. On output, * the current values of selected QP attributes are returned. * @qp_attr_mask: A bit-mask used to specify which attributes of the QP * are being modified. */ int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask); /** * ib_query_qp - Returns the attribute list and current values for the * specified QP. * @qp: The QP to query. * @qp_attr: The attributes of the specified QP. * @qp_attr_mask: A bit-mask used to select specific attributes to query. * @qp_init_attr: Additional attributes of the selected QP. * * The qp_attr_mask may be used to limit the query to gathering only the * selected attributes. */ int ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); /** * ib_destroy_qp - Destroys the specified QP. * @qp: The QP to destroy. */ int ib_destroy_qp(struct ib_qp *qp); /** * ib_open_qp - Obtain a reference to an existing sharable QP. * @xrcd - XRC domain * @qp_open_attr: Attributes identifying the QP to open. * * Returns a reference to a sharable QP. */ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, struct ib_qp_open_attr *qp_open_attr); /** * ib_close_qp - Release an external reference to a QP. * @qp: The QP handle to release * * The opened QP handle is released by the caller. The underlying * shared QP is not destroyed until all internal references are released. */ int ib_close_qp(struct ib_qp *qp); /** * ib_post_send - Posts a list of work requests to the send queue of * the specified QP. * @qp: The QP to post the work request on. * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. * * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate * error is returned, the QP state shall not be affected, * ib_post_send() will return an immediate error after queueing any * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, struct ib_send_wr **bad_send_wr) { return qp->device->post_send(qp, send_wr, bad_send_wr); } /** * ib_post_recv - Posts a list of work requests to the receive queue of * the specified QP. * @qp: The QP to post the work request on. * @recv_wr: A list of work requests to post on the receive queue. * @bad_recv_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. */ static inline int ib_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); void ib_free_cq(struct ib_cq *cq); /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. * @comp_handler: A user-specified callback that is invoked when a * completion event occurs on the CQ. * @event_handler: A user-specified callback that is invoked when an * asynchronous event not associated with a completion occurs on the CQ. * @cq_context: Context associated with the CQ returned to the user via * the associated completion and event handlers. * @cq_attr: The attributes the CQ should be created upon. * * Users can examine the cq structure to determine the actual CQ size. */ struct ib_cq *ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler, void (*event_handler)(struct ib_event *, void *), void *cq_context, const struct ib_cq_init_attr *cq_attr); /** * ib_resize_cq - Modifies the capacity of the CQ. * @cq: The CQ to resize. * @cqe: The minimum size of the CQ. * * Users can examine the cq structure to determine the actual CQ size. */ int ib_resize_cq(struct ib_cq *cq, int cqe); /** * ib_modify_cq - Modifies moderation params of the CQ * @cq: The CQ to modify. * @cq_count: number of CQEs that will trigger an event * @cq_period: max period of time in usec before triggering an event * */ int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); /** * ib_destroy_cq - Destroys the specified CQ. * @cq: The CQ to destroy. */ int ib_destroy_cq(struct ib_cq *cq); /** * ib_poll_cq - poll a CQ for completion(s) * @cq:the CQ being polled * @num_entries:maximum number of completions to return * @wc:array of at least @num_entries &struct ib_wc where completions * will be returned * * Poll a CQ for (possibly multiple) completions. If the return value * is < 0, an error occurred. If the return value is >= 0, it is the * number of completions returned. If the return value is * non-negative and < num_entries, then the CQ was emptied. */ static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { return cq->device->poll_cq(cq, num_entries, wc); } /** * ib_peek_cq - Returns the number of unreaped completions currently * on the specified CQ. * @cq: The CQ to peek. * @wc_cnt: A minimum number of unreaped completions to check for. * * If the number of unreaped completions is greater than or equal to wc_cnt, * this function returns wc_cnt, otherwise, it returns the actual number of * unreaped completions. */ int ib_peek_cq(struct ib_cq *cq, int wc_cnt); /** * ib_req_notify_cq - Request completion notification on a CQ. * @cq: The CQ to generate an event for. * @flags: * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP * to request an event on the next solicited event or next work * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS * may also be |ed in to request a hint about missed events, as * described below. * * Return Value: * < 0 means an error occurred while requesting notification * == 0 means notification was requested successfully, and if * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events * were missed and it is safe to wait for another event. In * this case is it guaranteed that any work completions added * to the CQ since the last CQ poll will trigger a completion * notification event. * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed * in. It means that the consumer must poll the CQ again to * make sure it is empty to avoid missing an event because of a * race between requesting notification and an entry being * added to the CQ. This return value means it is possible * (but not guaranteed) that a work completion has been added * to the CQ since the last poll without triggering a * completion notification event. */ static inline int ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags) { return cq->device->req_notify_cq(cq, flags); } /** * ib_req_ncomp_notif - Request completion notification when there are * at least the specified number of unreaped completions on the CQ. * @cq: The CQ to generate an event for. * @wc_cnt: The number of unreaped completions that should be on the * CQ before an event is generated. */ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) { return cq->device->req_ncomp_notif ? cq->device->req_ncomp_notif(cq, wc_cnt) : -ENOSYS; } /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created * @dma_addr: The DMA address to check */ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (dev->dma_ops) return dev->dma_ops->mapping_error(dev, dma_addr); return dma_mapping_error(dev->dma_device, dma_addr); } /** * ib_dma_map_single - Map a kernel virtual address to DMA address * @dev: The device for which the dma_addr is to be created * @cpu_addr: The kernel virtual address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_single(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_single(dev, cpu_addr, size, direction); return dma_map_single(dev->dma_device, cpu_addr, size, direction); } /** * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_single(dev, addr, size, direction); else dma_unmap_single(dev->dma_device, addr, size, direction); } static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *dma_attrs) { return dma_map_single_attrs(dev->dma_device, cpu_addr, size, direction, dma_attrs); } static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction, struct dma_attrs *dma_attrs) { return dma_unmap_single_attrs(dev->dma_device, addr, size, direction, dma_attrs); } /** * ib_dma_map_page - Map a physical page to DMA address * @dev: The device for which the dma_addr is to be created * @page: The page to be mapped * @offset: The offset within the page * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline u64 ib_dma_map_page(struct ib_device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_page(dev, page, offset, size, direction); return dma_map_page(dev->dma_device, page, offset, size, direction); } /** * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page() * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @direction: The direction of the DMA */ static inline void ib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_page(dev, addr, size, direction); else dma_unmap_page(dev->dma_device, addr, size, direction); } /** * ib_dma_map_sg - Map a scatter/gather list to DMA addresses * @dev: The device for which the DMA addresses are to be created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline int ib_dma_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { if (dev->dma_ops) return dev->dma_ops->map_sg(dev, sg, nents, direction); return dma_map_sg(dev->dma_device, sg, nents, direction); } /** * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses * @dev: The device for which the DMA addresses were created * @sg: The array of scatter/gather entries * @nents: The number of scatter/gather entries * @direction: The direction of the DMA */ static inline void ib_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { if (dev->dma_ops) dev->dma_ops->unmap_sg(dev, sg, nents, direction); else dma_unmap_sg(dev->dma_device, sg, nents, direction); } static inline int ib_dma_map_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *dma_attrs) { if (dev->dma_ops) return dev->dma_ops->map_sg_attrs(dev, sg, nents, direction, dma_attrs); else return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction, struct dma_attrs *dma_attrs) { if (dev->dma_ops) return dev->dma_ops->unmap_sg_attrs(dev, sg, nents, direction, dma_attrs); else dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs); } /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry * * Note: this function is obsolete. To do: change all occurrences of * ib_sg_dma_address() into sg_dma_address(). */ static inline u64 ib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { return sg_dma_address(sg); } /** * ib_sg_dma_len - Return the DMA length from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry * * Note: this function is obsolete. To do: change all occurrences of * ib_sg_dma_len() into sg_dma_len(). */ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, struct scatterlist *sg) { return sg_dma_len(sg); } /** * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (dev->dma_ops) dev->dma_ops->sync_single_for_cpu(dev, addr, size, dir); else dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); } /** * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device * @dev: The device for which the DMA address was created * @addr: The DMA address * @size: The size of the region in bytes * @dir: The direction of the DMA */ static inline void ib_dma_sync_single_for_device(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { if (dev->dma_ops) dev->dma_ops->sync_single_for_device(dev, addr, size, dir); else dma_sync_single_for_device(dev->dma_device, addr, size, dir); } /** * ib_dma_alloc_coherent - Allocate memory and map it for DMA * @dev: The device for which the DMA address is requested * @size: The size of the region to allocate in bytes * @dma_handle: A pointer for returning the DMA address of the region * @flag: memory allocator flags */ static inline void *ib_dma_alloc_coherent(struct ib_device *dev, size_t size, u64 *dma_handle, gfp_t flag) { if (dev->dma_ops) return dev->dma_ops->alloc_coherent(dev, size, dma_handle, flag); else { dma_addr_t handle; void *ret; ret = dma_alloc_coherent(dev->dma_device, size, &handle, flag); *dma_handle = handle; return ret; } } /** * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent() * @dev: The device for which the DMA addresses were allocated * @size: The size of the region * @cpu_addr: the address returned by ib_dma_alloc_coherent() * @dma_handle: the DMA address returned by ib_dma_alloc_coherent() */ static inline void ib_dma_free_coherent(struct ib_device *dev, size_t size, void *cpu_addr, u64 dma_handle) { if (dev->dma_ops) dev->dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); else dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle); } /** * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. * * This function can fail, if the memory region has memory windows bound to it. */ int ib_dereg_mr(struct ib_mr *mr); struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. * @mr - struct ib_mr pointer to be updated. * @newkey - new key to be used. */ static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) { mr->lkey = (mr->lkey & 0xffffff00) | newkey; mr->rkey = (mr->rkey & 0xffffff00) | newkey; } /** * ib_inc_rkey - increments the key portion of the given rkey. Can be used * for calculating a new rkey for type 2 memory windows. * @rkey - the rkey to increment. */ static inline u32 ib_inc_rkey(u32 rkey) { const u32 mask = 0x000000ff; return ((rkey + 1) & mask) | (rkey & ~mask); } /** * ib_alloc_fmr - Allocates a unmapped fast memory region. * @pd: The protection domain associated with the unmapped region. * @mr_access_flags: Specifies the memory access rights. * @fmr_attr: Attributes of the unmapped region. * * A fast memory region must be mapped before it can be used as part of * a work request. */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr); /** * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. * @fmr: The fast memory region to associate with the pages. * @page_list: An array of physical pages to map to the fast memory region. * @list_len: The number of pages in page_list. * @iova: The I/O virtual address to use with the mapped region. */ static inline int ib_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len, u64 iova) { return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); } /** * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. * @fmr_list: A linked list of fast memory regions to unmap. */ int ib_unmap_fmr(struct list_head *fmr_list); /** * ib_dealloc_fmr - Deallocates a fast memory region. * @fmr: The fast memory region to deallocate. */ int ib_dealloc_fmr(struct ib_fmr *fmr); /** * ib_attach_mcast - Attaches the specified QP to a multicast group. * @qp: QP to attach to the multicast group. The QP must be type * IB_QPT_UD. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. * * In order to send and receive multicast packets, subnet * administration must have created the multicast group and configured * the fabric appropriately. The port associated with the specified * QP must also be a member of the multicast group. */ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_detach_mcast - Detaches the specified QP from a multicast group. * @qp: QP to detach from the multicast group. * @gid: Multicast group GID. * @lid: Multicast group LID in host byte order. */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_alloc_xrcd - Allocates an XRC domain. * @device: The device on which to allocate the XRC domain. */ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); /** * ib_dealloc_xrcd - Deallocates an XRC domain. * @xrcd: The XRC domain to deallocate. */ int ib_dealloc_xrcd(struct ib_xrcd *xrcd); struct ib_flow *ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain); int ib_destroy_flow(struct ib_flow *flow_id); static inline int ib_check_mr_access(int flags) { /* * Local write permission is required if remote write or * remote atomic permission is also requested. */ if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; return 0; } /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected * ib_mr. first use is for signature status check. * * @mr: A memory region. * @check_mask: Bitmask of which checks to perform from * ib_mr_status_check enumeration. * @mr_status: The container of relevant status checks. * failed checks will be indicated in the status bitmask * and the relevant info shall be in the error item. */ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey, const union ib_gid *gid, const struct sockaddr *addr); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq(struct ib_wq *wq); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr* wq_ind_table_init_attr); int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); static inline int ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size) { int n; n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size); mr->iova = 0; return n; } int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64)); void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); #endif /* IB_VERBS_H */ Property changes on: stable/11/sys/ofed/include/rdma/ib_verbs.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/iw_cm.h =================================================================== --- stable/11/sys/ofed/include/rdma/iw_cm.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/iw_cm.h (revision 331772) @@ -1,255 +1,258 @@ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ + #ifndef IW_CM_H #define IW_CM_H #include #include struct iw_cm_id; enum iw_cm_event_type { IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */ IW_CM_EVENT_CONNECT_REPLY, /* reply from active connect request */ IW_CM_EVENT_ESTABLISHED, /* passive side accept successful */ IW_CM_EVENT_DISCONNECT, /* orderly shutdown */ IW_CM_EVENT_CLOSE /* close complete */ }; struct iw_cm_event { enum iw_cm_event_type event; int status; struct sockaddr_storage local_addr; struct sockaddr_storage remote_addr; void *private_data; void *provider_data; u8 private_data_len; u8 ord; u8 ird; }; /** * iw_cm_handler - Function to be called by the IW CM when delivering events * to the client. * * @cm_id: The IW CM identifier associated with the event. * @event: Pointer to the event structure. */ typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id, struct iw_cm_event *event); /** * iw_event_handler - Function called by the provider when delivering provider * events to the IW CM. Returns either 0 indicating the event was processed * or -errno if the event could not be processed. * * @cm_id: The IW CM identifier associated with the event. * @event: Pointer to the event structure. */ typedef int (*iw_event_handler)(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id { iw_cm_handler cm_handler; /* client callback function */ void *context; /* client cb context */ struct ib_device *device; struct sockaddr_storage local_addr; /* local addr */ struct sockaddr_storage remote_addr; struct sockaddr_storage m_local_addr; /* nmapped local addr */ struct sockaddr_storage m_remote_addr; /* nmapped rem addr */ void *provider_data; /* provider private data */ iw_event_handler event_handler; /* cb for provider events */ /* Used by provider to add and remove refs on IW cm_id */ void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); u8 tos; }; struct iw_cm_conn_param { const void *private_data; u16 private_data_len; u32 ord; u32 ird; u32 qpn; }; struct iw_cm_verbs { void (*add_ref)(struct ib_qp *qp); void (*rem_ref)(struct ib_qp *qp); struct ib_qp * (*get_qp)(struct ib_device *device, int qpn); int (*connect)(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int (*accept)(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int (*reject)(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int (*create_listen)(struct iw_cm_id *cm_id, int backlog); int (*destroy_listen)(struct iw_cm_id *cm_id); char ifname[IFNAMSIZ]; }; /** * iw_create_cm_id - Create an IW CM identifier. * * @device: The IB device on which to create the IW CM identier. * @event_handler: User callback invoked to report events associated with the * returned IW CM identifier. * @context: User specified context associated with the id. */ struct iw_cm_id *iw_create_cm_id(struct ib_device *device, iw_cm_handler cm_handler, void *context); /** * iw_destroy_cm_id - Destroy an IW CM identifier. * * @cm_id: The previously created IW CM identifier to destroy. * * The client can assume that no events will be delivered for the CM ID after * this function returns. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id); /** * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP * * @cm_id: The IW CM idenfier to unbind from the QP. * @qp: The QP * * This is called by the provider when destroying the QP to ensure * that any references held by the IWCM are released. It may also * be called by the IWCM when destroying a CM_ID to that any * references held by the provider are released. */ void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp); /** * iw_cm_get_qp - Return the ib_qp associated with a QPN * * @ib_device: The IB device * @qpn: The queue pair number */ struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn); /** * iw_cm_listen - Listen for incoming connection requests on the * specified IW CM id. * * @cm_id: The IW CM identifier. * @backlog: The maximum number of outstanding un-accepted inbound listen * requests to queue. * * The source address and port number are specified in the IW CM identifier * structure. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog); /** * iw_cm_accept - Called to accept an incoming connect request. * * @cm_id: The IW CM identifier associated with the connection request. * @iw_param: Pointer to a structure containing connection establishment * parameters. * * The specified cm_id will have been provided in the event data for a * CONNECT_REQUEST event. Subsequent events related to this connection will be * delivered to the specified IW CM identifier prior and may occur prior to * the return of this function. If this function returns a non-zero value, the * client can assume that no events will be delivered to the specified IW CM * identifier. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); /** * iw_cm_reject - Reject an incoming connection request. * * @cm_id: Connection identifier associated with the request. * @private_daa: Pointer to data to deliver to the remote peer as part of the * reject message. * @private_data_len: The number of bytes in the private_data parameter. * * The client can assume that no events will be delivered to the specified IW * CM identifier following the return of this function. The private_data * buffer is available for reuse when this function returns. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len); /** * iw_cm_connect - Called to request a connection to a remote peer. * * @cm_id: The IW CM identifier for the connection. * @iw_param: Pointer to a structure containing connection establishment * parameters. * * Events may be delivered to the specified IW CM identifier prior to the * return of this function. If this function returns a non-zero value, the * client can assume that no events will be delivered to the specified IW CM * identifier. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param); /** * iw_cm_disconnect - Close the specified connection. * * @cm_id: The IW CM identifier to close. * @abrupt: If 0, the connection will be closed gracefully, otherwise, the * connection will be reset. * * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is * delivered. */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt); /** * iw_cm_init_qp_attr - Called to initialize the attributes of the QP * associated with a IW CM identifier. * * @cm_id: The IW CM identifier associated with the QP * @qp_attr: Pointer to the QP attributes structure. * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are * valid. */ int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); #endif /* IW_CM_H */ Property changes on: stable/11/sys/ofed/include/rdma/iw_cm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/iw_portmap.h =================================================================== --- stable/11/sys/ofed/include/rdma/iw_portmap.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/iw_portmap.h (revision 331772) @@ -1,70 +1,75 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * Copyright (c) 2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ + #ifndef _IW_PORTMAP_H #define _IW_PORTMAP_H #define IWPM_ULIBNAME_SIZE 32 #define IWPM_DEVNAME_SIZE 32 #define IWPM_IFNAME_SIZE 16 #define IWPM_IPADDR_SIZE 16 enum { IWPM_INVALID_NLMSG_ERR = 10, IWPM_CREATE_MAPPING_ERR, IWPM_DUPLICATE_MAPPING_ERR, IWPM_UNKNOWN_MAPPING_ERR, IWPM_CLIENT_DEV_INFO_ERR, IWPM_USER_LIB_INFO_ERR, IWPM_REMOTE_QUERY_REJECT }; struct iwpm_dev_data { char dev_name[IWPM_DEVNAME_SIZE]; char if_name[IWPM_IFNAME_SIZE]; }; struct iwpm_sa_data { struct sockaddr_storage loc_addr; struct sockaddr_storage mapped_loc_addr; struct sockaddr_storage rem_addr; struct sockaddr_storage mapped_rem_addr; }; /** * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid * * Returns true if the pid is greater than zero, otherwise returns false */ int iwpm_valid_pid(void); #endif /* _IW_PORTMAP_H */ Index: stable/11/sys/ofed/include/rdma/opa_port_info.h =================================================================== --- stable/11/sys/ofed/include/rdma/opa_port_info.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/opa_port_info.h (revision 331772) @@ -1,417 +1,421 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(OPA_PORT_INFO_H) #define OPA_PORT_INFO_H #define OPA_PORT_LINK_MODE_NOP 0 /* No change */ #define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */ #define OPA_PORT_PACKET_FORMAT_NOP 0 /* No change */ #define OPA_PORT_PACKET_FORMAT_8B 1 /* Format 8B */ #define OPA_PORT_PACKET_FORMAT_9B 2 /* Format 9B */ #define OPA_PORT_PACKET_FORMAT_10B 4 /* Format 10B */ #define OPA_PORT_PACKET_FORMAT_16B 8 /* Format 16B */ #define OPA_PORT_LTP_CRC_MODE_NONE 0 /* No change */ #define OPA_PORT_LTP_CRC_MODE_14 1 /* 14-bit LTP CRC mode (optional) */ #define OPA_PORT_LTP_CRC_MODE_16 2 /* 16-bit LTP CRC mode */ #define OPA_PORT_LTP_CRC_MODE_48 4 /* 48-bit LTP CRC mode (optional) */ #define OPA_PORT_LTP_CRC_MODE_PER_LANE 8 /* 12/16-bit per lane LTP CRC mode */ /* Link Down / Neighbor Link Down Reason; indicated as follows: */ #define OPA_LINKDOWN_REASON_NONE 0 /* No specified reason */ #define OPA_LINKDOWN_REASON_RCV_ERROR_0 1 #define OPA_LINKDOWN_REASON_BAD_PKT_LEN 2 #define OPA_LINKDOWN_REASON_PKT_TOO_LONG 3 #define OPA_LINKDOWN_REASON_PKT_TOO_SHORT 4 #define OPA_LINKDOWN_REASON_BAD_SLID 5 #define OPA_LINKDOWN_REASON_BAD_DLID 6 #define OPA_LINKDOWN_REASON_BAD_L2 7 #define OPA_LINKDOWN_REASON_BAD_SC 8 #define OPA_LINKDOWN_REASON_RCV_ERROR_8 9 #define OPA_LINKDOWN_REASON_BAD_MID_TAIL 10 #define OPA_LINKDOWN_REASON_RCV_ERROR_10 11 #define OPA_LINKDOWN_REASON_PREEMPT_ERROR 12 #define OPA_LINKDOWN_REASON_PREEMPT_VL15 13 #define OPA_LINKDOWN_REASON_BAD_VL_MARKER 14 #define OPA_LINKDOWN_REASON_RCV_ERROR_14 15 #define OPA_LINKDOWN_REASON_RCV_ERROR_15 16 #define OPA_LINKDOWN_REASON_BAD_HEAD_DIST 17 #define OPA_LINKDOWN_REASON_BAD_TAIL_DIST 18 #define OPA_LINKDOWN_REASON_BAD_CTRL_DIST 19 #define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK 20 #define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER 21 #define OPA_LINKDOWN_REASON_BAD_PREEMPT 22 #define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT 23 #define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT 24 #define OPA_LINKDOWN_REASON_RCV_ERROR_24 25 #define OPA_LINKDOWN_REASON_RCV_ERROR_25 26 #define OPA_LINKDOWN_REASON_RCV_ERROR_26 27 #define OPA_LINKDOWN_REASON_RCV_ERROR_27 28 #define OPA_LINKDOWN_REASON_RCV_ERROR_28 29 #define OPA_LINKDOWN_REASON_RCV_ERROR_29 30 #define OPA_LINKDOWN_REASON_RCV_ERROR_30 31 #define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN 32 #define OPA_LINKDOWN_REASON_UNKNOWN 33 /* 34 -reserved */ #define OPA_LINKDOWN_REASON_REBOOT 35 #define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN 36 /* 37-38 reserved */ #define OPA_LINKDOWN_REASON_FM_BOUNCE 39 #define OPA_LINKDOWN_REASON_SPEED_POLICY 40 #define OPA_LINKDOWN_REASON_WIDTH_POLICY 41 /* 42-48 reserved */ #define OPA_LINKDOWN_REASON_DISCONNECTED 49 #define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED 50 #define OPA_LINKDOWN_REASON_NOT_INSTALLED 51 #define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52 /* 53 reserved */ #define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED 54 /* 55 reserved */ #define OPA_LINKDOWN_REASON_POWER_POLICY 56 #define OPA_LINKDOWN_REASON_LINKSPEED_POLICY 57 #define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY 58 /* 59 reserved */ #define OPA_LINKDOWN_REASON_SWITCH_MGMT 60 #define OPA_LINKDOWN_REASON_SMA_DISABLED 61 /* 62 reserved */ #define OPA_LINKDOWN_REASON_TRANSIENT 63 /* 64-255 reserved */ /* OPA Link Init reason; indicated as follows: */ /* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */ #define OPA_LINKINIT_REASON_NOP 0 #define OPA_LINKINIT_REASON_LINKUP (1 << 4) #define OPA_LINKINIT_REASON_FLAPPING (2 << 4) #define OPA_LINKINIT_REASON_CLEAR (8 << 4) #define OPA_LINKINIT_OUTSIDE_POLICY (8 << 4) #define OPA_LINKINIT_QUARANTINED (9 << 4) #define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4) #define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */ #define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */ #define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */ #define OPA_LINK_WIDTH_1X 0x0001 #define OPA_LINK_WIDTH_2X 0x0002 #define OPA_LINK_WIDTH_3X 0x0004 #define OPA_LINK_WIDTH_4X 0x0008 #define OPA_CAP_MASK3_IsSnoopSupported (1 << 7) #define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6) #define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5) #define OPA_CAP_MASK3_IsPassThroughSupported (1 << 4) #define OPA_CAP_MASK3_IsSharedSpaceSupported (1 << 3) /* reserved (1 << 2) */ #define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1) #define OPA_CAP_MASK3_IsVLrSupported (1 << 0) /** * new MTU values */ enum { OPA_MTU_8192 = 6, OPA_MTU_10240 = 7, }; enum { OPA_PORT_PHYS_CONF_DISCONNECTED = 0, OPA_PORT_PHYS_CONF_STANDARD = 1, OPA_PORT_PHYS_CONF_FIXED = 2, OPA_PORT_PHYS_CONF_VARIABLE = 3, OPA_PORT_PHYS_CONF_SI_PHOTO = 4 }; enum port_info_field_masks { /* vl.cap */ OPA_PI_MASK_VL_CAP = 0x1F, /* port_states.ledenable_offlinereason */ OPA_PI_MASK_OFFLINE_REASON = 0x0F, OPA_PI_MASK_LED_ENABLE = 0x40, /* port_states.unsleepstate_downdefstate */ OPA_PI_MASK_UNSLEEP_STATE = 0xF0, OPA_PI_MASK_DOWNDEF_STATE = 0x0F, /* port_states.portphysstate_portstate */ OPA_PI_MASK_PORT_PHYSICAL_STATE = 0xF0, OPA_PI_MASK_PORT_STATE = 0x0F, /* port_phys_conf */ OPA_PI_MASK_PORT_PHYSICAL_CONF = 0x0F, /* collectivemask_multicastmask */ OPA_PI_MASK_COLLECT_MASK = 0x38, OPA_PI_MASK_MULTICAST_MASK = 0x07, /* mkeyprotect_lmc */ OPA_PI_MASK_MKEY_PROT_BIT = 0xC0, OPA_PI_MASK_LMC = 0x0F, /* smsl */ OPA_PI_MASK_SMSL = 0x1F, /* partenforce_filterraw */ /* Filter Raw In/Out bits 1 and 2 were removed */ OPA_PI_MASK_LINKINIT_REASON = 0xF0, OPA_PI_MASK_PARTITION_ENFORCE_IN = 0x08, OPA_PI_MASK_PARTITION_ENFORCE_OUT = 0x04, /* operational_vls */ OPA_PI_MASK_OPERATIONAL_VL = 0x1F, /* sa_qp */ OPA_PI_MASK_SA_QP = 0x00FFFFFF, /* sm_trap_qp */ OPA_PI_MASK_SM_TRAP_QP = 0x00FFFFFF, /* localphy_overrun_errors */ OPA_PI_MASK_LOCAL_PHY_ERRORS = 0xF0, OPA_PI_MASK_OVERRUN_ERRORS = 0x0F, /* clientrereg_subnettimeout */ OPA_PI_MASK_CLIENT_REREGISTER = 0x80, OPA_PI_MASK_SUBNET_TIMEOUT = 0x1F, /* port_link_mode */ OPA_PI_MASK_PORT_LINK_SUPPORTED = (0x001F << 10), OPA_PI_MASK_PORT_LINK_ENABLED = (0x001F << 5), OPA_PI_MASK_PORT_LINK_ACTIVE = (0x001F << 0), /* port_link_crc_mode */ OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED = 0x0F00, OPA_PI_MASK_PORT_LINK_CRC_ENABLED = 0x00F0, OPA_PI_MASK_PORT_LINK_CRC_ACTIVE = 0x000F, /* port_mode */ OPA_PI_MASK_PORT_MODE_SECURITY_CHECK = 0x0001, OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY = 0x0002, OPA_PI_MASK_PORT_MODE_PKEY_CONVERT = 0x0004, OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING = 0x0008, OPA_PI_MASK_PORT_MODE_VL_MARKER = 0x0010, OPA_PI_MASK_PORT_PASS_THROUGH = 0x0020, OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE = 0x0040, /* flit_control.interleave */ OPA_PI_MASK_INTERLEAVE_DIST_SUP = (0x0003 << 12), OPA_PI_MASK_INTERLEAVE_DIST_ENABLE = (0x0003 << 10), OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX = (0x001F << 5), OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX = (0x001F << 0), /* port_error_action */ OPA_PI_MASK_EX_BUFFER_OVERRUN = 0x80000000, /* 7 bits reserved */ OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT = 0x00800000, OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT = 0x00400000, OPA_PI_MASK_FM_CFG_BAD_PREEMPT = 0x00200000, OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER = 0x00100000, OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK = 0x00080000, OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST = 0x00040000, OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST = 0x00020000, OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST = 0x00010000, /* 2 bits reserved */ OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER = 0x00002000, OPA_PI_MASK_PORT_RCV_PREEMPT_VL15 = 0x00001000, OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR = 0x00000800, /* 1 bit reserved */ OPA_PI_MASK_PORT_RCV_BAD_MidTail = 0x00000200, /* 1 bit reserved */ OPA_PI_MASK_PORT_RCV_BAD_SC = 0x00000080, OPA_PI_MASK_PORT_RCV_BAD_L2 = 0x00000040, OPA_PI_MASK_PORT_RCV_BAD_DLID = 0x00000020, OPA_PI_MASK_PORT_RCV_BAD_SLID = 0x00000010, OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT = 0x00000008, OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG = 0x00000004, OPA_PI_MASK_PORT_RCV_BAD_PKTLEN = 0x00000002, OPA_PI_MASK_PORT_RCV_BAD_LT = 0x00000001, /* pass_through.res_drctl */ OPA_PI_MASK_PASS_THROUGH_DR_CONTROL = 0x01, /* buffer_units */ OPA_PI_MASK_BUF_UNIT_VL15_INIT = (0x00000FFF << 11), OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE = (0x0000001F << 6), OPA_PI_MASK_BUF_UNIT_CREDIT_ACK = (0x00000003 << 3), OPA_PI_MASK_BUF_UNIT_BUF_ALLOC = (0x00000003 << 0), /* neigh_mtu.pvlx_to_mtu */ OPA_PI_MASK_NEIGH_MTU_PVL0 = 0xF0, OPA_PI_MASK_NEIGH_MTU_PVL1 = 0x0F, /* neigh_mtu.vlstall_hoq_life */ OPA_PI_MASK_VL_STALL = (0x03 << 5), OPA_PI_MASK_HOQ_LIFE = (0x1F << 0), /* port_neigh_mode */ OPA_PI_MASK_NEIGH_MGMT_ALLOWED = (0x01 << 3), OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS = (0x01 << 2), OPA_PI_MASK_NEIGH_NODE_TYPE = (0x03 << 0), /* resptime_value */ OPA_PI_MASK_RESPONSE_TIME_VALUE = 0x1F, /* mtucap */ OPA_PI_MASK_MTU_CAP = 0x0F, }; struct opa_port_states { u8 reserved; u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */ u8 reserved2; u8 portphysstate_portstate; /* 4 bits, 4 bits */ }; struct opa_port_state_info { struct opa_port_states port_states; __be16 link_width_downgrade_tx_active; __be16 link_width_downgrade_rx_active; }; struct opa_port_info { __be32 lid; __be32 flow_control_mask; struct { u8 res; /* was inittype */ u8 cap; /* 3 res, 5 bits */ __be16 high_limit; __be16 preempt_limit; u8 arb_high_cap; u8 arb_low_cap; } vl; struct opa_port_states port_states; u8 port_phys_conf; /* 4 res, 4 bits */ u8 collectivemask_multicastmask; /* 2 res, 3, 3 */ u8 mkeyprotect_lmc; /* 2 bits, 2 res, 4 bits */ u8 smsl; /* 3 res, 5 bits */ u8 partenforce_filterraw; /* bit fields */ u8 operational_vls; /* 3 res, 5 bits */ __be16 pkey_8b; __be16 pkey_10b; __be16 mkey_violations; __be16 pkey_violations; __be16 qkey_violations; __be32 sm_trap_qp; /* 8 bits, 24 bits */ __be32 sa_qp; /* 8 bits, 24 bits */ u8 neigh_port_num; u8 link_down_reason; u8 neigh_link_down_reason; u8 clientrereg_subnettimeout; /* 1 bit, 2 bits, 5 */ struct { __be16 supported; __be16 enabled; __be16 active; } link_speed; struct { __be16 supported; __be16 enabled; __be16 active; } link_width; struct { __be16 supported; __be16 enabled; __be16 tx_active; __be16 rx_active; } link_width_downgrade; __be16 port_link_mode; /* 1 res, 5 bits, 5 bits, 5 bits */ __be16 port_ltp_crc_mode; /* 4 res, 4 bits, 4 bits, 4 bits */ __be16 port_mode; /* 9 res, bit fields */ struct { __be16 supported; __be16 enabled; } port_packet_format; struct { __be16 interleave; /* 2 res, 2,2,5,5 */ struct { __be16 min_initial; __be16 min_tail; u8 large_pkt_limit; u8 small_pkt_limit; u8 max_small_pkt_limit; u8 preemption_limit; } preemption; } flit_control; __be32 reserved4; __be32 port_error_action; /* bit field */ struct { u8 egress_port; u8 res_drctl; /* 7 res, 1 */ } pass_through; __be16 mkey_lease_period; __be32 buffer_units; /* 9 res, 12, 5, 3, 3 */ __be32 reserved5; __be32 sm_lid; __be64 mkey; __be64 subnet_prefix; struct { u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */ } neigh_mtu; struct { u8 vlstall_hoqlife; /* 3 bits, 5 bits */ } xmit_q[OPA_MAX_VLS]; struct { u8 addr[16]; } ipaddr_ipv6; struct { u8 addr[4]; } ipaddr_ipv4; u32 reserved6; u32 reserved7; u32 reserved8; __be64 neigh_node_guid; __be32 ib_cap_mask; __be16 reserved9; /* was ib_cap_mask2 */ __be16 opa_cap_mask; __be32 reserved10; /* was link_roundtrip_latency */ __be16 overall_buffer_space; __be16 reserved11; /* was max_credit_hint */ __be16 diag_code; struct { u8 buffer; u8 wire; } replay_depth; u8 port_neigh_mode; u8 mtucap; /* 4 res, 4 bits */ u8 resptimevalue; /* 3 res, 5 bits */ u8 local_port_num; u8 reserved12; u8 reserved13; /* was guid_cap */ } __attribute__ ((packed)); #endif /* OPA_PORT_INFO_H */ Index: stable/11/sys/ofed/include/rdma/opa_smi.h =================================================================== --- stable/11/sys/ofed/include/rdma/opa_smi.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/opa_smi.h (revision 331772) @@ -1,153 +1,157 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(OPA_SMI_H) #define OPA_SMI_H #include #include #define OPA_SMP_LID_DATA_SIZE 2016 #define OPA_SMP_DR_DATA_SIZE 1872 #define OPA_SMP_MAX_PATH_HOPS 64 #define OPA_MAX_VLS 32 #define OPA_MAX_SLS 32 #define OPA_MAX_SCS 32 #define OPA_SMI_CLASS_VERSION 0x80 #define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF) struct opa_smp { u8 base_version; u8 mgmt_class; u8 class_version; u8 method; __be16 status; u8 hop_ptr; u8 hop_cnt; __be64 tid; __be16 attr_id; __be16 resv; __be32 attr_mod; __be64 mkey; union { struct { uint8_t data[OPA_SMP_LID_DATA_SIZE]; } lid; struct { __be32 dr_slid; __be32 dr_dlid; u8 initial_path[OPA_SMP_MAX_PATH_HOPS]; u8 return_path[OPA_SMP_MAX_PATH_HOPS]; u8 reserved[8]; u8 data[OPA_SMP_DR_DATA_SIZE]; } dr; } route; } __packed; /* Subnet management attributes */ /* ... */ #define OPA_ATTRIB_ID_NODE_DESCRIPTION cpu_to_be16(0x0010) #define OPA_ATTRIB_ID_NODE_INFO cpu_to_be16(0x0011) #define OPA_ATTRIB_ID_PORT_INFO cpu_to_be16(0x0015) #define OPA_ATTRIB_ID_PARTITION_TABLE cpu_to_be16(0x0016) #define OPA_ATTRIB_ID_SL_TO_SC_MAP cpu_to_be16(0x0017) #define OPA_ATTRIB_ID_VL_ARBITRATION cpu_to_be16(0x0018) #define OPA_ATTRIB_ID_SM_INFO cpu_to_be16(0x0020) #define OPA_ATTRIB_ID_CABLE_INFO cpu_to_be16(0x0032) #define OPA_ATTRIB_ID_AGGREGATE cpu_to_be16(0x0080) #define OPA_ATTRIB_ID_SC_TO_SL_MAP cpu_to_be16(0x0082) #define OPA_ATTRIB_ID_SC_TO_VLR_MAP cpu_to_be16(0x0083) #define OPA_ATTRIB_ID_SC_TO_VLT_MAP cpu_to_be16(0x0084) #define OPA_ATTRIB_ID_SC_TO_VLNT_MAP cpu_to_be16(0x0085) /* ... */ #define OPA_ATTRIB_ID_PORT_STATE_INFO cpu_to_be16(0x0087) /* ... */ #define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE cpu_to_be16(0x008A) /* ... */ struct opa_node_description { u8 data[64]; } __attribute__ ((packed)); struct opa_node_info { u8 base_version; u8 class_version; u8 node_type; u8 num_ports; __be32 reserved; __be64 system_image_guid; __be64 node_guid; __be64 port_guid; __be16 partition_cap; __be16 device_id; __be32 revision; u8 local_port_num; u8 vendor_id[3]; /* network byte order */ } __attribute__ ((packed)); #define OPA_PARTITION_TABLE_BLK_SIZE 32 static inline u8 opa_get_smp_direction(const struct opa_smp *smp) { return ib_get_smp_direction((const struct ib_smp *)smp); } static inline u8 *opa_get_smp_data(struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return smp->route.dr.data; return smp->route.lid.data; } static inline size_t opa_get_smp_data_size(const struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return sizeof(smp->route.dr.data); return sizeof(smp->route.lid.data); } static inline size_t opa_get_smp_header_size(const struct opa_smp *smp) { if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return sizeof(*smp) - sizeof(smp->route.dr.data); return sizeof(*smp) - sizeof(smp->route.lid.data); } #endif /* OPA_SMI_H */ Index: stable/11/sys/ofed/include/rdma/rdma_cm.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdma_cm.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdma_cm.h (revision 331772) @@ -1,391 +1,393 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(RDMA_CM_H) #define RDMA_CM_H #include #include #include #include /* * Upon receiving a device removal event, users must destroy the associated * RDMA identifier and release all resources allocated with the device. */ enum rdma_cm_event_type { RDMA_CM_EVENT_ADDR_RESOLVED, RDMA_CM_EVENT_ADDR_ERROR, RDMA_CM_EVENT_ROUTE_RESOLVED, RDMA_CM_EVENT_ROUTE_ERROR, RDMA_CM_EVENT_CONNECT_REQUEST, RDMA_CM_EVENT_CONNECT_RESPONSE, RDMA_CM_EVENT_CONNECT_ERROR, RDMA_CM_EVENT_UNREACHABLE, RDMA_CM_EVENT_REJECTED, RDMA_CM_EVENT_ESTABLISHED, RDMA_CM_EVENT_DISCONNECTED, RDMA_CM_EVENT_DEVICE_REMOVAL, RDMA_CM_EVENT_MULTICAST_JOIN, RDMA_CM_EVENT_MULTICAST_ERROR, RDMA_CM_EVENT_ADDR_CHANGE, RDMA_CM_EVENT_TIMEWAIT_EXIT }; const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event); enum rdma_port_space { RDMA_PS_SDP = 0x0001, RDMA_PS_IPOIB = 0x0002, RDMA_PS_IB = 0x013F, RDMA_PS_TCP = 0x0106, RDMA_PS_UDP = 0x0111, }; #define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL #define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL #define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL #define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL struct rdma_addr { struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr dev_addr; }; struct rdma_route { struct rdma_addr addr; struct ib_sa_path_rec *path_rec; int num_paths; }; struct rdma_conn_param { const void *private_data; u8 private_data_len; u8 responder_resources; u8 initiator_depth; u8 flow_control; u8 retry_count; /* ignored when accepting */ u8 rnr_retry_count; /* Fields below ignored if a QP is created on the rdma_cm_id. */ u8 srq; u32 qp_num; u32 qkey; }; struct rdma_ud_param { const void *private_data; u8 private_data_len; struct ib_ah_attr ah_attr; u32 qp_num; u32 qkey; }; struct rdma_cm_event { enum rdma_cm_event_type event; int status; union { struct rdma_conn_param conn; struct rdma_ud_param ud; } param; }; enum rdma_cm_state { RDMA_CM_IDLE, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT, RDMA_CM_DISCONNECT, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN, RDMA_CM_DEVICE_REMOVAL, RDMA_CM_DESTROYING }; struct rdma_cm_id; /** * rdma_cm_event_handler - Callback used to report user events. * * Notes: Users may not call rdma_destroy_id from this callback to destroy * the passed in id, or a corresponding listen id. Returning a * non-zero value from the callback will destroy the passed in id. */ typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id, struct rdma_cm_event *event); struct rdma_cm_id { struct ib_device *device; void *context; struct ib_qp *qp; rdma_cm_event_handler event_handler; struct rdma_route route; enum rdma_port_space ps; enum ib_qp_type qp_type; u8 port_num; }; /** * rdma_create_id - Create an RDMA identifier. * * @net: The network namespace in which to create the new id. * @event_handler: User callback invoked to report events associated with the * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. * @qp_type: type of queue pair associated with the id. * * The id holds a reference on the network namespace until it is destroyed. */ struct rdma_cm_id *rdma_create_id(struct vnet *net, rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type); /** * rdma_destroy_id - Destroys an RDMA identifier. * * @id: RDMA identifier. * * Note: calling this function has the effect of canceling in-flight * asynchronous operations associated with the id. */ void rdma_destroy_id(struct rdma_cm_id *id); /** * rdma_bind_addr - Bind an RDMA identifier to a source address and * associated RDMA device, if needed. * * @id: RDMA identifier. * @addr: Local address information. Wildcard values are permitted. * * This associates a source address with the RDMA identifier before calling * rdma_listen. If a specific local address is given, the RDMA identifier will * be bound to a local RDMA device. */ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr); /** * rdma_resolve_addr - Resolve destination and optional source addresses * from IP addresses to an RDMA address. If successful, the specified * rdma_cm_id will be bound to a local device. * * @id: RDMA identifier. * @src_addr: Source address information. This parameter may be NULL. * @dst_addr: Destination address information. * @timeout_ms: Time to wait for resolution to complete. */ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, struct sockaddr *dst_addr, int timeout_ms); /** * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier * into route information needed to establish a connection. * * This is called on the client side of a connection. * Users must have first called rdma_resolve_addr to resolve a dst_addr * into an RDMA address before calling this routine. */ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms); /** * rdma_create_qp - Allocate a QP and associate it with the specified RDMA * identifier. * * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA * through their states. */ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr); /** * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA * identifier. * * Users must destroy any QP associated with an RDMA identifier before * destroying the RDMA ID. */ void rdma_destroy_qp(struct rdma_cm_id *id); /** * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning * to a specified QP state. * @id: Communication identifier associated with the QP attributes to * initialize. * @qp_attr: On input, specifies the desired QP state. On output, the * mandatory and desired optional attributes will be set in order to * modify the QP to the specified state. * @qp_attr_mask: The QP attribute mask that may be used to transition the * QP to the specified state. * * Users must set the @qp_attr->qp_state to the desired QP state. This call * will set all required attributes for the given transition, along with * known optional attributes. Users may override the attributes returned from * this call before calling ib_modify_qp. * * Users that wish to have their QP automatically transitioned through its * states can associate a QP with the rdma_cm_id by calling rdma_create_qp(). */ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, int *qp_attr_mask); /** * rdma_connect - Initiate an active connection request. * @id: Connection identifier to connect. * @conn_param: Connection information used for connected QPs. * * Users must have resolved a route for the rdma_cm_id to connect with * by having called rdma_resolve_route before calling this routine. * * This call will either connect to a remote QP or obtain remote QP * information for unconnected rdma_cm_id's. The actual operation is * based on the rdma_cm_id's port space. */ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); /** * rdma_listen - This function is called by the passive side to * listen for incoming connection requests. * * Users must have bound the rdma_cm_id to a local address by calling * rdma_bind_addr before calling this routine. */ int rdma_listen(struct rdma_cm_id *id, int backlog); /** * rdma_accept - Called to accept a connection request or response. * @id: Connection identifier associated with the request. * @conn_param: Information needed to establish the connection. This must be * provided if accepting a connection request. If accepting a connection * response, this parameter must be NULL. * * Typically, this routine is only called by the listener to accept a connection * request. It must also be called on the active side of a connection if the * user is performing their own QP transitions. * * In the case of error, a reject message is sent to the remote side and the * state of the qp associated with the id is modified to error, such that any * previously posted receive buffers would be flushed. */ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has * occurred on the connection. * @id: Connection identifier to transition to established. * @event: Asynchronous event. * * This routine should be invoked by users to notify the CM of relevant * communication events. Events that should be reported to the CM and * when to report them are: * * IB_EVENT_COMM_EST - Used when a message is received on a connected * QP before an RTU has been received. */ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event); /** * rdma_reject - Called to reject a connection request or response. */ int rdma_reject(struct rdma_cm_id *id, const void *private_data, u8 private_data_len); /** * rdma_disconnect - This function disconnects the associated QP and * transitions it into the error state. */ int rdma_disconnect(struct rdma_cm_id *id); /** * rdma_join_multicast - Join the multicast group specified by the given * address. * @id: Communication identifier associated with the request. * @addr: Multicast address identifying the group to join. * @join_state: Multicast JoinState bitmap requested by port. * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits. * @context: User-defined context associated with the join request, returned * to the user through the private_data pointer in multicast events. */ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context); /** * rdma_leave_multicast - Leave the multicast group specified by the given * address. */ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr); /** * rdma_set_service_type - Set the type of service associated with a * connection identifier. * @id: Communication identifier to associated with service type. * @tos: Type of service. * * The type of service is interpretted as a differentiated service * field (RFC 2474). The service type should be specified before * performing route resolution, as existing communication on the * connection identifier may be unaffected. The type of service * requested may not be supported by the network to all destinations. */ void rdma_set_service_type(struct rdma_cm_id *id, int tos); /** * rdma_set_reuseaddr - Allow the reuse of local addresses when binding * the rdma_cm_id. * @id: Communication identifier to configure. * @reuse: Value indicating if the bound address is reusable. * * Reuse must be set before an address is bound to the id. */ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse); /** * rdma_set_afonly - Specify that listens are restricted to the * bound address family only. * @id: Communication identifer to configure. * @afonly: Value indicating if listens are restricted. * * Must be set before identifier is in the listening state. */ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); /** * rdma_get_service_id - Return the IB service ID for a specified address. * @id: Communication identifier associated with the address. * @addr: Address for the service ID. */ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); #endif /* RDMA_CM_H */ Property changes on: stable/11/sys/ofed/include/rdma/rdma_cm.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/rdma_cm_ib.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdma_cm_ib.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdma_cm_ib.h (revision 331772) @@ -1,54 +1,56 @@ /* * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #if !defined(RDMA_CM_IB_H) #define RDMA_CM_IB_H #include /** * rdma_set_ib_paths - Manually sets the path records used to establish a * connection. * @id: Connection identifier associated with the request. * @path_rec: Reference to the path record * * This call permits a user to specify routing information for rdma_cm_id's * bound to Infiniband devices. It is called on the client side of a * connection and replaces the call to rdma_resolve_route. */ int rdma_set_ib_paths(struct rdma_cm_id *id, struct ib_sa_path_rec *path_rec, int num_paths); /* Global qkey for UDP QPs and multicast groups. */ #define RDMA_UDP_QKEY 0x01234567 #endif /* RDMA_CM_IB_H */ Property changes on: stable/11/sys/ofed/include/rdma/rdma_cm_ib.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/rdma/rdma_vt.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdma_vt.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdma_vt.h (revision 331772) @@ -1,500 +1,503 @@ #ifndef DEF_RDMA_VT_H #define DEF_RDMA_VT_H -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $FreeBSD$ */ /* * Structure that low level drivers will populate in order to register with the * rdmavt layer. */ #include #include #include #include #include #include #define RVT_MAX_PKEY_VALUES 16 struct rvt_ibport { struct rvt_qp __rcu *qp[2]; struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ struct rb_root mcast_tree; spinlock_t lock; /* protect changes in this struct */ /* non-zero when timer is set */ unsigned long mkey_lease_timeout; unsigned long trap_timeout; __be64 gid_prefix; /* in network order */ __be64 mkey; u64 tid; u32 port_cap_flags; u32 pma_sample_start; u32 pma_sample_interval; __be16 pma_counter_select[5]; u16 pma_tag; u16 mkey_lease_period; u16 sm_lid; u8 sm_sl; u8 mkeyprot; u8 subnet_timeout; u8 vl_high_limit; /* * Driver is expected to keep these up to date. These * counters are informational only and not required to be * completely accurate. */ u64 n_rc_resends; u64 n_seq_naks; u64 n_rdma_seq; u64 n_rnr_naks; u64 n_other_naks; u64 n_loop_pkts; u64 n_pkt_drops; u64 n_vl15_dropped; u64 n_rc_timeouts; u64 n_dmawait; u64 n_unaligned; u64 n_rc_dupreq; u64 n_rc_seqnak; u16 pkey_violations; u16 qkey_violations; u16 mkey_violations; /* Hot-path per CPU counters to avoid cacheline trading to update */ u64 z_rc_acks; u64 z_rc_qacks; u64 z_rc_delayed_comp; u64 __percpu *rc_acks; u64 __percpu *rc_qacks; u64 __percpu *rc_delayed_comp; void *priv; /* driver private data */ /* * The pkey table is allocated and maintained by the driver. Drivers * need to have access to this before registering with rdmav. However * rdmavt will need access to it so drivers need to proviee this during * the attach port API call. */ u16 *pkey_table; struct rvt_ah *sm_ah; }; #define RVT_CQN_MAX 16 /* maximum length of cq name */ /* * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { struct ib_device_attr props; /* * Anything driver specific that is not covered by props * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; unsigned int qp_table_size; int qpn_start; int qpn_inc; int qpn_res_start; int qpn_res_end; int nports; int npkeys; char cq_name[RVT_CQN_MAX]; int node; int psn_mask; int psn_shift; int psn_modify_mask; u32 core_cap_flags; u32 max_mad_size; u8 qos_shift; u8 max_rdma_atomic; u8 reserved_operations; }; /* Protection domain */ struct rvt_pd { struct ib_pd ibpd; int user; /* non-zero if created from user space */ }; /* Address handle */ struct rvt_ah { struct ib_ah ibah; struct ib_ah_attr attr; atomic_t refcount; u8 vl; u8 log_pmtu; }; struct rvt_dev_info; struct rvt_swqe; struct rvt_driver_provided { /* * Which functions are required depends on which verbs rdmavt is * providing and which verbs the driver is overriding. See * check_support() for details. */ /* Passed to ib core registration. Callback to create syfs files */ int (*port_callback)(struct ib_device *, u8, struct kobject *); /* * Returns a string to represent the device for which is being * registered. This is primarily used for error and debug messages on * the console. */ const char * (*get_card_name)(struct rvt_dev_info *rdi); /* * Returns a pointer to the undelying hardware's PCI device. This is * used to display information as to what hardware is being referenced * in an output message */ struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); /* * Allocate a private queue pair data structure for driver specific * information which is opaque to rdmavt. Errors are returned via * ERR_PTR(err). The driver is free to return NULL or a valid * pointer. */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); /* * Free the driver's private qp structure. */ void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); /* * Inform the driver the particular qp in quesiton has been reset so * that it can clean up anything it needs to. */ void (*notify_qp_reset)(struct rvt_qp *qp); /* * Give the driver a notice that there is send work to do. It is up to * the driver to generally push the packets out, this just queues the * work with the driver. There are two variants here. The no_lock * version requires the s_lock not to be held. The other assumes the * s_lock is held. */ void (*schedule_send)(struct rvt_qp *qp); void (*schedule_send_no_lock)(struct rvt_qp *qp); /* * Sometimes rdmavt needs to kick the driver's send progress. That is * done by this call back. */ void (*do_send)(struct rvt_qp *qp); /* * Get a path mtu from the driver based on qp attributes. */ int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); /* * Notify driver that it needs to flush any outstanding IO requests that * are waiting on a qp. */ void (*flush_qp_waiters)(struct rvt_qp *qp); /* * Notify driver to stop its queue of sending packets. Nothing else * should be posted to the queue pair after this has been called. */ void (*stop_send_queue)(struct rvt_qp *qp); /* * Have the drivr drain any in progress operations */ void (*quiesce_qp)(struct rvt_qp *qp); /* * Inform the driver a qp has went to error state. */ void (*notify_error_qp)(struct rvt_qp *qp); /* * Get an MTU for a qp. */ u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); /* * Convert an mtu to a path mtu */ int (*mtu_to_path_mtu)(u32 mtu); /* * Get the guid of a port in big endian byte order */ int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, int guid_index, __be64 *guid); /* * Query driver for the state of the port. */ int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num, struct ib_port_attr *props); /* * Tell driver to shutdown a port */ int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num); /* Tell driver to send a trap for changed port capabilities */ void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num); /* * The following functions can be safely ignored completely. Any use of * these is checked for NULL before blindly calling. Rdmavt should also * be functional if drivers omit these. */ /* Called to inform the driver that all qps should now be freed. */ unsigned (*free_all_qps)(struct rvt_dev_info *rdi); /* Driver specific AH validation */ int (*check_ah)(struct ib_device *, struct ib_ah_attr *); /* Inform the driver a new AH has been created */ void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); /* Let the driver pick the next queue pair number*/ int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port_num, gfp_t gfp); /* Determine if its safe or allowed to modify the qp */ int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); /* Driver specific QP modification/notification-of */ void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); /* Driver specific work request checking */ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); /* Notify driver a mad agent has been created */ void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); /* Notify driver a mad agent has been removed */ void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; struct rvt_dev_info { struct ib_device ibdev; /* Keep this first. Nothing above here */ /* * Prior to calling for registration the driver will be responsible for * allocating space for this structure. * * The driver will also be responsible for filling in certain members of * dparms.props. The driver needs to fill in dparms exactly as it would * want values reported to a ULP. This will be returned to the caller * in rdmavt's device. The driver should also therefore refrain from * modifying this directly after registration with rdmavt. */ /* Driver specific properties */ struct rvt_driver_params dparms; /* post send table */ const struct rvt_operation_params *post_parms; struct rvt_mregion __rcu *dma_mr; struct rvt_lkey_table lkey_table; /* Driver specific helper functions */ struct rvt_driver_provided driver_f; /* Internal use */ int n_pds_allocated; spinlock_t n_pds_lock; /* Protect pd allocated count */ int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ u32 n_srqs_allocated; spinlock_t n_srqs_lock; /* Protect srqs allocated count */ int flags; struct rvt_ibport **ports; /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ u32 n_rc_qps; /* number of RC QPs allocated for device */ u32 busy_jiffies; /* timeout scaling based on RC QP count */ spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* protect mmap_offset */ u32 mmap_offset; spinlock_t pending_lock; /* protect pending mmap list */ /* CQ */ struct kthread_worker *worker; /* per device cq worker */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ /* Multicast */ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ spinlock_t n_mcast_grps_lock; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) { return container_of(ibpd, struct rvt_pd, ibpd); } static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah) { return container_of(ibah, struct rvt_ah, ibah); } static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) { return container_of(ibdev, struct rvt_dev_info, ibdev); } static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct rvt_srq, ibsrq); } static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) { return container_of(ibqp, struct rvt_qp, ibqp); } static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* * All ports have same number of pkeys. */ return rdi->dparms.npkeys; } /* * Return the max atomic suitable for determining * the size of the ack ring buffer in a QP. */ static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi) { return rdi->dparms.max_rdma_atomic + 1; } /* * Return the indexed PKEY from the port PKEY table. */ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, int port_index, unsigned index) { if (index >= rvt_get_npkeys(rdi)) return 0; else return rdi->ports[port_index]->pkey_table[index]; } /** * rvt_lookup_qpn - return the QP with the given QPN * @ibp: the ibport * @qpn: the QP number to look up * * The caller must hold the rcu_read_lock(), and keep the lock until * the returned qp is no longer in use. */ /* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, u32 qpn) __must_hold(RCU) { struct rvt_qp *qp = NULL; if (unlikely(qpn <= 1)) { qp = rcu_dereference(rvp->qp[qpn]); } else { u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; qp = rcu_dereference(qp->next)) if (qp->ibqp.qp_num == qpn) break; } return qp; } struct rvt_dev_info *rvt_alloc_device(size_t size, int nports); void rvt_dealloc_device(struct rvt_dev_info *rdi); int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table); int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, int access); int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); #endif /* DEF_RDMA_VT_H */ Index: stable/11/sys/ofed/include/rdma/rdmavt_cq.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdmavt_cq.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdmavt_cq.h (revision 331772) @@ -1,99 +1,101 @@ #ifndef DEF_RDMAVT_INCCQ_H #define DEF_RDMAVT_INCCQ_H -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Copyright(c) 2015 Intel Corporation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $FreeBSD$ */ #include #include /* * Define an ib_cq_notify value that is not valid so we know when CQ * notifications are armed. */ #define RVT_CQ_NONE (IB_CQ_NEXT_COMP + 1) /* * This structure is used to contain the head pointer, tail pointer, * and completion queue entries as a single memory allocation so * it can be mmap'ed into user space. */ struct rvt_cq_wc { u32 head; /* index of next entry to fill */ u32 tail; /* index of next ib_poll_cq() entry */ union { /* these are actually size ibcq.cqe + 1 */ struct ib_uverbs_wc uqueue[0]; struct ib_wc kqueue[0]; }; }; /* * The completion queue structure. */ struct rvt_cq { struct ib_cq ibcq; struct kthread_work comptask; spinlock_t lock; /* protect changes in this struct */ u8 notify; u8 triggered; struct rvt_dev_info *rdi; struct rvt_cq_wc *queue; struct rvt_mmap_info *ip; }; static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq) { return container_of(ibcq, struct rvt_cq, ibcq); } void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited); #endif /* DEF_RDMAVT_INCCQH */ Index: stable/11/sys/ofed/include/rdma/rdmavt_mr.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdmavt_mr.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdmavt_mr.h (revision 331772) @@ -1,140 +1,143 @@ #ifndef DEF_RDMAVT_INCMR_H #define DEF_RDMAVT_INCMR_H -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $FreeBSD$ */ /* * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once * drivers no longer need access to the MR directly. */ /* * A segment is a linear region of low physical memory. * Used by the verbs layer. */ struct rvt_seg { void *vaddr; size_t length; }; /* The number of rvt_segs that fit in a page. */ #define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) struct rvt_segarray { struct rvt_seg segs[RVT_SEGSZ]; }; struct rvt_mregion { struct ib_pd *pd; /* shares refcnt of ibmr.pd */ u64 user_base; /* User's address for this region */ u64 iova; /* IB start address of this region */ size_t length; u32 lkey; u32 offset; /* offset (bytes) to start of region */ int access_flags; u32 max_segs; /* number of rvt_segs in all the arrays */ u32 mapsz; /* size of the map array */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */ u8 lkey_published; /* in global table */ atomic_t lkey_invalid; /* true if current lkey is invalid */ struct completion comp; /* complete when refcount goes to zero */ atomic_t refcount; struct rvt_segarray *map[0]; /* the segments */ }; #define RVT_MAX_LKEY_TABLE_BITS 23 struct rvt_lkey_table { spinlock_t lock; /* protect changes in this struct */ u32 next; /* next unused index (speeds search) */ u32 gen; /* generation count */ u32 max; /* size of the table */ struct rvt_mregion __rcu **table; }; /* * These keep track of the copy progress within a memory region. * Used by the verbs layer. */ struct rvt_sge { struct rvt_mregion *mr; void *vaddr; /* kernel virtual address of segment */ u32 sge_length; /* length of the SGE */ u32 length; /* remaining length of the segment */ u16 m; /* current index: mr->map[m] */ u16 n; /* current index: mr->map[m]->segs[n] */ }; struct rvt_sge_state { struct rvt_sge *sg_list; /* next SGE to be used if any */ struct rvt_sge sge; /* progress state for the current SGE */ u32 total_len; u8 num_sge; }; static inline void rvt_put_mr(struct rvt_mregion *mr) { if (unlikely(atomic_dec_and_test(&mr->refcount))) complete(&mr->comp); } static inline void rvt_get_mr(struct rvt_mregion *mr) { atomic_inc(&mr->refcount); } static inline void rvt_put_ss(struct rvt_sge_state *ss) { while (ss->num_sge) { rvt_put_mr(ss->sge.mr); if (--ss->num_sge) ss->sge = *ss->sg_list++; } } #endif /* DEF_RDMAVT_INCMRH */ Index: stable/11/sys/ofed/include/rdma/rdmavt_qp.h =================================================================== --- stable/11/sys/ofed/include/rdma/rdmavt_qp.h (revision 331771) +++ stable/11/sys/ofed/include/rdma/rdmavt_qp.h (revision 331772) @@ -1,535 +1,538 @@ #ifndef DEF_RDMAVT_INCQP_H #define DEF_RDMAVT_INCQP_H -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * BSD LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $FreeBSD$ */ #include #include #include /* * Atomic bit definitions for r_aflags. */ #define RVT_R_WRID_VALID 0 #define RVT_R_REWIND_SGE 1 /* * Bit definitions for r_flags. */ #define RVT_R_REUSE_SGE 0x01 #define RVT_R_RDMAR_SEQ 0x02 #define RVT_R_RSP_NAK 0x04 #define RVT_R_RSP_SEND 0x08 #define RVT_R_COMM_EST 0x10 /* * Bit definitions for s_flags. * * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled * RVT_S_BUSY - send tasklet is processing the QP * RVT_S_TIMER - the RC retry timer is active * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs * before processing the next SWQE * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete * before processing the next SWQE * RVT_S_WAIT_RNR - waiting for RNR timeout * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating * next send completion entry not via send DMA * RVT_S_WAIT_PIO - waiting for a send buffer to be available * RVT_S_WAIT_PIO_DRAIN - waiting for a qp to drain pio packets * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available * RVT_S_WAIT_KMEM - waiting for kernel memory to be available * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK * RVT_S_ECN - a BECN was queued to the send engine */ #define RVT_S_SIGNAL_REQ_WR 0x0001 #define RVT_S_BUSY 0x0002 #define RVT_S_TIMER 0x0004 #define RVT_S_RESP_PENDING 0x0008 #define RVT_S_ACK_PENDING 0x0010 #define RVT_S_WAIT_FENCE 0x0020 #define RVT_S_WAIT_RDMAR 0x0040 #define RVT_S_WAIT_RNR 0x0080 #define RVT_S_WAIT_SSN_CREDIT 0x0100 #define RVT_S_WAIT_DMA 0x0200 #define RVT_S_WAIT_PIO 0x0400 #define RVT_S_WAIT_PIO_DRAIN 0x0800 #define RVT_S_WAIT_TX 0x1000 #define RVT_S_WAIT_DMA_DESC 0x2000 #define RVT_S_WAIT_KMEM 0x4000 #define RVT_S_WAIT_PSN 0x8000 #define RVT_S_WAIT_ACK 0x10000 #define RVT_S_SEND_ONE 0x20000 #define RVT_S_UNLIMITED_CREDIT 0x40000 #define RVT_S_AHG_VALID 0x80000 #define RVT_S_AHG_CLEAR 0x100000 #define RVT_S_ECN 0x200000 /* * Wait flags that would prevent any packet type from being sent. */ #define RVT_S_ANY_WAIT_IO \ (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \ RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) /* * Wait flags that would prevent send work requests from making progress. */ #define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) #define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 /* Flags for checking QP state (see ib_rvt_state_ops[]) */ #define RVT_POST_SEND_OK 0x01 #define RVT_POST_RECV_OK 0x02 #define RVT_PROCESS_RECV_OK 0x04 #define RVT_PROCESS_SEND_OK 0x08 #define RVT_PROCESS_NEXT_SEND_OK 0x10 #define RVT_FLUSH_SEND 0x20 #define RVT_FLUSH_RECV 0x40 #define RVT_PROCESS_OR_FLUSH_SEND \ (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) /* * Internal send flags */ #define RVT_SEND_RESERVE_USED IB_SEND_RESERVED_START #define RVT_SEND_COMPLETION_ONLY (IB_SEND_RESERVED_START << 1) /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored * in qp->s_max_sge. */ struct rvt_swqe { union { struct ib_send_wr wr; /* don't use wr.sg_list */ struct ib_ud_wr ud_wr; struct ib_reg_wr reg_wr; struct ib_rdma_wr rdma_wr; struct ib_atomic_wr atomic_wr; }; u32 psn; /* first packet sequence number */ u32 lpsn; /* last packet sequence number */ u32 ssn; /* send sequence number */ u32 length; /* total length of data in sg_list */ struct rvt_sge sg_list[0]; }; /* * Receive work request queue entry. * The size of the sg_list is determined when the QP (or SRQ) is created * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). */ struct rvt_rwqe { u64 wr_id; u8 num_sge; struct ib_sge sg_list[0]; }; /* * This structure is used to contain the head pointer, tail pointer, * and receive work queue entries as a single memory allocation so * it can be mmap'ed into user space. * Note that the wq array elements are variable size so you can't * just index into the array to get the N'th element; * use get_rwqe_ptr() instead. */ struct rvt_rwq { u32 head; /* new work requests posted to the head */ u32 tail; /* receives pull requests from here. */ struct rvt_rwqe wq[0]; }; struct rvt_rq { struct rvt_rwq *wq; u32 size; /* size of RWQE array */ u8 max_sge; /* protect changes in this struct */ spinlock_t lock ____cacheline_aligned_in_smp; }; /* * This structure is used by rvt_mmap() to validate an offset * when an mmap() request is made. The vm_area_struct then uses * this as its vm_private_data. */ struct rvt_mmap_info { struct list_head pending_mmaps; struct ib_ucontext *context; void *obj; __u64 offset; struct kref ref; unsigned size; }; /* * This structure holds the information that the send tasklet needs * to send a RDMA read response or atomic operation. */ struct rvt_ack_entry { struct rvt_sge rdma_sge; u64 atomic_data; u32 psn; u32 lpsn; u8 opcode; u8 sent; }; #define RC_QP_SCALING_INTERVAL 5 #define RVT_OPERATION_PRIV 0x00000001 #define RVT_OPERATION_ATOMIC 0x00000002 #define RVT_OPERATION_ATOMIC_SGE 0x00000004 #define RVT_OPERATION_LOCAL 0x00000008 #define RVT_OPERATION_USE_RESERVE 0x00000010 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1) /** * rvt_operation_params - op table entry * @length - the length to copy into the swqe entry * @qpt_support - a bit mask indicating QP type support * @flags - RVT_OPERATION flags (see above) * * This supports table driven post send so that * the driver can have differing an potentially * different sets of operations. * **/ struct rvt_operation_params { size_t length; u32 qpt_support; u32 flags; }; /* * Common variables are protected by both r_rq.lock and s_lock in that order * which only happens in modify_qp() or changing the QP 'state'. */ struct rvt_qp { struct ib_qp ibqp; void *priv; /* Driver private data */ /* read mostly fields above and below */ struct ib_ah_attr remote_ah_attr; struct ib_ah_attr alt_ah_attr; struct rvt_qp __rcu *next; /* link list for QPN hash table */ struct rvt_swqe *s_wq; /* send work queue */ struct rvt_mmap_info *ip; unsigned long timeout_jiffies; /* computed from timeout */ enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ u16 pmtu; /* decoded from path_mtu */ u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; u8 alt_timeout; /* Alternate path timeout for this QP */ u8 timeout; /* Timeout for this QP */ u8 s_srate; u8 s_mig_state; u8 port_num; u8 s_pkey_index; /* PKEY index to use */ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ u8 s_retry_cnt; /* number of times to retry */ u8 s_rnr_retry_cnt; u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ u8 s_max_sge; /* size of s_wq->sg_list */ u8 s_draining; /* start of read/write fields */ atomic_t refcount ____cacheline_aligned_in_smp; wait_queue_head_t wait; struct rvt_ack_entry *s_ack_queue; struct rvt_sge_state s_rdma_read_sge; spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ u32 r_psn; /* expected rcv packet sequence number */ unsigned long r_aflags; u64 r_wr_id; /* ID for current receive WQE */ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ u32 r_len; /* total length of r_sge */ u32 r_rcv_len; /* receive data len processed */ u32 r_msn; /* message sequence number */ u8 r_state; /* opcode of last packet received */ u8 r_flags; u8 r_head_ack_queue; /* index into s_ack_queue[] */ struct list_head rspwait; /* link for waiting to respond */ struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ /* post send line */ spinlock_t s_hlock ____cacheline_aligned_in_smp; u32 s_head; /* new entries added here */ u32 s_next_psn; /* PSN for next request */ u32 s_avail; /* number of entries avail */ u32 s_ssn; /* SSN of tail entry */ atomic_t s_reserved_used; /* reserved entries in use */ spinlock_t s_lock ____cacheline_aligned_in_smp; u32 s_flags; struct rvt_sge_state *s_cur_sge; struct rvt_swqe *s_wqe; struct rvt_sge_state s_sge; /* current send request data */ struct rvt_mregion *s_rdma_mr; u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; s8 s_ahgidx; u8 s_state; /* opcode of last packet sent */ u8 s_ack_state; /* opcode of packet to ACK */ u8 s_nak_state; /* non-zero if NAK is pending */ u8 r_nak_state; /* non-zero if NAK is pending */ u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ u8 s_tail_ack_queue; /* index into s_ack_queue[] */ struct rvt_sge_state s_ack_rdma_sge; struct timer_list s_timer; atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */ /* * This sge list MUST be last. Do not add anything below here. */ struct rvt_sge r_sg_list[0] /* verified SGEs */ ____cacheline_aligned_in_smp; }; struct rvt_srq { struct ib_srq ibsrq; struct rvt_rq rq; struct rvt_mmap_info *ip; /* send signal when number of RWQEs < limit */ u32 limit; }; #define RVT_QPN_MAX BIT(24) #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) #define RVT_QPN_MASK 0xFFFFFF /* * QPN-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way, * large bitmaps are not allocated unless large numbers of QPs are used. */ struct rvt_qpn_map { void *page; }; struct rvt_qpn_table { spinlock_t lock; /* protect changes to the qp table */ unsigned flags; /* flags for QP0/1 allocated for each port */ u32 last; /* last QP number allocated */ u32 nmaps; /* size of the map table */ u16 limit; u8 incr; /* bit map of free QP numbers other than 0/1 */ struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; }; struct rvt_qp_ibdev { u32 qp_table_size; u32 qp_table_bits; struct rvt_qp __rcu **qp_table; spinlock_t qpt_lock; /* qptable lock */ struct rvt_qpn_table qpn_table; }; /* * There is one struct rvt_mcast for each multicast GID. * All attached QPs are then stored as a list of * struct rvt_mcast_qp. */ struct rvt_mcast_qp { struct list_head list; struct rvt_qp *qp; }; struct rvt_mcast { struct rb_node rb_node; union ib_gid mgid; struct list_head qp_list; wait_queue_head_t wait; atomic_t refcount; int n_attached; }; /* * Since struct rvt_swqe is not a fixed size, we can't simply index into * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) { return (struct rvt_swqe *)((char *)qp->s_wq + (sizeof(struct rvt_swqe) + qp->s_max_sge * sizeof(struct rvt_sge)) * n); } /* * Since struct rvt_rwqe is not a fixed size, we can't simply index into * struct rvt_rwq.wq. This function does the array index computation. */ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) { return (struct rvt_rwqe *) ((char *)rq->wq->wq + (sizeof(struct rvt_rwqe) + rq->max_sge * sizeof(struct ib_sge)) * n); } /** * rvt_get_qp - get a QP reference * @qp - the QP to hold */ static inline void rvt_get_qp(struct rvt_qp *qp) { atomic_inc(&qp->refcount); } /** * rvt_put_qp - release a QP reference * @qp - the QP to release */ static inline void rvt_put_qp(struct rvt_qp *qp) { if (qp && atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); } /** * rvt_qp_wqe_reserve - reserve operation * @qp - the rvt qp * @wqe - the send wqe * * This routine used in post send to record * a wqe relative reserved operation use. */ static inline void rvt_qp_wqe_reserve( struct rvt_qp *qp, struct rvt_swqe *wqe) { wqe->wr.send_flags |= RVT_SEND_RESERVE_USED; atomic_inc(&qp->s_reserved_used); } /** * rvt_qp_wqe_unreserve - clean reserved operation * @qp - the rvt qp * @wqe - the send wqe * * This decrements the reserve use count. * * This call MUST precede the change to * s_last to insure that post send sees a stable * s_avail. * * An smp_mp__after_atomic() is used to insure * the compiler does not juggle the order of the s_last * ring index and the decrementing of s_reserved_used. */ static inline void rvt_qp_wqe_unreserve( struct rvt_qp *qp, struct rvt_swqe *wqe) { if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) { wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED; atomic_dec(&qp->s_reserved_used); /* insure no compiler re-order up to s_last change */ smp_mb__after_atomic(); } } extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); #endif /* DEF_RDMAVT_INCQP_H */ Index: stable/11/sys/ofed/include/uapi/rdma/ib_user_cm.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/ib_user_cm.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/ib_user_cm.h (revision 331772) @@ -1,329 +1,333 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_USER_CM_H #define IB_USER_CM_H #ifdef _KERNEL #include #else #include #endif #include #define IB_USER_CM_ABI_VERSION 5 enum { IB_USER_CM_CMD_CREATE_ID, IB_USER_CM_CMD_DESTROY_ID, IB_USER_CM_CMD_ATTR_ID, IB_USER_CM_CMD_LISTEN, IB_USER_CM_CMD_NOTIFY, IB_USER_CM_CMD_SEND_REQ, IB_USER_CM_CMD_SEND_REP, IB_USER_CM_CMD_SEND_RTU, IB_USER_CM_CMD_SEND_DREQ, IB_USER_CM_CMD_SEND_DREP, IB_USER_CM_CMD_SEND_REJ, IB_USER_CM_CMD_SEND_MRA, IB_USER_CM_CMD_SEND_LAP, IB_USER_CM_CMD_SEND_APR, IB_USER_CM_CMD_SEND_SIDR_REQ, IB_USER_CM_CMD_SEND_SIDR_REP, IB_USER_CM_CMD_EVENT, IB_USER_CM_CMD_INIT_QP_ATTR, }; /* * command ABI structures. */ struct ib_ucm_cmd_hdr { __u32 cmd; __u16 in; __u16 out; }; struct ib_ucm_create_id { __u64 uid; __u64 response; }; struct ib_ucm_create_id_resp { __u32 id; }; struct ib_ucm_destroy_id { __u64 response; __u32 id; __u32 reserved; }; struct ib_ucm_destroy_id_resp { __u32 events_reported; }; struct ib_ucm_attr_id { __u64 response; __u32 id; __u32 reserved; }; struct ib_ucm_attr_id_resp { __be64 service_id; __be64 service_mask; __be32 local_id; __be32 remote_id; }; struct ib_ucm_init_qp_attr { __u64 response; __u32 id; __u32 qp_state; }; struct ib_ucm_listen { __be64 service_id; __be64 service_mask; __u32 id; __u32 reserved; }; struct ib_ucm_notify { __u32 id; __u32 event; }; struct ib_ucm_private_data { __u64 data; __u32 id; __u8 len; __u8 reserved[3]; }; struct ib_ucm_req { __u32 id; __u32 qpn; __u32 qp_type; __u32 psn; __be64 sid; __u64 data; __u64 primary_path; __u64 alternate_path; __u8 len; __u8 peer_to_peer; __u8 responder_resources; __u8 initiator_depth; __u8 remote_cm_response_timeout; __u8 flow_control; __u8 local_cm_response_timeout; __u8 retry_count; __u8 rnr_retry_count; __u8 max_cm_retries; __u8 srq; __u8 reserved[5]; }; struct ib_ucm_rep { __u64 uid; __u64 data; __u32 id; __u32 qpn; __u32 psn; __u8 len; __u8 responder_resources; __u8 initiator_depth; __u8 target_ack_delay; __u8 failover_accepted; __u8 flow_control; __u8 rnr_retry_count; __u8 srq; __u8 reserved[4]; }; struct ib_ucm_info { __u32 id; __u32 status; __u64 info; __u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; }; struct ib_ucm_mra { __u64 data; __u32 id; __u8 len; __u8 timeout; __u8 reserved[2]; }; struct ib_ucm_lap { __u64 path; __u64 data; __u32 id; __u8 len; __u8 reserved[3]; }; struct ib_ucm_sidr_req { __u32 id; __u32 timeout; __be64 sid; __u64 data; __u64 path; __u16 reserved_pkey; __u8 len; __u8 max_cm_retries; __u8 reserved[4]; }; struct ib_ucm_sidr_rep { __u32 id; __u32 qpn; __u32 qkey; __u32 status; __u64 info; __u64 data; __u8 info_len; __u8 data_len; __u8 reserved[6]; }; /* * event notification ABI structures. */ struct ib_ucm_event_get { __u64 response; __u64 data; __u64 info; __u8 data_len; __u8 info_len; __u8 reserved[6]; }; struct ib_ucm_req_event_resp { struct ib_user_path_rec primary_path; struct ib_user_path_rec alternate_path; __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 qp_type; __u32 starting_psn; __u8 responder_resources; __u8 initiator_depth; __u8 local_cm_response_timeout; __u8 flow_control; __u8 remote_cm_response_timeout; __u8 retry_count; __u8 rnr_retry_count; __u8 srq; __u8 port; __u8 reserved[7]; }; struct ib_ucm_rep_event_resp { __be64 remote_ca_guid; __u32 remote_qkey; __u32 remote_qpn; __u32 starting_psn; __u8 responder_resources; __u8 initiator_depth; __u8 target_ack_delay; __u8 failover_accepted; __u8 flow_control; __u8 rnr_retry_count; __u8 srq; __u8 reserved[5]; }; struct ib_ucm_rej_event_resp { __u32 reason; /* ari in ib_ucm_event_get info field. */ }; struct ib_ucm_mra_event_resp { __u8 timeout; __u8 reserved[3]; }; struct ib_ucm_lap_event_resp { struct ib_user_path_rec path; }; struct ib_ucm_apr_event_resp { __u32 status; /* apr info in ib_ucm_event_get info field. */ }; struct ib_ucm_sidr_req_event_resp { __u16 pkey; __u8 port; __u8 reserved; }; struct ib_ucm_sidr_rep_event_resp { __u32 status; __u32 qkey; __u32 qpn; /* info in ib_ucm_event_get info field. */ }; #define IB_UCM_PRES_DATA 0x01 #define IB_UCM_PRES_INFO 0x02 #define IB_UCM_PRES_PRIMARY 0x04 #define IB_UCM_PRES_ALTERNATE 0x08 struct ib_ucm_event_resp { __u64 uid; __u32 id; __u32 event; __u32 present; __u32 reserved; union { struct ib_ucm_req_event_resp req_resp; struct ib_ucm_rep_event_resp rep_resp; struct ib_ucm_rej_event_resp rej_resp; struct ib_ucm_mra_event_resp mra_resp; struct ib_ucm_lap_event_resp lap_resp; struct ib_ucm_apr_event_resp apr_resp; struct ib_ucm_sidr_req_event_resp sidr_req_resp; struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; __u32 send_status; } u; }; #endif /* IB_USER_CM_H */ Index: stable/11/sys/ofed/include/uapi/rdma/ib_user_mad.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/ib_user_mad.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/ib_user_mad.h (revision 331772) @@ -1,250 +1,254 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_USER_MAD_H #define IB_USER_MAD_H #ifdef _KERNEL #include #include #else #include #include #endif /* * Increment this value if any changes that break userspace ABI * compatibility are made. */ #define IB_USER_MAD_ABI_VERSION 5 /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). */ /** * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index * @id - ID of agent MAD received with/to be sent with * @status - 0 on successful receive, ETIMEDOUT if no response * received (transaction ID in data[] will be set to TID of original * request) (ignored on send) * @timeout_ms - Milliseconds to wait for response (unset on receive) * @retries - Number of automatic retries to attempt * @qpn - Remote QP number received from/to be sent to * @qkey - Remote Q_Key to be sent with (unset on receive) * @lid - Remote lid received from/to be sent to * @sl - Service level received with/to be sent with * @path_bits - Local path bits received with/to be sent with * @grh_present - If set, GRH was received/should be sent * @gid_index - Local GID index to send with (unset on receive) * @hop_limit - Hop limit in GRH * @traffic_class - Traffic class in GRH * @gid - Remote GID in GRH * @flow_label - Flow label in GRH */ struct ib_user_mad_hdr_old { __u32 id; __u32 status; __u32 timeout_ms; __u32 retries; __u32 length; __be32 qpn; __be32 qkey; __be16 lid; __u8 sl; __u8 path_bits; __u8 grh_present; __u8 gid_index; __u8 hop_limit; __u8 traffic_class; __u8 gid[16]; __be32 flow_label; }; /** * ib_user_mad_hdr - MAD packet header * This layout allows specifying/receiving the P_Key index. To use * this capability, an application must call the * IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before * any other actions with the file handle. * @id - ID of agent MAD received with/to be sent with * @status - 0 on successful receive, ETIMEDOUT if no response * received (transaction ID in data[] will be set to TID of original * request) (ignored on send) * @timeout_ms - Milliseconds to wait for response (unset on receive) * @retries - Number of automatic retries to attempt * @qpn - Remote QP number received from/to be sent to * @qkey - Remote Q_Key to be sent with (unset on receive) * @lid - Remote lid received from/to be sent to * @sl - Service level received with/to be sent with * @path_bits - Local path bits received with/to be sent with * @grh_present - If set, GRH was received/should be sent * @gid_index - Local GID index to send with (unset on receive) * @hop_limit - Hop limit in GRH * @traffic_class - Traffic class in GRH * @gid - Remote GID in GRH * @flow_label - Flow label in GRH * @pkey_index - P_Key index */ struct ib_user_mad_hdr { __u32 id; __u32 status; __u32 timeout_ms; __u32 retries; __u32 length; __be32 qpn; __be32 qkey; __be16 lid; __u8 sl; __u8 path_bits; __u8 grh_present; __u8 gid_index; __u8 hop_limit; __u8 traffic_class; __u8 gid[16]; __be32 flow_label; __u16 pkey_index; __u8 reserved[6]; }; /** * ib_user_mad - MAD packet * @hdr - MAD packet header * @data - Contents of MAD * */ struct ib_user_mad { struct ib_user_mad_hdr hdr; __u64 data[0]; }; /* * Earlier versions of this interface definition declared the * method_mask[] member as an array of __u32 but treated it as a * bitmap made up of longs in the kernel. This ambiguity meant that * 32-bit big-endian applications that can run on both 32-bit and * 64-bit kernels had no consistent ABI to rely on, and 64-bit * big-endian applications that treated method_mask as being made up * of 32-bit words would have their bitmap misinterpreted. * * To clear up this confusion, we change the declaration of * method_mask[] to use unsigned long and handle the conversion from * 32-bit userspace to 64-bit kernel for big-endian systems in the * compat_ioctl method. Unfortunately, to keep the structure layout * the same, we need the method_mask[] array to be aligned only to 4 * bytes even when long is 64 bits, which forces us into this ugly * typedef. */ typedef unsigned long __attribute__((aligned(4))) packed_ulong; #define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long))) /** * ib_user_mad_reg_req - MAD registration request * @id - Set by the kernel; used to identify agent in future requests. * @qpn - Queue pair number; must be 0 or 1. * @method_mask - The caller will receive unsolicited MADs for any method * where @method_mask = 1. * @mgmt_class - Indicates which management class of MADs should be receive * by the caller. This field is only required if the user wishes to * receive unsolicited MADs, otherwise it should be 0. * @mgmt_class_version - Indicates which version of MADs for the given * management class to receive. * @oui: Indicates IEEE OUI when mgmt_class is a vendor class * in the range from 0x30 to 0x4f. Otherwise not used. * @rmpp_version: If set, indicates the RMPP version used. * */ struct ib_user_mad_reg_req { __u32 id; packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK]; __u8 qpn; __u8 mgmt_class; __u8 mgmt_class_version; __u8 oui[3]; __u8 rmpp_version; }; /** * ib_user_mad_reg_req2 - MAD registration request * * @id - Set by the _kernel_; used by userspace to identify the * registered agent in future requests. * @qpn - Queue pair number; must be 0 or 1. * @mgmt_class - Indicates which management class of MADs should be * receive by the caller. This field is only required if * the user wishes to receive unsolicited MADs, otherwise * it should be 0. * @mgmt_class_version - Indicates which version of MADs for the given * management class to receive. * @res - Ignored. * @flags - additional registration flags; Must be in the set of * flags defined in IB_USER_MAD_REG_FLAGS_CAP * @method_mask - The caller wishes to receive unsolicited MADs for the * methods whose bit(s) is(are) set. * @oui - Indicates IEEE OUI to use when mgmt_class is a vendor * class in the range from 0x30 to 0x4f. Otherwise not * used. * @rmpp_version - If set, indicates the RMPP version to use. */ enum { IB_USER_MAD_USER_RMPP = (1 << 0), }; #define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP) struct ib_user_mad_reg_req2 { __u32 id; __u32 qpn; __u8 mgmt_class; __u8 mgmt_class_version; __u16 res; __u32 flags; __u64 method_mask[2]; __u32 oui; __u8 rmpp_version; __u8 reserved[3]; }; #define IB_IOCTL_MAGIC 0x1b #define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ struct ib_user_mad_reg_req) #define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) #define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) #define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \ struct ib_user_mad_reg_req2) #endif /* IB_USER_MAD_H */ Index: stable/11/sys/ofed/include/uapi/rdma/ib_user_sa.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/ib_user_sa.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/ib_user_sa.h (revision 331772) @@ -1,80 +1,84 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_USER_SA_H #define IB_USER_SA_H #ifdef _KERNEL #include #else #include #endif enum { IB_PATH_GMP = 1, IB_PATH_PRIMARY = (1<<1), IB_PATH_ALTERNATE = (1<<2), IB_PATH_OUTBOUND = (1<<3), IB_PATH_INBOUND = (1<<4), IB_PATH_INBOUND_REVERSE = (1<<5), IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE }; struct ib_path_rec_data { __u32 flags; __u32 reserved; __u32 path_rec[16]; }; struct ib_user_path_rec { __u8 dgid[16]; __u8 sgid[16]; __be16 dlid; __be16 slid; __u32 raw_traffic; __be32 flow_label; __u32 reversible; __u32 mtu; __be16 pkey; __u8 hop_limit; __u8 traffic_class; __u8 numb_path; __u8 sl; __u8 mtu_selector; __u8 rate_selector; __u8 rate; __u8 packet_life_time_selector; __u8 packet_life_time; __u8 preference; }; #endif /* IB_USER_SA_H */ Index: stable/11/sys/ofed/include/uapi/rdma/ib_user_verbs.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/ib_user_verbs.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/ib_user_verbs.h (revision 331772) @@ -1,1071 +1,1075 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * Copyright (c) 2006 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef IB_USER_VERBS_H #define IB_USER_VERBS_H #ifdef _KERNEL #include #else #include #endif /* * Increment this value if any changes that break userspace ABI * compatibility are made. */ #define IB_USER_VERBS_ABI_VERSION 6 #define IB_USER_VERBS_CMD_THRESHOLD 50 enum { IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, IB_USER_VERBS_CMD_MODIFY_AH, IB_USER_VERBS_CMD_QUERY_AH, IB_USER_VERBS_CMD_DESTROY_AH, IB_USER_VERBS_CMD_REG_MR, IB_USER_VERBS_CMD_REG_SMR, IB_USER_VERBS_CMD_REREG_MR, IB_USER_VERBS_CMD_QUERY_MR, IB_USER_VERBS_CMD_DEREG_MR, IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, IB_USER_VERBS_CMD_POLL_CQ, IB_USER_VERBS_CMD_PEEK_CQ, IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, IB_USER_VERBS_CMD_CREATE_QP, IB_USER_VERBS_CMD_QUERY_QP, IB_USER_VERBS_CMD_MODIFY_QP, IB_USER_VERBS_CMD_DESTROY_QP, IB_USER_VERBS_CMD_POST_SEND, IB_USER_VERBS_CMD_POST_RECV, IB_USER_VERBS_CMD_ATTACH_MCAST, IB_USER_VERBS_CMD_DETACH_MCAST, IB_USER_VERBS_CMD_CREATE_SRQ, IB_USER_VERBS_CMD_MODIFY_SRQ, IB_USER_VERBS_CMD_QUERY_SRQ, IB_USER_VERBS_CMD_DESTROY_SRQ, IB_USER_VERBS_CMD_POST_SRQ_RECV, IB_USER_VERBS_CMD_OPEN_XRCD, IB_USER_VERBS_CMD_CLOSE_XRCD, IB_USER_VERBS_CMD_CREATE_XSRQ, IB_USER_VERBS_CMD_OPEN_QP, }; enum { IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, IB_USER_VERBS_EX_CMD_CREATE_WQ, IB_USER_VERBS_EX_CMD_MODIFY_WQ, IB_USER_VERBS_EX_CMD_DESTROY_WQ, IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL, IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL }; /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). * Specifically: * - Do not use pointer types -- pass pointers in __u64 instead. * - Make sure that any structure larger than 4 bytes is padded to a * multiple of 8 bytes. Otherwise the structure size will be * different between 32-bit and 64-bit architectures. */ struct ib_uverbs_async_event_desc { __u64 element; __u32 event_type; /* enum ib_event_type */ __u32 reserved; }; struct ib_uverbs_comp_event_desc { __u64 cq_handle; }; /* * All commands from userspace should start with a __u32 command field * followed by __u16 in_words and out_words fields (which give the * length of the command block and response buffer if any in 32-bit * words). The kernel driver will read these fields first and read * the rest of the command struct based on these value. */ #define IB_USER_VERBS_CMD_COMMAND_MASK 0xff #define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u #define IB_USER_VERBS_CMD_FLAGS_SHIFT 24 #define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80 struct ib_uverbs_cmd_hdr { __u32 command; __u16 in_words; __u16 out_words; }; struct ib_uverbs_ex_cmd_hdr { __u64 response; __u16 provider_in_words; __u16 provider_out_words; __u32 cmd_hdr_reserved; }; struct ib_uverbs_get_context { __u64 response; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; __u32 num_comp_vectors; }; struct ib_uverbs_query_device { __u64 response; __u64 driver_data[0]; }; struct ib_uverbs_query_device_resp { __u64 fw_ver; __be64 node_guid; __be64 sys_image_guid; __u64 max_mr_size; __u64 page_size_cap; __u32 vendor_id; __u32 vendor_part_id; __u32 hw_ver; __u32 max_qp; __u32 max_qp_wr; __u32 device_cap_flags; __u32 max_sge; __u32 max_sge_rd; __u32 max_cq; __u32 max_cqe; __u32 max_mr; __u32 max_pd; __u32 max_qp_rd_atom; __u32 max_ee_rd_atom; __u32 max_res_rd_atom; __u32 max_qp_init_rd_atom; __u32 max_ee_init_rd_atom; __u32 atomic_cap; __u32 max_ee; __u32 max_rdd; __u32 max_mw; __u32 max_raw_ipv6_qp; __u32 max_raw_ethy_qp; __u32 max_mcast_grp; __u32 max_mcast_qp_attach; __u32 max_total_mcast_qp_attach; __u32 max_ah; __u32 max_fmr; __u32 max_map_per_fmr; __u32 max_srq; __u32 max_srq_wr; __u32 max_srq_sge; __u16 max_pkeys; __u8 local_ca_ack_delay; __u8 phys_port_cnt; __u8 reserved[4]; }; struct ib_uverbs_ex_query_device { __u32 comp_mask; __u32 reserved; }; struct ib_uverbs_odp_caps { __u64 general_caps; struct { __u32 rc_odp_caps; __u32 uc_odp_caps; __u32 ud_odp_caps; } per_transport_caps; __u32 reserved; }; struct ib_uverbs_rss_caps { /* Corresponding bit will be set if qp type from * 'enum ib_qp_type' is supported, e.g. * supported_qpts |= 1 << IB_QPT_UD */ __u32 supported_qpts; __u32 max_rwq_indirection_tables; __u32 max_rwq_indirection_table_size; __u32 reserved; }; struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; __u32 response_length; struct ib_uverbs_odp_caps odp_caps; __u64 timestamp_mask; __u64 hca_core_clock; /* in KHZ */ __u64 device_cap_flags_ex; struct ib_uverbs_rss_caps rss_caps; __u32 max_wq_type_rq; __u32 reserved; }; struct ib_uverbs_query_port { __u64 response; __u8 port_num; __u8 reserved[7]; __u64 driver_data[0]; }; struct ib_uverbs_query_port_resp { __u32 port_cap_flags; __u32 max_msg_sz; __u32 bad_pkey_cntr; __u32 qkey_viol_cntr; __u32 gid_tbl_len; __u16 pkey_tbl_len; __u16 lid; __u16 sm_lid; __u8 state; __u8 max_mtu; __u8 active_mtu; __u8 lmc; __u8 max_vl_num; __u8 sm_sl; __u8 subnet_timeout; __u8 init_type_reply; __u8 active_width; __u8 active_speed; __u8 phys_state; __u8 link_layer; __u8 reserved[2]; }; struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; }; struct ib_uverbs_alloc_pd_resp { __u32 pd_handle; }; struct ib_uverbs_dealloc_pd { __u32 pd_handle; }; struct ib_uverbs_open_xrcd { __u64 response; __u32 fd; __u32 oflags; __u64 driver_data[0]; }; struct ib_uverbs_open_xrcd_resp { __u32 xrcd_handle; }; struct ib_uverbs_close_xrcd { __u32 xrcd_handle; }; struct ib_uverbs_reg_mr { __u64 response; __u64 start; __u64 length; __u64 hca_va; __u32 pd_handle; __u32 access_flags; __u64 driver_data[0]; }; struct ib_uverbs_reg_mr_resp { __u32 mr_handle; __u32 lkey; __u32 rkey; }; struct ib_uverbs_rereg_mr { __u64 response; __u32 mr_handle; __u32 flags; __u64 start; __u64 length; __u64 hca_va; __u32 pd_handle; __u32 access_flags; }; struct ib_uverbs_rereg_mr_resp { __u32 lkey; __u32 rkey; }; struct ib_uverbs_dereg_mr { __u32 mr_handle; }; struct ib_uverbs_alloc_mw { __u64 response; __u32 pd_handle; __u8 mw_type; __u8 reserved[3]; }; struct ib_uverbs_alloc_mw_resp { __u32 mw_handle; __u32 rkey; }; struct ib_uverbs_dealloc_mw { __u32 mw_handle; }; struct ib_uverbs_create_comp_channel { __u64 response; }; struct ib_uverbs_create_comp_channel_resp { __u32 fd; }; struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; __u32 reserved; __u64 driver_data[0]; }; struct ib_uverbs_ex_create_cq { __u64 user_handle; __u32 cqe; __u32 comp_vector; __s32 comp_channel; __u32 comp_mask; __u32 flags; __u32 reserved; }; struct ib_uverbs_create_cq_resp { __u32 cq_handle; __u32 cqe; }; struct ib_uverbs_ex_create_cq_resp { struct ib_uverbs_create_cq_resp base; __u32 comp_mask; __u32 response_length; }; struct ib_uverbs_resize_cq { __u64 response; __u32 cq_handle; __u32 cqe; __u64 driver_data[0]; }; struct ib_uverbs_resize_cq_resp { __u32 cqe; __u32 reserved; __u64 driver_data[0]; }; struct ib_uverbs_poll_cq { __u64 response; __u32 cq_handle; __u32 ne; }; struct ib_uverbs_wc { __u64 wr_id; __u32 status; __u32 opcode; __u32 vendor_err; __u32 byte_len; union { __u32 imm_data; __u32 invalidate_rkey; } ex; __u32 qp_num; __u32 src_qp; __u32 wc_flags; __u16 pkey_index; __u16 slid; __u8 sl; __u8 dlid_path_bits; __u8 port_num; __u8 reserved; }; struct ib_uverbs_poll_cq_resp { __u32 count; __u32 reserved; struct ib_uverbs_wc wc[0]; }; struct ib_uverbs_req_notify_cq { __u32 cq_handle; __u32 solicited_only; }; struct ib_uverbs_destroy_cq { __u64 response; __u32 cq_handle; __u32 reserved; }; struct ib_uverbs_destroy_cq_resp { __u32 comp_events_reported; __u32 async_events_reported; }; struct ib_uverbs_global_route { __u8 dgid[16]; __u32 flow_label; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; __u8 reserved; }; struct ib_uverbs_ah_attr { struct ib_uverbs_global_route grh; __u16 dlid; __u8 sl; __u8 src_path_bits; __u8 static_rate; __u8 is_global; __u8 port_num; __u8 reserved; }; struct ib_uverbs_qp_attr { __u32 qp_attr_mask; __u32 qp_state; __u32 cur_qp_state; __u32 path_mtu; __u32 path_mig_state; __u32 qkey; __u32 rq_psn; __u32 sq_psn; __u32 dest_qp_num; __u32 qp_access_flags; struct ib_uverbs_ah_attr ah_attr; struct ib_uverbs_ah_attr alt_ah_attr; /* ib_qp_cap */ __u32 max_send_wr; __u32 max_recv_wr; __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; __u16 pkey_index; __u16 alt_pkey_index; __u8 en_sqd_async_notify; __u8 sq_draining; __u8 max_rd_atomic; __u8 max_dest_rd_atomic; __u8 min_rnr_timer; __u8 port_num; __u8 timeout; __u8 retry_cnt; __u8 rnr_retry; __u8 alt_port_num; __u8 alt_timeout; __u8 reserved[5]; }; struct ib_uverbs_create_qp { __u64 response; __u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; __u32 srq_handle; __u32 max_send_wr; __u32 max_recv_wr; __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; __u8 sq_sig_all; __u8 qp_type; __u8 is_srq; __u8 reserved; __u64 driver_data[0]; }; enum ib_uverbs_create_qp_mask { IB_UVERBS_CREATE_QP_MASK_IND_TABLE = 1UL << 0, }; enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; struct ib_uverbs_ex_create_qp { __u64 user_handle; __u32 pd_handle; __u32 send_cq_handle; __u32 recv_cq_handle; __u32 srq_handle; __u32 max_send_wr; __u32 max_recv_wr; __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; __u8 sq_sig_all; __u8 qp_type; __u8 is_srq; __u8 reserved; __u32 comp_mask; __u32 create_flags; __u32 rwq_ind_tbl_handle; __u32 reserved1; }; struct ib_uverbs_open_qp { __u64 response; __u64 user_handle; __u32 pd_handle; __u32 qpn; __u8 qp_type; __u8 reserved[7]; __u64 driver_data[0]; }; /* also used for open response */ struct ib_uverbs_create_qp_resp { __u32 qp_handle; __u32 qpn; __u32 max_send_wr; __u32 max_recv_wr; __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; __u32 reserved; }; struct ib_uverbs_ex_create_qp_resp { struct ib_uverbs_create_qp_resp base; __u32 comp_mask; __u32 response_length; }; /* * This struct needs to remain a multiple of 8 bytes to keep the * alignment of the modify QP parameters. */ struct ib_uverbs_qp_dest { __u8 dgid[16]; __u32 flow_label; __u16 dlid; __u16 reserved; __u8 sgid_index; __u8 hop_limit; __u8 traffic_class; __u8 sl; __u8 src_path_bits; __u8 static_rate; __u8 is_global; __u8 port_num; }; struct ib_uverbs_query_qp { __u64 response; __u32 qp_handle; __u32 attr_mask; __u64 driver_data[0]; }; struct ib_uverbs_query_qp_resp { struct ib_uverbs_qp_dest dest; struct ib_uverbs_qp_dest alt_dest; __u32 max_send_wr; __u32 max_recv_wr; __u32 max_send_sge; __u32 max_recv_sge; __u32 max_inline_data; __u32 qkey; __u32 rq_psn; __u32 sq_psn; __u32 dest_qp_num; __u32 qp_access_flags; __u16 pkey_index; __u16 alt_pkey_index; __u8 qp_state; __u8 cur_qp_state; __u8 path_mtu; __u8 path_mig_state; __u8 sq_draining; __u8 max_rd_atomic; __u8 max_dest_rd_atomic; __u8 min_rnr_timer; __u8 port_num; __u8 timeout; __u8 retry_cnt; __u8 rnr_retry; __u8 alt_port_num; __u8 alt_timeout; __u8 sq_sig_all; __u8 reserved[5]; __u64 driver_data[0]; }; struct ib_uverbs_modify_qp { struct ib_uverbs_qp_dest dest; struct ib_uverbs_qp_dest alt_dest; __u32 qp_handle; __u32 attr_mask; __u32 qkey; __u32 rq_psn; __u32 sq_psn; __u32 dest_qp_num; __u32 qp_access_flags; __u16 pkey_index; __u16 alt_pkey_index; __u8 qp_state; __u8 cur_qp_state; __u8 path_mtu; __u8 path_mig_state; __u8 en_sqd_async_notify; __u8 max_rd_atomic; __u8 max_dest_rd_atomic; __u8 min_rnr_timer; __u8 port_num; __u8 timeout; __u8 retry_cnt; __u8 rnr_retry; __u8 alt_port_num; __u8 alt_timeout; __u8 reserved[2]; __u64 driver_data[0]; }; struct ib_uverbs_modify_qp_resp { }; struct ib_uverbs_destroy_qp { __u64 response; __u32 qp_handle; __u32 reserved; }; struct ib_uverbs_destroy_qp_resp { __u32 events_reported; }; /* * The ib_uverbs_sge structure isn't used anywhere, since we assume * the ib_sge structure is packed the same way on 32-bit and 64-bit * architectures in both kernel and user space. It's just here to * document the ABI. */ struct ib_uverbs_sge { __u64 addr; __u32 length; __u32 lkey; }; struct ib_uverbs_send_wr { __u64 wr_id; __u32 num_sge; __u32 opcode; __u32 send_flags; union { __u32 imm_data; __u32 invalidate_rkey; } ex; union { struct { __u64 remote_addr; __u32 rkey; __u32 reserved; } rdma; struct { __u64 remote_addr; __u64 compare_add; __u64 swap; __u32 rkey; __u32 reserved; } atomic; struct { __u32 ah; __u32 remote_qpn; __u32 remote_qkey; __u32 reserved; } ud; } wr; }; struct ib_uverbs_post_send { __u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; __u32 wqe_size; struct ib_uverbs_send_wr send_wr[0]; }; struct ib_uverbs_post_send_resp { __u32 bad_wr; }; struct ib_uverbs_recv_wr { __u64 wr_id; __u32 num_sge; __u32 reserved; }; struct ib_uverbs_post_recv { __u64 response; __u32 qp_handle; __u32 wr_count; __u32 sge_count; __u32 wqe_size; struct ib_uverbs_recv_wr recv_wr[0]; }; struct ib_uverbs_post_recv_resp { __u32 bad_wr; }; struct ib_uverbs_post_srq_recv { __u64 response; __u32 srq_handle; __u32 wr_count; __u32 sge_count; __u32 wqe_size; struct ib_uverbs_recv_wr recv[0]; }; struct ib_uverbs_post_srq_recv_resp { __u32 bad_wr; }; struct ib_uverbs_create_ah { __u64 response; __u64 user_handle; __u32 pd_handle; __u32 reserved; struct ib_uverbs_ah_attr attr; }; struct ib_uverbs_create_ah_resp { __u32 ah_handle; }; struct ib_uverbs_destroy_ah { __u32 ah_handle; }; struct ib_uverbs_attach_mcast { __u8 gid[16]; __u32 qp_handle; __u16 mlid; __u16 reserved; __u64 driver_data[0]; }; struct ib_uverbs_detach_mcast { __u8 gid[16]; __u32 qp_handle; __u16 mlid; __u16 reserved; __u64 driver_data[0]; }; struct ib_uverbs_flow_spec_hdr { __u32 type; __u16 size; __u16 reserved; /* followed by flow_spec */ __u64 flow_spec_data[0]; }; struct ib_uverbs_flow_eth_filter { __u8 dst_mac[6]; __u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; }; struct ib_uverbs_flow_spec_eth { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_eth_filter val; struct ib_uverbs_flow_eth_filter mask; }; struct ib_uverbs_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; __u8 proto; __u8 tos; __u8 ttl; __u8 flags; }; struct ib_uverbs_flow_spec_ipv4 { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_ipv4_filter val; struct ib_uverbs_flow_ipv4_filter mask; }; struct ib_uverbs_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; }; struct ib_uverbs_flow_spec_tcp_udp { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_tcp_udp_filter val; struct ib_uverbs_flow_tcp_udp_filter mask; }; struct ib_uverbs_flow_ipv6_filter { __u8 src_ip[16]; __u8 dst_ip[16]; __be32 flow_label; __u8 next_hdr; __u8 traffic_class; __u8 hop_limit; __u8 reserved; }; struct ib_uverbs_flow_spec_ipv6 { union { struct ib_uverbs_flow_spec_hdr hdr; struct { __u32 type; __u16 size; __u16 reserved; }; }; struct ib_uverbs_flow_ipv6_filter val; struct ib_uverbs_flow_ipv6_filter mask; }; struct ib_uverbs_flow_attr { __u32 type; __u16 size; __u16 priority; __u8 num_of_specs; __u8 reserved[2]; __u8 port; __u32 flags; /* Following are the optional layers according to user request * struct ib_flow_spec_xxx * struct ib_flow_spec_yyy */ struct ib_uverbs_flow_spec_hdr flow_specs[0]; }; struct ib_uverbs_create_flow { __u32 comp_mask; __u32 qp_handle; struct ib_uverbs_flow_attr flow_attr; }; struct ib_uverbs_create_flow_resp { __u32 comp_mask; __u32 flow_handle; }; struct ib_uverbs_destroy_flow { __u32 comp_mask; __u32 flow_handle; }; struct ib_uverbs_create_srq { __u64 response; __u64 user_handle; __u32 pd_handle; __u32 max_wr; __u32 max_sge; __u32 srq_limit; __u64 driver_data[0]; }; struct ib_uverbs_create_xsrq { __u64 response; __u64 user_handle; __u32 srq_type; __u32 pd_handle; __u32 max_wr; __u32 max_sge; __u32 srq_limit; __u32 reserved; __u32 xrcd_handle; __u32 cq_handle; __u64 driver_data[0]; }; struct ib_uverbs_create_srq_resp { __u32 srq_handle; __u32 max_wr; __u32 max_sge; __u32 srqn; }; struct ib_uverbs_modify_srq { __u32 srq_handle; __u32 attr_mask; __u32 max_wr; __u32 srq_limit; __u64 driver_data[0]; }; struct ib_uverbs_query_srq { __u64 response; __u32 srq_handle; __u32 reserved; __u64 driver_data[0]; }; struct ib_uverbs_query_srq_resp { __u32 max_wr; __u32 max_sge; __u32 srq_limit; __u32 reserved; }; struct ib_uverbs_destroy_srq { __u64 response; __u32 srq_handle; __u32 reserved; }; struct ib_uverbs_destroy_srq_resp { __u32 events_reported; }; struct ib_uverbs_ex_create_wq { __u32 comp_mask; __u32 wq_type; __u64 user_handle; __u32 pd_handle; __u32 cq_handle; __u32 max_wr; __u32 max_sge; }; struct ib_uverbs_ex_create_wq_resp { __u32 comp_mask; __u32 response_length; __u32 wq_handle; __u32 max_wr; __u32 max_sge; __u32 wqn; }; struct ib_uverbs_ex_destroy_wq { __u32 comp_mask; __u32 wq_handle; }; struct ib_uverbs_ex_destroy_wq_resp { __u32 comp_mask; __u32 response_length; __u32 events_reported; __u32 reserved; }; struct ib_uverbs_ex_modify_wq { __u32 attr_mask; __u32 wq_handle; __u32 wq_state; __u32 curr_wq_state; }; /* Prevent memory allocation rather than max expected size */ #define IB_USER_VERBS_MAX_LOG_IND_TBL_SIZE 0x0d struct ib_uverbs_ex_create_rwq_ind_table { __u32 comp_mask; __u32 log_ind_tbl_size; /* Following are the wq handles according to log_ind_tbl_size * wq_handle1 * wq_handle2 */ __u32 wq_handles[0]; }; struct ib_uverbs_ex_create_rwq_ind_table_resp { __u32 comp_mask; __u32 response_length; __u32 ind_tbl_handle; __u32 ind_tbl_num; }; struct ib_uverbs_ex_destroy_rwq_ind_table { __u32 comp_mask; __u32 ind_tbl_handle; }; #endif /* IB_USER_VERBS_H */ Index: stable/11/sys/ofed/include/uapi/rdma/mlx4-abi.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/mlx4-abi.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/mlx4-abi.h (revision 331772) @@ -1,111 +1,115 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef MLX4_ABI_USER_H #define MLX4_ABI_USER_H #ifdef _KERNEL #include #else #include #endif /* * Increment this value if any changes that break userspace ABI * compatibility are made. */ #define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 #define MLX4_IB_UVERBS_ABI_VERSION 4 /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). * In particular do not use pointer types -- pass pointers in __u64 * instead. */ struct mlx4_ib_alloc_ucontext_resp_v3 { __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; }; struct mlx4_ib_alloc_ucontext_resp { __u32 dev_caps; __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; __u32 cqe_size; }; struct mlx4_ib_alloc_pd_resp { __u32 pdn; __u32 reserved; }; struct mlx4_ib_create_cq { __u64 buf_addr; __u64 db_addr; }; struct mlx4_ib_create_cq_resp { __u32 cqn; __u32 reserved; }; struct mlx4_ib_resize_cq { __u64 buf_addr; }; struct mlx4_ib_create_srq { __u64 buf_addr; __u64 db_addr; }; struct mlx4_ib_create_srq_resp { __u32 srqn; __u32 reserved; }; struct mlx4_ib_create_qp { __u64 buf_addr; __u64 db_addr; __u8 log_sq_bb_count; __u8 log_sq_stride; __u8 sq_no_prefetch; __u8 reserved[5]; }; #endif /* MLX4_ABI_USER_H */ Index: stable/11/sys/ofed/include/uapi/rdma/mlx5-abi.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/mlx5-abi.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/mlx5-abi.h (revision 331772) @@ -1,253 +1,257 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef MLX5_ABI_USER_H #define MLX5_ABI_USER_H #ifdef _KERNEL #include #else #include #endif enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, }; enum { MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, }; enum { MLX5_WQ_FLAG_SIGNATURE = 1 << 0, }; /* Increment this value if any changes that break userspace ABI * compatibility are made. */ #define MLX5_IB_UVERBS_ABI_VERSION 1 /* Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). * In particular do not use pointer types -- pass pointers in __u64 * instead. */ struct mlx5_ib_alloc_ucontext_req { __u32 total_num_uuars; __u32 num_low_latency_uuars; }; struct mlx5_ib_alloc_ucontext_req_v2 { __u32 total_num_uuars; __u32 num_low_latency_uuars; __u32 flags; __u32 comp_mask; __u8 max_cqe_version; __u8 reserved0; __u16 reserved1; __u32 reserved2; }; enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, }; enum mlx5_user_cmds_supp_uhw { MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, }; struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; __u32 tot_uuars; __u32 cache_line_size; __u16 max_sq_desc_sz; __u16 max_rq_desc_sz; __u32 max_send_wqebb; __u32 max_recv_wr; __u32 max_srq_recv_wr; __u16 num_ports; __u16 reserved1; __u32 comp_mask; __u32 response_length; __u8 cqe_version; __u8 cmds_supp_uhw; __u16 reserved2; __u64 hca_core_clock_offset; }; struct mlx5_ib_alloc_pd_resp { __u32 pdn; }; struct mlx5_ib_tso_caps { __u32 max_tso; /* Maximum tso payload size in bytes */ /* Corresponding bit will be set if qp type from * 'enum ib_qp_type' is supported, e.g. * supported_qpts |= 1 << IB_QPT_UD */ __u32 supported_qpts; }; struct mlx5_ib_rss_caps { __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 reserved[7]; }; struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; struct mlx5_ib_tso_caps tso_caps; struct mlx5_ib_rss_caps rss_caps; }; struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { __u32 cqn; __u32 reserved; }; struct mlx5_ib_resize_cq { __u64 buf_addr; __u16 cqe_size; __u16 reserved0; __u32 reserved1; }; struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; __u32 reserved0; /* explicit padding (optional on i386) */ __u32 uidx; __u32 reserved1; }; struct mlx5_ib_create_srq_resp { __u32 srqn; __u32 reserved; }; struct mlx5_ib_create_qp { __u64 buf_addr; __u64 db_addr; __u32 sq_wqe_count; __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 flags; __u32 uidx; __u32 reserved0; __u64 sq_buf_addr; }; /* RX Hash function flags */ enum mlx5_rx_hash_function_flags { MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, }; /* * RX Hash flags, these flags allows to set which incoming packet's field should * participates in RX Hash. Each flag represent certain packet's field, * when the flag is set the field that is represented by the flag will * participate in RX Hash calculation. * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP * and *TCP and *UDP flags can't be enabled together on the same QP. */ enum mlx5_rx_hash_fields { MLX5_RX_HASH_SRC_IPV4 = 1 << 0, MLX5_RX_HASH_DST_IPV4 = 1 << 1, MLX5_RX_HASH_SRC_IPV6 = 1 << 2, MLX5_RX_HASH_DST_IPV6 = 1 << 3, MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 }; struct mlx5_ib_create_qp_rss { __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ __u8 rx_key_len; /* valid only for Toeplitz */ __u8 reserved[6]; __u8 rx_hash_key[128]; /* valid only for Toeplitz */ __u32 comp_mask; __u32 reserved1; }; struct mlx5_ib_create_qp_resp { __u32 uuar_index; }; struct mlx5_ib_alloc_mw { __u32 comp_mask; __u8 num_klms; __u8 reserved1; __u16 reserved2; }; struct mlx5_ib_create_wq { __u64 buf_addr; __u64 db_addr; __u32 rq_wqe_count; __u32 rq_wqe_shift; __u32 user_index; __u32 flags; __u32 comp_mask; __u32 reserved; }; struct mlx5_ib_create_wq_resp { __u32 response_length; __u32 reserved; }; struct mlx5_ib_create_rwq_ind_tbl_resp { __u32 response_length; __u32 reserved; }; struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; }; #endif /* MLX5_ABI_USER_H */ Index: stable/11/sys/ofed/include/uapi/rdma/mthca-abi.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/mthca-abi.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/mthca-abi.h (revision 331772) @@ -1,111 +1,115 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef MTHCA_ABI_USER_H #define MTHCA_ABI_USER_H #include /* * Increment this value if any changes that break userspace ABI * compatibility are made. */ #define MTHCA_UVERBS_ABI_VERSION 1 /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to * avoid incompatibility between 32-bit userspace and 64-bit kernels). * In particular do not use pointer types -- pass pointers in __u64 * instead. */ struct mthca_alloc_ucontext_resp { __u32 qp_tab_size; __u32 uarc_size; }; struct mthca_alloc_pd_resp { __u32 pdn; __u32 reserved; }; /* * Mark the memory region with a DMA attribute that causes * in-flight DMA to be flushed when the region is written to: */ #define MTHCA_MR_DMASYNC 0x1 struct mthca_reg_mr { __u32 mr_attrs; __u32 reserved; }; struct mthca_create_cq { __u32 lkey; __u32 pdn; __u64 arm_db_page; __u64 set_db_page; __u32 arm_db_index; __u32 set_db_index; }; struct mthca_create_cq_resp { __u32 cqn; __u32 reserved; }; struct mthca_resize_cq { __u32 lkey; __u32 reserved; }; struct mthca_create_srq { __u32 lkey; __u32 db_index; __u64 db_page; }; struct mthca_create_srq_resp { __u32 srqn; __u32 reserved; }; struct mthca_create_qp { __u32 lkey; __u32 reserved; __u64 sq_db_page; __u64 rq_db_page; __u32 sq_db_index; __u32 rq_db_index; }; #endif /* MTHCA_ABI_USER_H */ Property changes on: stable/11/sys/ofed/include/uapi/rdma/mthca-abi.h ___________________________________________________________________ Deleted: fbsd:nokeywords ## -1 +0,0 ## -true \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: stable/11/sys/ofed/include/uapi/rdma/rdma_user_cm.h =================================================================== --- stable/11/sys/ofed/include/uapi/rdma/rdma_user_cm.h (revision 331771) +++ stable/11/sys/ofed/include/uapi/rdma/rdma_user_cm.h (revision 331772) @@ -1,316 +1,320 @@ -/* +/*- + * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 + * * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. + * + * $FreeBSD$ */ #ifndef RDMA_USER_CM_H #define RDMA_USER_CM_H #ifdef _KERNEL #include #include #include #else #include #include #endif #include #include #define RDMA_USER_CM_ABI_VERSION 4 #define RDMA_MAX_PRIVATE_DATA 256 enum { RDMA_USER_CM_CMD_CREATE_ID, RDMA_USER_CM_CMD_DESTROY_ID, RDMA_USER_CM_CMD_BIND_IP, RDMA_USER_CM_CMD_RESOLVE_IP, RDMA_USER_CM_CMD_RESOLVE_ROUTE, RDMA_USER_CM_CMD_QUERY_ROUTE, RDMA_USER_CM_CMD_CONNECT, RDMA_USER_CM_CMD_LISTEN, RDMA_USER_CM_CMD_ACCEPT, RDMA_USER_CM_CMD_REJECT, RDMA_USER_CM_CMD_DISCONNECT, RDMA_USER_CM_CMD_INIT_QP_ATTR, RDMA_USER_CM_CMD_GET_EVENT, RDMA_USER_CM_CMD_GET_OPTION, RDMA_USER_CM_CMD_SET_OPTION, RDMA_USER_CM_CMD_NOTIFY, RDMA_USER_CM_CMD_JOIN_IP_MCAST, RDMA_USER_CM_CMD_LEAVE_MCAST, RDMA_USER_CM_CMD_MIGRATE_ID, RDMA_USER_CM_CMD_QUERY, RDMA_USER_CM_CMD_BIND, RDMA_USER_CM_CMD_RESOLVE_ADDR, RDMA_USER_CM_CMD_JOIN_MCAST }; /* * command ABI structures. */ struct rdma_ucm_cmd_hdr { __u32 cmd; __u16 in; __u16 out; }; struct rdma_ucm_create_id { __u64 uid; __u64 response; __u16 ps; __u8 qp_type; __u8 reserved[5]; }; struct rdma_ucm_create_id_resp { __u32 id; }; struct rdma_ucm_destroy_id { __u64 response; __u32 id; __u32 reserved; }; struct rdma_ucm_destroy_id_resp { __u32 events_reported; }; struct rdma_ucm_bind_ip { __u64 response; struct sockaddr_in6 addr; __u32 id; }; struct rdma_ucm_bind { __u32 id; __u16 addr_size; __u16 reserved; struct sockaddr_storage addr; }; struct rdma_ucm_resolve_ip { struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; __u32 id; __u32 timeout_ms; }; struct rdma_ucm_resolve_addr { __u32 id; __u32 timeout_ms; __u16 src_size; __u16 dst_size; __u32 reserved; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; }; struct rdma_ucm_resolve_route { __u32 id; __u32 timeout_ms; }; enum { RDMA_USER_CM_QUERY_ADDR, RDMA_USER_CM_QUERY_PATH, RDMA_USER_CM_QUERY_GID }; struct rdma_ucm_query { __u64 response; __u32 id; __u32 option; }; struct rdma_ucm_query_route_resp { __u64 node_guid; struct ib_user_path_rec ib_route[2]; struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; __u32 num_paths; __u8 port_num; __u8 reserved[3]; }; struct rdma_ucm_query_addr_resp { __u64 node_guid; __u8 port_num; __u8 reserved; __u16 pkey; __u16 src_size; __u16 dst_size; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; }; struct rdma_ucm_query_path_resp { __u32 num_paths; __u32 reserved; struct ib_path_rec_data path_data[0]; }; struct rdma_ucm_conn_param { __u32 qp_num; __u32 qkey; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; __u8 private_data_len; __u8 srq; __u8 responder_resources; __u8 initiator_depth; __u8 flow_control; __u8 retry_count; __u8 rnr_retry_count; __u8 valid; }; struct rdma_ucm_ud_param { __u32 qp_num; __u32 qkey; struct ib_uverbs_ah_attr ah_attr; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; __u8 private_data_len; __u8 reserved[7]; }; struct rdma_ucm_connect { struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; }; struct rdma_ucm_listen { __u32 id; __u32 backlog; }; struct rdma_ucm_accept { __u64 uid; struct rdma_ucm_conn_param conn_param; __u32 id; __u32 reserved; }; struct rdma_ucm_reject { __u32 id; __u8 private_data_len; __u8 reserved[3]; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; }; struct rdma_ucm_disconnect { __u32 id; }; struct rdma_ucm_init_qp_attr { __u64 response; __u32 id; __u32 qp_state; }; struct rdma_ucm_notify { __u32 id; __u32 event; }; struct rdma_ucm_join_ip_mcast { __u64 response; /* rdma_ucm_create_id_resp */ __u64 uid; struct sockaddr_in6 addr; __u32 id; }; /* Multicast join flags */ enum { RDMA_MC_JOIN_FLAG_FULLMEMBER, RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, RDMA_MC_JOIN_FLAG_RESERVED, }; struct rdma_ucm_join_mcast { __u64 response; /* rdma_ucma_create_id_resp */ __u64 uid; __u32 id; __u16 addr_size; __u16 join_flags; struct sockaddr_storage addr; }; struct rdma_ucm_get_event { __u64 response; }; struct rdma_ucm_event_resp { __u64 uid; __u32 id; __u32 event; __u32 status; union { struct rdma_ucm_conn_param conn; struct rdma_ucm_ud_param ud; } param; }; /* Option levels */ enum { RDMA_OPTION_ID = 0, RDMA_OPTION_IB = 1 }; /* Option details */ enum { RDMA_OPTION_ID_TOS = 0, RDMA_OPTION_ID_REUSEADDR = 1, RDMA_OPTION_ID_AFONLY = 2, RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { __u64 optval; __u32 id; __u32 level; __u32 optname; __u32 optlen; }; struct rdma_ucm_migrate_id { __u64 response; __u32 id; __u32 fd; }; struct rdma_ucm_migrate_resp { __u32 events_reported; }; #endif /* RDMA_USER_CM_H */ Index: stable/11 =================================================================== --- stable/11 (revision 331771) +++ stable/11 (revision 331772) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r330490