Index: sys/ofed/include/rdma/ib.h =================================================================== --- /dev/null +++ sys/ofed/include/rdma/ib.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2010 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_RDMA_IB_H) +#define _RDMA_IB_H + +#include + +/* + * Configuring a native Infiniband addr as in Linux upstream + * 8d36eb01da5d371feffa280e501377b5c450f5a5 + */ +#define AF_IB 41 + +struct ib_addr { + union { + __u8 uib_addr8[16]; + __be16 uib_addr16[8]; + __be32 uib_addr32[4]; + __be64 uib_addr64[2]; + } ib_u; +#define sib_addr8 ib_u.uib_addr8 +#define sib_addr16 ib_u.uib_addr16 +#define sib_addr32 ib_u.uib_addr32 +#define sib_addr64 ib_u.uib_addr64 +#define sib_raw ib_u.uib_addr8 +#define sib_subnet_prefix ib_u.uib_addr64[0] +#define sib_interface_id ib_u.uib_addr64[1] +}; + +static inline int ib_addr_any(const struct ib_addr *a) +{ + return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0); +} + +static inline int ib_addr_loopback(const struct ib_addr *a) +{ + return ((a->sib_addr32[0] | a->sib_addr32[1] | + a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0); +} + +static inline void ib_addr_set(struct ib_addr *addr, + __be32 w1, __be32 w2, __be32 w3, __be32 w4) +{ + addr->sib_addr32[0] = w1; + addr->sib_addr32[1] = w2; + addr->sib_addr32[2] = w3; + addr->sib_addr32[3] = w4; +} + +static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2) +{ + return memcmp(a1, a2, sizeof(struct ib_addr)); +} + +struct sockaddr_ib { + unsigned short int sib_family; /* AF_IB */ + __be16 sib_pkey; + __be32 sib_flowinfo; + struct ib_addr sib_addr; + __be64 sib_sid; + __be64 sib_sid_mask; + __u64 sib_scope_id; +}; + +#endif /* _RDMA_IB_H */ Index: sys/ofed/include/rdma/ib_addr.h =================================================================== --- sys/ofed/include/rdma/ib_addr.h +++ sys/ofed/include/rdma/ib_addr.h @@ -41,10 +41,10 @@ #include #include #include +#include +#include #include #include -#include -#include struct rdma_addr_client { atomic_t refcount; @@ -69,6 +69,7 @@ unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; + enum rdma_network_type network; }; /** @@ -105,17 +106,14 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr); + +int rdma_addr_size(struct sockaddr *addr); + int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id, u32 scope_id); int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac, u16 *vlan_id, u32 scope_id); -static inline int ip_addr_size(struct sockaddr *addr) -{ - return addr->sa_family == AF_INET6 ? - sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); -} - static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) { return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9]; @@ -147,16 +145,16 @@ return tag; } -static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid) +static inline int rdma_ip2gid(const struct sockaddr *addr, union ib_gid *gid) { switch (addr->sa_family) { case AF_INET: - ipv6_addr_set_v4mapped(((struct sockaddr_in *)addr)->sin_addr.s_addr, + ipv6_addr_set_v4mapped(((const struct sockaddr_in *) + addr)->sin_addr.s_addr, (struct in6_addr *)gid); break; case AF_INET6: - memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, - 16); + memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16); break; default: return -EINVAL; @@ -180,9 +178,10 @@ out_in->sin6_len = sizeof(*out_in); out_in->sin6_family = AF_INET6; memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16); - if (scope_id < 256 && - IN6_IS_SCOPE_LINKLOCAL(&out_in->sin6_addr)) + if (IN6_IS_SCOPE_LINKLOCAL(&out_in->sin6_addr)) { + out_in->sin6_addr.s6_addr16[1] = 0; out_in->sin6_scope_id = scope_id; + } } return 0; } @@ -246,13 +245,19 @@ static inline int iboe_get_rate(struct net_device *dev) { - if (dev->if_baudrate >= IF_Gbps(40)) + uint64_t baudrate = dev->if_baudrate; +#ifdef if_baudrate_pf + int exp; + for (exp = dev->if_baudrate_pf; exp > 0; exp--) + baudrate *= 10; +#endif + if (baudrate >= IF_Gbps(40)) return IB_RATE_40_GBPS; - else if (dev->if_baudrate >= IF_Gbps(30)) + else if (baudrate >= IF_Gbps(30)) return IB_RATE_30_GBPS; - else if (dev->if_baudrate >= IF_Gbps(20)) + else if (baudrate >= IF_Gbps(20)) return IB_RATE_20_GBPS; - else if (dev->if_baudrate >= IF_Gbps(10)) + else if (baudrate >= IF_Gbps(10)) return IB_RATE_10_GBPS; else return IB_RATE_PORT_CURRENT; @@ -279,20 +284,6 @@ return addr->s6_addr[0] == 0xff; } -static inline void resolve_mcast_mac(struct in6_addr *addr, u8 *mac) -{ - if (addr->s6_addr[0] != 0xff) - return; - -#ifdef DUAL_MODE_MCAST_MAC - if (addr->s6_addr[1] == 0x0e) /* IPv4 */ - ip_eth_mc_map(addr->s6_addr32[3], mac); - else -#endif - ipv6_eth_mc_map(addr, mac); -} - - static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac) { int i; @@ -308,7 +299,7 @@ u16 vid; vid = dgid->raw[11] << 8 | dgid->raw[12]; - return vid < 0x1000 ? vid : 0xffff; + return vid < 0x1000 ? vid : 0xffff; } static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev) Index: sys/ofed/include/rdma/ib_cache.h =================================================================== --- sys/ofed/include/rdma/ib_cache.h +++ sys/ofed/include/rdma/ib_cache.h @@ -38,11 +38,22 @@ #include /** + * ib_cache_use_roce_gid_cache - Returns whether the device uses roce gid cache + * @device: The device to query + * @port_num: The port number of the device to query. + * + * ib_cache_use_roce_gid_cache() returns 0 if this port uses the roce_gid_cache + * to store GIDs and error otherwise. + */ +int ib_cache_use_roce_gid_cache(struct ib_device *device, u8 port_num); + +/** * ib_get_cached_gid - Returns a cached GID table entry * @device: The device to query. * @port_num: The port number of the device to query. * @index: The index into the cached GID table to query. * @gid: The GID value found at the specified index. + * @attr: The GID attribute found at the specified index (only in RoCE). * * ib_get_cached_gid() fetches the specified GID table entry stored in * the local software cache. @@ -50,13 +61,17 @@ int ib_get_cached_gid(struct ib_device *device, u8 port_num, int index, - union ib_gid *gid); + union ib_gid *gid, + struct ib_gid_attr *attr); /** * ib_find_cached_gid - Returns the port number and GID table index where * a specified GID value occurs. * @device: The device to query. * @gid: The GID value to search for. + * @gid_type: The GID type to search for. + * @net: In RoCE, the namespace (currently not supported) of the device. + * @if_index: In RoCE, the if_index of the device. Zero means ignore. * @port_num: The port number of the device where the GID value was found. * @index: The index into the cached GID table where the GID was found. This * parameter may be NULL. @@ -66,10 +81,63 @@ */ int ib_find_cached_gid(struct ib_device *device, union ib_gid *gid, + enum ib_gid_type gid_type, + struct net *net, + int if_index, u8 *port_num, u16 *index); /** + * ib_find_cached_gid_by_port - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @gid_type: The GID type to search for. + * @port_num: The port number of the device where the GID value sould be + * searched. + * @net: In RoCE, the namespace (currently not supported) of the device. + * @if_index: In RoCE, the if_index of the device. Zero means ignore. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid_by_port(struct ib_device *device, + union ib_gid *gid, + enum ib_gid_type gid_type, + u8 port_num, + struct net *net, + int if_index, + u16 *index); + +/** + * ib_find_gid_by_filter - Returns the GID table index where a specified + * GID value occurs + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value could be + * searched. + * @filter: The filter function is executed on any matching GID in the table. + * If the filter function returns true, the corresponding index is returned, + * otherwise, we continue searching the GID table. It's guaranteed that + * while filter is executed, ndev field is valid and the structure won't + * change. filter is executed in an atomic context. filter must be NULL + * when RoCE GID cache isn't supported on the respective device's port. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid_by_port() searches for the specified GID value in + * the local software cache. + */ +int ib_find_gid_by_filter(struct ib_device *device, + union ib_gid *gid, + u8 port_num, + bool (*filter)(const union ib_gid *gid, + const struct ib_gid_attr *, + void *), + void *context, u16 *index); +/** * ib_get_cached_pkey - Returns a cached PKey table entry * @device: The device to query. * @port_num: The port number of the device to query. Index: sys/ofed/include/rdma/ib_pack.h =================================================================== --- sys/ofed/include/rdma/ib_pack.h +++ sys/ofed/include/rdma/ib_pack.h @@ -34,12 +34,15 @@ #define IB_PACK_H #include +#include enum { IB_LRH_BYTES = 8, IB_ETH_BYTES = 14, IB_VLAN_BYTES = 4, IB_GRH_BYTES = 40, + IB_IP4_BYTES = 20, + IB_UDP_BYTES = 8, IB_BTH_BYTES = 12, IB_DETH_BYTES = 8 }; @@ -220,6 +223,26 @@ __be16 type; }; +struct ib_unpacked_ip4 { + u8 ver_len; + u8 tos; + __be16 tot_len; + __be16 id; + __be16 frag_off; + u8 ttl; + u8 protocol; + __be16 check; + __be32 saddr; + __be32 daddr; +}; + +struct ib_unpacked_udp { + __be16 sport; + __be16 dport; + __be16 length; + __be16 csum; +}; + struct ib_unpacked_vlan { __be16 tag; __be16 type; @@ -228,16 +251,20 @@ struct ib_ud_header { int lrh_present; struct ib_unpacked_lrh lrh; - int eth_present; - struct ib_unpacked_eth eth; + int eth_present; + struct ib_unpacked_eth eth; int vlan_present; struct ib_unpacked_vlan vlan; - int grh_present; - struct ib_unpacked_grh grh; - struct ib_unpacked_bth bth; + int grh_present; + struct ib_unpacked_grh grh; + int ipv4_present; + struct ib_unpacked_ip4 ip4; + int udp_present; + struct ib_unpacked_udp udp; + struct ib_unpacked_bth bth; struct ib_unpacked_deth deth; - int immediate_present; - __be32 immediate_data; + int immediate_present; + __be32 immediate_data; }; void ib_pack(const struct ib_field *desc, @@ -250,13 +277,17 @@ void *buf, void *structure); -void ib_ud_header_init(int payload_bytes, - int lrh_present, - int eth_present, - int vlan_present, - int grh_present, - int immediate_present, - struct ib_ud_header *header); +u16 ib_ud_ip4_csum(struct ib_ud_header *header); + +int ib_ud_header_init(int payload_bytes, + int lrh_present, + int eth_present, + int vlan_present, + int grh_present, + int ip_version, + int udp_present, + int immediate_present, + struct ib_ud_header *header); int ib_ud_header_pack(struct ib_ud_header *header, void *buf); Index: sys/ofed/include/rdma/ib_peer_mem.h =================================================================== --- sys/ofed/include/rdma/ib_peer_mem.h +++ sys/ofed/include/rdma/ib_peer_mem.h @@ -48,8 +48,8 @@ void *peer_client_context, int srcu_key); -unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client, - void *context); +int ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client, + void *context, unsigned long *context_ticket); int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client, unsigned long key); struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client, Index: sys/ofed/include/rdma/ib_pma.h =================================================================== --- sys/ofed/include/rdma/ib_pma.h +++ sys/ofed/include/rdma/ib_pma.h @@ -51,9 +51,9 @@ /* Counters should be saturate once they reach their maximum value */ #define ASSIGN_16BIT_COUNTER(counter, value) do { \ if ((value) > MAX_U16) \ - counter = cpu_to_be16(MAX_U16); \ + counter = cpu_to_be16(MAX_U16); \ else \ - counter = cpu_to_be16(value); \ + counter = cpu_to_be16(value); \ } while (0) /* Index: sys/ofed/include/rdma/ib_sa.h =================================================================== --- sys/ofed/include/rdma/ib_sa.h +++ sys/ofed/include/rdma/ib_sa.h @@ -154,9 +154,10 @@ u8 packet_life_time_selector; u8 packet_life_time; u8 preference; - u8 smac[ETH_ALEN]; - u8 dmac[6]; - __be16 vlan_id; + u8 dmac[ETH_ALEN]; + int ifindex; + struct net *net; + enum ib_gid_type gid_type; }; #define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) @@ -197,6 +198,9 @@ u8 scope; u8 join_state; int proxy_join; + int ifindex; + struct net *net; + enum ib_gid_type gid_type; }; /* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ @@ -300,7 +304,7 @@ struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, + int timeout_ms, int retries, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_path_rec *resp, void *context), @@ -312,7 +316,7 @@ u8 method, struct ib_sa_service_rec *rec, ib_sa_comp_mask comp_mask, - int timeout_ms, gfp_t gfp_mask, + int timeout_ms, int retries, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_service_rec *resp, void *context), @@ -405,6 +409,12 @@ struct ib_ah_attr *ah_attr); /** + * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec + * to IB MAD wire format. + */ +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute); + +/** * ib_sa_unpack_path - Convert a path record from MAD format to struct * ib_sa_path_rec. */ @@ -412,13 +422,13 @@ /* Support GuidInfoRecord */ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, - struct ib_device *device, u8 port_num, - struct ib_sa_guidinfo_rec *rec, - ib_sa_comp_mask comp_mask, u8 method, - int timeout_ms, gfp_t gfp_mask, - void (*callback)(int status, - struct ib_sa_guidinfo_rec *resp, - void *context), - void *context, - struct ib_sa_query **sa_query); + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, int retries, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); #endif /* IB_SA_H */ Index: sys/ofed/include/rdma/ib_umem.h =================================================================== --- sys/ofed/include/rdma/ib_umem.h +++ sys/ofed/include/rdma/ib_umem.h @@ -69,7 +69,7 @@ unsigned long diff; unsigned long start; struct sg_table sg_head; - int nmap; + int nmap; int npages; /* peer memory that manages this umem*/ struct ib_peer_memory_client *ib_peer_mem; Index: sys/ofed/include/rdma/ib_user_mad.h =================================================================== --- sys/ofed/include/rdma/ib_user_mad.h +++ sys/ofed/include/rdma/ib_user_mad.h @@ -191,6 +191,15 @@ __u8 rmpp_version; }; + +/** + * ib_user_mad_thresh_req - Change receive list threshold request + * @threshold- The new threshold value for the receive list. + */ +struct ib_user_mad_thresh_req { + __u32 threshold; +}; + #define IB_IOCTL_MAGIC 0x1b #define IB_USER_MAD_REGISTER_AGENT _IO(IB_IOCTL_MAGIC, 1) @@ -199,4 +208,7 @@ #define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) +#define IB_USER_MAD_UPDATE_THRESHOLD _IOW(IB_IOCTL_MAGIC, 5, \ + struct ib_user_mad_thresh_req) + #endif /* IB_USER_MAD_H */ Index: sys/ofed/include/rdma/ib_user_verbs.h =================================================================== --- sys/ofed/include/rdma/ib_user_verbs.h +++ sys/ofed/include/rdma/ib_user_verbs.h @@ -45,12 +45,6 @@ #define IB_USER_VERBS_ABI_VERSION 6 #define IB_USER_VERBS_CMD_THRESHOLD 50 -/* - * To support 6 legacy commands using the old extension style - */ -#define IB_USER_VERBS_LEGACY_CMD_FIRST 52 -#define IB_USER_VERBS_LEGACY_EX_CMD_LAST 56 - enum { IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, @@ -96,11 +90,11 @@ }; enum { + IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW }; - /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to @@ -115,7 +109,7 @@ struct ib_uverbs_async_event_desc { __u64 element; __u32 event_type; /* enum ib_event_type */ - __u32 reserved; + __u32 rsc_type; }; struct ib_uverbs_comp_event_desc { @@ -137,6 +131,13 @@ #define IBV_RESP_TO_VERBS_RESP_EX(ex_ptr, ex_type, ibv_type) \ IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, comp_mask) +enum ib_event_rsc_type { + IB_EVENT_RSC_CQ, + IB_EVENT_RSC_QP, + IB_EVENT_RSC_DCT, + IB_EVENT_RSC_SRQ, + IB_EVENT_RSC_DEVICE, +}; #define IB_USER_VERBS_CMD_COMMAND_MASK 0xff #define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u @@ -221,6 +222,17 @@ __u8 reserved[4]; }; +struct ib_uverbs_ex_query_device { + __u32 comp_mask; + __u32 reserved; +}; + +struct ib_uverbs_ex_query_device_resp { + struct ib_uverbs_query_device_resp base; + __u32 comp_mask; + __u32 response_length; +}; + struct ib_uverbs_query_port { __u64 response; __u8 port_num; @@ -296,6 +308,22 @@ __u32 rkey; }; +struct ib_uverbs_rereg_mr { + __u64 response; + __u32 mr_handle; + __u32 flags; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; +}; + +struct ib_uverbs_rereg_mr_resp { + __u32 lkey; + __u32 rkey; +}; + struct ib_uverbs_dereg_mr { __u32 mr_handle; }; @@ -339,30 +367,6 @@ __u32 cqe; }; -enum ib_uverbs_create_cq_ex_comp_mask { - IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS = (u64)1 << 0, -}; - -struct ib_uverbs_create_cq_ex { - __u64 comp_mask; - __u64 user_handle; - __u32 cqe; - __u32 comp_vector; - __s32 comp_channel; - __u32 reserved; - __u64 create_flags; - __u64 driver_data[0]; -}; - -struct ib_uverbs_modify_cq_ex { - __u64 comp_mask; - __u32 cq_handle; - __u32 attr_mask; - __u16 cq_count; - __u16 cq_period; - __u32 cq_cap_flags; -}; - struct ib_uverbs_resize_cq { __u64 response; __u32 cq_handle; @@ -614,42 +618,6 @@ __u64 driver_data[0]; }; -enum ib_uverbs_modify_qp_ex_comp_mask { - IB_UVERBS_QP_ATTR_DCT_KEY = 1ULL << 0, -}; - -struct ib_uverbs_modify_qp_ex { - __u32 comp_mask; - struct ib_uverbs_qp_dest dest; - struct ib_uverbs_qp_dest alt_dest; - __u32 qp_handle; - __u32 attr_mask; - __u32 qkey; - __u32 rq_psn; - __u32 sq_psn; - __u32 dest_qp_num; - __u32 qp_access_flags; - __u16 pkey_index; - __u16 alt_pkey_index; - __u8 qp_state; - __u8 cur_qp_state; - __u8 path_mtu; - __u8 path_mig_state; - __u8 en_sqd_async_notify; - __u8 max_rd_atomic; - __u8 max_dest_rd_atomic; - __u8 min_rnr_timer; - __u8 port_num; - __u8 timeout; - __u8 retry_cnt; - __u8 rnr_retry; - __u8 alt_port_num; - __u8 alt_timeout; - __u8 reserved[2]; - __u64 dct_key; - __u64 driver_data[0]; -}; - struct ib_uverbs_modify_qp_resp { }; @@ -784,18 +752,18 @@ }; struct ib_uverbs_flow_spec_hdr { - __u32 type; + __u32 type; __u16 size; __u16 reserved; /* followed by flow_spec */ __u64 flow_spec_data[0]; }; -struct ib_kern_eth_filter { - __u8 dst_mac[6]; - __u8 src_mac[6]; - __be16 ether_type; - __be16 vlan_tag; +struct ib_uverbs_flow_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; }; struct ib_uverbs_flow_spec_eth { @@ -807,8 +775,13 @@ __u16 reserved; }; }; - struct ib_kern_eth_filter val; - struct ib_kern_eth_filter mask; + struct ib_uverbs_flow_eth_filter val; + struct ib_uverbs_flow_eth_filter mask; +}; + +struct ib_uverbs_flow_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; }; struct ib_kern_ib_filter { @@ -829,11 +802,6 @@ struct ib_kern_ib_filter mask; }; -struct ib_kern_ipv4_filter { - __be32 src_ip; - __be32 dst_ip; -}; - struct ib_uverbs_flow_spec_ipv4 { union { struct ib_uverbs_flow_spec_hdr hdr; @@ -843,13 +811,13 @@ __u16 reserved; }; }; - struct ib_kern_ipv4_filter val; - struct ib_kern_ipv4_filter mask; + struct ib_uverbs_flow_ipv4_filter val; + struct ib_uverbs_flow_ipv4_filter mask; }; -struct ib_kern_tcp_udp_filter { +struct ib_uverbs_flow_tcp_udp_filter { __be16 dst_port; - __be16 src_port; + __be16 src_port; }; struct ib_uverbs_flow_spec_tcp_udp { @@ -861,18 +829,18 @@ __u16 reserved; }; }; - struct ib_kern_tcp_udp_filter val; - struct ib_kern_tcp_udp_filter mask; + struct ib_uverbs_flow_tcp_udp_filter val; + struct ib_uverbs_flow_tcp_udp_filter mask; }; struct ib_uverbs_flow_attr { - __u32 type; - __u16 size; - __u16 priority; - __u8 num_of_specs; - __u8 reserved[2]; - __u8 port; - __u32 flags; + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; + __u8 reserved[2]; + __u8 port; + __u32 flags; /* Following are the optional layers according to user request * struct ib_flow_spec_xxx * struct ib_flow_spec_yyy @@ -959,22 +927,4 @@ __u32 events_reported; }; - -/* - * Legacy extended verbs related structures - */ -struct ib_uverbs_ex_cmd_hdr_legacy { - __u32 command; - __u16 in_words; - __u16 out_words; - __u16 provider_in_words; - __u16 provider_out_words; - __u32 cmd_hdr_reserved; -}; - -struct ib_uverbs_ex_cmd_resp1_legacy { - __u64 comp_mask; - __u64 response; -}; - #endif /* IB_USER_VERBS_H */ Index: sys/ofed/include/rdma/ib_user_verbs_exp.h =================================================================== --- sys/ofed/include/rdma/ib_user_verbs_exp.h +++ sys/ofed/include/rdma/ib_user_verbs_exp.h @@ -37,6 +37,13 @@ #define IB_USER_VERBS_EXP_H #include +#include + +enum ibv_exp_start_values { + IBV_EXP_START_ENUM = 0x40, + IBV_EXP_START_FLAG_LOC = 0x20, + IBV_EXP_START_FLAG = (1ULL << IBV_EXP_START_FLAG_LOC), +}; enum { IB_USER_VERBS_EXP_CMD_FIRST = 64 @@ -51,6 +58,28 @@ IB_USER_VERBS_EXP_CMD_CREATE_DCT, IB_USER_VERBS_EXP_CMD_DESTROY_DCT, IB_USER_VERBS_EXP_CMD_QUERY_DCT, + IB_USER_VERBS_EXP_CMD_ARM_DCT, + IB_USER_VERBS_EXP_CMD_CREATE_MR, + IB_USER_VERBS_EXP_CMD_QUERY_MKEY, + IB_USER_VERBS_EXP_CMD_REG_MR_EX, + IB_USER_VERBS_EXP_CMD_PREFETCH_MR, + IB_USER_VERBS_EXP_CMD_REREG_MR, + IB_USER_VERBS_EXP_CMD_CREATE_WQ, + IB_USER_VERBS_EXP_CMD_MODIFY_WQ, + IB_USER_VERBS_EXP_CMD_DESTROY_WQ, + IB_USER_VERBS_EXP_CMD_CREATE_RWQ_IND_TBL, + IB_USER_VERBS_EXP_CMD_DESTROY_RWQ_IND_TBL, + IB_USER_VERBS_EXP_CMD_CREATE_FLOW, +}; + +struct ib_uverbs_exp_hash_conf { + /* enum ib_rx_hash_fields */ + __u64 rx_hash_fields_mask; + __u32 rwq_ind_tbl_handle; + __u8 rx_hash_function; /* enum ib_rx_hash_function_flags */ + __u8 rx_key_len; /* valid only for Toeplitz */ + __u8 rx_hash_key[128]; /* valid only for Toeplitz */ + __u8 reserved[2]; }; /* @@ -67,7 +96,8 @@ enum ib_uverbs_exp_create_qp_comp_mask { IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS = (1ULL << 0), IB_UVERBS_EXP_CREATE_QP_INL_RECV = (1ULL << 1), - IB_UVERBS_EXP_CREATE_QP_QPG = (1ULL << 2) + IB_UVERBS_EXP_CREATE_QP_QPG = (1ULL << 2), + IB_UVERBS_EXP_CREATE_QP_MAX_INL_KLMS = (1ULL << 3) }; struct ib_uverbs_qpg_init_attrib { @@ -87,6 +117,13 @@ __u32 reserved2; }; +enum ib_uverbs_exp_create_qp_flags { + IBV_UVERBS_EXP_CREATE_QP_FLAGS = IB_QP_CREATE_CROSS_CHANNEL | + IB_QP_CREATE_MANAGED_SEND | + IB_QP_CREATE_MANAGED_RECV | + IB_QP_CREATE_ATOMIC_BE_REPLY +}; + struct ib_uverbs_exp_create_qp { __u64 comp_mask; __u64 user_handle; @@ -107,6 +144,10 @@ __u32 max_inl_recv; __u32 reserved1; struct ib_uverbs_qpg qpg; + __u64 max_inl_send_klms; + struct ib_uverbs_exp_hash_conf rx_hash_conf; + uint8_t port_num; + __u8 reserved_2[7]; __u64 driver_data[0]; }; @@ -133,8 +174,8 @@ __u32 cq_handle; __u32 srq_handle; __u32 access_flags; - __u32 flow_label; __u64 dc_key; + __u32 flow_label; __u8 min_rnr_timer; __u8 tclass; __u8 port; @@ -150,20 +191,26 @@ struct ib_uverbs_create_dct_resp { __u32 dct_handle; __u32 dctn; + __u32 inline_size; + __u32 rsvd; }; struct ib_uverbs_destroy_dct { __u64 comp_mask; - __u64 user_handle; + __u64 dct_handle; + __u32 rsvd; + __u64 driver_data[0]; }; struct ib_uverbs_destroy_dct_resp { - __u64 reserved; + __u32 events_reported; + __u32 reserved; }; struct ib_uverbs_query_dct { __u64 comp_mask; __u64 dct_handle; + __u32 reserved; __u64 driver_data[0]; }; @@ -184,6 +231,55 @@ __u64 driver_data[0]; }; +struct ib_uverbs_arm_dct { + __u64 comp_mask; + __u32 dct_handle; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_arm_dct_resp { + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_umr_caps { + __u32 max_reg_descriptors; + __u32 max_send_wqe_inline_klms; + __u32 max_umr_recursion_depth; + __u32 max_umr_stride_dimenson; +}; + +struct ib_uverbs_exp_odp_caps { + __u64 general_odp_caps; + struct { + __u32 rc_odp_caps; + __u32 uc_odp_caps; + __u32 ud_odp_caps; + __u32 dc_odp_caps; + __u32 xrc_odp_caps; + __u32 raw_eth_odp_caps; + } per_transport_caps; +}; + +struct ib_uverbs_exp_rx_hash_caps { + __u32 max_rwq_indirection_tables; + __u32 max_rwq_indirection_table_size; + __u64 supported_packet_fields; + __u32 supported_qps; + __u8 supported_hash_functions; + __u8 reserved[3]; +}; + +struct ib_uverbs_exp_mp_rq_caps { + __u32 supported_qps; /* use ib_exp_supported_qp_types */ + __u32 allowed_shifts; /* use ib_mp_rq_shifts */ + __u8 min_single_wqe_log_num_of_strides; + __u8 max_single_wqe_log_num_of_strides; + __u8 min_single_stride_log_num_of_bytes; + __u8 max_single_stride_log_num_of_bytes; + __u32 reserved; +}; + struct ib_uverbs_exp_query_device { __u64 comp_mask; __u64 driver_data[0]; @@ -199,6 +295,304 @@ __u32 dc_rd_res; __u32 inline_recv_sz; __u32 max_rss_tbl_sz; + __u64 atomic_arg_sizes; + __u32 max_fa_bit_boudary; + __u32 log_max_atomic_inline_arg; + struct ib_uverbs_exp_umr_caps umr_caps; + struct ib_uverbs_exp_odp_caps odp_caps; + __u32 max_dct; + __u32 max_ctx_res_domain; + struct ib_uverbs_exp_rx_hash_caps rx_hash; + __u32 max_wq_type_rq; + __u32 max_device_ctx; + struct ib_uverbs_exp_mp_rq_caps mp_rq_caps; +}; + +enum ib_uverbs_exp_modify_cq_comp_mask { + /* set supported bits for validity check */ + IB_UVERBS_EXP_CQ_ATTR_RESERVED = 1 << 0 +}; + +struct ib_uverbs_exp_modify_cq { + __u32 cq_handle; + __u32 attr_mask; + __u16 cq_count; + __u16 cq_period; + __u32 cq_cap_flags; + __u32 comp_mask; + __u32 rsvd; +}; + +/* + * Flags for exp_attr_mask field in ibv_exp_qp_attr struct + */ +enum ibv_exp_qp_attr_mask { + IBV_EXP_QP_GROUP_RSS = IB_QP_GROUP_RSS, + IBV_EXP_QP_DC_KEY = IB_QP_DC_KEY, + IBV_EXP_QP_FLOW_ENTROPY = IB_QP_FLOW_ENTROPY, + IBV_EXP_QP_ATTR_MASK = IB_QP_GROUP_RSS | IB_QP_DC_KEY | IB_QP_FLOW_ENTROPY +}; + +enum ib_uverbs_exp_modify_qp_comp_mask { + IB_UVERBS_EXP_QP_ATTR_FLOW_ENTROPY = 1UL << 0, + IB_UVERBS_EXP_QP_ATTR_RESERVED = 1UL << 1, +}; + +struct ib_uverbs_exp_modify_qp { + __u32 comp_mask; + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[6]; + __u64 dct_key; + __u32 exp_attr_mask; + __u32 flow_entropy; + __u64 driver_data[0]; +}; + +enum ib_uverbs_exp_create_cq_comp_mask { + IB_UVERBS_EXP_CREATE_CQ_CAP_FLAGS = (u64)1 << 0, + IB_UVERBS_EXP_CREATE_CQ_ATTR_RESERVED = (u64)1 << 1, +}; + +struct ib_uverbs_exp_create_cq { + __u64 comp_mask; + __u64 user_handle; + __u32 cqe; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; + __u64 create_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_create_mr { + __u64 comp_mask; + __u32 pd_handle; + __u32 max_reg_descriptors; + __u64 exp_access_flags; + __u32 create_flags; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_create_mr_resp { + __u64 comp_mask; + __u32 handle; + __u32 lkey; + __u32 rkey; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_query_mkey { + __u64 comp_mask; + __u32 handle; + __u32 lkey; + __u32 rkey; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_query_mkey_resp { + __u64 comp_mask; + __u32 max_reg_descriptors; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_exp_query_odp_caps { + __u64 comp_mask; +}; + +struct ib_uverbs_exp_query_odp_caps_resp { + __u64 comp_mask; + __u64 general_caps; + struct { + __u32 rc_odp_caps; + __u32 uc_odp_caps; + __u32 ud_odp_caps; + __u32 dc_odp_caps; + __u32 xrc_odp_caps; + __u32 raw_eth_odp_caps; + } per_transport_caps; +}; + +enum ib_uverbs_exp_access_flags { + IB_UVERBS_EXP_ACCESS_MW_ZERO_BASED = (IBV_EXP_START_FLAG << 13), + IB_UVERBS_EXP_ACCESS_ON_DEMAND = (IBV_EXP_START_FLAG << 14), +}; + +enum ib_uverbs_exp_reg_mr_ex_comp_mask { + IB_UVERBS_EXP_REG_MR_EX_RESERVED = (u64)1 << 0, +}; + +struct ib_uverbs_exp_reg_mr_ex { + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 reserved; + __u64 exp_access_flags; + __u64 comp_mask; +}; + +struct ib_uverbs_exp_rereg_mr { + __u32 comp_mask; + __u32 mr_handle; + __u32 flags; + __u32 reserved; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; +}; + +struct ib_uverbs_exp_rereg_mr_resp { + __u32 comp_mask; + __u32 lkey; + __u32 rkey; + __u32 reserved; +}; + +struct ib_uverbs_exp_reg_mr_resp_ex { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; + __u32 reserved; + __u64 comp_mask; +}; + +struct ib_uverbs_exp_prefetch_mr { + __u64 comp_mask; + __u32 mr_handle; + __u32 flags; + __u64 start; + __u64 length; +}; + +struct ib_uverbs_exp_wq_mp_rq { + __u32 use_shift; /* use ib_mp_rq_shifts */ + __u8 single_wqe_log_num_of_strides; + __u8 single_stride_log_num_of_bytes; + __u16 reserved; +}; + +struct ib_uverbs_exp_create_wq { + __u32 comp_mask; + __u32 wq_type; /* enum ib_wq_type */ + __u64 user_handle; + __u32 pd_handle; + __u32 cq_handle; + __u32 srq_handle; + __u32 max_recv_wr; + __u32 max_recv_sge; + __u32 reserved; + struct ib_uverbs_exp_wq_mp_rq mp_rq; +}; + +struct ib_uverbs_exp_create_wq_resp { + __u32 comp_mask; + __u32 response_length; + __u32 wq_handle; + __u32 max_recv_wr; + __u32 max_recv_sge; + __u32 wqn; +}; + +struct ib_uverbs_exp_destroy_wq { + __u32 comp_mask; + __u32 wq_handle; +}; + +struct ib_uverbs_exp_modify_wq { + __u32 comp_mask; + __u32 wq_handle; + __u32 wq_state; + __u32 curr_wq_state; +}; + +struct ib_uverbs_exp_create_rwq_ind_table { + __u32 comp_mask; + __u32 pd_handle; + __u32 log_ind_tbl_size; + __u32 reserved; + /* Following are the wq handles according to log_ind_tbl_size + * wq_handle1 + * wq_handle2 + */ + __u32 wq_handles[0]; +}; + +struct ib_uverbs_exp_create_rwq_ind_table_resp { + __u32 comp_mask; + __u32 response_length; + __u32 ind_tbl_handle; + __u32 ind_tbl_num; +}; + +struct ib_uverbs_exp_destroy_rwq_ind_table { + __u32 comp_mask; + __u32 ind_tbl_handle; +}; + +struct ib_uverbs_exp_flow_ipv6_filter { + __u8 src_ip[16]; + __u8 dst_ip[16]; +}; + +struct ib_uverbs_exp_flow_spec_ipv6 { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_exp_flow_ipv6_filter val; + struct ib_uverbs_exp_flow_ipv6_filter mask; +}; + +struct ib_uverbs_exp_flow_spec { + union { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_spec_eth eth; + struct ib_uverbs_flow_spec_ib ib; + struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + struct ib_uverbs_exp_flow_spec_ipv6 ipv6; + }; }; #endif /* IB_USER_VERBS_EXP_H */ Index: sys/ofed/include/rdma/ib_verbs.h =================================================================== --- sys/ofed/include/rdma/ib_verbs.h +++ sys/ofed/include/rdma/ib_verbs.h @@ -50,7 +50,13 @@ #include #include #include +#include +#include +#include +#include + +#include #include extern struct workqueue_struct *ib_wq; @@ -63,23 +69,91 @@ } global; }; +extern union ib_gid zgid; + +enum ib_gid_type { + /* If link layer is Ethernet, this is RoCE V1 */ + IB_GID_TYPE_IB = 0, + IB_GID_TYPE_ROCE_V2 = 1, + IB_GID_TYPE_ROCE_V1_5 = 2, + IB_GID_TYPE_SIZE +}; + +#define ROCE_V2_UDP_DPORT 4791 + +enum { + IB_SA_WELL_KNOWN_GID_PREFIX = 0xfe80000000000000ull, + IB_SA_WELL_KNOWN_GUID = 2, +}; + +struct ib_gid_attr { + enum ib_gid_type gid_type; + struct net_device *ndev; +}; + +struct ib_roce_gid_cache_entry { + /* seq number of 0 indicates entry being changed. */ + unsigned int seq; + union ib_gid gid; + struct ib_gid_attr attr; + void *context; +}; + +struct ib_roce_gid_cache { + int active; + int sz; + /* locking against multiple writes in data_vec */ + struct mutex lock; + struct ib_roce_gid_cache_entry *data_vec; +}; + enum rdma_node_type { /* IB values map to NodeInfo:NodeType. */ RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, RDMA_NODE_RNIC, - RDMA_NODE_MIC }; enum rdma_transport_type { RDMA_TRANSPORT_IB, RDMA_TRANSPORT_IWARP, - RDMA_TRANSPORT_SCIF + RDMA_TRANSPORT_SCIF, +}; + +__attribute_const__ enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type); + +enum rdma_network_type { + RDMA_NETWORK_IB, + RDMA_NETWORK_IPV4, + RDMA_NETWORK_IPV6 }; -enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; +static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type, void *grh) +{ + if (network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) { + const struct ip *ip4h = (struct ip *)((u8 *)grh + 20); + const struct ip6_hdr *ip6h = (struct ip6_hdr *)grh; + __u8 next = (network_type == RDMA_NETWORK_IPV4) ? ip4h->ip_p : ip6h->ip6_nxt; + + return (next == IPPROTO_UDP) ? IB_GID_TYPE_ROCE_V2 : IB_GID_TYPE_ROCE_V1_5; + } + return IB_GID_TYPE_IB; +} + +static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type, + union ib_gid *gid) +{ + if (gid_type == IB_GID_TYPE_IB) + return RDMA_NETWORK_IB; + + if (ipv6_addr_v4mapped((const struct in6_addr *)gid)) + return RDMA_NETWORK_IPV4; + else + return RDMA_NETWORK_IPV6; +} enum rdma_link_layer { IB_LINK_LAYER_UNSPECIFIED, @@ -89,24 +163,24 @@ }; enum ib_device_cap_flags { - IB_DEVICE_RESIZE_MAX_WR = 1, - IB_DEVICE_BAD_PKEY_CNTR = (1<<1), - IB_DEVICE_BAD_QKEY_CNTR = (1<<2), - IB_DEVICE_RAW_MULTI = (1<<3), - IB_DEVICE_AUTO_PATH_MIG = (1<<4), - IB_DEVICE_CHANGE_PHY_PORT = (1<<5), - IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), - IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), - IB_DEVICE_SHUTDOWN_PORT = (1<<8), - IB_DEVICE_INIT_TYPE = (1<<9), - IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), - IB_DEVICE_SYS_IMAGE_GUID = (1<<11), - IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), - IB_DEVICE_SRQ_RESIZE = (1<<13), - IB_DEVICE_N_NOTIFY_CQ = (1<<14), - IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), - IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ - IB_DEVICE_MEM_WINDOW = (1<<17), + IB_DEVICE_RESIZE_MAX_WR = 1U, + IB_DEVICE_BAD_PKEY_CNTR = (1U<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1U<<2), + IB_DEVICE_RAW_MULTI = (1U<<3), + IB_DEVICE_AUTO_PATH_MIG = (1U<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1U<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1U<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1U<<7), + IB_DEVICE_SHUTDOWN_PORT = (1U<<8), + IB_DEVICE_INIT_TYPE = (1U<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1U<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1U<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1U<<12), + IB_DEVICE_SRQ_RESIZE = (1U<<13), + IB_DEVICE_N_NOTIFY_CQ = (1U<<14), + IB_DEVICE_LOCAL_DMA_LKEY = (1U<<15), + IB_DEVICE_RESERVED = (1U<<16), /* old SEND_W_INV */ + IB_DEVICE_MEM_WINDOW = (1U<<17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support * insertion of UDP and TCP checksum on outgoing UD IPoIB @@ -114,27 +188,31 @@ * incoming messages. Setting this flag implies that the * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ - IB_DEVICE_UD_IP_CSUM = (1<<18), - IB_DEVICE_UD_TSO = (1<<19), - IB_DEVICE_XRC = (1<<20), - IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), - IB_DEVICE_MR_ALLOCATE = (1<<23), - IB_DEVICE_SHARED_MR = (1<<24), - IB_DEVICE_QPG = (1<<25), - IB_DEVICE_UD_RSS = (1<<26), - IB_DEVICE_UD_TSS = (1<<27), - IB_DEVICE_CROSS_CHANNEL = (1<<28), - IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), + IB_DEVICE_UD_IP_CSUM = (1U<<18), + IB_DEVICE_UD_TSO = (1U<<19), + IB_DEVICE_XRC = (1U<<20), + IB_DEVICE_MEM_MGT_EXTENSIONS = (1U<<21), + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1U<<22), + IB_DEVICE_MR_ALLOCATE = (1U<<23), + IB_DEVICE_SHARED_MR = (1U<<24), + IB_DEVICE_QPG = (1U<<25), + IB_DEVICE_UD_RSS = (1U<<26), + IB_DEVICE_UD_TSS = (1U<<27), + IB_DEVICE_CROSS_CHANNEL = (1U<<28), + IB_DEVICE_MANAGED_FLOW_STEERING = (1U<<29), /* * Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or * IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B * memory windows. It can set neither to indicate it doesn't support * type 2 windows at all. */ - IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<30), - IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<31), - IB_DEVICE_SIGNATURE_HANDOVER = (1LL<<32) + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1U<<30), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1U<<31), + IB_DEVICE_SIGNATURE_HANDOVER = (1ULL<<32), + IB_DEVICE_ROCE_MODE_1_5 = (1ULL<<34), + IB_DEVICE_ROCE_MODE_2 = (1ULL<<35), + IB_DEVICE_INDIR_REGISTRATION = (1ULL<<36) + }; enum ib_signature_prot_cap { @@ -199,19 +277,20 @@ int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; + unsigned int max_indir_reg_mr_list_len; int max_rss_tbl_sz; u16 max_pkeys; u8 local_ca_ack_delay; int comp_mask; - uint64_t timestamp_mask; - uint64_t hca_core_clock; - unsigned int sig_prot_cap; - unsigned int sig_guard_cap; + u64 timestamp_mask; + u64 hca_core_clock; + int sig_prot_cap; + int sig_guard_cap; }; enum ib_device_attr_comp_mask { IB_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1, - IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2 + IB_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2, }; enum ib_mtu { @@ -240,8 +319,7 @@ IB_PORT_INIT = 2, IB_PORT_ARMED = 3, IB_PORT_ACTIVE = 4, - IB_PORT_ACTIVE_DEFER = 5, - IB_PORT_DUMMY = -1 /* force enum signed */ + IB_PORT_ACTIVE_DEFER = 5 }; enum ib_port_cap_flags { @@ -267,7 +345,11 @@ IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25 + IB_PORT_CLIENT_REG_SUP = 1 << 25, + IB_PORT_IP_BASED_GIDS = 1 << 26, + IB_PORT_ROCE = 1 << 27, + IB_PORT_ROCE_V2 = 1 << 28, + IB_PORT_ROCE_V1_5 = 1 << 29, }; enum ib_port_width { @@ -294,7 +376,8 @@ IB_SPEED_QDR = 4, IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, - IB_SPEED_EDR = 32 + IB_SPEED_EDR = 32, + IB_SPEED_HDR = 64 }; struct ib_protocol_stats { @@ -368,6 +451,7 @@ u8 active_width; u8 active_speed; u8 phys_state; + bool grh_required; }; enum ib_device_modify_flags { @@ -455,11 +539,19 @@ union ib_gid dgid; }; +#define IPV6_DEFAULT_HOPLIMIT 64 + +union rdma_network_hdr { + struct ib_grh ibgrh; + struct ip roce4grh; +}; + enum { IB_MULTICAST_QPN = 0xffffff }; #define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) +#define IB_SL_INVALID 0xFF enum ib_ah_flags { IB_AH_GRH = 1 @@ -487,7 +579,8 @@ }; enum ib_mr_create_flags { - IB_MR_SIGNATURE_EN = 1, + IB_MR_SIGNATURE_EN = 1 << 0, + IB_MR_INDIRECT_REG = 1 << 1 }; /** @@ -500,6 +593,7 @@ struct ib_mr_init_attr { int max_reg_descriptors; u32 flags; + u64 exp_access_flags; /* region's access rights */ }; /** @@ -508,14 +602,14 @@ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ -int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate); /** * ib_rate_to_mbps - Convert the IB rate enum to Mbps. * For example, IB_RATE_2_5_GBPS will be converted to 2500. * @rate: rate to convert. */ -int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); struct ib_cq_init_attr { int cqe; @@ -523,20 +617,14 @@ u32 flags; }; -enum ib_signature_type { - IB_SIG_TYPE_T10_DIF, -}; - /** - * T10-DIF Signature types - * T10-DIF types are defined by SCSI - * specifications. + * Signature types + * IB_SIG_TYPE_NONE: Unprotected. + * IB_SIG_TYPE_T10_DIF: Type T10-DIF */ -enum ib_t10_dif_type { - IB_T10DIF_NONE, - IB_T10DIF_TYPE1, - IB_T10DIF_TYPE2, - IB_T10DIF_TYPE3 +enum ib_signature_type { + IB_SIG_TYPE_NONE, + IB_SIG_TYPE_T10_DIF, }; /** @@ -552,24 +640,26 @@ /** * struct ib_t10_dif_domain - Parameters specific for T10-DIF * domain. - * @type: T10-DIF type (0|1|2|3) * @bg_type: T10-DIF block guard type (CRC|CSUM) * @pi_interval: protection information interval. * @bg: seed of guard computation. * @app_tag: application tag of guard block * @ref_tag: initial guard block reference tag. - * @type3_inc_reftag: T10-DIF type 3 does not state - * about the reference tag, it is the user - * choice to increment it or not. + * @ref_remap: Indicate wethear the reftag increments each block + * @app_escape: Indicate to skip block check if apptag=0xffff + * @ref_escape: Indicate to skip block check if reftag=0xffffffff + * @apptag_check_mask: check bitmask of application tag. */ struct ib_t10_dif_domain { - enum ib_t10_dif_type type; enum ib_t10_dif_bg_type bg_type; u32 pi_interval; u16 bg; u16 app_tag; u32 ref_tag; - bool type3_inc_reftag; + bool ref_remap; + bool app_escape; + bool ref_escape; + u16 apptag_check_mask; }; /** @@ -636,7 +726,7 @@ * enum. * @mult: multiple to convert. */ -enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct ib_ah_attr { struct ib_global_route grh; @@ -646,8 +736,7 @@ u8 static_rate; u8 ah_flags; u8 port_num; - u8 dmac[6]; - u16 vlan_id; + u8 dmac[ETH_ALEN]; }; enum ib_wc_status { @@ -687,12 +776,14 @@ IB_WC_FAST_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, + IB_WC_REG_INDIR_MR, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). */ IB_WC_RECV = 1 << 7, - IB_WC_RECV_RDMA_WITH_IMM + IB_WC_RECV_RDMA_WITH_IMM, + IB_WC_DUMMY = -1 /* force enum signed */ }; enum ib_wc_flags { @@ -700,11 +791,10 @@ IB_WC_WITH_IMM = (1<<1), IB_WC_WITH_INVALIDATE = (1<<2), IB_WC_IP_CSUM_OK = (1<<3), - IB_WC_WITH_SL = (1<<4), - IB_WC_WITH_SLID = (1<<5), - IB_WC_WITH_TIMESTAMP = (1<<6), - IB_WC_WITH_SMAC = (1<<7), - IB_WC_WITH_VLAN = (1<<8), + IB_WC_WITH_SMAC = (1<<4), + IB_WC_WITH_VLAN = (1<<5), + IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6), + IB_WC_WITH_TIMESTAMP = (1<<7), }; struct ib_wc { @@ -725,12 +815,13 @@ u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ - int csum_ok; + int csum_ok; + u8 smac[ETH_ALEN]; + u16 vlan_id; + u8 network_hdr_type; struct { uint64_t timestamp; /* timestamp = 0 indicates error*/ } ts; - u8 smac[6]; - u16 vlan_id; }; enum ib_cq_notify_flags { @@ -827,6 +918,9 @@ IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, + IB_QP_CREATE_ATOMIC_BE_REPLY = 1 << 8, + IB_QP_CREATE_SIGNATURE_PIPELINE = 1 << 9, /* reserve bits 26-31 for low level drivers' internal use */ IB_QP_CREATE_RESERVED_START = 1 << 26, IB_QP_CREATE_RESERVED_END = 1 << 31, @@ -966,10 +1060,7 @@ IB_QP_DEST_QPN = (1<<20), IB_QP_GROUP_RSS = (1<<21), IB_QP_DC_KEY = (1<<22), - IB_QP_SMAC = (1<<23), - IB_QP_ALT_SMAC = (1<<24), - IB_QP_VID = (1<<25), - IB_QP_ALT_VID = (1<<26) + IB_QP_FLOW_ENTROPY = (1<<23), }; enum ib_qp_state { @@ -979,8 +1070,7 @@ IB_QPS_RTS, IB_QPS_SQD, IB_QPS_SQE, - IB_QPS_ERR, - IB_QPS_DUMMY = -1 /* force enum signed */ + IB_QPS_ERR }; enum ib_mig_state { @@ -1020,40 +1110,8 @@ u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; - u8 smac[ETH_ALEN]; - u8 alt_smac[ETH_ALEN]; - u16 vlan_id; - u16 alt_vlan_id; - -}; - -struct ib_qp_attr_ex { - enum ib_qp_state qp_state; - enum ib_qp_state cur_qp_state; - enum ib_mtu path_mtu; - enum ib_mig_state path_mig_state; - u32 qkey; - u32 rq_psn; - u32 sq_psn; - u32 dest_qp_num; - int qp_access_flags; - struct ib_qp_cap cap; - struct ib_ah_attr ah_attr; - struct ib_ah_attr alt_ah_attr; - u16 pkey_index; - u16 alt_pkey_index; - u8 en_sqd_async_notify; - u8 sq_draining; - u8 max_rd_atomic; - u8 max_dest_rd_atomic; - u8 min_rnr_timer; - u8 port_num; - u8 timeout; - u8 retry_cnt; - u8 rnr_retry; - u8 alt_port_num; - u8 alt_timeout; u64 dct_key; + u32 flow_entropy; }; enum ib_wr_opcode { @@ -1073,6 +1131,7 @@ IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, IB_WR_BIND_MW, IB_WR_REG_SIG_MR, + IB_WR_REG_INDIR_MR, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. */ @@ -1086,6 +1145,7 @@ IB_WR_RESERVED8, IB_WR_RESERVED9, IB_WR_RESERVED10, + IB_WR_DUMMY = -1 /* force enum signed */ }; enum ib_send_flags { @@ -1098,7 +1158,6 @@ /* reserve bits 26-31 for low level drivers' internal use */ IB_SEND_RESERVED_START = (1 << 26), IB_SEND_RESERVED_END = (1 << 31), - IB_SEND_UMR_UNREG = (1<<5) }; struct ib_sge { @@ -1113,6 +1172,12 @@ unsigned int max_page_list_len; }; +struct ib_indir_reg_list { + struct ib_device *device; + struct ib_sge *sg_list; + unsigned int max_indir_list_len; +}; + /** * struct ib_mw_bind_info - Parameters for a memory window bind operation. * @mr: A memory region to bind the memory window to. @@ -1169,7 +1234,7 @@ struct ib_fast_reg_page_list *page_list; unsigned int page_shift; unsigned int page_list_len; - u32 length; + u64 length; int access_flags; u32 rkey; } fast_reg; @@ -1194,6 +1259,14 @@ int access_flags; struct ib_sge *prot; } sig_handover; + struct { + u64 iova_start; + struct ib_indir_reg_list *indir_list; + unsigned int indir_list_len; + u64 length; + unsigned int access_flags; + u32 mkey; + } indir_reg; } wr; u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; @@ -1232,7 +1305,8 @@ enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), - IB_MR_REREG_ACCESS = (1<<2) + IB_MR_REREG_ACCESS = (1<<2), + IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1), }; /** @@ -1242,8 +1316,8 @@ * @bind_info: More parameters of the bind operation. */ struct ib_mw_bind { - u64 wr_id; - int send_flags; + u64 wr_id; + int send_flags; struct ib_mw_bind_info bind_info; }; @@ -1265,6 +1339,8 @@ struct list_head xrcd_list; struct list_head rule_list; struct list_head dct_list; + struct list_head wq_list; + struct list_head rwq_ind_tbl_list; int closing; void *peer_mem_private_data; char *peer_mem_name; @@ -1289,12 +1365,19 @@ size_t len); }; +enum ib_udate_src { + IB_UDATA_LEGACY_CMD, + IB_UDATA_EX_CMD, + IB_UDATA_EXP_CMD = 32, +}; + struct ib_udata { struct ib_udata_ops *ops; - void __user *inbuf; - void __user *outbuf; - size_t inlen; - size_t outlen; + void __user *inbuf; + void __user *outbuf; + size_t inlen; + size_t outlen; + enum ib_udate_src src; }; struct ib_pd { @@ -1307,7 +1390,7 @@ struct ib_device *device; atomic_t usecnt; /* count all exposed resources */ struct inode *inode; - + struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; }; @@ -1365,6 +1448,69 @@ } ext; }; +enum ib_wq_type { + IB_WQT_RQ, + IB_WQT_SRQ +}; + +enum ib_wq_state { + IB_WQS_RESET, + IB_WQS_RDY, + IB_WQS_ERR, + IB_WQS_UNKNOWN +}; + +struct ib_wq { + struct ib_device *device; + struct ib_uobject *uobject; + void *wq_context; + void (*event_handler)(struct ib_event *, void *); + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_srq *srq; /* IB_WQT_SRQ only */ + u32 wq_num; + enum ib_wq_state state; + enum ib_wq_type wq_type; + atomic_t usecnt; +}; + +struct ib_wq_init_attr { + void *wq_context; + enum ib_wq_type wq_type; + u32 max_recv_wr; /* IB_WQT_RQ only */ + u32 max_recv_sge; /* IB_WQT_RQ only */ + struct ib_cq *cq; + struct ib_srq *srq; /* IB_WQT_SRQ only */ + void (*event_handler)(struct ib_event *, void *); +}; + +enum ib_wq_attr_mask { + IB_WQ_STATE = 1 << 0, + IB_WQ_CUR_STATE = 1 << 1, +}; + +struct ib_wq_attr { + enum ib_wq_state wq_state; + enum ib_wq_state curr_wq_state; +}; + +struct ib_rwq_ind_table { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + atomic_t usecnt; + u32 ind_tbl_num; + u32 log_ind_tbl_size; + struct ib_wq **ind_tbl; +}; + +struct ib_rwq_ind_table_init_attr { + struct ib_pd *pd; + u32 log_ind_tbl_size; + /* Each entry is a pointer to Receive Work Queue */ + struct ib_wq **ind_tbl; +}; + struct ib_qp { struct ib_device *device; struct ib_pd *pd; @@ -1384,6 +1530,7 @@ enum ib_qp_type qp_type; enum ib_qpg_type qpg_type; u8 port_num; + struct ib_rwq_ind_table *rwq_ind_tbl; }; struct ib_dct { @@ -1395,6 +1542,41 @@ u32 dct_num; }; +/* + * RX Hash Function flags. + */ +enum ib_rx_hash_function_flags { + IB_EX_RX_HASH_FUNC_TOEPLITZ = 1 << 0, + IB_EX_RX_HASH_FUNC_XOR = 1 << 1 +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Notice: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. + */ +enum ib_rx_hash_fields { + IB_RX_HASH_SRC_IPV4 = 1 << 0, + IB_RX_HASH_DST_IPV4 = 1 << 1, + IB_RX_HASH_SRC_IPV6 = 1 << 2, + IB_RX_HASH_DST_IPV6 = 1 << 3, + IB_RX_HASH_SRC_PORT_TCP = 1 << 4, + IB_RX_HASH_DST_PORT_TCP = 1 << 5, + IB_RX_HASH_SRC_PORT_UDP = 1 << 6, + IB_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + +struct ib_rx_hash_conf { + enum ib_rx_hash_function_flags rx_hash_function; + u8 rx_key_len; /* valid only for Toeplitz */ + u8 *rx_hash_key; + uint64_t rx_hash_fields_mask; /* enum ib_rx_hash_fields */ + struct ib_rwq_ind_table *rwq_ind_tbl; +}; + struct ib_mr { struct ib_device *device; struct ib_pd *pd; @@ -1443,11 +1625,12 @@ IB_FLOW_SPEC_IB = 0x21, /* L3 header*/ IB_FLOW_SPEC_IPV4 = 0x30, + IB_FLOW_SPEC_IPV6 = 0x31, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41 }; - +#define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 /* Flow steering rule priority is set according to it's domain. @@ -1492,8 +1675,8 @@ }; struct ib_flow_ipv4_filter { - __be32 src_ip; - __be32 dst_ip; + __be32 src_ip; + __be32 dst_ip; }; struct ib_flow_spec_ipv4 { @@ -1503,8 +1686,20 @@ struct ib_flow_ipv4_filter mask; }; +struct ib_flow_ipv6_filter { + u8 src_ip[16]; + u8 dst_ip[16]; +}; + +struct ib_flow_spec_ipv6 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv6_filter val; + struct ib_flow_ipv6_filter mask; +}; + struct ib_flow_tcp_udp_filter { - __be16 dst_port; + __be16 dst_port; __be16 src_port; }; @@ -1520,10 +1715,11 @@ enum ib_flow_spec_type type; u16 size; }; - struct ib_flow_spec_ib ib; - struct ib_flow_spec_eth eth; - struct ib_flow_spec_ipv4 ipv4; - struct ib_flow_spec_tcp_udp tcp_udp; + struct ib_flow_spec_eth eth; + struct ib_flow_spec_ib ib; + struct ib_flow_spec_ipv4 ipv4; + struct ib_flow_spec_tcp_udp tcp_udp; + struct ib_flow_spec_ipv6 ipv6; }; struct ib_flow_attr { @@ -1544,6 +1740,18 @@ struct ib_uobject *uobject; }; +struct ib_vf_stats { + u64 rx_frames; + u64 tx_frames; + u64 rx_bytes; + u64 tx_bytes; + u64 rx_errors; + u64 tx_errors; + u64 rx_dropped; + u64 tx_dropped; + u64 rx_mcast; +}; + struct ib_mad; struct ib_grh; @@ -1568,9 +1776,11 @@ struct ib_pkey_cache **pkey_cache; struct ib_gid_cache **gid_cache; u8 *lmc_cache; + struct ib_roce_gid_cache **roce_gid_cache; + struct work_struct roce_gid_cache_cleanup_work; }; -enum verbs_values_mask { +enum ib_values_mask { IBV_VALUES_HW_CLOCK = 1 << 0 }; @@ -1666,6 +1876,31 @@ int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); + /* When calling modify_gid, the HW vendor's driver should + * modify the gid of device @device at gid index @index of + * port @port to be @gid. Meta-info of that gid (for example, + * the network device related to this gid is available + * at @attr. @context allows the HW vendor driver to store extra + * information together with a GID entry. The HW vendor may allocate + * memory to contain this information and store it in @context when a + * new GID entry is written to. Upon the deletion of a GID entry, + * the HW vendor must free any allocated memory. The caller will clear + * @context afterwards.GID deletion is done by passing the zero gid. + * Params are consistent until the next call of modify_gid. + * The function should return 0 on success or error otherwise. + * The function could be called concurrently for different ports. + */ + int (*modify_gid)(struct ib_device *device, + u8 port_num, + unsigned int index, + const union ib_gid *gid, + const struct ib_gid_attr *attr, + void **context); + int (*set_vf_port_guid)(struct ib_device *device, + u8 port_num, u64 guid); + int (*set_vf_node_guid)(struct ib_device *device, + u16 vf, u64 guid); + int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); int (*modify_device)(struct ib_device *device, @@ -1750,7 +1985,14 @@ u64 virt_addr, int mr_access_flags, struct ib_udata *udata, - int mr_id); + int mr_id); + int (*rereg_user_mr)(struct ib_mr *mr, + int flags, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + struct ib_pd *pd, + struct ib_udata *udata); int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); @@ -1762,6 +2004,9 @@ struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, int page_list_len); void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); + struct ib_indir_reg_list * (*alloc_indir_reg_list)(struct ib_device *device, + unsigned int indir_list_len); + void (*free_indir_reg_list)(struct ib_indir_reg_list *indir_list); int (*rereg_phys_mr)(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, @@ -1807,17 +2052,19 @@ int (*destroy_flow)(struct ib_flow *flow_id); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); - + void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); unsigned long (*get_unmapped_area)(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); + int (*get_vf_stats)(struct ib_device *device, u16 vf, + struct ib_vf_stats *stats); int (*ioctl)(struct ib_ucontext *context, unsigned int cmd, unsigned long arg); - int (*query_values)(struct ib_device *device, - int q_values, - struct ib_device_values *values); + int (*query_values)(struct ib_device *device, + int q_values, + struct ib_device_values *values); struct ib_dma_mapping_ops *dma_ops; struct module *owner; @@ -1828,6 +2075,7 @@ enum { IB_DEV_UNINITIALIZED, IB_DEV_REGISTERED, + IB_DEV_UNREGISTERING, IB_DEV_UNREGISTERED } reg_state; @@ -1840,14 +2088,23 @@ u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; - int cmd_perf; - u64 cmd_avg; - u32 cmd_n; - spinlock_t cmd_perf_lock; - + struct kref refcount; + struct completion free; /* * Experimental data and functions */ + struct ib_wq * (*create_wq)(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_wq)(struct ib_wq *wq); + int (*modify_wq)(struct ib_wq *wq, + struct ib_wq_attr *attr, + enum ib_wq_attr_mask attr_mask, + struct ib_udata *udata); + struct ib_rwq_ind_table *(*create_rwq_ind_table)(struct ib_device *device, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); int (*exp_query_device)(struct ib_device *device, struct ib_exp_device_attr *device_attr); struct ib_qp * (*exp_create_qp)(struct ib_pd *pd, @@ -1858,7 +2115,39 @@ struct ib_udata *udata); int (*exp_destroy_dct)(struct ib_dct *dct); int (*exp_query_dct)(struct ib_dct *dct, struct ib_dct_attr *attr); - + /** + * exp_rereg_user_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @flags: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @start: If %IB_MR_REREG_TRANS is set in flags, this + * field specifies the start of the virtual address to use in the new + * translation, otherwise, this parameter is ignored. + * @length: If %IB_MR_REREG_TRANS is set in flags, this + * field specifies the length of the virtual address to use in the new + * translation, otherwise, this parameter is ignored. + * @virt_address: If %IB_MR_REREG_TRANS is set in flags, this + * field specifies the start of the virtual address in HCA to use in the new + * translation, otherwise, this parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in flags, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @pd: If %IB_MR_REREG_PD is set in flags, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + */ + int (*exp_rereg_user_mr)(struct ib_mr *mr, + int flags, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + struct ib_pd *pd); + int (*exp_prefetch_mr)(struct ib_mr *mr, + u64 start, u64 length, + u32 flags); u64 uverbs_exp_cmd_mask; }; @@ -1873,6 +2162,9 @@ struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); +void ib_device_hold(struct ib_device *device); +int ib_device_put(struct ib_device *device); + int ib_register_device(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)); @@ -1887,12 +2179,12 @@ static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) { - return udata->ops->copy_from(dest, udata, len); + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; } static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) { - return udata->ops->copy_to(udata, src, len); + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; } /** @@ -1929,7 +2221,13 @@ u8 port_num); int ib_query_gid(struct ib_device *device, - u8 port_num, int index, union ib_gid *gid); + u8 port_num, int index, union ib_gid *gid, + struct ib_gid_attr *attr); + +int ib_set_vf_port_guid(struct ib_device *device, u8 port_num, u64 guid); +int ib_set_vf_node_guid(struct ib_device *device, int vf, u64 guid); +int ib_get_vf_stats(struct ib_device *device, int vf, + struct ib_vf_stats *stats); int ib_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey); @@ -1943,7 +2241,8 @@ struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - u8 *port_num, u16 *index); + enum ib_gid_type gid_type, struct net *net, + int if_index, u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); @@ -1974,6 +2273,17 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); /** + * ib_get_gids_from_grh - Get sgid and dgid from GRH or IPv4 header + * work completion. + * @device: grh - the L3 header to parse + * @net_type: type of header to parse + * @sgid: place to store source gid + * @dgid: place to store destination gid + */ +int ib_get_gids_from_grh(struct ib_grh *grh, enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid); + +/** * ib_init_ah_from_wc - Initializes address handle attributes from a * work completion. * @device: Device on which the received message arrived. @@ -2691,6 +3001,32 @@ void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); /** + * ib_alloc_indir_reg_list() - Allocates an indirect list array + * @device: ib device pointer + * @indir_list_len: size of the list array to be allocated + * + * Allocate a struct ib_indir_reg_list and a sg_list array + * that is at least indir_list_len in size. The actual size is + * returned in max_indir_list_len. The caller is responsible for + * initializing the contents of the sg_list array before posting + * a send work request with the IB_WC_INDIR_REG_MR opcode. + * + * The sg_list array entries should be set exactly the same way + * the ib_send_wr sg_list {lkey, addr, length}. + */ +struct ib_indir_reg_list * +ib_alloc_indir_reg_list(struct ib_device *device, + unsigned int indir_list_len); + +/** + * ib_free_indir_reg_list() - Deallocates a previously allocated + * indirect list array + * @indir_list: pointer to be deallocated + */ +void +ib_free_indir_reg_list(struct ib_indir_reg_list *indir_list); + +/** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. * @mr - struct ib_mr pointer to be updated. @@ -2834,6 +3170,69 @@ int ib_query_values(struct ib_device *device, int q_values, struct ib_device_values *values); +/** + * ib_create_wq - Creates a WQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the WQ. + * @wq_init_attr: A list of initial attributes required to create the + * WQ. If WQ creation succeeds, then the attributes are updated to + * the actual capabilities of the created WQ. + * + * wq_init_attr->max_recv_wr and wq_init_attr->max_recv_sge determine + * the requested size of the RQ's WQ, and set to the actual values allocated + * on return. + * If ib_create_wq() succeeds, then max_recv_wr and max_recv_sge will always be + * at least as large as the requested values. + * + * Return Value + * ib_create_wq() returns a pointer to the created WQ, or NULL if the request + * fails. + */ +struct ib_wq *ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr); + +/** + * ib_destroy_wq - Destroys the specified WQ. + * @wq: The WQ to destroy. + */ +int ib_destroy_wq(struct ib_wq *wq); + +/** + * ib_modify_wq - Modifies the specified WQ. + * @wq: The WQ to modify. + * @wq_attr: On input, specifies the WQ attributes to modify. + * @attr_mask: A bit-mask used to specify which attributes of the WQ + * are being modified. + * On output, the current values of selected WQ attributes are returned. + */ +int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, + enum ib_wq_attr_mask attr_mask); + +/* + * ib_create_rwq_ind_table - Creates a RQ Indirection Table associated + * with the specified protection domain. + * @device: The device on which to create the rwq indirection table. + * @ib_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * + * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less + * than the created ib_rwq_ind_table object and the caller is responsible + * for its memory allocation/free. + * + * Return Value: + * ib_create_rwq_ind_table returns a pointer to the created + * Indirection Table, or NULL if the request fails. + */ +struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, + struct ib_rwq_ind_table_init_attr* + wq_ind_table_init_attr); +/* + * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @wq_ind_table: The Indirection Table to destroy. +*/ +int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); + + static inline void ib_active_speed_enum_to_rate(u8 active_speed, int *rate, char **speed) @@ -2859,6 +3258,10 @@ *speed = " EDR"; *rate = 250; break; + case IB_SPEED_HDR: + *speed = " HDR"; + *rate = 500; + break; case IB_SPEED_SDR: default: /* default to SDR for invalid rates */ *rate = 25; @@ -2880,6 +3283,8 @@ return 0; } +int ib_roce_mode_is_over_ip(struct ib_device *ibdev, int port_num); + /** * ib_check_mr_status: lightweight check of MR status. * This routine may provide status checks on a selected @@ -2895,4 +3300,14 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); +/** + * ib_query_values - Query values from the HCA + * @device: The device on which to query the values from + * @q_values - combination of enum ib_values_mask flags to query + * @values - the response + */ +int ib_query_values(struct ib_device *device, + int q_values, struct ib_device_values *values); +int ib_get_grh_header_version(const void *h); + #endif /* IB_VERBS_H */ Index: sys/ofed/include/rdma/ib_verbs_exp.h =================================================================== --- sys/ofed/include/rdma/ib_verbs_exp.h +++ sys/ofed/include/rdma/ib_verbs_exp.h @@ -46,7 +46,76 @@ IB_EXP_DEVICE_DC_TRANSPORT = 1 << 0, IB_EXP_DEVICE_QPG = 1 << 1, IB_EXP_DEVICE_UD_RSS = 1 << 2, - IB_EXP_DEVICE_UD_TSS = 1 << 3 + IB_EXP_DEVICE_UD_TSS = 1 << 3, + IB_EXP_DEVICE_EXT_ATOMICS = 1 << 4, + IB_EXP_DEVICE_NOP = 1 << 5, + IB_EXP_DEVICE_UMR = 1 << 6, + IB_EXP_DEVICE_ODP = 1 << 7, + IB_EXP_DEVICE_ROCE_MODE_1_5 = 1 << 8, + IB_EXP_DEVICE_ROCE_MODE_2 = 1 << 9, + IB_EXP_DEVICE_VXLAN_SUPPORT = 1 << 10, + IB_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT = 1 << 11, + IB_EXP_DEVICE_RX_CSUM_IP_PKT = 1 << 12, + IB_EXP_DEVICE_MEM_WINDOW = IB_DEVICE_MEM_WINDOW, + IB_EXP_DEVICE_MEM_MGT_EXTENSIONS = IB_DEVICE_MEM_MGT_EXTENSIONS, + /* Jumping to 23 as of next capability in include/rdma/ib_verbs.h */ + /* + * Devices can set either IB_DEVICE_MEM_WINDOW_TYPE_2A or + * IB_DEVICE_MEM_WINDOW_TYPE_2B if it supports type 2A or type 2B + * memory windows. It can set neither to indicate it doesn't support + * type 2 windows at all. + */ + IB_EXP_DEVICE_MEM_WINDOW_TYPE_2A = IB_DEVICE_MEM_WINDOW_TYPE_2A, + IB_EXP_DEVICE_MEM_WINDOW_TYPE_2B = IB_DEVICE_MEM_WINDOW_TYPE_2B, + IB_EXP_DEVICE_CROSS_CHANNEL = IB_DEVICE_CROSS_CHANNEL, + IB_EXP_DEVICE_MANAGED_FLOW_STEERING = IB_DEVICE_MANAGED_FLOW_STEERING, + IB_EXP_DEVICE_MR_ALLOCATE = IB_DEVICE_MR_ALLOCATE, + IB_EXP_DEVICE_SHARED_MR = IB_DEVICE_SHARED_MR, + IB_EXP_DEVICE_MASK = IB_EXP_DEVICE_MEM_WINDOW | + IB_EXP_DEVICE_MEM_MGT_EXTENSIONS | + IB_EXP_DEVICE_MEM_WINDOW_TYPE_2A | + IB_EXP_DEVICE_MEM_WINDOW_TYPE_2B | + IB_EXP_DEVICE_MR_ALLOCATE | + IB_EXP_DEVICE_SHARED_MR | + IB_EXP_DEVICE_CROSS_CHANNEL | + IB_EXP_DEVICE_MANAGED_FLOW_STEERING +}; + +struct ib_exp_umr_caps { + u32 max_reg_descriptors; + u32 max_send_wqe_inline_klms; + u32 max_umr_recursion_depth; + u32 max_umr_stride_dimenson; +}; + + +struct ib_exp_odp_caps { + uint64_t general_odp_caps; + struct { + uint32_t rc_odp_caps; + uint32_t uc_odp_caps; + uint32_t ud_odp_caps; + uint32_t dc_odp_caps; + uint32_t xrc_odp_caps; + uint32_t raw_eth_odp_caps; + } per_transport_caps; +}; + +enum ib_exp_supported_qp_types { + IB_EXP_QPT_RC = 1ULL << 0, + IB_EXP_QPT_UC = 1ULL << 1, + IB_EXP_QPT_UD = 1ULL << 2, + IB_EXP_QPT_XRC_INIT = 1ULL << 3, + IB_EXP_QPT_XRC_TGT = 1ULL << 4, + IB_EXP_QPT_RAW_PACKET = 1ULL << 5, +}; + +struct ib_exp_rx_hash_caps { + uint32_t max_rwq_indirection_tables; + uint32_t max_rwq_indirection_table_size; + uint8_t supported_hash_functions; /* from ib_rx_hash_function_flags */ + uint64_t supported_packet_fields; /* from ib_rx_hash_fields */ + uint32_t supported_qps; /* from ib_exp_supported_qp_types */ }; enum ib_exp_device_attr_comp_mask { @@ -57,6 +126,14 @@ IB_EXP_DEVICE_ATTR_DC_RES_RD = 1ULL << 5, IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ = 1ULL << 6, IB_EXP_DEVICE_ATTR_RSS_TBL_SZ = 1ULL << 7, + IB_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS = 1ULL << 8, + IB_EXP_DEVICE_ATTR_UMR = 1ULL << 9, + IB_EXP_DEVICE_ATTR_ODP = 1ULL << 10, + IB_EXP_DEVICE_ATTR_MAX_DCT = 1ULL << 11, + IB_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN = 1ULL << 12, + IB_EXP_DEVICE_ATTR_RX_HASH = 1ULL << 13, + IB_EXP_DEVICE_ATTR_MAX_WQ_TYPE_RQ = 1ULL << 14, + IB_EXP_DEVICE_ATTR_MAX_DEVICE_CTX = 1ULL << 15, }; struct ib_exp_device_attr { @@ -68,6 +145,21 @@ uint32_t dc_rd_res; uint32_t inline_recv_sz; uint32_t max_rss_tbl_sz; + /* + * This field is a bit mask for the supported atomic argument sizes. + * A bit set signifies an argument of size of 2 ^ bit_nubmer bytes is + * supported. + */ + u64 atomic_arg_sizes; + u32 max_fa_bit_boudary; + u32 log_max_atomic_inline_arg; + struct ib_exp_umr_caps umr_caps; + struct ib_exp_odp_caps odp_caps; + uint32_t max_dct; + uint32_t max_ctx_res_domain; + struct ib_exp_rx_hash_caps rx_hash_caps; + uint32_t max_wq_type_rq; + uint32_t max_device_ctx; }; struct ib_exp_qp_init_attr { @@ -88,6 +180,7 @@ enum ib_qpg_type qpg_type; u8 port_num; /* special QP types only */ u32 max_inl_recv; + struct ib_rx_hash_conf *rx_hash_conf; }; @@ -95,6 +188,12 @@ struct ib_exp_device_attr *device_attr); +enum ib_exp_mr_rereg_flags { + IB_EXP_MR_REREG_TRANS = IB_MR_REREG_TRANS, + IB_EXP_MR_REREG_PD = IB_MR_REREG_PD, + IB_EXP_MR_REREG_ACCESS = IB_MR_REREG_ACCESS, + IB_EXP_MR_REREG_SUPPORTED = ((IB_EXP_MR_REREG_ACCESS << 1) - 1) +}; #endif /* IB_VERBS_EXP_H */ Index: sys/ofed/include/rdma/iw_cm.h =================================================================== --- sys/ofed/include/rdma/iw_cm.h +++ sys/ofed/include/rdma/iw_cm.h @@ -1,7 +1,6 @@ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -54,8 +53,8 @@ struct sockaddr_in remote_addr; void *private_data; void *provider_data; - u8 private_data_len; struct socket *so; + u8 private_data_len; u8 ord; u8 ird; }; @@ -93,7 +92,7 @@ /* Used by provider to add and remove refs on IW cm_id */ void (*add_ref)(struct iw_cm_id *); void (*rem_ref)(struct iw_cm_id *); - struct socket *so; + struct socket *so; }; struct iw_cm_conn_param { Index: sys/ofed/include/rdma/rdma_cm.h =================================================================== --- sys/ofed/include/rdma/rdma_cm.h +++ sys/ofed/include/rdma/rdma_cm.h @@ -1,7 +1,6 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. - * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -75,6 +74,11 @@ RDMA_PS_UDP = 0x0111, }; +#define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL +#define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL +#define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL +#define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL + enum alt_path_type { RDMA_ALT_PATH_NONE, RDMA_ALT_PATH_PORT, @@ -105,6 +109,7 @@ /* Fields below ignored if a QP is created on the rdma_cm_id. */ u8 srq; u32 qp_num; + u32 qkey; }; struct rdma_ud_param { @@ -394,6 +399,13 @@ */ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); + /** + * rdma_get_service_id - Return the IB service ID for a specified address. + * @id: Communication identifier associated with the address. + * @addr: Address for the service ID. + */ +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr); + /** * rdma_set_timeout - Set the QP timeout associated with a connection * identifier. @@ -404,4 +416,5 @@ int rdma_cma_any_addr(struct sockaddr *addr); int rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type, void **cm_id); + #endif /* RDMA_CM_H */ Index: sys/ofed/include/rdma/rdma_user_cm.h =================================================================== --- sys/ofed/include/rdma/rdma_user_cm.h +++ sys/ofed/include/rdma/rdma_user_cm.h @@ -34,6 +34,7 @@ #define RDMA_USER_CM_H #include +#include #include #include #include @@ -45,8 +46,8 @@ enum { RDMA_USER_CM_CMD_CREATE_ID, RDMA_USER_CM_CMD_DESTROY_ID, - RDMA_USER_CM_CMD_BIND_ADDR, - RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_BIND_IP, + RDMA_USER_CM_CMD_RESOLVE_IP, RDMA_USER_CM_CMD_RESOLVE_ROUTE, RDMA_USER_CM_CMD_QUERY_ROUTE, RDMA_USER_CM_CMD_CONNECT, @@ -59,9 +60,13 @@ RDMA_USER_CM_CMD_GET_OPTION, RDMA_USER_CM_CMD_SET_OPTION, RDMA_USER_CM_CMD_NOTIFY, - RDMA_USER_CM_CMD_JOIN_MCAST, + RDMA_USER_CM_CMD_JOIN_IP_MCAST, RDMA_USER_CM_CMD_LEAVE_MCAST, - RDMA_USER_CM_CMD_MIGRATE_ID + RDMA_USER_CM_CMD_MIGRATE_ID, + RDMA_USER_CM_CMD_QUERY, + RDMA_USER_CM_CMD_BIND, + RDMA_USER_CM_CMD_RESOLVE_ADDR, + RDMA_USER_CM_CMD_JOIN_MCAST }; /* @@ -95,28 +100,51 @@ __u32 events_reported; }; -struct rdma_ucm_bind_addr { +struct rdma_ucm_bind_ip { __u64 response; struct sockaddr_in6 addr; __u32 id; }; -struct rdma_ucm_resolve_addr { +struct rdma_ucm_bind { + __u32 id; + __u16 addr_size; + __u16 reserved; + struct sockaddr_storage addr; +}; + +struct rdma_ucm_resolve_ip { struct sockaddr_in6 src_addr; struct sockaddr_in6 dst_addr; __u32 id; __u32 timeout_ms; }; +struct rdma_ucm_resolve_addr { + __u32 id; + __u32 timeout_ms; + __u16 src_size; + __u16 dst_size; + __u32 reserved; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + struct rdma_ucm_resolve_route { __u32 id; __u32 timeout_ms; }; -struct rdma_ucm_query_route { +enum { + RDMA_USER_CM_QUERY_ADDR, + RDMA_USER_CM_QUERY_PATH, + RDMA_USER_CM_QUERY_GID +}; + +struct rdma_ucm_query { __u64 response; __u32 id; - __u32 reserved; + __u32 option; }; struct rdma_ucm_query_route_resp { @@ -129,9 +157,26 @@ __u8 reserved[3]; }; +struct rdma_ucm_query_addr_resp { + __u64 node_guid; + __u8 port_num; + __u8 reserved; + __u16 pkey; + __u16 src_size; + __u16 dst_size; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; +}; + +struct rdma_ucm_query_path_resp { + __u32 num_paths; + __u32 reserved; + struct ib_path_rec_data path_data[0]; +}; + struct rdma_ucm_conn_param { __u32 qp_num; - __u32 reserved; + __u32 qkey; __u8 private_data[RDMA_MAX_PRIVATE_DATA]; __u8 private_data_len; __u8 srq; @@ -192,13 +237,22 @@ __u32 event; }; -struct rdma_ucm_join_mcast { +struct rdma_ucm_join_ip_mcast { __u64 response; /* rdma_ucm_create_id_resp */ __u64 uid; struct sockaddr_in6 addr; __u32 id; }; +struct rdma_ucm_join_mcast { + __u64 response; /* rdma_ucma_create_id_resp */ + __u64 uid; + __u32 id; + __u16 addr_size; + __u16 reserved; + struct sockaddr_storage addr; +}; + struct rdma_ucm_get_event { __u64 response; }; @@ -222,7 +276,7 @@ /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_ID_TOS = 0, RDMA_OPTION_ID_REUSEADDR = 1, RDMA_OPTION_ID_AFONLY = 2, Index: sys/ofed/include/rdma/sdp_socket.h =================================================================== --- sys/ofed/include/rdma/sdp_socket.h +++ sys/ofed/include/rdma/sdp_socket.h @@ -8,6 +8,11 @@ #define AF_INET_SDP 27 #define PF_INET_SDP AF_INET_SDP #endif + +#ifndef AF_INET6_SDP +#define AF_INET6_SDP 28 +#define PF_INET6_SDP AF_INET6_SDP +#endif #endif #ifndef SDP_ZCOPY_THRESH @@ -18,6 +23,4 @@ #define SDP_LAST_BIND_ERR 81 #endif -/* TODO: AF_INET6_SDP ? */ - #endif