Index: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h =================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h (revision 289577) +++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h (revision 289578) @@ -1,1044 +1,1042 @@ /* * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * $FreeBSD$ */ #ifndef __IW_CXGB4_H__ #define __IW_CXGB4_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #undef prefetch #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_tcb.h" #include "t4_l2t.h" #define DRV_NAME "iw_cxgbe" #define MOD DRV_NAME ":" #define KTR_IW_CXGBE KTR_SPARE3 extern int c4iw_debug; #define PDBG(fmt, args...) \ do { \ if (c4iw_debug) \ printf(MOD fmt, ## args); \ } while (0) #include "t4.h" static inline void *cplhdr(struct mbuf *m) { return mtod(m, void*); } #define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.pbl.start) #define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.rq.start) #define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ #define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ struct c4iw_id_table { u32 flags; u32 start; /* logical minimal id */ u32 last; /* hint for find */ u32 max; spinlock_t lock; unsigned long *table; }; struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; }; struct c4iw_qid_list { struct list_head entry; u32 qid; }; struct c4iw_dev_ucontext { struct list_head qpids; struct list_head cqids; struct mutex lock; }; enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), }; struct c4iw_stat { u64 total; u64 cur; u64 max; u64 fail; }; struct c4iw_stats { struct mutex lock; struct c4iw_stat qid; struct c4iw_stat pd; struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; u64 db_full; u64 db_empty; u64 db_drop; u64 db_state_transitions; }; struct c4iw_rdev { struct adapter *adap; struct c4iw_resource resource; unsigned long qpshift; u32 qpmask; unsigned long cqshift; u32 cqmask; struct c4iw_dev_ucontext uctx; struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; u32 flags; struct c4iw_stats stats; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) { return rdev->flags & T4_FATAL_ERROR; } static inline int c4iw_num_stags(struct c4iw_rdev *rdev) { return min((int)T4_MAX_NUM_STAG, (int)(rdev->adap->vres.stag.size >> 5)); } #define C4IW_WR_TO (10*HZ) struct c4iw_wr_wait { int ret; atomic_t completion; }; static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) { wr_waitp->ret = 0; atomic_set(&wr_waitp->completion, 0); } static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) { wr_waitp->ret = ret; atomic_set(&wr_waitp->completion, 1); wakeup(wr_waitp); } static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, struct c4iw_wr_wait *wr_waitp, u32 hwtid, u32 qpid, const char *func) { struct adapter *sc = rdev->adap; unsigned to = C4IW_WR_TO; while (!atomic_read(&wr_waitp->completion)) { tsleep(wr_waitp, 0, "c4iw_wait", to); if (SIGPENDING(curthread)) { printf("%s - Device %s not responding - " "tid %u qpid %u\n", func, device_get_nameunit(sc->dev), hwtid, qpid); if (c4iw_fatal_error(rdev)) { wr_waitp->ret = -EIO; break; } to = to << 2; } } if (wr_waitp->ret) CTR4(KTR_IW_CXGBE, "%s: FW reply %d tid %u qpid %u", device_get_nameunit(sc->dev), wr_waitp->ret, hwtid, qpid); return (wr_waitp->ret); } enum db_state { NORMAL = 0, FLOW_CONTROL = 1, RECOVERY = 2 }; struct c4iw_dev { struct ib_device ibdev; struct c4iw_rdev rdev; u32 device_cap_flags; struct idr cqidr; struct idr qpidr; struct idr mmidr; spinlock_t lock; struct dentry *debugfs_root; enum db_state db_state; int qpcnt; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) { return container_of(ibdev, struct c4iw_dev, ibdev); } static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) { return container_of(rdev, struct c4iw_dev, rdev); } static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) { return idr_find(&rhp->cqidr, cqid); } static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) { return idr_find(&rhp->qpidr, qpid); } static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) { return idr_find(&rhp->mmidr, mmid); } static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) { int ret; int newid; do { if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) return -ENOMEM; if (lock) spin_lock_irq(&rhp->lock); ret = idr_get_new_above(idr, handle, id, &newid); BUG_ON(!ret && newid != id); if (lock) spin_unlock_irq(&rhp->lock); } while (ret == -EAGAIN); return ret; } static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id) { return _insert_handle(rhp, idr, handle, id, 1); } static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id) { return _insert_handle(rhp, idr, handle, id, 0); } static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id, int lock) { if (lock) spin_lock_irq(&rhp->lock); idr_remove(idr, id); if (lock) spin_unlock_irq(&rhp->lock); } static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) { _remove_handle(rhp, idr, id, 1); } static inline void remove_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, u32 id) { _remove_handle(rhp, idr, id, 0); } struct c4iw_pd { struct ib_pd ibpd; u32 pdid; struct c4iw_dev *rhp; }; static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd) { return container_of(ibpd, struct c4iw_pd, ibpd); } struct tpt_attributes { u64 len; u64 va_fbo; enum fw_ri_mem_perms perms; u32 stag; u32 pdid; u32 qpid; u32 pbl_addr; u32 pbl_size; u32 state:1; u32 type:2; u32 rsvd:1; u32 remote_invaliate_disable:1; u32 zbva:1; u32 mw_bind_enable:1; u32 page_size:5; }; struct c4iw_mr { struct ib_mr ibmr; struct ib_umem *umem; struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; }; static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) { return container_of(ibmr, struct c4iw_mr, ibmr); } struct c4iw_mw { struct ib_mw ibmw; struct c4iw_dev *rhp; u64 kva; struct tpt_attributes attr; }; static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) { return container_of(ibmw, struct c4iw_mw, ibmw); } struct c4iw_fr_page_list { struct ib_fast_reg_page_list ibpl; DECLARE_PCI_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; int size; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( struct ib_fast_reg_page_list *ibpl) { return container_of(ibpl, struct c4iw_fr_page_list, ibpl); } struct c4iw_cq { struct ib_cq ibcq; struct c4iw_dev *rhp; struct t4_cq cq; spinlock_t lock; spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; }; static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) { return container_of(ibcq, struct c4iw_cq, ibcq); } struct c4iw_mpa_attributes { u8 initiator; u8 recv_marker_enabled; u8 xmit_marker_enabled; u8 crc_enabled; u8 enhanced_rdma_conn; u8 version; u8 p2p_type; }; struct c4iw_qp_attributes { u32 scq; u32 rcq; u32 sq_num_entries; u32 rq_num_entries; u32 sq_max_sges; u32 sq_max_sges_rdma_write; u32 rq_max_sges; u32 state; u8 enable_rdma_read; u8 enable_rdma_write; u8 enable_bind; u8 enable_mmid0_fastreg; u32 max_ord; u32 max_ird; u32 pd; u32 next_state; char terminate_buffer[52]; u32 terminate_msg_len; u8 is_terminate_local; struct c4iw_mpa_attributes mpa_attr; struct c4iw_ep *llp_stream_handle; u8 layer_etype; u8 ecode; u16 sq_db_inc; u16 rq_db_inc; }; struct c4iw_qp { struct ib_qp ibqp; struct c4iw_dev *rhp; struct c4iw_ep *ep; struct c4iw_qp_attributes attr; struct t4_wq wq; spinlock_t lock; struct mutex mutex; atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) { return container_of(ibqp, struct c4iw_qp, ibqp); } struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; u32 key; spinlock_t mmap_lock; struct list_head mmaps; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) { return container_of(c, struct c4iw_ucontext, ibucontext); } struct c4iw_mm_entry { struct list_head entry; u64 addr; u32 key; unsigned len; }; static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, u32 key, unsigned len) { struct list_head *pos, *nxt; struct c4iw_mm_entry *mm; spin_lock(&ucontext->mmap_lock); list_for_each_safe(pos, nxt, &ucontext->mmaps) { mm = list_entry(pos, struct c4iw_mm_entry, entry); if (mm->key == key && mm->len == len) { list_del_init(&mm->entry); spin_unlock(&ucontext->mmap_lock); CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d", __func__, key, (unsigned long long) mm->addr, mm->len); return mm; } } spin_unlock(&ucontext->mmap_lock); return NULL; } static inline void insert_mmap(struct c4iw_ucontext *ucontext, struct c4iw_mm_entry *mm) { spin_lock(&ucontext->mmap_lock); CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d", __func__, mm->key, (unsigned long long) mm->addr, mm->len); list_add_tail(&mm->entry, &ucontext->mmaps); spin_unlock(&ucontext->mmap_lock); } enum c4iw_qp_attr_mask { C4IW_QP_ATTR_NEXT_STATE = 1 << 0, C4IW_QP_ATTR_SQ_DB = 1<<1, C4IW_QP_ATTR_RQ_DB = 1<<2, C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, C4IW_QP_ATTR_MAX_ORD = 1 << 11, C4IW_QP_ATTR_MAX_IRD = 1 << 12, C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, C4IW_QP_ATTR_MPA_ATTR = 1 << 24, C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ | C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_MAX_ORD | C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_STREAM_MSG_BUFFER | C4IW_QP_ATTR_MPA_ATTR | C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE) }; int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, int internal); enum c4iw_qp_state { C4IW_QP_STATE_IDLE, C4IW_QP_STATE_RTS, C4IW_QP_STATE_ERROR, C4IW_QP_STATE_TERMINATE, C4IW_QP_STATE_CLOSING, C4IW_QP_STATE_TOT }; static inline int c4iw_convert_state(enum ib_qp_state ib_state) { switch (ib_state) { case IB_QPS_RESET: case IB_QPS_INIT: return C4IW_QP_STATE_IDLE; case IB_QPS_RTS: return C4IW_QP_STATE_RTS; case IB_QPS_SQD: return C4IW_QP_STATE_CLOSING; case IB_QPS_SQE: return C4IW_QP_STATE_TERMINATE; case IB_QPS_ERR: return C4IW_QP_STATE_ERROR; default: return -1; } } static inline int to_ib_qp_state(int c4iw_qp_state) { switch (c4iw_qp_state) { case C4IW_QP_STATE_IDLE: return IB_QPS_INIT; case C4IW_QP_STATE_RTS: return IB_QPS_RTS; case C4IW_QP_STATE_CLOSING: return IB_QPS_SQD; case C4IW_QP_STATE_TERMINATE: return IB_QPS_SQE; case C4IW_QP_STATE_ERROR: return IB_QPS_ERR; } return IB_QPS_ERR; } static inline u32 c4iw_ib_to_tpt_access(int a) { return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) | (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) | FW_RI_MEM_ACCESS_LOCAL_READ; } static inline u32 c4iw_ib_to_tpt_bind_access(int acc) { return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0); } enum c4iw_mmid_state { C4IW_STAG_STATE_VALID, C4IW_STAG_STATE_INVALID }; #define C4IW_NODE_DESC "iw_cxgbe Chelsio Communications" #define MPA_KEY_REQ "MPA ID Req Frame" #define MPA_KEY_REP "MPA ID Rep Frame" #define MPA_MAX_PRIVATE_DATA 256 #define MPA_ENHANCED_RDMA_CONN 0x10 #define MPA_REJECT 0x20 #define MPA_CRC 0x40 #define MPA_MARKERS 0x80 #define MPA_FLAGS_MASK 0xE0 #define MPA_V2_PEER2PEER_MODEL 0x8000 #define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 #define MPA_V2_RDMA_WRITE_RTR 0x8000 #define MPA_V2_RDMA_READ_RTR 0x4000 #define MPA_V2_IRD_ORD_MASK 0x3FFF -/* Fixme: Use atomic_read for kref.count as same as Linux */ #define c4iw_put_ep(ep) { \ CTR4(KTR_IW_CXGBE, "put_ep (%s:%u) ep %p, refcnt %d", \ - __func__, __LINE__, ep, (ep)->kref.count); \ - WARN_ON((ep)->kref.count < 1); \ + __func__, __LINE__, ep, atomic_read(&(ep)->kref.refcount)); \ + WARN_ON(atomic_read(&(ep)->kref.refcount) < 1); \ kref_put(&((ep)->kref), _c4iw_free_ep); \ } -/* Fixme: Use atomic_read for kref.count as same as Linux */ #define c4iw_get_ep(ep) { \ CTR4(KTR_IW_CXGBE, "get_ep (%s:%u) ep %p, refcnt %d", \ - __func__, __LINE__, ep, (ep)->kref.count); \ + __func__, __LINE__, ep, atomic_read(&(ep)->kref.refcount)); \ kref_get(&((ep)->kref)); \ } void _c4iw_free_ep(struct kref *kref); struct mpa_message { u8 key[16]; u8 flags; u8 revision; __be16 private_data_size; u8 private_data[0]; }; struct mpa_v2_conn_params { __be16 ird; __be16 ord; }; struct terminate_message { u8 layer_etype; u8 ecode; __be16 hdrct_rsvd; u8 len_hdrs[0]; }; #define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) enum c4iw_layers_types { LAYER_RDMAP = 0x00, LAYER_DDP = 0x10, LAYER_MPA = 0x20, RDMAP_LOCAL_CATA = 0x00, RDMAP_REMOTE_PROT = 0x01, RDMAP_REMOTE_OP = 0x02, DDP_LOCAL_CATA = 0x00, DDP_TAGGED_ERR = 0x01, DDP_UNTAGGED_ERR = 0x02, DDP_LLP = 0x03 }; enum c4iw_rdma_ecodes { RDMAP_INV_STAG = 0x00, RDMAP_BASE_BOUNDS = 0x01, RDMAP_ACC_VIOL = 0x02, RDMAP_STAG_NOT_ASSOC = 0x03, RDMAP_TO_WRAP = 0x04, RDMAP_INV_VERS = 0x05, RDMAP_INV_OPCODE = 0x06, RDMAP_STREAM_CATA = 0x07, RDMAP_GLOBAL_CATA = 0x08, RDMAP_CANT_INV_STAG = 0x09, RDMAP_UNSPECIFIED = 0xff }; enum c4iw_ddp_ecodes { DDPT_INV_STAG = 0x00, DDPT_BASE_BOUNDS = 0x01, DDPT_STAG_NOT_ASSOC = 0x02, DDPT_TO_WRAP = 0x03, DDPT_INV_VERS = 0x04, DDPU_INV_QN = 0x01, DDPU_INV_MSN_NOBUF = 0x02, DDPU_INV_MSN_RANGE = 0x03, DDPU_INV_MO = 0x04, DDPU_MSG_TOOBIG = 0x05, DDPU_INV_VERS = 0x06 }; enum c4iw_mpa_ecodes { MPA_CRC_ERR = 0x02, MPA_MARKER_ERR = 0x03, MPA_LOCAL_CATA = 0x05, MPA_INSUFF_IRD = 0x06, MPA_NOMATCH_RTR = 0x07, }; enum c4iw_ep_state { IDLE = 0, LISTEN, CONNECTING, MPA_REQ_WAIT, MPA_REQ_SENT, MPA_REQ_RCVD, MPA_REP_SENT, FPDU_MODE, ABORTING, CLOSING, MORIBUND, DEAD, }; enum c4iw_ep_flags { PEER_ABORT_IN_PROGRESS = 0, ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, TIMEOUT = 4 }; enum c4iw_ep_history { ACT_OPEN_REQ = 0, ACT_OFLD_CONN = 1, ACT_OPEN_RPL = 2, ACT_ESTAB = 3, PASS_ACCEPT_REQ = 4, PASS_ESTAB = 5, ABORT_UPCALL = 6, ESTAB_UPCALL = 7, CLOSE_UPCALL = 8, ULP_ACCEPT = 9, ULP_REJECT = 10, TIMEDOUT = 11, PEER_ABORT = 12, PEER_CLOSE = 13, CONNREQ_UPCALL = 14, ABORT_CONN = 15, DISCONN_UPCALL = 16, EP_DISC_CLOSE = 17, EP_DISC_ABORT = 18, CONN_RPL_UPCALL = 19, ACT_RETRY_NOMEM = 20, ACT_RETRY_INUSE = 21 }; struct c4iw_ep_common { TAILQ_ENTRY(c4iw_ep_common) entry; /* Work queue attachment */ struct iw_cm_id *cm_id; struct c4iw_qp *qp; struct c4iw_dev *dev; enum c4iw_ep_state state; struct kref kref; struct mutex mutex; struct sockaddr_in local_addr; struct sockaddr_in remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; int rpl_err; int rpl_done; struct thread *thread; struct socket *so; }; struct c4iw_listen_ep { struct c4iw_ep_common com; unsigned int stid; int backlog; }; struct c4iw_ep { struct c4iw_ep_common com; struct c4iw_ep *parent_ep; struct timer_list timer; struct list_head entry; unsigned int atid; u32 hwtid; u32 snd_seq; u32 rcv_seq; struct l2t_entry *l2t; struct dst_entry *dst; struct c4iw_mpa_attributes mpa_attr; u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; unsigned int mpa_pkt_len; u32 ird; u32 ord; u32 smac_idx; u32 tx_chan; u32 mtu; u16 mss; u16 emss; u16 plen; u16 rss_qid; u16 txq_idx; u16 ctrlq_idx; u8 tos; u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; }; static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; } static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) { return cm_id->provider_data; } static inline int compute_wscale(int win) { int wscale = 0; while (wscale < 14 && (65535< struct gen_pool { blist_t gen_list; daddr_t gen_base; int gen_chunk_shift; struct mutex gen_lock; }; static __inline struct gen_pool * gen_pool_create(daddr_t base, u_int chunk_shift, u_int len) { struct gen_pool *gp; gp = malloc(sizeof(struct gen_pool), M_DEVBUF, M_NOWAIT); if (gp == NULL) return (NULL); memset(gp, 0, sizeof(struct gen_pool)); gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT); if (gp->gen_list == NULL) { free(gp, M_DEVBUF); return (NULL); } blist_free(gp->gen_list, 0, len >> chunk_shift); gp->gen_base = base; gp->gen_chunk_shift = chunk_shift; //mutex_init(&gp->gen_lock, "genpool", NULL, MTX_DUPOK|MTX_DEF); mutex_init(&gp->gen_lock); return (gp); } static __inline unsigned long gen_pool_alloc(struct gen_pool *gp, int size) { int chunks; daddr_t blkno; chunks = (size + (1<gen_chunk_shift) - 1) >> gp->gen_chunk_shift; mutex_lock(&gp->gen_lock); blkno = blist_alloc(gp->gen_list, chunks); mutex_unlock(&gp->gen_lock); if (blkno == SWAPBLK_NONE) return (0); return (gp->gen_base + ((1 << gp->gen_chunk_shift) * blkno)); } static __inline void gen_pool_free(struct gen_pool *gp, daddr_t address, int size) { int chunks; daddr_t blkno; chunks = (size + (1<gen_chunk_shift) - 1) >> gp->gen_chunk_shift; blkno = (address - gp->gen_base) / (1 << gp->gen_chunk_shift); mutex_lock(&gp->gen_lock); blist_free(gp->gen_list, blkno, chunks); mutex_unlock(&gp->gen_lock); } static __inline void gen_pool_destroy(struct gen_pool *gp) { blist_destroy(gp->gen_list); free(gp, M_DEVBUF); } #if defined(__i386__) || defined(__amd64__) #define L1_CACHE_BYTES 128 #else #define L1_CACHE_BYTES 32 #endif static inline int idr_for_each(struct idr *idp, int (*fn)(int id, void *p, void *data), void *data) { int n, id, max, error = 0; struct idr_layer *p; struct idr_layer *pa[MAX_LEVEL]; struct idr_layer **paa = &pa[0]; n = idp->layers * IDR_BITS; p = idp->top; max = 1 << n; id = 0; while (id < max) { while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; p = p->ary[(id >> n) & IDR_MASK]; } if (p) { error = fn(id, (void *)p, data); if (error) break; } id += 1 << n; while (n < fls(id)) { n += IDR_BITS; p = *--paa; } } return error; } void c4iw_cm_init_cpl(struct adapter *); void c4iw_cm_term_cpl(struct adapter *); void your_reg_device(struct c4iw_dev *dev); #define SGE_CTRLQ_NUM 0 extern int spg_creds;/* Status Page size in credit units(1 unit = 64) */ #endif Index: head/sys/dev/cxgbe/iw_cxgbe/qp.c =================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/qp.c (revision 289577) +++ head/sys/dev/cxgbe/iw_cxgbe/qp.c (revision 289578) @@ -1,1706 +1,1705 @@ /* * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct sge_iq; struct rss_header; #include #include "offload.h" #include "tom/t4_tom.h" #include "iw_cxgbe.h" #include "user.h" extern int db_delay_usecs; extern int db_fc_threshold; static void creds(struct toepcb *toep, size_t wrsize); static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { unsigned long flag; spin_lock_irqsave(&qhp->lock, flag); qhp->attr.state = state; spin_unlock_irqrestore(&qhp->lock, flag); } static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { contigfree(sq->queue, sq->memsize, M_DEVBUF); } static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { dealloc_host_sq(rdev, sq); } static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { sq->queue = contigmalloc(sq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (sq->queue) sq->dma_addr = vtophys(sq->queue); else return -ENOMEM; sq->phys_addr = vtophys(sq->queue); pci_unmap_addr_set(sq, mapping, sq->dma_addr); CTR4(KTR_IW_CXGBE, "%s sq %p dma_addr %p phys_addr %p", __func__, sq->queue, sq->dma_addr, sq->phys_addr); return 0; } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); dealloc_sq(rdev, &wq->sq); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); return 0; } static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx) { struct adapter *sc = rdev->adap; int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; struct fw_ri_res *res; int wr_len; struct c4iw_wr_wait wr_wait; int ret; int eqsize; struct wrqe *wr; wq->sq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->sq.qid) return -ENOMEM; wq->rq.qid = c4iw_get_qpid(rdev, uctx); if (!wq->rq.qid) goto err1; if (!user) { wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, GFP_KERNEL); if (!wq->sq.sw_sq) goto err2; wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, GFP_KERNEL); if (!wq->rq.sw_rq) goto err3; } /* RQT must be a power of 2. */ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); if (!wq->rq.rqt_hwaddr) goto err4; if (alloc_host_sq(rdev, &wq->sq)) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = contigmalloc(wq->rq.memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); if (wq->rq.queue) wq->rq.dma_addr = vtophys(wq->rq.queue); else goto err6; CTR5(KTR_IW_CXGBE, "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__, wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue), wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) + MYPF_REG(SGE_PF_KDOORBELL)); wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res) + MYPF_REG(SGE_PF_GTS)); if (user) { wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->sq.qid << rdev->qpshift)); wq->sq.udb &= PAGE_MASK; wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) + (wq->rq.qid << rdev->qpshift)); wq->rq.udb &= PAGE_MASK; } wq->rdev = rdev; wq->rq.msn = 1; /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; wr = alloc_wrqe(wr_len, &sc->sge.mgmtq); if (wr == NULL) return (0); res_wr = wrtod(wr); memset(res_wr, 0, wr_len); res_wr->op_nres = cpu_to_be32( V_FW_WR_OP(FW_RI_RES_WR) | V_FW_RI_RES_WR_NRES(2) | F_FW_WR_COMPL); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (unsigned long) &wr_wait; res = res_wr->res; res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + spg_creds; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(scq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); res++; res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; res->u.sqrq.op = FW_RI_RES_OP_WRITE; /* eqsize is the number of 64B entries plus the status page size. */ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + spg_creds ; res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ V_FW_RI_RES_WR_IQID(rcq->cqid)); res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( V_FW_RI_RES_WR_DCAEN(0) | V_FW_RI_RES_WR_DCACPU(0) | V_FW_RI_RES_WR_FBMIN(2) | V_FW_RI_RES_WR_FBMAX(2) | V_FW_RI_RES_WR_CIDXFTHRESHO(0) | V_FW_RI_RES_WR_CIDXFTHRESH(0) | V_FW_RI_RES_WR_EQSIZE(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); c4iw_init_wr_wait(&wr_wait); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); if (ret) goto err7; CTR6(KTR_IW_CXGBE, "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx", __func__, wq->sq.qid, wq->rq.qid, wq->db, (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); return 0; err7: contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF); err6: dealloc_sq(rdev, &wq->sq); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: kfree(wq->rq.sw_rq); err3: kfree(wq->sq.sw_sq); err2: c4iw_put_qpid(rdev, wq->rq.qid, uctx); err1: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return -ENOMEM; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; int i; int rem, len; dstp = (u8 *)immdp->data; for (i = 0; i < wr->num_sge; i++) { if ((plen + wr->sg_list[i].length) > max) return -EMSGSIZE; srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; plen += wr->sg_list[i].length; rem = wr->sg_list[i].length; while (rem) { if (dstp == (u8 *)&sq->queue[sq->size]) dstp = (u8 *)sq->queue; if (rem <= (u8 *)&sq->queue[sq->size] - dstp) len = rem; else len = (u8 *)&sq->queue[sq->size] - dstp; memcpy(dstp, srcp, len); dstp += len; srcp += len; rem -= len; } } len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); if (len) memset(dstp, 0, len); immdp->op = FW_RI_DATA_IMMD; immdp->r1 = 0; immdp->r2 = 0; immdp->immdlen = cpu_to_be32(plen); *plenp = plen; return 0; } static int build_isgl(__be64 *queue_start, __be64 *queue_end, struct fw_ri_isgl *isglp, struct ib_sge *sg_list, int num_sge, u32 *plenp) { int i; u32 plen = 0; __be64 *flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) return -EMSGSIZE; plen += sg_list[i].length; *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | sg_list[i].length); if (++flitp == queue_end) flitp = queue_start; *flitp = cpu_to_be64(sg_list[i].addr); if (++flitp == queue_end) flitp = queue_start; } *flitp = (__force __be64)0; isglp->op = FW_RI_DATA_ISGL; isglp->r1 = 0; isglp->nsge = cpu_to_be16(num_sge); isglp->r2 = 0; if (plenp) *plenp = plen; return 0; } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; int ret; if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; switch (wr->opcode) { case IB_WR_SEND: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); else wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); wqe->send.stag_inv = 0; break; case IB_WR_SEND_WITH_INV: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV)); else wqe->send.sendop_pkd = cpu_to_be32( V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV)); wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); break; default: return -EINVAL; } plen = 0; if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->send.u.immd_src, wr, T4_MAX_SEND_INLINE, &plen); if (ret) return ret; size = sizeof wqe->send + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], wqe->send.u.isgl_src, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; wqe->send.u.immd_src[0].r1 = 0; wqe->send.u.immd_src[0].r2 = 0; wqe->send.u.immd_src[0].immdlen = 0; size = sizeof wqe->send + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->send.plen = cpu_to_be32(plen); return 0; } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; int ret; if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; wqe->write.r2 = 0; wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); if (wr->num_sge) { if (wr->send_flags & IB_SEND_INLINE) { ret = build_immd(sq, wqe->write.u.immd_src, wr, T4_MAX_WRITE_INLINE, &plen); if (ret) return ret; size = sizeof wqe->write + sizeof(struct fw_ri_immd) + plen; } else { ret = build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], wqe->write.u.isgl_src, wr->sg_list, wr->num_sge, &plen); if (ret) return ret; size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + wr->num_sge * sizeof(struct fw_ri_sge); } } else { wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; wqe->write.u.immd_src[0].r1 = 0; wqe->write.u.immd_src[0].r2 = 0; wqe->write.u.immd_src[0].immdlen = 0; size = sizeof wqe->write + sizeof(struct fw_ri_immd); plen = 0; } *len16 = DIV_ROUND_UP(size, 16); wqe->write.plen = cpu_to_be32(plen); return 0; } static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { if (wr->num_sge > 1) return -EINVAL; if (wr->num_sge) { wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr >> 32)); wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr >> 32)); wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); } else { wqe->read.stag_src = cpu_to_be32(2); wqe->read.to_src_hi = 0; wqe->read.to_src_lo = 0; wqe->read.stag_sink = cpu_to_be32(2); wqe->read.plen = 0; wqe->read.to_sink_hi = 0; wqe->read.to_sink_lo = 0; } wqe->read.r2 = 0; wqe->read.r5 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); return 0; } static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, struct ib_recv_wr *wr, u8 *len16) { int ret; ret = build_isgl((__be64 *)qhp->wq.rq.queue, (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); if (ret) return ret; *len16 = DIV_ROUND_UP(sizeof wqe->recv + wr->num_sge * sizeof(struct fw_ri_sge), 16); return 0; } static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { struct fw_ri_immd *imdp; __be64 *p; int i; int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); int rem; if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; wqe->fr.addr_type = FW_RI_VA_BASED_TO; wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); wqe->fr.len_hi = 0; wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); WARN_ON(pbllen > T4_MAX_FR_IMMD); imdp = (struct fw_ri_immd *)(&wqe->fr + 1); imdp->op = FW_RI_DATA_IMMD; imdp->r1 = 0; imdp->r2 = 0; imdp->immdlen = cpu_to_be32(pbllen); p = (__be64 *)(imdp + 1); rem = pbllen; for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); rem -= sizeof *p; if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; } BUG_ON(rem < 0); while (rem) { *p = 0; rem -= sizeof *p; if (++p == (__be64 *)&sq->queue[sq->size]) p = (__be64 *)sq->queue; } *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); return 0; } void c4iw_qp_add_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); atomic_inc(&(to_c4iw_qp(qp)->refcnt)); } void c4iw_qp_rem_ref(struct ib_qp *qp) { CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp); if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) wake_up(&(to_c4iw_qp(qp)->wait)); } int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; union t4_wr *wqe; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; u16 idx = 0; qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); return -EINVAL; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } while (wr) { if (num_wrs == 0) { err = -ENOMEM; *bad_wr = wr; break; } wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { case IB_WR_SEND_WITH_INV: case IB_WR_SEND: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_READ_FENCE_FLAG; fw_opcode = FW_RI_SEND_WR; if (wr->opcode == IB_WR_SEND) swsqe->opcode = FW_RI_SEND; else swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) fw_flags = FW_RI_RDMA_READ_INVALIDATE; else fw_flags = 0; err = build_rdma_read(wqe, wr, &len16); if (err) break; swsqe->read_len = wr->sg_list[0].length; if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); break; default: CTR2(KTR_IW_CXGBE, "%s post of type =%d TBD!", __func__, wr->opcode); err = -EINVAL; } if (err) { *bad_wr = wr; break; } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); CTR5(KTR_IW_CXGBE, "%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u", __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, swsqe->opcode, swsqe->read_len); wr = wr->next; num_wrs--; t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_sq_db(&qhp->wq, idx); spin_unlock_irqrestore(&qhp->lock, flag); return err; } int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; union t4_recv_wr *wqe; u32 num_wrs; u8 len16 = 0; unsigned long flag; u16 idx = 0; qhp = to_c4iw_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); return -EINVAL; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } while (wr) { if (wr->num_sge > T4_MAX_RECV_SGE) { err = -EINVAL; *bad_wr = wr; break; } wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + qhp->wq.rq.wq_pidx * T4_EQ_ENTRY_SIZE); if (num_wrs) err = build_rdma_recv(qhp, wqe, wr, &len16); else err = -ENOMEM; if (err) { *bad_wr = wr; break; } qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; wqe->recv.opcode = FW_RI_RECV_WR; wqe->recv.r1 = 0; wqe->recv.wrid = qhp->wq.rq.pidx; wqe->recv.r2[0] = 0; wqe->recv.r2[1] = 0; wqe->recv.r2[2] = 0; wqe->recv.len16 = len16; CTR3(KTR_IW_CXGBE, "%s cookie 0x%llx pidx %u", __func__, (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); t4_rq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); wr = wr->next; num_wrs--; } if (t4_wq_db_enabled(&qhp->wq)) t4_ring_rq_db(&qhp->wq, idx); spin_unlock_irqrestore(&qhp->lock, flag); return err; } int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) { return -ENOSYS; } static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { int status; int tagged; int opcode; int rqtype; int send_inv; if (!err_cqe) { *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; *ecode = 0; return; } status = CQE_STATUS(err_cqe); opcode = CQE_OPCODE(err_cqe); rqtype = RQ_TYPE(err_cqe); send_inv = (opcode == FW_RI_SEND_WITH_INV) || (opcode == FW_RI_SEND_WITH_SE_INV); tagged = (opcode == FW_RI_RDMA_WRITE) || (rqtype && (opcode == FW_RI_READ_RESP)); switch (status) { case T4_ERR_STAG: if (send_inv) { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_INV_STAG; } break; case T4_ERR_PDID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; if ((opcode == FW_RI_SEND_WITH_INV) || (opcode == FW_RI_SEND_WITH_SE_INV)) *ecode = RDMAP_CANT_INV_STAG; else *ecode = RDMAP_STAG_NOT_ASSOC; break; case T4_ERR_QPID: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_STAG_NOT_ASSOC; break; case T4_ERR_ACCESS: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_ACC_VIOL; break; case T4_ERR_WRAP: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_TO_WRAP; break; case T4_ERR_BOUND: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; } else { *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; *ecode = RDMAP_BASE_BOUNDS; } break; case T4_ERR_INVALIDATE_SHARED_MR: case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_CANT_INV_STAG; break; case T4_ERR_ECC: case T4_ERR_ECC_PSTAG: case T4_ERR_INTERNAL_ERR: *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; *ecode = 0; break; case T4_ERR_OUT_OF_RQE: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_NOBUF; break; case T4_ERR_PBL_ADDR_BOUND: *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_BASE_BOUNDS; break; case T4_ERR_CRC: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_CRC_ERR; break; case T4_ERR_MARKER: *layer_type = LAYER_MPA|DDP_LLP; *ecode = MPA_MARKER_ERR; break; case T4_ERR_PDU_LEN_ERR: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_MSG_TOOBIG; break; case T4_ERR_DDP_VERSION: if (tagged) { *layer_type = LAYER_DDP|DDP_TAGGED_ERR; *ecode = DDPT_INV_VERS; } else { *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_VERS; } break; case T4_ERR_RDMA_VERSION: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_VERS; break; case T4_ERR_OPCODE: *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; *ecode = RDMAP_INV_OPCODE; break; case T4_ERR_DDP_QUEUE_NUM: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_QN; break; case T4_ERR_MSN: case T4_ERR_MSN_GAP: case T4_ERR_MSN_RANGE: case T4_ERR_IRD_OVERFLOW: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MSN_RANGE; break; case T4_ERR_TBIT: *layer_type = LAYER_DDP|DDP_LOCAL_CATA; *ecode = 0; break; case T4_ERR_MO: *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; *ecode = DDPU_INV_MO; break; default: *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; *ecode = 0; break; } } static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, gfp_t gfp) { struct fw_ri_wr *wqe; struct terminate_message *term; struct wrqe *wr; struct socket *so = qhp->ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, qhp->ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return; wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR)); wqe->flowid_len16 = cpu_to_be32( V_FW_WR_FLOWID(qhp->ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); term = (struct terminate_message *)wqe->u.terminate.termmsg; if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { term->layer_etype = qhp->attr.layer_etype; term->ecode = qhp->attr.ecode; } else build_term_codes(err_cqe, &term->layer_etype, &term->ecode); creds(toep, sizeof(*wqe)); t4_wrq_tx(qhp->rhp->rdev.adap, wr); } /* Assumes qhp lock is held. */ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; int flushed; unsigned long flag; CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp, schp); /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&rchp->cq); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); if (flushed) { spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); c4iw_flush_hw_cq(&schp->cq); c4iw_count_scqes(&schp->cq, &qhp->wq, &count); flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); if (flushed) { spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } } static void flush_qp(struct c4iw_qp *qhp) { struct c4iw_cq *rchp, *schp; unsigned long flag; rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); if (qhp->ibqp.uobject) { t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); if (schp != rchp) { t4_set_cq_in_error(&schp->cq); spin_lock_irqsave(&schp->comp_handler_lock, flag); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); spin_unlock_irqrestore(&schp->comp_handler_lock, flag); } return; } __flush_qp(qhp, rchp, schp); } static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep) { struct c4iw_rdev *rdev = &rhp->rdev; struct adapter *sc = rdev->adap; struct fw_ri_wr *wqe; int ret; struct wrqe *wr; struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; KASSERT(rhp == qhp->rhp && ep == qhp->ep, ("%s: EDOOFUS", __func__)); CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return (0); wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR) | F_FW_WR_COMPL); wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.fini.type = FW_RI_TYPE_FINI; c4iw_init_wr_wait(&ep->com.wr_wait); creds(toep, sizeof(*wqe)); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, qhp->wq.sq.qid, __func__); return ret; } static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) { CTR2(KTR_IW_CXGBE, "%s p2p_type = %d", __func__, p2p_type); memset(&init->u, 0, sizeof init->u); switch (p2p_type) { case FW_RI_INIT_P2PTYPE_RDMA_WRITE: init->u.write.opcode = FW_RI_RDMA_WRITE_WR; init->u.write.stag_sink = cpu_to_be32(1); init->u.write.to_sink = cpu_to_be64(1); init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write + sizeof(struct fw_ri_immd), 16); break; case FW_RI_INIT_P2PTYPE_READ_REQ: init->u.write.opcode = FW_RI_RDMA_READ_WR; init->u.read.stag_src = cpu_to_be32(1); init->u.read.to_src_lo = cpu_to_be32(1); init->u.read.stag_sink = cpu_to_be32(1); init->u.read.to_sink_lo = cpu_to_be32(1); init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16); break; } } static void creds(struct toepcb *toep, size_t wrsize) { struct ofld_tx_sdesc *txsd; CTR3(KTR_IW_CXGBE, "%s:creB %p %u", __func__, toep , wrsize); INP_WLOCK(toep->inp); txsd = &toep->txsd[toep->txsd_pidx]; txsd->tx_credits = howmany(wrsize, 16); txsd->plen = 0; KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, ("%s: not enough credits (%d)", __func__, toep->tx_credits)); toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; INP_WUNLOCK(toep->inp); CTR5(KTR_IW_CXGBE, "%s:creE %p %u %u %u", __func__, toep , txsd->tx_credits, toep->tx_credits, toep->txsd_pidx); } static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) { struct fw_ri_wr *wqe; int ret; struct wrqe *wr; struct c4iw_ep *ep = qhp->ep; struct c4iw_rdev *rdev = &qhp->rhp->rdev; struct adapter *sc = rdev->adap; struct socket *so = ep->com.so; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp, qhp->wq.sq.qid, ep->hwtid); wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq); if (wr == NULL) return (0); wqe = wrtod(wr); memset(wqe, 0, sizeof *wqe); wqe->op_compl = cpu_to_be32( V_FW_WR_OP(FW_RI_WR) | F_FW_WR_COMPL); wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) | V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); wqe->cookie = (unsigned long) &ep->com.wr_wait; wqe->u.init.type = FW_RI_TYPE_INIT; wqe->u.init.mpareqbit_p2ptype = V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) | V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type); wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; if (qhp->attr.mpa_attr.recv_marker_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; if (qhp->attr.mpa_attr.xmit_marker_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE; if (qhp->attr.mpa_attr.crc_enabled) wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE; wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE | FW_RI_QP_RDMA_WRITE_ENABLE | FW_RI_QP_BIND_ENABLE; if (!qhp->ibqp.uobject) wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE | FW_RI_QP_STAG0_ENABLE; wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq)); wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(ep->snd_seq); wqe->u.init.irs = cpu_to_be32(ep->rcv_seq); wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - sc->vres.rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); c4iw_init_wr_wait(&ep->com.wr_wait); creds(toep, sizeof(*wqe)); t4_wrq_tx(sc, wr); ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid, qhp->wq.sq.qid, __func__); toep->ulp_mode = ULP_MODE_RDMA; return ret; } int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, int internal) { int ret = 0; struct c4iw_qp_attributes newattr = qhp->attr; int disconnect = 0; int terminate = 0; int abort = 0; int free = 0; struct c4iw_ep *ep = NULL; CTR5(KTR_IW_CXGBE, "%s qhp %p sqid 0x%x rqid 0x%x ep %p", __func__, qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep); CTR3(KTR_IW_CXGBE, "%s state %d -> %d", __func__, qhp->attr.state, (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); mutex_lock(&qhp->mutex); /* Process attr changes if in IDLE */ if (mask & C4IW_QP_ATTR_VALID_MODIFY) { if (qhp->attr.state != C4IW_QP_STATE_IDLE) { ret = -EIO; goto out; } if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ) newattr.enable_rdma_read = attrs->enable_rdma_read; if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE) newattr.enable_rdma_write = attrs->enable_rdma_write; if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND) newattr.enable_bind = attrs->enable_bind; if (mask & C4IW_QP_ATTR_MAX_ORD) { if (attrs->max_ord > c4iw_max_read_depth) { ret = -EINVAL; goto out; } newattr.max_ord = attrs->max_ord; } if (mask & C4IW_QP_ATTR_MAX_IRD) { if (attrs->max_ird > c4iw_max_read_depth) { ret = -EINVAL; goto out; } newattr.max_ird = attrs->max_ird; } qhp->attr = newattr; } if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) goto out; if (qhp->attr.state == attrs->next_state) goto out; switch (qhp->attr.state) { case C4IW_QP_STATE_IDLE: switch (attrs->next_state) { case C4IW_QP_STATE_RTS: if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) { ret = -EINVAL; goto out; } if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) { ret = -EINVAL; goto out; } qhp->attr.mpa_attr = attrs->mpa_attr; qhp->attr.llp_stream_handle = attrs->llp_stream_handle; qhp->ep = qhp->attr.llp_stream_handle; set_state(qhp, C4IW_QP_STATE_RTS); /* * Ref the endpoint here and deref when we * disassociate the endpoint from the QP. This * happens in CLOSING->IDLE transition or *->ERROR * transition. */ c4iw_get_ep(&qhp->ep->com); ret = rdma_init(rhp, qhp); if (ret) goto err; break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); flush_qp(qhp); break; default: ret = -EINVAL; goto out; } break; case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - //Fixme: Use atomic_read as same as Linux - BUG_ON(qhp->ep->com.kref.count < 2); + BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { abort = 0; disconnect = 1; c4iw_get_ep(&qhp->ep->com); } if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; if (!internal) terminate = 1; disconnect = 1; c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: set_state(qhp, C4IW_QP_STATE_ERROR); if (qhp->ibqp.uobject) t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; ep = qhp->ep; c4iw_get_ep(&qhp->ep->com); } goto err; break; default: ret = -EINVAL; goto out; } break; case C4IW_QP_STATE_CLOSING: if (!internal) { ret = -EINVAL; goto out; } switch (attrs->next_state) { case C4IW_QP_STATE_IDLE: flush_qp(qhp); set_state(qhp, C4IW_QP_STATE_IDLE); qhp->attr.llp_stream_handle = NULL; c4iw_put_ep(&qhp->ep->com); qhp->ep = NULL; wake_up(&qhp->wait); break; case C4IW_QP_STATE_ERROR: goto err; default: ret = -EINVAL; goto err; } break; case C4IW_QP_STATE_ERROR: if (attrs->next_state != C4IW_QP_STATE_IDLE) { ret = -EINVAL; goto out; } if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) { ret = -EINVAL; goto out; } set_state(qhp, C4IW_QP_STATE_IDLE); break; case C4IW_QP_STATE_TERMINATE: if (!internal) { ret = -EINVAL; goto out; } goto err; break; default: printf("%s in a bad state %d\n", __func__, qhp->attr.state); ret = -EINVAL; goto err; break; } goto out; err: CTR3(KTR_IW_CXGBE, "%s disassociating ep %p qpid 0x%x", __func__, qhp->ep, qhp->wq.sq.qid); /* disassociate the LLP connection */ qhp->attr.llp_stream_handle = NULL; if (!ep) ep = qhp->ep; qhp->ep = NULL; set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; BUG_ON(!ep); flush_qp(qhp); wake_up(&qhp->wait); out: mutex_unlock(&qhp->mutex); if (terminate) post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); /* * If disconnect is 1, then we need to initiate a disconnect * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ if (disconnect) { c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : GFP_KERNEL); c4iw_put_ep(&ep->com); } /* * If free is 1, then we've disassociated the EP from the QP * and we need to dereference the EP. */ if (free) c4iw_put_ep(&ep->com); CTR2(KTR_IW_CXGBE, "%s exit state %d", __func__, qhp->attr.state); return ret; } static int enable_qp_db(int id, void *p, void *data) { struct c4iw_qp *qp = p; t4_enable_wq_db(&qp->wq); return 0; } int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_qp_attributes attrs; struct c4iw_ucontext *ucontext; CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ib_qp); qhp = to_c4iw_qp(ib_qp); rhp = qhp->rhp; attrs.next_state = C4IW_QP_STATE_ERROR; if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); else c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); spin_lock_irq(&rhp->lock); remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); rhp->qpcnt--; BUG_ON(rhp->qpcnt < 0); if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { rhp->rdev.stats.db_state_transitions++; rhp->db_state = NORMAL; idr_for_each(&rhp->qpidr, enable_qp_db, NULL); } spin_unlock_irq(&rhp->lock); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); CTR3(KTR_IW_CXGBE, "%s ib_qp %p qpid 0x%0x", __func__, ib_qp, qhp->wq.sq.qid); kfree(qhp); return 0; } static int disable_qp_db(int id, void *p, void *data) { struct c4iw_qp *qp = p; t4_disable_wq_db(&qp->wq); return 0; } struct ib_qp * c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; struct c4iw_pd *php; struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4; CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd); if (attrs->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); php = to_c4iw_pd(pd); rhp = php->rhp; schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid); rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid); if (!schp || !rchp) return ERR_PTR(-EINVAL); if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); if (rqsize > T4_MAX_RQ_SIZE) return ERR_PTR(-E2BIG); sqsize = roundup(attrs->cap.max_send_wr + 1, 16); if (sqsize > T4_MAX_SQ_SIZE) return ERR_PTR(-E2BIG); ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; qhp->wq.rq.size = rqsize; qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); } CTR5(KTR_IW_CXGBE, "%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu", __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); if (ret) goto err1; attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize - 1; attrs->cap.max_inline_data = T4_MAX_SEND_INLINE; qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; qhp->attr.enable_rdma_write = 1; qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); spin_lock_irq(&rhp->lock); if (rhp->db_state != NORMAL) t4_disable_wq_db(&qhp->wq); if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { rhp->rdev.stats.db_state_transitions++; rhp->db_state = FLOW_CONTROL; idr_for_each(&rhp->qpidr, disable_qp_db, NULL); } ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); spin_unlock_irq(&rhp->lock); if (ret) goto err2; if (udata) { mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); if (!mm1) { ret = -ENOMEM; goto err3; } mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); if (!mm2) { ret = -ENOMEM; goto err4; } mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); if (!mm3) { ret = -ENOMEM; goto err5; } mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); if (!mm4) { ret = -ENOMEM; goto err6; } uresp.flags = 0; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; uresp.rqid = qhp->wq.rq.qid; uresp.rq_size = qhp->wq.rq.size; uresp.rq_memsize = qhp->wq.rq.memsize; spin_lock(&ucontext->mmap_lock); uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; uresp.rq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) goto err7; mm1->key = uresp.sq_key; mm1->addr = qhp->wq.sq.phys_addr; mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); CTR4(KTR_IW_CXGBE, "%s mm1 %x, %x, %d", __func__, mm1->key, mm1->addr, mm1->len); insert_mmap(ucontext, mm1); mm2->key = uresp.rq_key; mm2->addr = vtophys(qhp->wq.rq.queue); mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); CTR4(KTR_IW_CXGBE, "%s mm2 %x, %x, %d", __func__, mm2->key, mm2->addr, mm2->len); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; mm3->addr = qhp->wq.sq.udb; mm3->len = PAGE_SIZE; CTR4(KTR_IW_CXGBE, "%s mm3 %x, %x, %d", __func__, mm3->key, mm3->addr, mm3->len); insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; mm4->addr = qhp->wq.rq.udb; mm4->len = PAGE_SIZE; CTR4(KTR_IW_CXGBE, "%s mm4 %x, %x, %d", __func__, mm4->key, mm4->addr, mm4->len); insert_mmap(ucontext, mm4); } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); CTR5(KTR_IW_CXGBE, "%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x", __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); return &qhp->ibqp; err7: kfree(mm4); err6: kfree(mm3); err5: kfree(mm2); err4: kfree(mm1); err3: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err2: destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); err1: kfree(qhp); return ERR_PTR(ret); } int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_qp *qhp; enum c4iw_qp_attr_mask mask = 0; struct c4iw_qp_attributes attrs; CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ibqp); /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) attr_mask &= ~IB_QP_STATE; /* Make sure we still have something left to do */ if (!attr_mask) return 0; memset(&attrs, 0, sizeof attrs); qhp = to_c4iw_qp(ibqp); rhp = qhp->rhp; attrs.next_state = c4iw_convert_state(attr->qp_state); attrs.enable_rdma_read = (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) ? 1 : 0; attrs.enable_rdma_write = (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0; mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? (C4IW_QP_ATTR_ENABLE_RDMA_READ | C4IW_QP_ATTR_ENABLE_RDMA_WRITE | C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) { CTR3(KTR_IW_CXGBE, "%s ib_dev %p qpn 0x%x", __func__, dev, qpn); return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { struct c4iw_qp *qhp = to_c4iw_qp(ibqp); memset(attr, 0, sizeof *attr); memset(init_attr, 0, sizeof *init_attr); attr->qp_state = to_ib_qp_state(qhp->attr.state); return 0; } #endif Index: head/sys/ofed/drivers/infiniband/core/uverbs_main.c =================================================================== --- head/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 289577) +++ head/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 289578) @@ -1,1435 +1,1435 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "uverbs.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand userspace verbs access"); MODULE_LICENSE("Dual BSD/GPL"); enum { IB_UVERBS_MAJOR = 231, IB_UVERBS_BASE_MINOR = 192, IB_UVERBS_MAX_DEVICES = 32 }; #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static int uverbs_copy_from_udata_ex(void *dest, struct ib_udata *udata, size_t len) { return copy_from_user(dest, udata->inbuf, min(udata->inlen, len)) ? -EFAULT : 0; } static int uverbs_copy_to_udata_ex(struct ib_udata *udata, void *src, size_t len) { return copy_to_user(udata->outbuf, src, min(udata->outlen, len)) ? -EFAULT : 0; } static struct ib_udata_ops uverbs_copy_ex = { .copy_from = uverbs_copy_from_udata_ex, .copy_to = uverbs_copy_to_udata_ex }; #define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen) \ do { \ (udata)->ops = &uverbs_copy_ex; \ (udata)->inbuf = (void __user *)(ibuf); \ (udata)->outbuf = (void __user *)(obuf); \ (udata)->inlen = (ilen); \ (udata)->outlen = (olen); \ } while (0) static struct class *uverbs_class; DEFINE_SPINLOCK(ib_uverbs_idr_lock); DEFINE_IDR(ib_uverbs_pd_idr); DEFINE_IDR(ib_uverbs_mr_idr); DEFINE_IDR(ib_uverbs_mw_idr); DEFINE_IDR(ib_uverbs_ah_idr); DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); DEFINE_IDR(ib_uverbs_rule_idr); DEFINE_IDR(ib_uverbs_dct_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send, [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv, [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, }; static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow, }; static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file, struct ib_udata *ucore, struct ib_udata *uhw) = { [IB_USER_VERBS_EXP_CMD_CREATE_QP] = ib_uverbs_exp_create_qp, [IB_USER_VERBS_EXP_CMD_MODIFY_CQ] = ib_uverbs_exp_modify_cq, [IB_USER_VERBS_EXP_CMD_MODIFY_QP] = ib_uverbs_exp_modify_qp, [IB_USER_VERBS_EXP_CMD_CREATE_CQ] = ib_uverbs_exp_create_cq, [IB_USER_VERBS_EXP_CMD_QUERY_DEVICE] = ib_uverbs_exp_query_device, [IB_USER_VERBS_EXP_CMD_CREATE_DCT] = ib_uverbs_exp_create_dct, [IB_USER_VERBS_EXP_CMD_DESTROY_DCT] = ib_uverbs_exp_destroy_dct, [IB_USER_VERBS_EXP_CMD_QUERY_DCT] = ib_uverbs_exp_query_dct, }; static void ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device); static void ib_uverbs_release_dev(struct kref *ref) { struct ib_uverbs_device *dev = container_of(ref, struct ib_uverbs_device, ref); complete(&dev->comp); } static void ib_uverbs_release_event_file(struct kref *ref) { struct ib_uverbs_event_file *file = container_of(ref, struct ib_uverbs_event_file, ref); kfree(file); } void ib_uverbs_release_ucq(struct ib_uverbs_file *file, struct ib_uverbs_event_file *ev_file, struct ib_ucq_object *uobj) { struct ib_uverbs_event *evt, *tmp; if (ev_file) { spin_lock_irq(&ev_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&ev_file->lock); kref_put(&ev_file->ref, ib_uverbs_release_event_file); } spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } void ib_uverbs_release_uevent(struct ib_uverbs_file *file, struct ib_uevent_object *uobj) { struct ib_uverbs_event *evt, *tmp; spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } spin_unlock_irq(&file->async_file->lock); } static void ib_uverbs_detach_umcast(struct ib_qp *qp, struct ib_uqp_object *uobj) { struct ib_uverbs_mcast_entry *mcast, *tmp; list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) { ib_detach_mcast(qp, &mcast->gid, mcast->lid); list_del(&mcast->list); kfree(mcast); } } static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, struct ib_ucontext *context) { struct ib_uobject *uobj, *tmp; int err; if (!context) return 0; context->closing = 1; list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { struct ib_ah *ah = uobj->object; idr_remove_uobj(&ib_uverbs_ah_idr, uobj); ib_destroy_ah(ah); kfree(uobj); } /* Remove MWs before QPs, in order to support type 2A MWs. */ list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { struct ib_mw *mw = uobj->object; idr_remove_uobj(&ib_uverbs_mw_idr, uobj); err = ib_dealloc_mw(mw); if (err) { pr_info("user_verbs: couldn't deallocate MW during cleanup.\n"); pr_info("user_verbs: the system may have become unstable.\n"); } kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { struct ib_flow *flow_id = uobj->object; idr_remove_uobj(&ib_uverbs_rule_idr, uobj); ib_destroy_flow(flow_id); kfree(uobj); } list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); ib_uverbs_detach_umcast(qp, uqp); err = ib_destroy_qp(qp); if (err) pr_info("destroying uverbs qp failed: err %d\n", err); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) { struct ib_dct *dct = uobj->object; struct ib_udct_object *udct = container_of(uobj, struct ib_udct_object, uobject); idr_remove_uobj(&ib_uverbs_dct_idr, uobj); err = ib_destroy_dct(dct); if (err) pr_info("destroying uverbs dct failed: err %d\n", err); kfree(udct); } list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) { struct ib_srq *srq = uobj->object; struct ib_uevent_object *uevent = container_of(uobj, struct ib_uevent_object, uobject); idr_remove_uobj(&ib_uverbs_srq_idr, uobj); err = ib_destroy_srq(srq); if (err) pr_info("destroying uverbs srq failed: err %d\n", err); ib_uverbs_release_uevent(file, uevent); kfree(uevent); } list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { struct ib_cq *cq = uobj->object; struct ib_uverbs_event_file *ev_file = cq->cq_context; struct ib_ucq_object *ucq = container_of(uobj, struct ib_ucq_object, uobject); idr_remove_uobj(&ib_uverbs_cq_idr, uobj); err = ib_destroy_cq(cq); if (err) pr_info("destroying uverbs cq failed: err %d\n", err); ib_uverbs_release_ucq(file, ev_file, ucq); kfree(ucq); } list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; idr_remove_uobj(&ib_uverbs_mr_idr, uobj); err = ib_dereg_mr(mr); if (err) { pr_info("user_verbs: couldn't deregister an MR during cleanup.\n"); pr_info("user_verbs: the system may have become unstable.\n"); } kfree(uobj); } mutex_lock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { struct ib_xrcd *xrcd = uobj->object; struct ib_uxrcd_object *uxrcd = container_of(uobj, struct ib_uxrcd_object, uobject); idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); ib_uverbs_dealloc_xrcd(file->device, xrcd); kfree(uxrcd); } mutex_unlock(&file->device->xrcd_tree_mutex); list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { struct ib_pd *pd = uobj->object; idr_remove_uobj(&ib_uverbs_pd_idr, uobj); ib_dealloc_pd(pd); kfree(uobj); } return context->device->dealloc_ucontext(context); } static void ib_uverbs_release_file(struct kref *ref) { struct ib_uverbs_file *file = container_of(ref, struct ib_uverbs_file, ref); module_put(file->device->ib_dev->owner); kref_put(&file->device->ref, ib_uverbs_release_dev); kfree(file); } static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *event; int eventsz; int ret = 0; spin_lock_irq(&file->lock); while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) eventsz = sizeof (struct ib_uverbs_async_event_desc); else eventsz = sizeof (struct ib_uverbs_comp_event_desc); if (eventsz > count) { ret = -EINVAL; event = NULL; } else { list_del(file->event_list.next); if (event->counter) { ++(*event->counter); list_del(&event->obj_list); } } spin_unlock_irq(&file->lock); if (event) { if (copy_to_user(buf, event, eventsz)) ret = -EFAULT; else ret = eventsz; } kfree(event); return ret; } static unsigned int ib_uverbs_event_poll(struct file *filp, struct poll_table_struct *wait) { unsigned int pollflags = 0; struct ib_uverbs_event_file *file = filp->private_data; file->filp = filp; poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) { struct ib_uverbs_event_file *file = filp->private_data; return fasync_helper(fd, filp, on, &file->async_queue); } static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; struct ib_uverbs_event *entry, *tmp; spin_lock_irq(&file->lock); file->is_closed = 1; list_for_each_entry_safe(entry, tmp, &file->event_list, list) { if (entry->counter) list_del(&entry->obj_list); kfree(entry); } spin_unlock_irq(&file->lock); if (file->is_async) { ib_unregister_event_handler(&file->uverbs_file->event_handler); kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); } kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static const struct file_operations uverbs_event_fops = { .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, .fasync = ib_uverbs_event_fasync, .llseek = no_llseek, }; void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { struct ib_uverbs_event_file *file = cq_context; struct ib_ucq_object *uobj; struct ib_uverbs_event *entry; unsigned long flags; if (!file) return; spin_lock_irqsave(&file->lock, flags); if (file->is_closed) { spin_unlock_irqrestore(&file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->lock, flags); return; } uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); spin_unlock_irqrestore(&file->lock, flags); wake_up_interruptible(&file->poll_wait); if (file->filp) selwakeup(&file->filp->f_selinfo); kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, __u64 element, __u64 event, struct list_head *obj_list, u32 *counter) { struct ib_uverbs_event *entry; unsigned long flags; spin_lock_irqsave(&file->async_file->lock, flags); if (file->async_file->is_closed) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) { spin_unlock_irqrestore(&file->async_file->lock, flags); return; } entry->desc.async.element = element; entry->desc.async.event_type = event; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); spin_unlock_irqrestore(&file->async_file->lock, flags); wake_up_interruptible(&file->async_file->poll_wait); if (file->async_file->filp) selwakeup(&file->async_file->filp->f_selinfo); kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_ucq_object *uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); } void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.qp->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) { struct ib_uevent_object *uobj; uobj = container_of(event->element.srq->uobject, struct ib_uevent_object, uobject); ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, event->event, &uobj->event_list, &uobj->events_reported); } void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); ib_uverbs_async_handler(file, event->element.port_num, event->event, NULL, NULL); } struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, int is_async) { struct ib_uverbs_event_file *ev_file; struct file *filp; ev_file = kzalloc(sizeof *ev_file, GFP_KERNEL); if (!ev_file) return ERR_PTR(-ENOMEM); kref_init(&ev_file->ref); spin_lock_init(&ev_file->lock); INIT_LIST_HEAD(&ev_file->event_list); init_waitqueue_head(&ev_file->poll_wait); ev_file->uverbs_file = uverbs_file; ev_file->is_async = is_async; /* * fops_get() can't fail here, because we're coming from a * system call on a uverbs file, which will already have a * module reference. */ filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops)); if (IS_ERR(filp)) { kfree(ev_file); } else { filp->private_data = ev_file; } return filp; } /* * Look up a completion event file by FD. If lookup is successful, * takes a ref to the event file struct that it returns; if * unsuccessful, returns NULL. */ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; struct fd f = fdget(fd); if (!f.file) return NULL; if (f.file->f_op != &uverbs_event_fops) goto out; ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; } kref_get(&ev_file->ref); out: fdput(f); return ev_file; } static const char *verbs_cmd_str(__u32 cmd) { switch (cmd) { case IB_USER_VERBS_CMD_GET_CONTEXT: return "GET_CONTEXT"; case IB_USER_VERBS_CMD_QUERY_DEVICE: return "QUERY_DEVICE"; case IB_USER_VERBS_CMD_QUERY_PORT: return "QUERY_PORT"; case IB_USER_VERBS_CMD_ALLOC_PD: return "ALLOC_PD"; case IB_USER_VERBS_CMD_DEALLOC_PD: return "DEALLOC_PD"; case IB_USER_VERBS_CMD_REG_MR: return "REG_MR"; case IB_USER_VERBS_CMD_DEREG_MR: return "DEREG_MR"; case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL: return "CREATE_COMP_CHANNEL"; case IB_USER_VERBS_CMD_CREATE_CQ: return "CREATE_CQ"; case IB_USER_VERBS_CMD_RESIZE_CQ: return "RESIZE_CQ"; case IB_USER_VERBS_CMD_POLL_CQ: return "POLL_CQ"; case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ: return "REQ_NOTIFY_CQ"; case IB_USER_VERBS_CMD_DESTROY_CQ: return "DESTROY_CQ"; case IB_USER_VERBS_CMD_CREATE_QP: return "CREATE_QP"; case IB_USER_VERBS_CMD_QUERY_QP: return "QUERY_QP"; case IB_USER_VERBS_CMD_MODIFY_QP: return "MODIFY_QP"; case IB_USER_VERBS_CMD_DESTROY_QP: return "DESTROY_QP"; case IB_USER_VERBS_CMD_POST_SEND: return "POST_SEND"; case IB_USER_VERBS_CMD_POST_RECV: return "POST_RECV"; case IB_USER_VERBS_CMD_POST_SRQ_RECV: return "POST_SRQ_RECV"; case IB_USER_VERBS_CMD_CREATE_AH: return "CREATE_AH"; case IB_USER_VERBS_CMD_DESTROY_AH: return "DESTROY_AH"; case IB_USER_VERBS_CMD_ATTACH_MCAST: return "ATTACH_MCAST"; case IB_USER_VERBS_CMD_DETACH_MCAST: return "DETACH_MCAST"; case IB_USER_VERBS_CMD_CREATE_SRQ: return "CREATE_SRQ"; case IB_USER_VERBS_CMD_MODIFY_SRQ: return "MODIFY_SRQ"; case IB_USER_VERBS_CMD_QUERY_SRQ: return "QUERY_SRQ"; case IB_USER_VERBS_CMD_DESTROY_SRQ: return "DESTROY_SRQ"; case IB_USER_VERBS_CMD_OPEN_XRCD: return "OPEN_XRCD"; case IB_USER_VERBS_CMD_CLOSE_XRCD: return "CLOSE_XRCD"; case IB_USER_VERBS_CMD_CREATE_XSRQ: return "CREATE_XSRQ"; case IB_USER_VERBS_CMD_OPEN_QP: return "OPEN_QP"; } return "Unknown command"; } enum { COMMAND_INFO_MASK = 0x1000, }; static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file, const char __user *buf, struct ib_device *dev, struct ib_uverbs_cmd_hdr *hdr, size_t count, int legacy_ex_cmd) { struct ib_udata ucore; struct ib_udata uhw; struct ib_uverbs_ex_cmd_hdr ex_hdr; __u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST; if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | IB_USER_VERBS_CMD_COMMAND_MASK)) return -EINVAL; if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) || !uverbs_exp_cmd_table[command]) return -EINVAL; if (!file->ucontext) return -EINVAL; if (!(dev->uverbs_exp_cmd_mask & (1ull << command))) return -ENOSYS; if (legacy_ex_cmd) { struct ib_uverbs_ex_cmd_hdr_legacy hxl; struct ib_uverbs_ex_cmd_resp1_legacy resp1; __u64 response; ssize_t ret; if (count < sizeof(hxl)) return -EINVAL; if (copy_from_user(&hxl, buf, sizeof(hxl))) return -EFAULT; if (((hxl.in_words + hxl.provider_in_words) * 4) != count) return -EINVAL; count -= sizeof(hxl); buf += sizeof(hxl); if (hxl.out_words || hxl.provider_out_words) { if (count < sizeof(resp1)) return -EINVAL; if (copy_from_user(&resp1, buf, sizeof(resp1))) return -EFAULT; response = resp1.response; if (!response) return -EINVAL; /* * Change user buffer to comply with new extension format. */ if (sizeof(resp1.comp_mask) != sizeof(resp1.response)) return -EFAULT; buf += sizeof(resp1.comp_mask); if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask, sizeof(resp1.response))) return -EFAULT; } else { response = 0; } INIT_UDATA_EX(&ucore, (hxl.in_words) ? buf : 0, response, hxl.in_words * 4, hxl.out_words * 4); INIT_UDATA_EX(&uhw, (hxl.provider_in_words) ? buf + ucore.inlen : 0, (hxl.provider_out_words) ? response + ucore.outlen : 0, hxl.provider_in_words * 4, hxl.provider_out_words * 4); ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw); /* * UnChange user buffer */ if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response))) return -EFAULT; return ret; } else { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; buf += sizeof(hdr) + sizeof(ex_hdr); if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count) return -EINVAL; if (ex_hdr.response) { if (!hdr->out_words && !ex_hdr.provider_out_words) return -EINVAL; } else { if (hdr->out_words || ex_hdr.provider_out_words) return -EINVAL; } INIT_UDATA_EX(&ucore, (hdr->in_words) ? buf : 0, (unsigned long)ex_hdr.response, hdr->in_words * 8, hdr->out_words * 8); INIT_UDATA_EX(&uhw, (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0, (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); return uverbs_exp_cmd_table[command](file, &ucore, &uhw); } } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, size_t count, loff_t *pos) { struct ib_uverbs_file *file = filp->private_data; struct ib_device *dev = file->device->ib_dev; struct ib_uverbs_cmd_hdr hdr; struct timespec ts1; struct timespec ts2; ktime_t t1, t2, delta; s64 ds; ssize_t ret; u64 dividend; u32 divisor; __u32 flags; __u32 command; int legacy_ex_cmd = 0; size_t written_count = count; if (count < sizeof hdr) return -EINVAL; if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; /* * For BWD compatibility change old style extension verbs commands * to their equivalent experimental command. */ if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) && (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) { hdr.command += IB_USER_VERBS_EXP_CMD_FIRST - IB_USER_VERBS_LEGACY_CMD_FIRST; legacy_ex_cmd = 1; } flags = (hdr.command & IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; ktime_get_ts(&ts1); if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) { ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd); } else if (!flags) { if (command >= ARRAY_SIZE(uverbs_cmd_table) || !uverbs_cmd_table[command]) return -EINVAL; if (!file->ucontext && command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; if (!(dev->uverbs_cmd_mask & (1ull << command))) return -ENOSYS; if (hdr.in_words * 4 != count) return -EINVAL; ret = uverbs_cmd_table[command](file, buf + sizeof(hdr), hdr.in_words * 4, hdr.out_words * 4); } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { struct ib_udata ucore; struct ib_udata uhw; struct ib_uverbs_ex_cmd_hdr ex_hdr; if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | IB_USER_VERBS_CMD_COMMAND_MASK)) return -EINVAL; if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || !uverbs_ex_cmd_table[command]) return -EINVAL; if (!file->ucontext) return -EINVAL; if (!(dev->uverbs_ex_cmd_mask & (1ull << command))) return -ENOSYS; if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT; count -= sizeof(hdr) + sizeof(ex_hdr); buf += sizeof(hdr) + sizeof(ex_hdr); if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) return -EINVAL; if (ex_hdr.response) { if (!hdr.out_words && !ex_hdr.provider_out_words) return -EINVAL; } else { if (hdr.out_words || ex_hdr.provider_out_words) return -EINVAL; } INIT_UDATA_EX(&ucore, (hdr.in_words) ? buf : 0, (unsigned long)ex_hdr.response, hdr.in_words * 8, hdr.out_words * 8); INIT_UDATA_EX(&uhw, (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0, (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0, ex_hdr.provider_in_words * 8, ex_hdr.provider_out_words * 8); ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw); if (ret) return ret; return written_count; } else { return -EFAULT; } if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) { ktime_get_ts(&ts2); t1 = timespec_to_ktime(ts1); t2 = timespec_to_ktime(ts2); delta = ktime_sub(t2, t1); ds = ktime_to_ns(delta); spin_lock(&dev->cmd_perf_lock); dividend = dev->cmd_avg * dev->cmd_n + ds; ++dev->cmd_n; divisor = dev->cmd_n; do_div(dividend, divisor); dev->cmd_avg = dividend; spin_unlock(&dev->cmd_perf_lock); if (dev->cmd_perf & COMMAND_INFO_MASK) { pr_info("%s: %s execution time = %lld nsec\n", file->device->ib_dev->name, verbs_cmd_str(hdr.command), (long long)ds); } } return ret; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) { struct ib_uverbs_file *file = filp->private_data; if (!file->ucontext) return -ENODEV; else return file->device->ib_dev->mmap(file->ucontext, vma); } /* XXX Not supported in FreeBSD */ #if 0 static unsigned long ib_uverbs_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct ib_uverbs_file *file = filp->private_data; if (!file->ucontext) return -ENODEV; else { if (!file->device->ib_dev->get_unmapped_area) return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); return file->device->ib_dev->get_unmapped_area(filp, addr, len, pgoff, flags); } } #endif static long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct ib_uverbs_file *file = filp->private_data; if (!file->device->ib_dev->ioctl) return -ENOTSUPP; if (!file->ucontext) return -ENODEV; else /* provider should provide it's own locking mechanism */ return file->device->ib_dev->ioctl(file->ucontext, cmd, arg); } /* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done. */ static int ib_uverbs_open(struct inode *inode, struct file *filp) { struct ib_uverbs_device *dev; struct ib_uverbs_file *file; int ret; dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev); if (dev) kref_get(&dev->ref); else return -ENXIO; if (!try_module_get(dev->ib_dev->owner)) { ret = -ENODEV; goto err; } file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { ret = -ENOMEM; goto err_module; } file->device = dev; file->ucontext = NULL; file->async_file = NULL; kref_init(&file->ref); mutex_init(&file->mutex); filp->private_data = file; return nonseekable_open(inode, filp); err_module: module_put(dev->ib_dev->owner); err: kref_put(&dev->ref, ib_uverbs_release_dev); return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; ib_uverbs_cleanup_ucontext(file, file->ucontext); if (file->async_file) kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; } static const struct file_operations uverbs_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, .unlocked_ioctl = ib_uverbs_ioctl, }; static const struct file_operations uverbs_mmap_fops = { .owner = THIS_MODULE, .write = ib_uverbs_write, .mmap = ib_uverbs_mmap, .open = ib_uverbs_open, .release = ib_uverbs_close, .llseek = no_llseek, /* XXX Not supported in FreeBSD */ #if 0 .get_unmapped_area = ib_uverbs_get_unmapped_area, #endif .unlocked_ioctl = ib_uverbs_ioctl, }; static struct ib_client uverbs_client = { .name = "uverbs", .add = ib_uverbs_add_one, .remove = ib_uverbs_remove_one }; static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%s\n", dev->ib_dev->name); } static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static ssize_t show_dev_ref_cnt(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; - return sprintf(buf, "%d\n", dev->ref.count); + return sprintf(buf, "%d\n", atomic_read(&dev->ref.refcount)); } static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL); static ssize_t show_dev_abi_version(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver); } static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf) { return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); } static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES); /* * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by * requesting a new major number and doubling the number of max devices we * support. It's stupid, but simple. */ static int find_overflow_devnum(void) { int ret; if (!overflow_maj) { ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n"); return ret; } } ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES); if (ret >= IB_UVERBS_MAX_DEVICES) return -1; return ret; } #include static ssize_t show_dev_device(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->device); } static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL); static ssize_t show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf) { struct ib_uverbs_device *dev = dev_get_drvdata(device); if (!dev) return -ENODEV; return sprintf(buf, "0x%04x\n", ((struct pci_dev *)dev->ib_dev->dma_device)->vendor); } static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL); struct attribute *device_attrs[] = { &dev_attr_device.attr, &dev_attr_vendor.attr, NULL }; static struct attribute_group device_group = { .name = "device", .attrs = device_attrs }; static void ib_uverbs_add_one(struct ib_device *device) { int devnum; dev_t base; struct ib_uverbs_device *uverbs_dev; if (!device->alloc_ucontext) return; uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL); if (!uverbs_dev) return; kref_init(&uverbs_dev->ref); init_completion(&uverbs_dev->comp); uverbs_dev->xrcd_tree = RB_ROOT; mutex_init(&uverbs_dev->xrcd_tree_mutex); spin_lock(&map_lock); devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); if (devnum >= IB_UVERBS_MAX_DEVICES) { spin_unlock(&map_lock); devnum = find_overflow_devnum(); if (devnum < 0) goto err; spin_lock(&map_lock); uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES; base = devnum + overflow_maj; set_bit(devnum, overflow_map); } else { uverbs_dev->devnum = devnum; base = devnum + IB_UVERBS_BASE_DEV; set_bit(devnum, dev_map); } spin_unlock(&map_lock); uverbs_dev->ib_dev = device; uverbs_dev->num_comp_vectors = device->num_comp_vectors; cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops; kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum); if (cdev_add(&uverbs_dev->cdev, base, 1)) goto err_cdev; uverbs_dev->dev = device_create(uverbs_class, device->dma_device, uverbs_dev->cdev.dev, uverbs_dev, "uverbs%d", uverbs_dev->devnum); if (IS_ERR(uverbs_dev->dev)) goto err_cdev; if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt)) goto err_class; if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version)) goto err_class; if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group)) goto err_class; ib_set_client_data(device, &uverbs_client, uverbs_dev); return; err_class: device_destroy(uverbs_class, uverbs_dev->cdev.dev); err_cdev: cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(devnum, dev_map); else clear_bit(devnum, overflow_map); err: kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); return; } static void ib_uverbs_remove_one(struct ib_device *device) { struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); if (!uverbs_dev) return; sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group); dev_set_drvdata(uverbs_dev->dev, NULL); device_destroy(uverbs_class, uverbs_dev->cdev.dev); cdev_del(&uverbs_dev->cdev); if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES) clear_bit(uverbs_dev->devnum, dev_map); else clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map); kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); wait_for_completion(&uverbs_dev->comp); kfree(uverbs_dev); } static char *uverbs_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } static int __init ib_uverbs_init(void) { int ret; ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, "infiniband_verbs"); if (ret) { printk(KERN_ERR "user_verbs: couldn't register device number\n"); goto out; } uverbs_class = class_create(THIS_MODULE, "infiniband_verbs"); if (IS_ERR(uverbs_class)) { ret = PTR_ERR(uverbs_class); printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } uverbs_class->devnode = uverbs_devnode; ret = class_create_file(uverbs_class, &class_attr_abi_version); if (ret) { printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&uverbs_client); if (ret) { printk(KERN_ERR "user_verbs: couldn't register client\n"); goto out_class; } return 0; out_class: class_destroy(uverbs_class); out_chrdev: unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); out: return ret; } static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); class_destroy(uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); if (overflow_maj) unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES); idr_destroy(&ib_uverbs_pd_idr); idr_destroy(&ib_uverbs_mr_idr); idr_destroy(&ib_uverbs_mw_idr); idr_destroy(&ib_uverbs_ah_idr); idr_destroy(&ib_uverbs_cq_idr); idr_destroy(&ib_uverbs_qp_idr); idr_destroy(&ib_uverbs_srq_idr); } module_init(ib_uverbs_init); module_exit(ib_uverbs_cleanup); Index: head/sys/ofed/include/linux/kref.h =================================================================== --- head/sys/ofed/include/linux/kref.h (revision 289577) +++ head/sys/ofed/include/linux/kref.h (revision 289578) @@ -1,64 +1,88 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2015 Mellanox Technologies, Ltd. + * Copyright (c) 2013 François Tigeot * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _LINUX_KREF_H_ #define _LINUX_KREF_H_ #include #include +#include + struct kref { - volatile u_int count; + atomic_t refcount; }; static inline void kref_init(struct kref *kref) { - refcount_init(&kref->count, 1); + refcount_init(&kref->refcount.counter, 1); } static inline void kref_get(struct kref *kref) { - refcount_acquire(&kref->count); + refcount_acquire(&kref->refcount.counter); } static inline int kref_put(struct kref *kref, void (*rel)(struct kref *kref)) { - if (refcount_release(&kref->count)) { + if (refcount_release(&kref->refcount.counter)) { rel(kref); return 1; } return 0; +} + +static inline int +kref_sub(struct kref *kref, unsigned int count, + void (*rel)(struct kref *kref)) +{ + + while (count--) { + if (refcount_release(&kref->refcount.counter)) { + rel(kref); + return 1; + } + } + return 0; +} + +static inline int __must_check +kref_get_unless_zero(struct kref *kref) +{ + + return atomic_add_unless(&kref->refcount, 1, 0); } #endif /* _LINUX_KREF_H_ */