diff --git a/sys/dev/bnxt/bnxt_re/bnxt_re.h b/sys/dev/bnxt/bnxt_re/bnxt_re.h index 56b3c8c0488c..fe7a27f4e216 100644 --- a/sys/dev/bnxt/bnxt_re/bnxt_re.h +++ b/sys/dev/bnxt/bnxt_re/bnxt_re.h @@ -1,1077 +1,1077 @@ /* * Copyright (c) 2015-2024, Broadcom. All rights reserved. The term * Broadcom refers to Broadcom Limited and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Description: main (header) */ #ifndef __BNXT_RE_H__ #define __BNXT_RE_H__ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bnxt.h" #include "bnxt_ulp.h" #include "hsi_struct_def.h" #include "qplib_res.h" #include "qplib_sp.h" #include "qplib_fp.h" #include "qplib_rcfw.h" #include "ib_verbs.h" #include "stats.h" #define ROCE_DRV_MODULE_NAME "bnxt_re" #define ROCE_DRV_MODULE_VERSION "230.0.133.0" #define ROCE_DRV_MODULE_RELDATE "April 22, 2024" #define BNXT_RE_REF_WAIT_COUNT 20 #define BNXT_RE_ROCE_V1_ETH_TYPE 0x8915 #define BNXT_RE_ROCE_V2_PORT_NO 4791 #define BNXT_RE_RES_FREE_WAIT_COUNT 1000 #define BNXT_RE_PAGE_SHIFT_4K (12) #define BNXT_RE_PAGE_SHIFT_8K (13) #define BNXT_RE_PAGE_SHIFT_64K (16) #define BNXT_RE_PAGE_SHIFT_2M (21) #define BNXT_RE_PAGE_SHIFT_8M (23) #define BNXT_RE_PAGE_SHIFT_1G (30) #define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K) #define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K) #define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K) #define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M) #define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M) #define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G) #define BNXT_RE_MAX_MR_SIZE_LOW BIT(BNXT_RE_PAGE_SHIFT_1G) #define BNXT_RE_MAX_MR_SIZE_HIGH BIT(39) #define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH /* Number of MRs to reserve for PF, leaving remainder for VFs */ #define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024) #define BNXT_RE_MAX_GID_PER_VF 128 #define BNXT_RE_MAX_VF_QPS_PER_PF (6 * 1024) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero * @x: value1 * @y: value2 */ #ifndef min_not_zero #define min_not_zero(x, y) ({ \ typeof(x) __x = (x); \ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) #endif struct ib_mr_init_attr { int max_reg_descriptors; u32 flags; }; struct bnxt_re_dev; int bnxt_re_register_netdevice_notifier(struct notifier_block *nb); int bnxt_re_unregister_netdevice_notifier(struct notifier_block *nb); int ib_register_device_compat(struct bnxt_re_dev *rdev); #ifndef __struct_group #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ } #endif /* __struct_group */ #ifndef struct_group_attr #define struct_group_attr(NAME, ATTRS, MEMBERS...) \ __struct_group(/* no tag */, NAME, ATTRS, MEMBERS) #endif /* struct_group_attr */ /* * Percentage of resources of each type reserved for PF. * Remaining resources are divided equally among VFs. * [0, 100] */ #define BNXT_RE_RQ_WQE_THRESHOLD 32 #define BNXT_RE_UD_QP_HW_STALL 0x400000 /* * Setting the default ack delay value to 16, which means * the default timeout is approx. 260ms(4 usec * 2 ^(timeout)) */ #define BNXT_RE_DEFAULT_ACK_DELAY 16 #define BNXT_RE_BOND_PF_MAX 2 #define BNXT_RE_STATS_CTX_UPDATE_TIMER 250 #define BNXT_RE_30SEC_MSEC (30 * 1000) #define BNXT_RE_BOND_RESCHED_CNT 10 #define BNXT_RE_CHIP_NUM_57454 0xC454 #define BNXT_RE_CHIP_NUM_57452 0xC452 #define BNXT_RE_CHIP_NUM_5745X(chip_num) \ ((chip_num) == BNXT_RE_CHIP_NUM_57454 || \ (chip_num) == BNXT_RE_CHIP_NUM_57452) #define BNXT_RE_MIN_KERNEL_QP_TX_DEPTH 4096 #define BNXT_RE_STOP_QPS_BUDGET 200 #define BNXT_RE_HWRM_CMD_TIMEOUT(rdev) \ ((rdev)->chip_ctx->hwrm_cmd_max_timeout * 1000) extern unsigned int min_tx_depth; extern struct mutex bnxt_re_dev_lock; extern struct mutex bnxt_re_mutex; extern struct list_head bnxt_re_dev_list; struct bnxt_re_ring_attr { dma_addr_t *dma_arr; int pages; int type; u32 depth; u32 lrid; /* Logical ring id */ u16 flags; u8 mode; u8 rsvd; }; #define BNXT_RE_MAX_DEVICES 256 #define BNXT_RE_MSIX_FROM_MOD_PARAM -1 #define BNXT_RE_MIN_MSIX 2 #define BNXT_RE_MAX_MSIX_VF 2 #define BNXT_RE_MAX_MSIX_PF 9 #define BNXT_RE_MAX_MSIX_NPAR_PF 5 #define BNXT_RE_MAX_MSIX 64 #define BNXT_RE_MAX_MSIX_GEN_P5_PF BNXT_RE_MAX_MSIX #define BNXT_RE_GEN_P5_MAX_VF 64 struct bnxt_re_nq_record { struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; /* FP Notification Queue (CQ & SRQ) */ struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; int num_msix; int max_init; struct mutex load_lock; }; struct bnxt_re_work { struct work_struct work; unsigned long event; struct bnxt_re_dev *rdev; struct ifnet *vlan_dev; bool do_lag; /* netdev where we received the event */ struct ifnet *netdev; struct auxiliary_device *adev; }; /* * Data structure and defines to handle * recovery */ #define BNXT_RE_RECOVERY_IB_UNINIT_WAIT_RETRY 20 #define BNXT_RE_RECOVERY_IB_UNINIT_WAIT_TIME_MS 30000 /* 30sec timeout */ #define BNXT_RE_PRE_RECOVERY_REMOVE 0x1 #define BNXT_RE_COMPLETE_REMOVE 0x2 #define BNXT_RE_POST_RECOVERY_INIT 0x4 #define BNXT_RE_COMPLETE_INIT 0x8 #define BNXT_RE_COMPLETE_SHUTDOWN 0x10 /* QP1 SQ entry data strucutre */ struct bnxt_re_sqp_entries { u64 wrid; struct bnxt_qplib_sge sge; /* For storing the actual qp1 cqe */ struct bnxt_qplib_cqe cqe; struct bnxt_re_qp *qp1_qp; }; /* GSI QP mode enum */ enum bnxt_re_gsi_mode { BNXT_RE_GSI_MODE_INVALID = 0, BNXT_RE_GSI_MODE_ALL = 1, BNXT_RE_GSI_MODE_ROCE_V1, BNXT_RE_GSI_MODE_ROCE_V2_IPV4, BNXT_RE_GSI_MODE_ROCE_V2_IPV6, BNXT_RE_GSI_MODE_UD }; enum bnxt_re_roce_cap { BNXT_RE_FLAG_ROCEV1_CAP = 1, BNXT_RE_FLAG_ROCEV2_CAP, BNXT_RE_FLAG_ROCEV1_V2_CAP, }; #define BNXT_RE_MAX_GSI_SQP_ENTRIES 1024 struct bnxt_re_gsi_context { u8 gsi_qp_mode; bool first_cq_created; /* Start: used only in gsi_mode_all */ struct bnxt_re_qp *gsi_qp; struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; struct bnxt_re_sqp_entries *sqp_tbl; /* End: used only in gsi_mode_all */ }; struct bnxt_re_tc_rec { u8 cos_id_roce; u8 tc_roce; u8 cos_id_cnp; u8 tc_cnp; u8 tc_def; u8 cos_id_def; u8 max_tc; u8 roce_prio; u8 cnp_prio; u8 roce_dscp; u8 cnp_dscp; u8 prio_valid; u8 dscp_valid; bool ecn_enabled; bool serv_type_enabled; u64 cnp_dscp_bv; u64 roce_dscp_bv; }; struct bnxt_re_dscp2pri { u8 dscp; u8 mask; u8 pri; }; struct bnxt_re_cos2bw_cfg { u8 pad[3]; struct_group_attr(cfg, __packed, u8 queue_id; __le32 min_bw; __le32 max_bw; u8 tsa; u8 pri_lvl; u8 bw_weight; ); u8 unused; }; #define BNXT_RE_AEQ_IDX 0 #define BNXT_RE_MAX_SGID_ENTRIES 256 #define BNXT_RE_DBGFS_FILE_MEM 65536 enum { BNXT_RE_STATS_QUERY = 1, BNXT_RE_QP_QUERY = 2, BNXT_RE_SERVICE_FN_QUERY = 3, }; struct bnxt_re_dbg_file { struct bnxt_re_dev *rdev; u32 type; union { struct bnxt_qplib_query_stats_info sinfo; struct bnxt_qplib_query_fn_info fninfo; }params; char dbg_buf[BNXT_RE_DBGFS_FILE_MEM]; }; struct bnxt_re_debug_entries { /* Dir entries */ struct dentry *qpinfo_dir; struct dentry *service_fn_dir; /* file entries */ struct dentry *stat_query; struct bnxt_re_dbg_file stat_file; struct dentry *qplist_query; struct bnxt_re_dbg_file qp_file; struct dentry *service_fn_query; struct bnxt_re_dbg_file service_fn_file; }; struct bnxt_re_en_dev_info { struct list_head en_list; struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; unsigned long flags; #define BNXT_RE_FLAG_EN_DEV_NETDEV_REG 0 #define BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV 1 #define BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV 2 u8 wqe_mode; u8 gsi_mode; bool te_bypass; bool ib_uninit_done; u32 num_msix_requested; wait_queue_head_t waitq; }; #define BNXT_RE_DB_FIFO_ROOM_MASK_P5 0x1FFF8000 #define BNXT_RE_MAX_FIFO_DEPTH_P5 0x2c00 #define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 #define BNXT_RE_DB_FIFO_ROOM_MASK_P7 0x3FFF8000 #define BNXT_RE_MAX_FIFO_DEPTH_P7 0x8000 #define BNXT_RE_DB_FIFO_ROOM_MASK(ctx) \ (_is_chip_p7((ctx)) ? \ BNXT_RE_DB_FIFO_ROOM_MASK_P7 :\ BNXT_RE_DB_FIFO_ROOM_MASK_P5) #define BNXT_RE_MAX_FIFO_DEPTH(ctx) \ (_is_chip_p7((ctx)) ? \ BNXT_RE_MAX_FIFO_DEPTH_P7 :\ BNXT_RE_MAX_FIFO_DEPTH_P5) struct bnxt_dbq_nq_list { int num_nql_entries; u16 nq_id[16]; }; #define BNXT_RE_ASYNC_ERR_REP_BASE(_type) \ (ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_##_type) #define BNXT_RE_ASYNC_ERR_DBR_TRESH(_type) \ (ASYNC_EVENT_CMPL_ERROR_REPORT_DOORBELL_DROP_THRESHOLD_##_type) #define BNXT_RE_EVENT_DBR_EPOCH(data) \ (((data) & \ BNXT_RE_ASYNC_ERR_DBR_TRESH(EVENT_DATA1_EPOCH_MASK)) >> \ BNXT_RE_ASYNC_ERR_DBR_TRESH(EVENT_DATA1_EPOCH_SFT)) #define BNXT_RE_EVENT_ERROR_REPORT_TYPE(data1) \ (((data1) & \ BNXT_RE_ASYNC_ERR_REP_BASE(TYPE_MASK)) >> \ BNXT_RE_ASYNC_ERR_REP_BASE(TYPE_SFT)) #define BNXT_RE_DBR_LIST_ADD(_rdev, _res, _type) \ { \ spin_lock(&(_rdev)->res_list[_type].lock); \ list_add_tail(&(_res)->dbr_list, \ &(_rdev)->res_list[_type].head); \ spin_unlock(&(_rdev)->res_list[_type].lock); \ } #define BNXT_RE_DBR_LIST_DEL(_rdev, _res, _type) \ { \ spin_lock(&(_rdev)->res_list[_type].lock); \ list_del(&(_res)->dbr_list); \ spin_unlock(&(_rdev)->res_list[_type].lock); \ } #define BNXT_RE_CQ_PAGE_LIST_ADD(_uctx, _cq) \ { \ mutex_lock(&(_uctx)->cq_lock); \ list_add_tail(&(_cq)->cq_list, &(_uctx)->cq_list); \ mutex_unlock(&(_uctx)->cq_lock); \ } #define BNXT_RE_CQ_PAGE_LIST_DEL(_uctx, _cq) \ { \ mutex_lock(&(_uctx)->cq_lock); \ list_del(&(_cq)->cq_list); \ mutex_unlock(&(_uctx)->cq_lock); \ } #define BNXT_RE_NETDEV_EVENT(event, x) \ do { \ if ((event) == (x)) \ return #x; \ } while (0) /* Do not change the seq of this enum which is followed by dbr recov */ enum { BNXT_RE_RES_TYPE_CQ = 0, BNXT_RE_RES_TYPE_UCTX, BNXT_RE_RES_TYPE_QP, BNXT_RE_RES_TYPE_SRQ, BNXT_RE_RES_TYPE_MAX }; struct bnxt_re_dbr_res_list { struct list_head head; spinlock_t lock; }; struct bnxt_re_dbr_drop_recov_work { struct work_struct work; struct bnxt_re_dev *rdev; u32 curr_epoch; }; struct bnxt_re_aer_work { struct work_struct work; struct bnxt_re_dev *rdev; }; struct bnxt_re_dbq_stats { u64 fifo_occup_slab_1; u64 fifo_occup_slab_2; u64 fifo_occup_slab_3; u64 fifo_occup_slab_4; u64 fifo_occup_water_mark; u64 do_pacing_slab_1; u64 do_pacing_slab_2; u64 do_pacing_slab_3; u64 do_pacing_slab_4; u64 do_pacing_slab_5; u64 do_pacing_water_mark; }; /* Device debug statistics */ struct bnxt_re_drv_dbg_stats { struct bnxt_re_dbq_stats dbq; }; /* DB pacing counters */ struct bnxt_re_dbr_sw_stats { u64 dbq_int_recv; u64 dbq_int_en; u64 dbq_pacing_resched; u64 dbq_pacing_complete; u64 dbq_pacing_alerts; u64 dbr_drop_recov_events; u64 dbr_drop_recov_timeouts; u64 dbr_drop_recov_timeout_users; u64 dbr_drop_recov_event_skips; }; struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; atomic_t ref_count; atomic_t sched_count; unsigned long flags; #define BNXT_RE_FLAG_NETDEV_REGISTERED 0 #define BNXT_RE_FLAG_IBDEV_REGISTERED 1 #define BNXT_RE_FLAG_GOT_MSIX 2 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_ALLOC_RCFW 4 #define BNXT_RE_FLAG_NET_RING_ALLOC 5 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 6 #define BNXT_RE_FLAG_ALLOC_CTX 7 #define BNXT_RE_FLAG_STATS_CTX_ALLOC 8 #define BNXT_RE_FLAG_STATS_CTX2_ALLOC 9 #define BNXT_RE_FLAG_RCFW_CHANNEL_INIT 10 #define BNXT_RE_FLAG_WORKER_REG 11 #define BNXT_RE_FLAG_TBLS_ALLOCINIT 12 #define BNXT_RE_FLAG_SETUP_NQ 13 #define BNXT_RE_FLAG_BOND_DEV_REGISTERED 14 #define BNXT_RE_FLAG_PER_PORT_DEBUG_INFO 15 #define BNXT_RE_FLAG_DEV_LIST_INITIALIZED 16 #define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17 #define BNXT_RE_FLAG_INIT_DCBX_CC_PARAM 18 #define BNXT_RE_FLAG_STOP_IN_PROGRESS 20 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 #define BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS 30 struct ifnet *netdev; struct auxiliary_device *adev; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; struct bnxt_re_nq_record nqr; int id; struct delayed_work worker; u16 worker_30s; struct bnxt_re_tc_rec tc_rec[2]; u8 cur_prio_map; /* RCFW Channel */ struct bnxt_qplib_rcfw rcfw; /* Device Resources */ struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_res qplib_res; struct bnxt_qplib_dpi dpi_privileged; struct bnxt_qplib_cc_param cc_param; struct mutex cc_lock; struct mutex qp_lock; struct list_head qp_list; u8 roce_mode; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; /* Start: QP for handling QP1 packets */ struct bnxt_re_gsi_context gsi_ctx; /* End: QP for handling QP1 packets */ bool is_virtfn; u32 num_vfs; u32 espeed; /* * For storing the speed of slave interfaces. * Same as espeed when bond is not configured */ u32 sl_espeed; /* To be used for a workaround for ISER stack */ u32 min_tx_depth; /* To enable qp debug info. Disabled during driver load */ u32 en_qp_dbg; /* Array to handle gid mapping */ char *gid_map; struct bnxt_re_device_stats stats; struct bnxt_re_drv_dbg_stats *dbg_stats; /* debugfs to expose per port information*/ struct dentry *port_debug_dir; struct dentry *info; struct dentry *drv_dbg_stats; struct dentry *sp_perf_stats; struct dentry *pdev_debug_dir; struct dentry *pdev_qpinfo_dir; struct bnxt_re_debug_entries *dbg_ent; struct workqueue_struct *resolve_wq; struct list_head mac_wq_list; struct workqueue_struct *dcb_wq; struct workqueue_struct *aer_wq; u32 event_bitmap[3]; bool unreg_sched; u64 dbr_throttling_reg_off; u64 dbr_aeq_arm_reg_off; u64 dbr_db_fifo_reg_off; void *dbr_page; u64 dbr_bar_addr; u32 pacing_algo_th; u32 pacing_en_int_th; u32 do_pacing_save; struct workqueue_struct *dbq_wq; struct workqueue_struct *dbr_drop_recov_wq; struct work_struct dbq_fifo_check_work; struct delayed_work dbq_pacing_work; /* protect DB pacing */ struct mutex dbq_lock; /* Control DBR pacing feature. Set if enabled */ bool dbr_pacing; /* Control DBR recovery feature. Set if enabled */ bool dbr_drop_recov; bool user_dbr_drop_recov; /* DBR recovery feature. Set if running */ bool dbr_recovery_on; u32 user_dbr_drop_recov_timeout; /* * Value used for pacing algo when pacing is active */ #define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF u32 dbr_do_pacing; u32 dbq_watermark; /* Current watermark set in HW registers */ u32 dbq_nq_id; /* Current NQ ID for DBQ events */ u32 dbq_pacing_time; /* ms */ u32 dbr_def_do_pacing; /* do_pacing when no congestion */ u32 dbr_evt_curr_epoch; bool dbq_int_disable; bool mod_exit; struct bnxt_re_dbr_sw_stats *dbr_sw_stats; struct bnxt_re_dbr_res_list res_list[BNXT_RE_RES_TYPE_MAX]; struct bnxt_dbq_nq_list nq_list; char dev_name[IB_DEVICE_NAME_MAX]; atomic_t dbq_intr_running; u32 num_msix_requested; unsigned char *dev_addr; /* For netdev->dev_addr */ }; #define BNXT_RE_RESOLVE_RETRY_COUNT_US 5000000 /* 5 sec */ struct bnxt_re_resolve_dmac_work{ struct work_struct work; struct list_head list; struct bnxt_re_dev *rdev; struct ib_ah_attr *ah_attr; struct bnxt_re_ah_info *ah_info; atomic_t status_wait; }; static inline u8 bnxt_re_get_prio(u8 prio_map) { u8 prio = 0xFF; for (prio = 0; prio < 8; prio++) if (prio_map & (1UL << prio)) break; return prio; } /* This should be called with bnxt_re_dev_lock mutex held */ static inline bool __bnxt_re_is_rdev_valid(struct bnxt_re_dev *rdev) { struct bnxt_re_dev *tmp_rdev; list_for_each_entry(tmp_rdev, &bnxt_re_dev_list, list) { if (rdev == tmp_rdev) return true; } return false; } static inline bool bnxt_re_is_rdev_valid(struct bnxt_re_dev *rdev) { struct bnxt_re_dev *tmp_rdev; mutex_lock(&bnxt_re_dev_lock); list_for_each_entry(tmp_rdev, &bnxt_re_dev_list, list) { if (rdev == tmp_rdev) { mutex_unlock(&bnxt_re_dev_lock); return true; } } mutex_unlock(&bnxt_re_dev_lock); pr_debug("bnxt_re: %s : Invalid rdev received rdev = %p\n", __func__, rdev); return false; } int bnxt_re_send_hwrm_cmd(struct bnxt_re_dev *rdev, void *cmd, int cmdlen); void bnxt_re_stopqps_and_ib_uninit(struct bnxt_re_dev *rdev); int bnxt_re_set_hwrm_dscp2pri(struct bnxt_re_dev *rdev, struct bnxt_re_dscp2pri *d2p, u16 count, u16 target_id); int bnxt_re_query_hwrm_dscp2pri(struct bnxt_re_dev *rdev, struct bnxt_re_dscp2pri *d2p, u16 *count, u16 target_id); int bnxt_re_query_hwrm_qportcfg(struct bnxt_re_dev *rdev, struct bnxt_re_tc_rec *cnprec, u16 tid); int bnxt_re_hwrm_cos2bw_qcfg(struct bnxt_re_dev *rdev, u16 target_id, struct bnxt_re_cos2bw_cfg *cfg); int bnxt_re_hwrm_cos2bw_cfg(struct bnxt_re_dev *rdev, u16 target_id, struct bnxt_re_cos2bw_cfg *cfg); int bnxt_re_hwrm_pri2cos_cfg(struct bnxt_re_dev *rdev, u16 target_id, u16 port_id, u8 *cos_id_map, u8 pri_map); int bnxt_re_prio_vlan_tx_update(struct bnxt_re_dev *rdev); int bnxt_re_get_slot_pf_count(struct bnxt_re_dev *rdev); struct bnxt_re_dev *bnxt_re_get_peer_pf(struct bnxt_re_dev *rdev); struct bnxt_re_dev *bnxt_re_from_netdev(struct ifnet *netdev); u8 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev, u8 selector); struct bnxt_qplib_nq * bnxt_re_get_nq(struct bnxt_re_dev *rdev); void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq); #define to_bnxt_re(ptr, type, member) \ container_of(ptr, type, member) #define to_bnxt_re_dev(ptr, member) \ container_of((ptr), struct bnxt_re_dev, member) /* Even number functions from port 0 and odd number from port 1 */ #define BNXT_RE_IS_PORT0(rdev) (!(rdev->en_dev->pdev->devfn & 1)) #define BNXT_RE_ROCE_V1_PACKET 0 #define BNXT_RE_ROCEV2_IPV4_PACKET 2 #define BNXT_RE_ROCEV2_IPV6_PACKET 3 #define BNXT_RE_ACTIVE_MAP_PORT1 0x1 /*port-1 active */ #define BNXT_RE_ACTIVE_MAP_PORT2 0x2 /*port-2 active */ #define BNXT_RE_MEMBER_PORT_MAP (BNXT_RE_ACTIVE_MAP_PORT1 | \ BNXT_RE_ACTIVE_MAP_PORT2) #define rdev_to_dev(rdev) ((rdev) ? (&(rdev)->ibdev.dev) : NULL) void bnxt_re_set_dma_device(struct ib_device *ibdev, struct bnxt_re_dev *rdev); bool bnxt_re_is_rdev_valid(struct bnxt_re_dev *rdev); #define bnxt_re_rdev_ready(rdev) (bnxt_re_is_rdev_valid(rdev) && \ (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))) #define BNXT_RE_SRIOV_CFG_TIMEOUT 6 int bnxt_re_get_device_stats(struct bnxt_re_dev *rdev); void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 removal_type, struct auxiliary_device *aux_dev); void bnxt_re_destroy_lag(struct bnxt_re_dev **rdev); int bnxt_re_add_device(struct bnxt_re_dev **rdev, struct ifnet *netdev, u8 qp_mode, u8 op_type, u8 wqe_mode, u32 num_msix_requested, struct auxiliary_device *aux_dev); void bnxt_re_create_base_interface(bool primary); int bnxt_re_schedule_work(struct bnxt_re_dev *rdev, unsigned long event, struct ifnet *vlan_dev, struct ifnet *netdev, struct auxiliary_device *aux_dev); void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev); int _bnxt_re_ib_init(struct bnxt_re_dev *rdev); int _bnxt_re_ib_init2(struct bnxt_re_dev *rdev); void bnxt_re_init_resolve_wq(struct bnxt_re_dev *rdev); void bnxt_re_uninit_resolve_wq(struct bnxt_re_dev *rdev); /* The rdev ref_count is to protect immature removal of the device */ static inline void bnxt_re_hold(struct bnxt_re_dev *rdev) { atomic_inc(&rdev->ref_count); dev_dbg(rdev_to_dev(rdev), "Hold ref_count = 0x%x", atomic_read(&rdev->ref_count)); } static inline void bnxt_re_put(struct bnxt_re_dev *rdev) { atomic_dec(&rdev->ref_count); dev_dbg(rdev_to_dev(rdev), "Put ref_count = 0x%x", atomic_read(&rdev->ref_count)); } /* * Responder Error reason codes * FIXME: Remove these when the defs * are properly included in hsi header */ enum res_err_state_reason { /* No error. */ CFCQ_RES_ERR_STATE_REASON_NO_ERROR = 0, /* * Incoming Send, RDMA write, or RDMA read exceeds the maximum * transfer length. Detected on RX first and only packets for * write. Detected on RX request for read. This is an RX * Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_EXCEED_MAX, /* * RDMA write payload size does not match write length. Detected * when total write payload is not equal to the RDMA write * length that was given in the first or only packet of the * request. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH, /* * Send payload exceeds RQ/SRQ WQE buffer capacity. The total * send payload that arrived is more than the size of the WQE * buffer that was fetched from the RQ/SRQ. This is an RX * Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE, /* * Responder detected opcode error. * First, only, middle, last * for incoming requests are improperly ordered with respect to * previous (PSN) packet. * First or middle packet is not full * MTU size. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_OPCODE_ERROR, /* * PSN sequence error retry limit exceeded. The responder * encountered a PSN sequence error for the same PSN too many * times. This can occur via implicit or explicit NAK. This is * an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT, /* * Invalid R_Key. An incoming request contained an R_Key that * did not reference a valid MR/MW. This error may be detected * by the RX engine for RDMA write or by the TX engine for RDMA * read (detected while servicing IRRQ). This is an RX Detected * Error. */ CFCQ_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY, /* * Domain error. An incoming request specified an R_Key which * referenced a MR/MW that was not in the same PD as the QP on * which the request arrived. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR, /* * No permission. An incoming request contained an R_Key that * referenced a MR/MW which did not have the access permission * needed for the operation. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION, /* * Range error. An incoming request had a combination of R_Key, * VA, and length that was out of bounds of the associated * MR/MW. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR, /* * Invalid R_Key. An incoming request contained an R_Key that * did not reference a valid MR/MW. This error may be detected * by the RX engine for RDMA write or by the TX engine for RDMA * read (detected while servicing IRRQ). This is a TX Detected * Error. */ CFCQ_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY, /* * Domain error. An incoming request specified an R_Key which * referenced a MR/MW that was not in the same PD as the QP on * which the request arrived. This is a TX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR, /* * No permission. An incoming request contained an R_Key that * referenced a MR/MW which did not have the access permission * needed for the operation. This is a TX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION, /* * Range error. An incoming request had a combination of R_Key, * VA, and length that was out of bounds of the associated * MR/MW. This is a TX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR, /* * IRRQ overflow. The peer sent us more RDMA read or atomic * requests than the negotiated maximum. This is an RX Detected * Error. */ CFCQ_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW, /* * Unsupported opcode. The peer sent us a request with an opcode * for a request type that is not supported on this QP. This is * an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE, /* * Unaligned atomic operation. The VA of an atomic request is on * a memory boundary that prevents atomic execution. This is an * RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC, /* * Remote invalidate error. A send with invalidate request * arrived in which the R_Key to invalidate did not describe a * MR/MW which could be invalidated. RQ WQE completes with error * status. This error is only reported if the send operation did * not fail. If the send operation failed then the remote * invalidate error is not reported. This is an RX Detected * Error. */ CFCQ_RES_ERR_STATE_REASON_RES_REM_INVALIDATE, /* * Local memory error. An RQ/SRQ SGE described an inaccessible * memory. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_MEMORY_ERROR, /* * SRQ in error. The QP is moving to error state because it * found SRQ it uses in error. This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_SRQ_ERROR, /* * Completion error. No CQE space available on queue or CQ not * in VALID state. This is a Completion Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_CMP_ERROR, /* * Invalid R_Key while resending responses to duplicate request. * This is a TX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_IVALID_DUP_RKEY, /* * Problem was found in the format of a WQE in the RQ/SRQ. This * is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR, /* * A load error occurred on an attempt to load the CQ Context. * This is a Completion Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR = 0x18, /* * A load error occurred on an attempt to load the SRQ Context. * This is an RX Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR, /* * A fatal error was detected on an attempt to read from or * write to PCIe on the transmit side. This error is detected by * the TX side, but has the priority of a Completion Detected * Error. */ CFCQ_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR = 0x1b, /* * A fatal error was detected on an attempt to read from or * write to PCIe on the receive side. This error is detected by * the RX side (or CAGR), but has the priority of a Completion * Detected Error. */ CFCQ_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR = 0x1c }; int bnxt_re_host_pf_id_query(struct bnxt_re_dev *rdev, struct bnxt_qplib_query_fn_info *fn_info, u32 *pf_mask, u32 *first_pf); /* Default DCBx and CC values */ #define BNXT_RE_DEFAULT_CNP_DSCP 48 #define BNXT_RE_DEFAULT_CNP_PRI 7 #define BNXT_RE_DEFAULT_ROCE_DSCP 26 #define BNXT_RE_DEFAULT_ROCE_PRI 3 #define BNXT_RE_DEFAULT_L2_BW 50 #define BNXT_RE_DEFAULT_ROCE_BW 50 #define ROCE_PRIO_VALID 0x0 #define CNP_PRIO_VALID 0x1 #define ROCE_DSCP_VALID 0x0 #define CNP_DSCP_VALID 0x1 int bnxt_re_get_pri_dscp_settings(struct bnxt_re_dev *rdev, u16 target_id, struct bnxt_re_tc_rec *tc_rec); int bnxt_re_setup_dscp(struct bnxt_re_dev *rdev); int bnxt_re_clear_dscp(struct bnxt_re_dev *rdev); int bnxt_re_setup_cnp_cos(struct bnxt_re_dev *rdev, bool reset); static inline enum ib_port_state bnxt_re_get_link_state(struct bnxt_re_dev *rdev) { - if (rdev->netdev->if_drv_flags & IFF_DRV_RUNNING && - rdev->netdev->if_link_state == LINK_STATE_UP) + if (if_getdrvflags(rdev->netdev) & IFF_DRV_RUNNING && + if_getlinkstate(rdev->netdev) == LINK_STATE_UP) return IB_PORT_ACTIVE; return IB_PORT_DOWN; } static inline int bnxt_re_link_state(struct bnxt_re_dev *rdev) { return bnxt_re_get_link_state(rdev) == IB_PORT_ACTIVE ? 1:0; } static inline int is_cc_enabled(struct bnxt_re_dev *rdev) { return rdev->cc_param.enable; } static inline void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr, u16 opcd, u16 crid, u16 trid) { hdr->req_type = cpu_to_le16(opcd); hdr->cmpl_ring = cpu_to_le16(crid); hdr->target_id = cpu_to_le16(trid); } static inline void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg, int msg_len, void *resp, int resp_max_len, int timeout) { fw_msg->msg = msg; fw_msg->msg_len = msg_len; fw_msg->resp = resp; fw_msg->resp_max_len = resp_max_len; fw_msg->timeout = timeout; } static inline bool is_qport_service_type_supported(struct bnxt_re_dev *rdev) { return rdev->tc_rec[0].serv_type_enabled; } static inline bool is_bnxt_roce_queue(struct bnxt_re_dev *rdev, u8 ser_prof, u8 prof_type) { if (is_qport_service_type_supported(rdev)) return (prof_type & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE); else return (ser_prof == HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE); } static inline bool is_bnxt_cnp_queue(struct bnxt_re_dev *rdev, u8 ser_prof, u8 prof_type) { if (is_qport_service_type_supported(rdev)) return (prof_type & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP); else return (ser_prof == HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP); } #define BNXT_RE_MAP_SH_PAGE 0x0 #define BNXT_RE_MAP_WC 0x1 #define BNXT_RE_DBR_PAGE 0x2 #define BNXT_RE_MAP_DB_RECOVERY_PAGE 0x3 #define BNXT_RE_DBR_RECOV_USERLAND_TIMEOUT (20) /* 20 ms */ #define BNXT_RE_DBR_INT_TIME 5 /* ms */ #define BNXT_RE_PACING_EN_INT_THRESHOLD 50 /* Entries in DB FIFO */ #define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */ /* Percentage of DB FIFO depth */ #define BNXT_RE_PACING_DBQ_THRESHOLD BNXT_RE_PACING_DBQ_HIGH_WATERMARK #define BNXT_RE_PACING_ALARM_TH_MULTIPLE(ctx) (_is_chip_p7(ctx) ? 0 : 2) /* * Maximum Percentage of configurable DB FIFO depth. * The Doorbell FIFO depth is 0x2c00. But the DBR_REG_DB_THROTTLING register has only 12 bits * to program the high watermark. This means user can configure maximum 36% only(4095/11264). */ #define BNXT_RE_PACING_DBQ_HIGH_WATERMARK 36 /* Default do_pacing value when there is no congestion */ #define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */ enum { BNXT_RE_DBQ_EVENT_SCHED = 0, BNXT_RE_DBR_PACING_EVENT = 1, BNXT_RE_DBR_NQ_PACING_NOTIFICATION = 2, }; struct bnxt_re_dbq_work { struct work_struct work; struct bnxt_re_dev *rdev; struct hwrm_async_event_cmpl cmpl; u32 event; }; int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); int bnxt_re_enable_dbr_pacing(struct bnxt_re_dev *rdev); int bnxt_re_disable_dbr_pacing(struct bnxt_re_dev *rdev); int bnxt_re_set_dbq_throttling_reg(struct bnxt_re_dev *rdev, u16 nq_id, u32 throttle); void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev); int bnxt_re_hwrm_pri2cos_qcfg(struct bnxt_re_dev *rdev, struct bnxt_re_tc_rec *tc_rec, u16 target_id); void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32); u32 readl_fbsd(struct bnxt_softc *bp, u32, u8); static inline unsigned int bnxt_re_get_total_mr_mw_count(struct bnxt_re_dev *rdev) { return (atomic_read(&rdev->stats.rsors.mr_count) + atomic_read(&rdev->stats.rsors.mw_count)); } static inline void bnxt_re_set_def_pacing_threshold(struct bnxt_re_dev *rdev) { rdev->qplib_res.pacing_data->pacing_th = rdev->pacing_algo_th; rdev->qplib_res.pacing_data->alarm_th = rdev->pacing_algo_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx); } static inline void bnxt_re_set_def_do_pacing(struct bnxt_re_dev *rdev) { rdev->qplib_res.pacing_data->do_pacing = rdev->dbr_def_do_pacing; } static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev) { rdev->qplib_res.pacing_data->dev_err_state = test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); } #endif diff --git a/sys/dev/bnxt/bnxt_re/ib_verbs.c b/sys/dev/bnxt/bnxt_re/ib_verbs.c index 8d43fa96c048..0383a16757aa 100644 --- a/sys/dev/bnxt/bnxt_re/ib_verbs.c +++ b/sys/dev/bnxt/bnxt_re/ib_verbs.c @@ -1,5498 +1,5498 @@ /* * Copyright (c) 2015-2024, Broadcom. All rights reserved. The term * Broadcom refers to Broadcom Limited and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Description: IB Verbs interpreter */ #include #include #include #include "bnxt_re.h" #include "ib_verbs.h" static inline struct scatterlist *get_ib_umem_sgl(struct ib_umem *umem, u32 *nmap) { *nmap = umem->nmap; return umem->sg_head.sgl; } static inline void bnxt_re_peer_mem_release(struct ib_umem *umem) { dev_dbg(NULL, "ib_umem_release getting invoked \n"); ib_umem_release(umem); } void bnxt_re_resolve_dmac_task(struct work_struct *work) { int rc = -1; struct bnxt_re_dev *rdev; struct ib_ah_attr *ah_attr; struct bnxt_re_resolve_dmac_work *dmac_work = container_of(work, struct bnxt_re_resolve_dmac_work, work); rdev = dmac_work->rdev; ah_attr = dmac_work->ah_attr; rc = ib_resolve_eth_dmac(&rdev->ibdev, ah_attr); if (rc) dev_err(rdev_to_dev(dmac_work->rdev), "Failed to resolve dest mac rc = %d\n", rc); atomic_set(&dmac_work->status_wait, rc << 8); } static int __from_ib_access_flags(int iflags) { int qflags = 0; if (iflags & IB_ACCESS_LOCAL_WRITE) qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE; if (iflags & IB_ACCESS_REMOTE_READ) qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ; if (iflags & IB_ACCESS_REMOTE_WRITE) qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE; if (iflags & IB_ACCESS_REMOTE_ATOMIC) qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC; if (iflags & IB_ACCESS_MW_BIND) qflags |= BNXT_QPLIB_ACCESS_MW_BIND; if (iflags & IB_ZERO_BASED) qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED; if (iflags & IB_ACCESS_ON_DEMAND) qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND; return qflags; }; static enum ib_access_flags __to_ib_access_flags(int qflags) { enum ib_access_flags iflags = 0; if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE) iflags |= IB_ACCESS_LOCAL_WRITE; if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE) iflags |= IB_ACCESS_REMOTE_WRITE; if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ) iflags |= IB_ACCESS_REMOTE_READ; if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC) iflags |= IB_ACCESS_REMOTE_ATOMIC; if (qflags & BNXT_QPLIB_ACCESS_MW_BIND) iflags |= IB_ACCESS_MW_BIND; if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED) iflags |= IB_ZERO_BASED; if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND) iflags |= IB_ACCESS_ON_DEMAND; return iflags; }; static int bnxt_re_copy_to_udata(struct bnxt_re_dev *rdev, void *data, int len, struct ib_udata *udata) { int rc; rc = ib_copy_to_udata(udata, data, len); if (rc) dev_err(rdev_to_dev(rdev), "ucontext copy failed from %ps rc %d\n", __builtin_return_address(0), rc); return rc; } struct ifnet *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct ifnet *netdev = NULL; rcu_read_lock(); if (!rdev || !rdev->netdev) goto end; netdev = rdev->netdev; /* In case of active-backup bond mode, return active slave */ if (netdev) dev_hold(netdev); end: rcu_read_unlock(); return netdev; } int bnxt_re_query_device(struct ib_device *ibdev, struct ib_device_attr *ib_attr, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; memset(ib_attr, 0, sizeof(*ib_attr)); memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, 4); bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&ib_attr->sys_image_guid); ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE; ib_attr->page_size_cap = dev_attr->page_size_cap; ib_attr->vendor_id = rdev->en_dev->pdev->vendor; ib_attr->vendor_part_id = rdev->en_dev->pdev->device; ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device; ib_attr->max_qp = dev_attr->max_qp; ib_attr->max_qp_wr = dev_attr->max_qp_wqes; /* * Read and set from the module param 'min_tx_depth' * only once after the driver load */ if (rdev->min_tx_depth == 1 && min_tx_depth < dev_attr->max_qp_wqes) rdev->min_tx_depth = min_tx_depth; ib_attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_SHUTDOWN_PORT | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_RESIZE_MAX_WR | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_N_NOTIFY_CQ | IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_WINDOW_TYPE_2B | IB_DEVICE_MEM_MGT_EXTENSIONS; ib_attr->max_send_sge = dev_attr->max_qp_sges; ib_attr->max_recv_sge = dev_attr->max_qp_sges; ib_attr->max_sge_rd = dev_attr->max_qp_sges; ib_attr->max_cq = dev_attr->max_cq; ib_attr->max_cqe = dev_attr->max_cq_wqes; ib_attr->max_mr = dev_attr->max_mr; ib_attr->max_pd = dev_attr->max_pd; ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom; ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom; if (dev_attr->is_atomic) { ib_attr->atomic_cap = IB_ATOMIC_GLOB; ib_attr->masked_atomic_cap = IB_ATOMIC_GLOB; } ib_attr->max_ee_rd_atom = 0; ib_attr->max_res_rd_atom = 0; ib_attr->max_ee_init_rd_atom = 0; ib_attr->max_ee = 0; ib_attr->max_rdd = 0; ib_attr->max_mw = dev_attr->max_mw; ib_attr->max_raw_ipv6_qp = 0; ib_attr->max_raw_ethy_qp = dev_attr->max_raw_ethy_qp; ib_attr->max_mcast_grp = 0; ib_attr->max_mcast_qp_attach = 0; ib_attr->max_total_mcast_qp_attach = 0; ib_attr->max_ah = dev_attr->max_ah; ib_attr->max_srq = dev_attr->max_srq; ib_attr->max_srq_wr = dev_attr->max_srq_wqes; ib_attr->max_srq_sge = dev_attr->max_srq_sges; ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS; ib_attr->max_pkeys = 1; ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY; ib_attr->sig_prot_cap = 0; ib_attr->sig_guard_cap = 0; ib_attr->odp_caps.general_caps = 0; return 0; } int bnxt_re_modify_device(struct ib_device *ibdev, int device_modify_mask, struct ib_device_modify *device_modify) { dev_dbg(rdev_to_dev(rdev), "Modify device with mask 0x%x\n", device_modify_mask); switch (device_modify_mask) { case IB_DEVICE_MODIFY_SYS_IMAGE_GUID: /* Modify the GUID requires the modification of the GID table */ /* GUID should be made as READ-ONLY */ break; case IB_DEVICE_MODIFY_NODE_DESC: /* Node Desc should be made as READ-ONLY */ break; default: break; } return 0; } static void __to_ib_speed_width(u32 espeed, u8 *speed, u8 *width) { switch (espeed) { case SPEED_1000: *speed = IB_SPEED_SDR; *width = IB_WIDTH_1X; break; case SPEED_10000: *speed = IB_SPEED_QDR; *width = IB_WIDTH_1X; break; case SPEED_20000: *speed = IB_SPEED_DDR; *width = IB_WIDTH_4X; break; case SPEED_25000: *speed = IB_SPEED_EDR; *width = IB_WIDTH_1X; break; case SPEED_40000: *speed = IB_SPEED_QDR; *width = IB_WIDTH_4X; break; case SPEED_50000: *speed = IB_SPEED_EDR; *width = IB_WIDTH_2X; break; case SPEED_100000: *speed = IB_SPEED_EDR; *width = IB_WIDTH_4X; break; case SPEED_200000: *speed = IB_SPEED_HDR; *width = IB_WIDTH_4X; break; default: *speed = IB_SPEED_SDR; *width = IB_WIDTH_1X; break; } } /* Port */ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *port_attr) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; u8 active_speed = 0, active_width = 0; dev_dbg(rdev_to_dev(rdev), "QUERY PORT with port_num 0x%x\n", port_num); memset(port_attr, 0, sizeof(*port_attr)); port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; port_attr->state = bnxt_re_get_link_state(rdev); if (port_attr->state == IB_PORT_ACTIVE) port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; port_attr->max_mtu = IB_MTU_4096; - port_attr->active_mtu = iboe_get_mtu(rdev->netdev->if_mtu); + port_attr->active_mtu = iboe_get_mtu(if_getmtu(rdev->netdev)); port_attr->gid_tbl_len = dev_attr->max_sgid; port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS; port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW; port_attr->bad_pkey_cntr = 0; port_attr->qkey_viol_cntr = 0; port_attr->pkey_tbl_len = dev_attr->max_pkey; port_attr->lid = 0; port_attr->sm_lid = 0; port_attr->lmc = 0; port_attr->max_vl_num = 4; port_attr->sm_sl = 0; port_attr->subnet_timeout = 0; port_attr->init_type_reply = 0; rdev->espeed = rdev->en_dev->espeed; if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) __to_ib_speed_width(rdev->espeed, &active_speed, &active_width); port_attr->active_speed = active_speed; port_attr->active_width = active_width; return 0; } int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify) { dev_dbg(rdev_to_dev(rdev), "Modify port with mask 0x%x\n", port_modify_mask); switch (port_modify_mask) { case IB_PORT_SHUTDOWN: break; case IB_PORT_INIT_TYPE: break; case IB_PORT_RESET_QKEY_CNTR: break; default: break; } return 0; } int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct ib_port_attr port_attr; if (bnxt_re_query_port(ibdev, port_num, &port_attr)) return -EINVAL; immutable->pkey_tbl_len = port_attr.pkey_tbl_len; immutable->gid_tbl_len = port_attr.gid_tbl_len; if (rdev->roce_mode == BNXT_RE_FLAG_ROCEV1_CAP) immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; else if (rdev->roce_mode == BNXT_RE_FLAG_ROCEV2_CAP) immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; else immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } void bnxt_re_compat_qfwstr(void) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); sprintf(str, "%d.%d.%d.%d", rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1], rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]); } int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) { if (index > 0) return -EINVAL; *pkey = IB_DEFAULT_PKEY_FULL; return 0; } int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); int rc = 0; /* Ignore port_num */ memset(gid, 0, sizeof(*gid)); rc = bnxt_qplib_get_sgid(&rdev->qplib_res, &rdev->qplib_res.sgid_tbl, index, (struct bnxt_qplib_gid *)gid); return rc; } int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, unsigned int index, void **context) { int rc = 0; struct bnxt_re_gid_ctx *ctx, **ctx_tbl; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; struct bnxt_qplib_gid *gid_to_del; u16 vlan_id = 0xFFFF; /* Delete the entry from the hardware */ ctx = *context; if (!ctx) { dev_err(rdev_to_dev(rdev), "GID entry has no ctx?!\n"); return -EINVAL; } if (sgid_tbl && sgid_tbl->active) { if (ctx->idx >= sgid_tbl->max) { dev_dbg(rdev_to_dev(rdev), "GID index out of range?!\n"); return -EINVAL; } gid_to_del = &sgid_tbl->tbl[ctx->idx].gid; vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id; ctx->refcnt--; /* DEL_GID is called via WQ context(netdevice_event_work_handler) * or via the ib_unregister_device path. In the former case QP1 * may not be destroyed yet, in which case just return as FW * needs that entry to be present and will fail it's deletion. * We could get invoked again after QP1 is destroyed OR get an * ADD_GID call with a different GID value for the same index * where we issue MODIFY_GID cmd to update the GID entry -- TBD */ if (ctx->idx == 0 && rdma_link_local_addr((struct in6_addr *)gid_to_del) && (rdev->gsi_ctx.gsi_sqp || rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_UD)) { dev_dbg(rdev_to_dev(rdev), "Trying to delete GID0 while QP1 is alive\n"); if (!ctx->refcnt) { rdev->gid_map[index] = -1; ctx_tbl = sgid_tbl->ctx; ctx_tbl[ctx->idx] = NULL; kfree(ctx); } return 0; } rdev->gid_map[index] = -1; if (!ctx->refcnt) { rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, vlan_id, true); if (!rc) { dev_dbg(rdev_to_dev(rdev), "GID remove success\n"); ctx_tbl = sgid_tbl->ctx; ctx_tbl[ctx->idx] = NULL; kfree(ctx); } else { dev_err(rdev_to_dev(rdev), "Remove GID failed rc = 0x%x\n", rc); } } } else { dev_dbg(rdev_to_dev(rdev), "GID sgid_tbl does not exist!\n"); return -EINVAL; } return rc; } int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr, void **context) { int rc; u32 tbl_idx = 0; u16 vlan_id = 0xFFFF; struct bnxt_re_gid_ctx *ctx, **ctx_tbl; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; if ((attr->ndev) && is_vlan_dev(attr->ndev)) vlan_id = vlan_dev_vlan_id(attr->ndev); rc = bnxt_qplib_add_sgid(sgid_tbl, gid, rdev->dev_addr, vlan_id, true, &tbl_idx); if (rc == -EALREADY) { dev_dbg(rdev_to_dev(rdev), "GID %pI6 is already present\n", gid); ctx_tbl = sgid_tbl->ctx; if (!ctx_tbl[tbl_idx]) { ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->idx = tbl_idx; ctx->refcnt = 1; ctx_tbl[tbl_idx] = ctx; } else { ctx_tbl[tbl_idx]->refcnt++; } *context = ctx_tbl[tbl_idx]; /* tbl_idx is the HW table index and index is the stack index */ rdev->gid_map[index] = tbl_idx; return 0; } else if (rc < 0) { dev_err(rdev_to_dev(rdev), "Add GID failed rc = 0x%x\n", rc); return rc; } else { ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) { dev_err(rdev_to_dev(rdev), "Add GID ctx failed\n"); return -ENOMEM; } ctx_tbl = sgid_tbl->ctx; ctx->idx = tbl_idx; ctx->refcnt = 1; ctx_tbl[tbl_idx] = ctx; /* tbl_idx is the HW table index and index is the stack index */ rdev->gid_map[index] = tbl_idx; *context = ctx; } return rc; } enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num) { return IB_LINK_LAYER_ETHERNET; } static void bnxt_re_legacy_create_fence_wqe(struct bnxt_re_pd *pd) { struct bnxt_re_legacy_fence_data *fence = &pd->fence; struct ib_mr *ib_mr = &fence->mr->ib_mr; struct bnxt_qplib_swqe *wqe = &fence->bind_wqe; struct bnxt_re_dev *rdev = pd->rdev; if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) return; memset(wqe, 0, sizeof(*wqe)); wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW; wqe->wr_id = BNXT_QPLIB_FENCE_WRID; wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; wqe->bind.zero_based = false; wqe->bind.parent_l_key = ib_mr->lkey; wqe->bind.va = (u64)fence->va; wqe->bind.length = fence->size; wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ); wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1; /* Save the initial rkey in fence structure for now; * wqe->bind.r_key will be set at (re)bind time. */ fence->bind_rkey = ib_inc_rkey(fence->mw->rkey); } static int bnxt_re_legacy_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp) { struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp, qplib_qp); struct ib_pd *ib_pd = qp->ib_qp.pd; struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_legacy_fence_data *fence = &pd->fence; struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe; struct bnxt_qplib_swqe wqe; int rc; /* TODO: Need SQ locking here when Fence WQE * posting moves up into bnxt_re from bnxt_qplib. */ memcpy(&wqe, fence_wqe, sizeof(wqe)); wqe.bind.r_key = fence->bind_rkey; fence->bind_rkey = ib_inc_rkey(fence->bind_rkey); dev_dbg(rdev_to_dev(qp->rdev), "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n", wqe.bind.r_key, qp->qplib_qp.id, pd); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); if (rc) { dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n"); return rc; } bnxt_qplib_post_send_db(&qp->qplib_qp); return rc; } static int bnxt_re_legacy_create_fence_mr(struct bnxt_re_pd *pd) { int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND; struct bnxt_re_legacy_fence_data *fence = &pd->fence; struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_mrinfo mrinfo; struct bnxt_re_mr *mr = NULL; struct ib_mw *ib_mw = NULL; dma_addr_t dma_addr = 0; u32 max_mr_count; u64 pbl_tbl; int rc; if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) return 0; memset(&mrinfo, 0, sizeof(mrinfo)); /* Allocate a small chunk of memory and dma-map it */ fence->va = kzalloc(BNXT_RE_LEGACY_FENCE_BYTES, GFP_KERNEL); if (!fence->va) return -ENOMEM; dma_addr = ib_dma_map_single(&rdev->ibdev, fence->va, BNXT_RE_LEGACY_FENCE_BYTES, DMA_BIDIRECTIONAL); rc = ib_dma_mapping_error(&rdev->ibdev, dma_addr); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n"); rc = -EIO; fence->dma_addr = 0; goto free_va; } fence->dma_addr = dma_addr; /* Allocate a MR */ mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) goto free_dma_addr; fence->mr = mr; mr->rdev = rdev; mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); if (!_is_alloc_mr_unified(rdev->qplib_res.dattr)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n"); goto free_mr; } /* Register MR */ mr->ib_mr.lkey = mr->qplib_mr.lkey; } mr->qplib_mr.va = (u64)fence->va; mr->qplib_mr.total_size = BNXT_RE_LEGACY_FENCE_BYTES; pbl_tbl = dma_addr; mrinfo.mrw = &mr->qplib_mr; mrinfo.ptes = &pbl_tbl; mrinfo.sg.npages = BNXT_RE_LEGACY_FENCE_PBL_SIZE; mrinfo.sg.nmap = 0; mrinfo.sg.sghead = 0; mrinfo.sg.pgshft = PAGE_SHIFT; mrinfo.sg.pgsize = PAGE_SIZE; rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mrinfo, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n"); goto free_mr; } mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.rkey; atomic_inc(&rdev->stats.rsors.mr_count); max_mr_count = atomic_read(&rdev->stats.rsors.mr_count); if (max_mr_count > (atomic_read(&rdev->stats.rsors.max_mr_count))) atomic_set(&rdev->stats.rsors.max_mr_count, max_mr_count); ib_mw = bnxt_re_alloc_mw(&pd->ibpd, IB_MW_TYPE_1, NULL); /* Create a fence MW only for kernel consumers */ if (!ib_mw) { dev_err(rdev_to_dev(rdev), "Failed to create fence-MW for PD: %p\n", pd); rc = -EINVAL; goto free_mr; } fence->mw = ib_mw; bnxt_re_legacy_create_fence_wqe(pd); return 0; free_mr: if (mr->ib_mr.lkey) { bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); atomic_dec(&rdev->stats.rsors.mr_count); } kfree(mr); fence->mr = NULL; free_dma_addr: ib_dma_unmap_single(&rdev->ibdev, fence->dma_addr, BNXT_RE_LEGACY_FENCE_BYTES, DMA_BIDIRECTIONAL); fence->dma_addr = 0; free_va: kfree(fence->va); fence->va = NULL; return rc; } static void bnxt_re_legacy_destroy_fence_mr(struct bnxt_re_pd *pd) { struct bnxt_re_legacy_fence_data *fence = &pd->fence; struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr = fence->mr; if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) return; if (fence->mw) { bnxt_re_dealloc_mw(fence->mw); fence->mw = NULL; } if (mr) { if (mr->ib_mr.rkey) bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr, false); if (mr->ib_mr.lkey) bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); kfree(mr); fence->mr = NULL; atomic_dec(&rdev->stats.rsors.mr_count); } if (fence->dma_addr) { ib_dma_unmap_single(&rdev->ibdev, fence->dma_addr, BNXT_RE_LEGACY_FENCE_BYTES, DMA_BIDIRECTIONAL); fence->dma_addr = 0; } kfree(fence->va); fence->va = NULL; } static int bnxt_re_get_user_dpi(struct bnxt_re_dev *rdev, struct bnxt_re_ucontext *cntx) { struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx; int ret = 0; u8 type; /* Allocate DPI in alloc_pd or in create_cq to avoid failing of * ibv_devinfo and family of application when DPIs are depleted. */ type = BNXT_QPLIB_DPI_TYPE_UC; ret = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &cntx->dpi, cntx, type); if (ret) { dev_err(rdev_to_dev(rdev), "Alloc doorbell page failed!\n"); goto out; } if (cctx->modes.db_push) { type = BNXT_QPLIB_DPI_TYPE_WC; ret = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &cntx->wcdpi, cntx, type); if (ret) dev_err(rdev_to_dev(rdev), "push dp alloc failed\n"); } out: return ret; } /* Protection Domains */ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_dev *rdev = pd->rdev; int rc; bnxt_re_legacy_destroy_fence_mr(pd); rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd); if (rc) dev_err_ratelimited(rdev_to_dev(rdev), "%s failed rc = %d\n", __func__, rc); atomic_dec(&rdev->stats.rsors.pd_count); return; } int bnxt_re_alloc_pd(struct ib_pd *pd_in, struct ib_udata *udata) { struct ib_pd *ibpd = pd_in; struct ib_device *ibdev = ibpd->device; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ibucontext); u32 max_pd_count; int rc; struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ibpd); pd->rdev = rdev; if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) { dev_err(rdev_to_dev(rdev), "Allocate HW Protection Domain failed!\n"); rc = -ENOMEM; goto fail; } if (udata) { struct bnxt_re_pd_resp resp = {}; if (!ucntx->dpi.dbr) { rc = bnxt_re_get_user_dpi(rdev, ucntx); if (rc) goto dbfail; } resp.pdid = pd->qplib_pd.id; /* Still allow mapping this DBR to the new user PD. */ resp.dpi = ucntx->dpi.dpi; resp.dbr = (u64)ucntx->dpi.umdbr; /* Copy only on a valid wcpdi */ if (ucntx->wcdpi.dpi) { resp.wcdpi = ucntx->wcdpi.dpi; resp.comp_mask = BNXT_RE_COMP_MASK_PD_HAS_WC_DPI; } if (rdev->dbr_pacing) { WARN_ON(!rdev->dbr_bar_addr); resp.dbr_bar_addr = (u64)rdev->dbr_bar_addr; resp.comp_mask |= BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR; } rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) goto dbfail; } if (!udata) if (bnxt_re_legacy_create_fence_mr(pd)) dev_warn(rdev_to_dev(rdev), "Failed to create Fence-MR\n"); atomic_inc(&rdev->stats.rsors.pd_count); max_pd_count = atomic_read(&rdev->stats.rsors.pd_count); if (max_pd_count > atomic_read(&rdev->stats.rsors.max_pd_count)) atomic_set(&rdev->stats.rsors.max_pd_count, max_pd_count); return 0; dbfail: (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd); fail: return rc; } /* Address Handles */ void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = to_bnxt_re(ib_ah, struct bnxt_re_ah, ibah); struct bnxt_re_dev *rdev = ah->rdev; int rc = 0; bool block = true; block = !(flags & RDMA_DESTROY_AH_SLEEPABLE); rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block); if (rc) dev_err_ratelimited(rdev_to_dev(rdev), "%s id = %d blocking %d failed rc = %d\n", __func__, ah->qplib_ah.id, block, rc); atomic_dec(&rdev->stats.rsors.ah_count); return; } static u8 _to_bnxt_re_nw_type(enum rdma_network_type ntype) { u8 nw_type; switch (ntype) { case RDMA_NETWORK_IPV4: nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4; break; case RDMA_NETWORK_IPV6: nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6; break; default: nw_type = CMDQ_CREATE_AH_TYPE_V1; break; } return nw_type; } static inline int bnxt_re_get_cached_gid(struct ib_device *dev, u8 port_num, int index, union ib_gid *sgid, struct ib_gid_attr **sgid_attr, struct ib_global_route *grh, struct ib_ah *ah) { int ret = 0; ret = ib_get_cached_gid(dev, port_num, index, sgid, *sgid_attr); return ret; } static inline enum rdma_network_type bnxt_re_gid_to_network_type(struct ib_gid_attr *sgid_attr, union ib_gid *sgid) { return ib_gid_to_network_type(sgid_attr->gid_type, sgid); } static int bnxt_re_get_ah_info(struct bnxt_re_dev *rdev, struct ib_ah_attr *ah_attr, struct bnxt_re_ah_info *ah_info) { struct ib_gid_attr *gattr; enum rdma_network_type ib_ntype; u8 ntype; union ib_gid *gid; int rc = 0; gid = &ah_info->sgid; gattr = &ah_info->sgid_attr; rc = bnxt_re_get_cached_gid(&rdev->ibdev, 1, ah_attr->grh.sgid_index, gid, &gattr, &ah_attr->grh, NULL); if (rc) return rc; /* Get vlan tag */ if (gattr->ndev) { if (is_vlan_dev(gattr->ndev)) ah_info->vlan_tag = vlan_dev_vlan_id(gattr->ndev); if_rele(gattr->ndev); } /* Get network header type for this GID */ ib_ntype = bnxt_re_gid_to_network_type(gattr, gid); ntype = _to_bnxt_re_nw_type(ib_ntype); ah_info->nw_type = ntype; return rc; } static u8 _get_sgid_index(struct bnxt_re_dev *rdev, u8 gindx) { gindx = rdev->gid_map[gindx]; return gindx; } static int bnxt_re_init_dmac(struct bnxt_re_dev *rdev, struct ib_ah_attr *ah_attr, struct bnxt_re_ah_info *ah_info, bool is_user, struct bnxt_re_ah *ah) { int rc = 0; u8 *dmac; if (is_user && !rdma_is_multicast_addr((struct in6_addr *) ah_attr->grh.dgid.raw) && !rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { u32 retry_count = BNXT_RE_RESOLVE_RETRY_COUNT_US; struct bnxt_re_resolve_dmac_work *resolve_dmac_work; resolve_dmac_work = kzalloc(sizeof(*resolve_dmac_work), GFP_ATOMIC); resolve_dmac_work->rdev = rdev; resolve_dmac_work->ah_attr = ah_attr; resolve_dmac_work->ah_info = ah_info; atomic_set(&resolve_dmac_work->status_wait, 1); INIT_WORK(&resolve_dmac_work->work, bnxt_re_resolve_dmac_task); queue_work(rdev->resolve_wq, &resolve_dmac_work->work); do { rc = atomic_read(&resolve_dmac_work->status_wait) & 0xFF; if (!rc) break; udelay(1); } while (--retry_count); if (atomic_read(&resolve_dmac_work->status_wait)) { INIT_LIST_HEAD(&resolve_dmac_work->list); list_add_tail(&resolve_dmac_work->list, &rdev->mac_wq_list); return -EFAULT; } kfree(resolve_dmac_work); } dmac = ROCE_DMAC(ah_attr); if (dmac) memcpy(ah->qplib_ah.dmac, dmac, ETH_ALEN); return rc; } int bnxt_re_create_ah(struct ib_ah *ah_in, struct ib_ah_attr *attr, u32 flags, struct ib_udata *udata) { struct ib_ah *ib_ah = ah_in; struct ib_pd *ib_pd = ib_ah->pd; struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ibah); struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_ah_info ah_info; u32 max_ah_count; bool is_user; int rc; bool block = true; struct ib_ah_attr *ah_attr = attr; block = !(flags & RDMA_CREATE_AH_SLEEPABLE); if (!(ah_attr->ah_flags & IB_AH_GRH)) dev_err(rdev_to_dev(rdev), "ah_attr->ah_flags GRH is not set\n"); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; is_user = ib_pd->uobject ? true : false; /* Supply the configuration for the HW */ memcpy(ah->qplib_ah.dgid.data, ah_attr->grh.dgid.raw, sizeof(union ib_gid)); ah->qplib_ah.sgid_index = _get_sgid_index(rdev, ah_attr->grh.sgid_index); if (ah->qplib_ah.sgid_index == 0xFF) { dev_err(rdev_to_dev(rdev), "invalid sgid_index!\n"); rc = -EINVAL; goto fail; } ah->qplib_ah.host_sgid_index = ah_attr->grh.sgid_index; ah->qplib_ah.traffic_class = ah_attr->grh.traffic_class; ah->qplib_ah.flow_label = ah_attr->grh.flow_label; ah->qplib_ah.hop_limit = ah_attr->grh.hop_limit; ah->qplib_ah.sl = ah_attr->sl; rc = bnxt_re_get_ah_info(rdev, ah_attr, &ah_info); if (rc) goto fail; ah->qplib_ah.nw_type = ah_info.nw_type; rc = bnxt_re_init_dmac(rdev, ah_attr, &ah_info, is_user, ah); if (rc) goto fail; rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, block); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate HW Address Handle failed!\n"); goto fail; } /* Write AVID to shared page. */ if (ib_pd->uobject) { struct ib_ucontext *ib_uctx = ib_pd->uobject->context; struct bnxt_re_ucontext *uctx; unsigned long flag; u32 *wrptr; uctx = to_bnxt_re(ib_uctx, struct bnxt_re_ucontext, ibucontext); spin_lock_irqsave(&uctx->sh_lock, flag); wrptr = (u32 *)((u8 *)uctx->shpg + BNXT_RE_AVID_OFFT); *wrptr = ah->qplib_ah.id; wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } atomic_inc(&rdev->stats.rsors.ah_count); max_ah_count = atomic_read(&rdev->stats.rsors.ah_count); if (max_ah_count > atomic_read(&rdev->stats.rsors.max_ah_count)) atomic_set(&rdev->stats.rsors.max_ah_count, max_ah_count); return 0; fail: return rc; } int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr) { return 0; } int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr) { struct bnxt_re_ah *ah = to_bnxt_re(ib_ah, struct bnxt_re_ah, ibah); memcpy(ah_attr->grh.dgid.raw, ah->qplib_ah.dgid.data, sizeof(union ib_gid)); ah_attr->grh.sgid_index = ah->qplib_ah.host_sgid_index; ah_attr->grh.traffic_class = ah->qplib_ah.traffic_class; ah_attr->sl = ah->qplib_ah.sl; memcpy(ROCE_DMAC(ah_attr), ah->qplib_ah.dmac, ETH_ALEN); ah_attr->ah_flags = IB_AH_GRH; ah_attr->port_num = 1; ah_attr->static_rate = 0; return 0; } /* Shared Receive Queues */ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = to_bnxt_re(ib_srq, struct bnxt_re_srq, ibsrq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; int rc = 0; rc = bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); if (rc) dev_err_ratelimited(rdev_to_dev(rdev), "%s id = %d failed rc = %d\n", __func__, qplib_srq->id, rc); if (srq->umem && !IS_ERR(srq->umem)) ib_umem_release(srq->umem); atomic_dec(&rdev->stats.rsors.srq_count); return; } static u16 _max_rwqe_sz(int nsge) { return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge)); } static u16 bnxt_re_get_rwqe_size(struct bnxt_qplib_qp *qplqp, int rsge, int max) { if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) rsge = max; return _max_rwqe_sz(rsge); } static inline struct ib_umem *ib_umem_get_compat(struct bnxt_re_dev *rdev, struct ib_ucontext *ucontext, struct ib_udata *udata, unsigned long addr, size_t size, int access, int dmasync) { return ib_umem_get(ucontext, addr, size, access, dmasync); } static inline struct ib_umem *ib_umem_get_flags_compat(struct bnxt_re_dev *rdev, struct ib_ucontext *ucontext, struct ib_udata *udata, unsigned long addr, size_t size, int access, int dmasync) { return ib_umem_get_compat(rdev, ucontext, udata, addr, size, access, 0); } static inline size_t ib_umem_num_pages_compat(struct ib_umem *umem) { return ib_umem_num_pages(umem); } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_srq *srq, struct ib_udata *udata) { struct bnxt_qplib_sg_info *sginfo; struct bnxt_qplib_srq *qplib_srq; struct bnxt_re_ucontext *cntx; struct ib_ucontext *context; struct bnxt_re_srq_req ureq; struct ib_umem *umem; int rc, bytes = 0; context = pd->ibpd.uobject->context; cntx = to_bnxt_re(context, struct bnxt_re_ucontext, ibucontext); qplib_srq = &srq->qplib_srq; sginfo = &qplib_srq->sginfo; if (udata->inlen < sizeof(ureq)) dev_warn(rdev_to_dev(rdev), "Update the library ulen %d klen %d\n", (unsigned int)udata->inlen, (unsigned int)sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) return rc; bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get_compat(rdev, context, udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed with %ld\n", __func__, PTR_ERR(umem)); return PTR_ERR(umem); } srq->umem = umem; sginfo->sghead = get_ib_umem_sgl(umem, &sginfo->nmap); sginfo->npages = ib_umem_num_pages_compat(umem); qplib_srq->srq_handle = ureq.srq_handle; qplib_srq->dpi = &cntx->dpi; qplib_srq->is_user = true; return 0; } int bnxt_re_create_srq(struct ib_srq *srq_in, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_re_ucontext *cntx = NULL; struct ib_ucontext *context; struct bnxt_re_dev *rdev; struct bnxt_re_pd *pd; int rc, entries; struct ib_srq *ib_srq = srq_in; struct ib_pd *ib_pd = ib_srq->pd; struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ibsrq); u32 max_srq_count; pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); rdev = pd->rdev; dev_attr = rdev->dev_attr; if (rdev->mod_exit) { dev_dbg(rdev_to_dev(rdev), "%s(): in mod_exit, just return!\n", __func__); rc = -EIO; goto exit; } if (srq_init_attr->srq_type != IB_SRQT_BASIC) { dev_err(rdev_to_dev(rdev), "SRQ type not supported\n"); rc = -ENOTSUPP; goto exit; } if (udata) { context = pd->ibpd.uobject->context; cntx = to_bnxt_re(context, struct bnxt_re_ucontext, ibucontext); } if (atomic_read(&rdev->stats.rsors.srq_count) >= dev_attr->max_srq) { dev_err(rdev_to_dev(rdev), "Create SRQ failed - max exceeded(SRQs)\n"); rc = -EINVAL; goto exit; } if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { dev_err(rdev_to_dev(rdev), "Create SRQ failed - max exceeded(SRQ_WQs)\n"); rc = -EINVAL; goto exit; } srq->rdev = rdev; srq->qplib_srq.pd = &pd->qplib_pd; srq->qplib_srq.dpi = &rdev->dpi_privileged; /* Allocate 1 more than what's provided so posting max doesn't mean empty */ entries = srq_init_attr->attr.max_wr + 1; entries = bnxt_re_init_depth(entries, cntx); if (entries > dev_attr->max_srq_wqes + 1) entries = dev_attr->max_srq_wqes + 1; srq->qplib_srq.wqe_size = _max_rwqe_sz(6); /* 128 byte wqe size */ srq->qplib_srq.max_wqe = entries; srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge; srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nqr.nq[0].ring_id; srq->qplib_srq.sginfo.pgsize = PAGE_SIZE; srq->qplib_srq.sginfo.pgshft = PAGE_SHIFT; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); if (rc) goto fail; } rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq); if (rc) { dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!\n"); goto fail; } if (udata) { struct bnxt_re_srq_resp resp; resp.srqid = srq->qplib_srq.id; rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) { bnxt_qplib_destroy_srq(&rdev->qplib_res, &srq->qplib_srq); goto fail; } } atomic_inc(&rdev->stats.rsors.srq_count); max_srq_count = atomic_read(&rdev->stats.rsors.srq_count); if (max_srq_count > atomic_read(&rdev->stats.rsors.max_srq_count)) atomic_set(&rdev->stats.rsors.max_srq_count, max_srq_count); spin_lock_init(&srq->lock); return 0; fail: if (udata && srq->umem && !IS_ERR(srq->umem)) { ib_umem_release(srq->umem); srq->umem = NULL; } exit: return rc; } int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata) { struct bnxt_re_srq *srq = to_bnxt_re(ib_srq, struct bnxt_re_srq, ibsrq); struct bnxt_re_dev *rdev = srq->rdev; int rc; switch (srq_attr_mask) { case IB_SRQ_MAX_WR: /* SRQ resize is not supported */ break; case IB_SRQ_LIMIT: /* Change the SRQ threshold */ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe) return -EINVAL; srq->qplib_srq.threshold = srq_attr->srq_limit; rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq); if (rc) { dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!\n"); return rc; } /* On success, update the shadow */ srq->srq_limit = srq_attr->srq_limit; if (udata) { /* Build and send response back to udata */ rc = bnxt_re_copy_to_udata(rdev, srq, 0, udata); if (rc) return rc; } break; default: dev_err(rdev_to_dev(rdev), "Unsupported srq_attr_mask 0x%x\n", srq_attr_mask); return -EINVAL; } return 0; } int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr) { struct bnxt_re_srq *srq = to_bnxt_re(ib_srq, struct bnxt_re_srq, ibsrq); struct bnxt_re_dev *rdev = srq->rdev; int rc; rc = bnxt_qplib_query_srq(&rdev->qplib_res, &srq->qplib_srq); if (rc) { dev_err(rdev_to_dev(rdev), "Query HW SRQ (0x%x) failed! rc = %d\n", srq->qplib_srq.id, rc); return rc; } srq_attr->max_wr = srq->qplib_srq.max_wqe; srq_attr->max_sge = srq->qplib_srq.max_sge; srq_attr->srq_limit = srq->qplib_srq.threshold; return 0; } int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct bnxt_re_srq *srq = to_bnxt_re(ib_srq, struct bnxt_re_srq, ibsrq); struct bnxt_qplib_swqe wqe = {}; unsigned long flags; int rc = 0; spin_lock_irqsave(&srq->lock, flags); while (wr) { /* Transcribe each ib_recv_wr to qplib_swqe */ wqe.num_sge = wr->num_sge; wqe.sg_list = (struct bnxt_qplib_sge *)wr->sg_list; wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; rc = bnxt_qplib_post_srq_recv(&srq->qplib_srq, &wqe); if (rc) { *bad_wr = wr; break; } wr = wr->next; } spin_unlock_irqrestore(&srq->lock, flags); return rc; } unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp) { unsigned long flags; spin_lock_irqsave(&qp->scq->cq_lock, flags); if (qp->rcq && qp->rcq != qp->scq) spin_lock(&qp->rcq->cq_lock); return flags; } void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags) { if (qp->rcq && qp->rcq != qp->scq) spin_unlock(&qp->rcq->cq_lock); spin_unlock_irqrestore(&qp->scq->cq_lock, flags); } /* Queue Pairs */ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) { struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; struct bnxt_re_dev *rdev; unsigned long flags; int rc = 0; rdev = qp->rdev; gsi_sqp = rdev->gsi_ctx.gsi_sqp; gsi_sah = rdev->gsi_ctx.gsi_sah; /* remove from active qp list */ mutex_lock(&rdev->qp_lock); list_del(&gsi_sqp->list); mutex_unlock(&rdev->qp_lock); if (gsi_sah) { dev_dbg(rdev_to_dev(rdev), "Destroy the shadow AH\n"); rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, true); if (rc) dev_err(rdev_to_dev(rdev), "Destroy HW AH for shadow QP failed!\n"); atomic_dec(&rdev->stats.rsors.ah_count); } dev_dbg(rdev_to_dev(rdev), "Destroy the shadow QP\n"); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp); if (rc) dev_err(rdev_to_dev(rdev), "Destroy Shadow QP failed\n"); /* Clean the CQ for shadow QP completions */ flags = bnxt_re_lock_cqs(gsi_sqp); bnxt_qplib_clean_qp(&gsi_sqp->qplib_qp); bnxt_re_unlock_cqs(gsi_sqp, flags); bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); bnxt_qplib_free_hdr_buf(&rdev->qplib_res, &gsi_sqp->qplib_qp); kfree(rdev->gsi_ctx.sqp_tbl); kfree(gsi_sah); kfree(gsi_sqp); rdev->gsi_ctx.gsi_sqp = NULL; rdev->gsi_ctx.gsi_sah = NULL; rdev->gsi_ctx.sqp_tbl = NULL; atomic_dec(&rdev->stats.rsors.qp_count); return 0; } static void bnxt_re_dump_debug_stats(struct bnxt_re_dev *rdev, u32 active_qps) { u32 total_qp = 0; u64 avg_time = 0; int i; if (!rdev->rcfw.sp_perf_stats_enabled) return; switch (active_qps) { case 1: /* Potential hint for Test Stop */ for (i = 0; i < RCFW_MAX_STAT_INDEX; i++) { if (rdev->rcfw.qp_destroy_stats[i]) { total_qp++; avg_time += rdev->rcfw.qp_destroy_stats[i]; } } if (total_qp >= 0 || avg_time >= 0) dev_dbg(rdev_to_dev(rdev), "Perf Debug: %ps Total (%d) QP destroyed in (%ld) msec\n", __builtin_return_address(0), total_qp, (long)jiffies_to_msecs(avg_time)); break; case 2: /* Potential hint for Test Start */ dev_dbg(rdev_to_dev(rdev), "Perf Debug: %ps active_qps = %d\n", __builtin_return_address(0), active_qps); break; default: /* Potential hint to know latency of QP destroy. * Average time taken for 1K QP Destroy. */ if (active_qps > 1024 && !(active_qps % 1024)) dev_dbg(rdev_to_dev(rdev), "Perf Debug: %ps Active QP (%d) Watermark (%d)\n", __builtin_return_address(0), active_qps, atomic_read(&rdev->stats.rsors.max_qp_count)); break; } } int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) { struct bnxt_re_qp *qp = to_bnxt_re(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; unsigned long flags; u32 active_qps; int rc; mutex_lock(&rdev->qp_lock); list_del(&qp->list); active_qps = atomic_dec_return(&rdev->stats.rsors.qp_count); if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC) atomic_dec(&rdev->stats.rsors.rc_qp_count); else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) atomic_dec(&rdev->stats.rsors.ud_qp_count); mutex_unlock(&rdev->qp_lock); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) dev_err_ratelimited(rdev_to_dev(rdev), "%s id = %d failed rc = %d\n", __func__, qp->qplib_qp.id, rc); if (!ib_qp->uobject) { flags = bnxt_re_lock_cqs(qp); bnxt_qplib_clean_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); } bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp); if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_UD) { if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL && rdev->gsi_ctx.gsi_sqp) { bnxt_re_destroy_gsi_sqp(qp); } bnxt_qplib_free_hdr_buf(&rdev->qplib_res, &qp->qplib_qp); } if (qp->rumem && !IS_ERR(qp->rumem)) ib_umem_release(qp->rumem); if (qp->sumem && !IS_ERR(qp->sumem)) ib_umem_release(qp->sumem); kfree(qp); bnxt_re_dump_debug_stats(rdev, active_qps); return 0; } static u8 __from_ib_qp_type(enum ib_qp_type type) { switch (type) { case IB_QPT_GSI: return CMDQ_CREATE_QP1_TYPE_GSI; case IB_QPT_RC: return CMDQ_CREATE_QP_TYPE_RC; case IB_QPT_UD: return CMDQ_CREATE_QP_TYPE_UD; case IB_QPT_RAW_ETHERTYPE: return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE; default: return IB_QPT_MAX; } } static u16 _get_swqe_sz(int nsge) { return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge); } static int bnxt_re_get_swqe_size(int ilsize, int nsge) { u16 wqe_size, calc_ils; wqe_size = _get_swqe_sz(nsge); if (ilsize) { calc_ils = (sizeof(struct sq_send_hdr) + ilsize); wqe_size = max_t(int, calc_ils, wqe_size); wqe_size = ALIGN(wqe_size, 32); } return wqe_size; } static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int align, ilsize; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = rdev->dev_attr; align = sizeof(struct sq_send_hdr); ilsize = ALIGN(init_attr->cap.max_inline_data, align); sq->wqe_size = bnxt_re_get_swqe_size(ilsize, sq->max_sge); if (sq->wqe_size > _get_swqe_sz(dev_attr->max_qp_sges)) return -EINVAL; /* For Cu/Wh and gen p5 backward compatibility mode * wqe size is fixed to 128 bytes */ if (sq->wqe_size < _get_swqe_sz(dev_attr->max_qp_sges) && qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) sq->wqe_size = _get_swqe_sz(dev_attr->max_qp_sges); if (init_attr->cap.max_inline_data) { qplqp->max_inline_data = sq->wqe_size - sizeof(struct sq_send_hdr); init_attr->cap.max_inline_data = qplqp->max_inline_data; if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) sq->max_sge = qplqp->max_inline_data / sizeof(struct sq_sge); } return 0; } static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, struct bnxt_re_qp *qp, struct ib_udata *udata) { struct bnxt_qplib_sg_info *sginfo; struct bnxt_qplib_qp *qplib_qp; struct bnxt_re_ucontext *cntx; struct ib_ucontext *context; struct bnxt_re_qp_req ureq; struct ib_umem *umem; int rc, bytes = 0; int psn_nume; int psn_sz; qplib_qp = &qp->qplib_qp; context = pd->ibpd.uobject->context; cntx = to_bnxt_re(context, struct bnxt_re_ucontext, ibucontext); sginfo = &qplib_qp->sq.sginfo; if (udata->inlen < sizeof(ureq)) dev_warn(rdev_to_dev(rdev), "Update the library ulen %d klen %d\n", (unsigned int)udata->inlen, (unsigned int)sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) return rc; bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size); /* Consider mapping PSN search memory only for RC QPs. */ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) { psn_sz = _is_chip_gen_p5_p7(rdev->chip_ctx) ? sizeof(struct sq_psn_search_ext) : sizeof(struct sq_psn_search); if (rdev->dev_attr && BNXT_RE_HW_RETX(rdev->dev_attr->dev_cap_flags)) psn_sz = sizeof(struct sq_msn_search); psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ? qplib_qp->sq.max_wqe : ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) / sizeof(struct bnxt_qplib_sge)); if (BNXT_RE_HW_RETX(rdev->dev_attr->dev_cap_flags)) psn_nume = roundup_pow_of_two(psn_nume); bytes += (psn_nume * psn_sz); } bytes = PAGE_ALIGN(bytes); umem = ib_umem_get_compat(rdev, context, udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed with %ld\n", __func__, PTR_ERR(umem)); return PTR_ERR(umem); } qp->sumem = umem; /* pgsize and pgshft were initialize already. */ sginfo->sghead = get_ib_umem_sgl(umem, &sginfo->nmap); sginfo->npages = ib_umem_num_pages_compat(umem); qplib_qp->qp_handle = ureq.qp_handle; if (!qp->qplib_qp.srq) { sginfo = &qplib_qp->rq.sginfo; bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size); bytes = PAGE_ALIGN(bytes); umem = ib_umem_get_compat(rdev, context, udata, ureq.qprva, bytes, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(umem)) { dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed ret =%ld\n", __func__, PTR_ERR(umem)); goto rqfail; } qp->rumem = umem; /* pgsize and pgshft were initialize already. */ sginfo->sghead = get_ib_umem_sgl(umem, &sginfo->nmap); sginfo->npages = ib_umem_num_pages_compat(umem); } qplib_qp->dpi = &cntx->dpi; qplib_qp->is_user = true; return 0; rqfail: ib_umem_release(qp->sumem); qp->sumem = NULL; qplib_qp->sq.sginfo.sghead = NULL; qplib_qp->sq.sginfo.nmap = 0; return PTR_ERR(umem); } static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah(struct bnxt_re_pd *pd, struct bnxt_qplib_res *qp1_res, struct bnxt_qplib_qp *qp1_qp) { struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_ah *ah; union ib_gid sgid; int rc; ah = kzalloc(sizeof(*ah), GFP_KERNEL); if (!ah) { dev_err(rdev_to_dev(rdev), "Allocate Address Handle failed!\n"); return NULL; } memset(ah, 0, sizeof(*ah)); ah->rdev = rdev; ah->qplib_ah.pd = &pd->qplib_pd; rc = bnxt_re_query_gid(&rdev->ibdev, 1, 0, &sgid); if (rc) goto fail; /* supply the dgid data same as sgid */ memcpy(ah->qplib_ah.dgid.data, &sgid.raw, sizeof(union ib_gid)); ah->qplib_ah.sgid_index = 0; ah->qplib_ah.traffic_class = 0; ah->qplib_ah.flow_label = 0; ah->qplib_ah.hop_limit = 1; ah->qplib_ah.sl = 0; /* Have DMAC same as SMAC */ ether_addr_copy(ah->qplib_ah.dmac, rdev->dev_addr); dev_dbg(rdev_to_dev(rdev), "ah->qplib_ah.dmac = %x:%x:%x:%x:%x:%x\n", ah->qplib_ah.dmac[0], ah->qplib_ah.dmac[1], ah->qplib_ah.dmac[2], ah->qplib_ah.dmac[3], ah->qplib_ah.dmac[4], ah->qplib_ah.dmac[5]); rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, true); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate HW AH for Shadow QP failed!\n"); goto fail; } dev_dbg(rdev_to_dev(rdev), "AH ID = %d\n", ah->qplib_ah.id); atomic_inc(&rdev->stats.rsors.ah_count); return ah; fail: kfree(ah); return NULL; } void bnxt_re_update_shadow_ah(struct bnxt_re_dev *rdev) { struct bnxt_re_qp *gsi_qp; struct bnxt_re_ah *sah; struct bnxt_re_pd *pd; struct ib_pd *ib_pd; int rc; if (!rdev) return; sah = rdev->gsi_ctx.gsi_sah; dev_dbg(rdev_to_dev(rdev), "Updating the AH\n"); if (sah) { /* Check if the AH created with current mac address */ if (!compare_ether_header(sah->qplib_ah.dmac, rdev->dev_addr)) { dev_dbg(rdev_to_dev(rdev), "Not modifying shadow AH during AH update\n"); return; } gsi_qp = rdev->gsi_ctx.gsi_qp; ib_pd = gsi_qp->ib_qp.pd; pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &sah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy shadow AH during AH update\n"); return; } atomic_dec(&rdev->stats.rsors.ah_count); kfree(sah); rdev->gsi_ctx.gsi_sah = NULL; sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res, &gsi_qp->qplib_qp); if (!sah) { dev_err(rdev_to_dev(rdev), "Failed to update AH for ShadowQP\n"); return; } rdev->gsi_ctx.gsi_sah = sah; atomic_inc(&rdev->stats.rsors.ah_count); } } static struct bnxt_re_qp *bnxt_re_create_shadow_qp(struct bnxt_re_pd *pd, struct bnxt_qplib_res *qp1_res, struct bnxt_qplib_qp *qp1_qp) { struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_qp *qp; int rc; qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) { dev_err(rdev_to_dev(rdev), "Allocate internal UD QP failed!\n"); return NULL; } memset(qp, 0, sizeof(*qp)); qp->rdev = rdev; /* Initialize the shadow QP structure from the QP1 values */ ether_addr_copy(qp->qplib_qp.smac, rdev->dev_addr); qp->qplib_qp.pd = &pd->qplib_pd; qp->qplib_qp.qp_handle = (u64)&qp->qplib_qp; qp->qplib_qp.type = IB_QPT_UD; qp->qplib_qp.max_inline_data = 0; qp->qplib_qp.sig_type = true; /* Shadow QP SQ depth should be same as QP1 RQ depth */ qp->qplib_qp.sq.wqe_size = bnxt_re_get_swqe_size(0, 6); qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.sq.max_sge = 2; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.sq.q_full_delta = 1; qp->qplib_qp.sq.sginfo.pgsize = PAGE_SIZE; qp->qplib_qp.sq.sginfo.pgshft = PAGE_SHIFT; qp->qplib_qp.scq = qp1_qp->scq; qp->qplib_qp.rcq = qp1_qp->rcq; qp->qplib_qp.rq.wqe_size = _max_rwqe_sz(6); /* 128 Byte wqe size */ qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe; qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge; qp->qplib_qp.rq.sginfo.pgsize = PAGE_SIZE; qp->qplib_qp.rq.sginfo.pgshft = PAGE_SHIFT; /* Q full delta can be 1 since it is internal QP */ qp->qplib_qp.rq.q_full_delta = 1; qp->qplib_qp.mtu = qp1_qp->mtu; qp->qplib_qp.dpi = &rdev->dpi_privileged; rc = bnxt_qplib_alloc_hdr_buf(qp1_res, &qp->qplib_qp, 0, BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6); if (rc) goto fail; rc = bnxt_qplib_create_qp(qp1_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "create HW QP failed!\n"); goto qp_fail; } dev_dbg(rdev_to_dev(rdev), "Created shadow QP with ID = %d\n", qp->qplib_qp.id); spin_lock_init(&qp->sq_lock); INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); atomic_inc(&rdev->stats.rsors.qp_count); mutex_unlock(&rdev->qp_lock); return qp; qp_fail: bnxt_qplib_free_hdr_buf(qp1_res, &qp->qplib_qp); fail: kfree(qp); return NULL; } static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr, void *cntx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *rq; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; rq = &qplqp->rq; dev_attr = rdev->dev_attr; if (init_attr->srq) { struct bnxt_re_srq *srq; srq = to_bnxt_re(init_attr->srq, struct bnxt_re_srq, ibsrq); if (!srq) { dev_err(rdev_to_dev(rdev), "SRQ not found\n"); return -EINVAL; } qplqp->srq = &srq->qplib_srq; rq->max_wqe = 0; } else { rq->max_sge = init_attr->cap.max_recv_sge; if (rq->max_sge > dev_attr->max_qp_sges) rq->max_sge = dev_attr->max_qp_sges; init_attr->cap.max_recv_sge = rq->max_sge; rq->wqe_size = bnxt_re_get_rwqe_size(qplqp, rq->max_sge, dev_attr->max_qp_sges); /* Allocate 1 more than what's provided so posting max doesn't mean empty */ entries = init_attr->cap.max_recv_wr + 1; entries = bnxt_re_init_depth(entries, cntx); rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); rq->q_full_delta = 0; rq->sginfo.pgsize = PAGE_SIZE; rq->sginfo.pgshft = PAGE_SHIFT; } return 0; } static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; rdev = qp->rdev; qplqp = &qp->qplib_qp; dev_attr = rdev->dev_attr; if (rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_UD) qplqp->rq.max_sge = dev_attr->max_qp_sges; } static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr, void *cntx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_qplib_q *sq; int diff = 0; int entries; int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; dev_attr = rdev->dev_attr; sq->max_sge = init_attr->cap.max_send_sge; if (sq->max_sge > dev_attr->max_qp_sges) { sq->max_sge = dev_attr->max_qp_sges; init_attr->cap.max_send_sge = sq->max_sge; } rc = bnxt_re_setup_swqe_size(qp, init_attr); if (rc) return rc; /* * Change the SQ depth if user has requested minimum using * configfs. Only supported for kernel consumers. Setting * min_tx_depth to 4096 to handle iser SQ full condition * in most of the newer OS distros */ entries = init_attr->cap.max_send_wr; if (!cntx && rdev->min_tx_depth && init_attr->qp_type != IB_QPT_GSI) { /* * If users specify any value greater than 1 use min_tx_depth * provided by user for comparison. Else, compare it with the * BNXT_RE_MIN_KERNEL_QP_TX_DEPTH and adjust it accordingly. */ if (rdev->min_tx_depth > 1 && entries < rdev->min_tx_depth) entries = rdev->min_tx_depth; else if (entries < BNXT_RE_MIN_KERNEL_QP_TX_DEPTH) entries = BNXT_RE_MIN_KERNEL_QP_TX_DEPTH; } diff = bnxt_re_get_diff(cntx, rdev->chip_ctx); entries = bnxt_re_init_depth(entries + diff + 1, cntx); sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1); sq->q_full_delta = diff + 1; /* * Reserving one slot for Phantom WQE. Application can * post one extra entry in this case. But allowing this to avoid * unexpected Queue full condition */ sq->q_full_delta -= 1; /* becomes 0 for gen-p5 */ sq->sginfo.pgsize = PAGE_SIZE; sq->sginfo.pgshft = PAGE_SHIFT; return 0; } static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, struct ib_qp_init_attr *init_attr, void *cntx) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; int entries; rdev = qp->rdev; qplqp = &qp->qplib_qp; dev_attr = rdev->dev_attr; if (rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_UD) { entries = init_attr->cap.max_send_wr + 1; entries = bnxt_re_init_depth(entries, cntx); qplqp->sq.max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1); qplqp->sq.q_full_delta = qplqp->sq.max_wqe - init_attr->cap.max_send_wr; qplqp->sq.max_sge++; /* Need one extra sge to put UD header */ if (qplqp->sq.max_sge > dev_attr->max_qp_sges) qplqp->sq.max_sge = dev_attr->max_qp_sges; } } static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev, struct ib_qp_init_attr *init_attr) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_re_gsi_context *gsi_ctx; int qptype; chip_ctx = rdev->chip_ctx; gsi_ctx = &rdev->gsi_ctx; qptype = __from_ib_qp_type(init_attr->qp_type); if (qptype == IB_QPT_MAX) { dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported\n", qptype); qptype = -EINVAL; goto out; } if (_is_chip_gen_p5_p7(chip_ctx) && init_attr->qp_type == IB_QPT_GSI) { /* For Thor always force UD mode. */ qptype = CMDQ_CREATE_QP_TYPE_GSI; gsi_ctx->gsi_qp_mode = BNXT_RE_GSI_MODE_UD; } out: return qptype; } static int bnxt_re_init_qp_wqe_mode(struct bnxt_re_dev *rdev) { return rdev->chip_ctx->modes.wqe_mode; } static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_re_ucontext *cntx = NULL; struct ib_ucontext *context; struct bnxt_qplib_qp *qplqp; struct bnxt_re_dev *rdev; struct bnxt_re_cq *cq; int rc = 0, qptype; rdev = qp->rdev; qplqp = &qp->qplib_qp; dev_attr = rdev->dev_attr; if (udata) { context = pd->ibpd.uobject->context; cntx = to_bnxt_re(context, struct bnxt_re_ucontext, ibucontext); } /* Setup misc params */ qplqp->is_user = false; qplqp->pd = &pd->qplib_pd; qplqp->qp_handle = (u64)qplqp; qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false); qptype = bnxt_re_init_qp_type(rdev, init_attr); if (qptype < 0) { rc = qptype; goto out; } qplqp->type = (u8)qptype; qplqp->wqe_mode = bnxt_re_init_qp_wqe_mode(rdev); ether_addr_copy(qplqp->smac, rdev->dev_addr); if (init_attr->qp_type == IB_QPT_RC) { qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom; qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom; } - qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->if_mtu)); + qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(if_getmtu(rdev->netdev))); qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */ if (init_attr->create_flags) { dev_dbg(rdev_to_dev(rdev), "QP create flags 0x%x not supported\n", init_attr->create_flags); return -EOPNOTSUPP; } /* Setup CQs */ if (init_attr->send_cq) { cq = to_bnxt_re(init_attr->send_cq, struct bnxt_re_cq, ibcq); if (!cq) { dev_err(rdev_to_dev(rdev), "Send CQ not found\n"); rc = -EINVAL; goto out; } qplqp->scq = &cq->qplib_cq; qp->scq = cq; } if (init_attr->recv_cq) { cq = to_bnxt_re(init_attr->recv_cq, struct bnxt_re_cq, ibcq); if (!cq) { dev_err(rdev_to_dev(rdev), "Receive CQ not found\n"); rc = -EINVAL; goto out; } qplqp->rcq = &cq->qplib_cq; qp->rcq = cq; } /* Setup RQ/SRQ */ rc = bnxt_re_init_rq_attr(qp, init_attr, cntx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_rq_attr(qp); /* Setup SQ */ rc = bnxt_re_init_sq_attr(qp, init_attr, cntx); if (rc) goto out; if (init_attr->qp_type == IB_QPT_GSI) bnxt_re_adjust_gsi_sq_attr(qp, init_attr, cntx); if (udata) /* This will update DPI and qp_handle */ rc = bnxt_re_init_user_qp(rdev, pd, qp, udata); out: return rc; } static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd) { struct bnxt_re_sqp_entries *sqp_tbl = NULL; struct bnxt_re_dev *rdev; struct bnxt_re_qp *sqp; struct bnxt_re_ah *sah; int rc = 0; rdev = qp->rdev; /* Create a shadow QP to handle the QP1 traffic */ sqp_tbl = kzalloc(sizeof(*sqp_tbl) * BNXT_RE_MAX_GSI_SQP_ENTRIES, GFP_KERNEL); if (!sqp_tbl) return -ENOMEM; rdev->gsi_ctx.sqp_tbl = sqp_tbl; sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, &qp->qplib_qp); if (!sqp) { rc = -ENODEV; dev_err(rdev_to_dev(rdev), "Failed to create Shadow QP for QP1\n"); goto out; } rdev->gsi_ctx.gsi_sqp = sqp; sqp->rcq = qp->rcq; sqp->scq = qp->scq; sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res, &qp->qplib_qp); if (!sah) { bnxt_qplib_destroy_qp(&rdev->qplib_res, &sqp->qplib_qp); rc = -ENODEV; dev_err(rdev_to_dev(rdev), "Failed to create AH entry for ShadowQP\n"); goto out; } rdev->gsi_ctx.gsi_sah = sah; return 0; out: kfree(sqp_tbl); return rc; } static int __get_rq_hdr_buf_size(u8 gsi_mode) { return (gsi_mode == BNXT_RE_GSI_MODE_ALL) ? BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2 : BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE; } static int __get_sq_hdr_buf_size(u8 gsi_mode) { return (gsi_mode != BNXT_RE_GSI_MODE_ROCE_V1) ? BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2 : BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE; } static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd) { struct bnxt_qplib_qp *qplqp; struct bnxt_qplib_res *res; struct bnxt_re_dev *rdev; u32 sstep, rstep; u8 gsi_mode; int rc = 0; rdev = qp->rdev; qplqp = &qp->qplib_qp; res = &rdev->qplib_res; gsi_mode = rdev->gsi_ctx.gsi_qp_mode; rstep = __get_rq_hdr_buf_size(gsi_mode); sstep = __get_sq_hdr_buf_size(gsi_mode); rc = bnxt_qplib_alloc_hdr_buf(res, qplqp, sstep, rstep); if (rc) goto out; rc = bnxt_qplib_create_qp1(res, qplqp); if (rc) { dev_err(rdev_to_dev(rdev), "create HW QP1 failed!\n"); goto out; } if (gsi_mode == BNXT_RE_GSI_MODE_ALL) rc = bnxt_re_create_shadow_gsi(qp, pd); out: return rc; } static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev, struct ib_qp_init_attr *init_attr, struct bnxt_qplib_dev_attr *dev_attr) { bool rc = true; int ilsize; ilsize = ALIGN(init_attr->cap.max_inline_data, sizeof(struct sq_sge)); if ((init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) || (init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) || (init_attr->cap.max_send_sge > dev_attr->max_qp_sges) || (init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) || (ilsize > dev_attr->max_inline_data)) { dev_err(rdev_to_dev(rdev), "Create QP failed - max exceeded! " "0x%x/0x%x 0x%x/0x%x 0x%x/0x%x " "0x%x/0x%x 0x%x/0x%x\n", init_attr->cap.max_send_wr, dev_attr->max_qp_wqes, init_attr->cap.max_recv_wr, dev_attr->max_qp_wqes, init_attr->cap.max_send_sge, dev_attr->max_qp_sges, init_attr->cap.max_recv_sge, dev_attr->max_qp_sges, init_attr->cap.max_inline_data, dev_attr->max_inline_data); rc = false; } return rc; } static inline struct bnxt_re_qp *__get_qp_from_qp_in(struct ib_pd *qp_in, struct bnxt_re_dev *rdev) { struct bnxt_re_qp *qp; qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) dev_err(rdev_to_dev(rdev), "Allocate QP failed!\n"); return qp; } struct ib_qp *bnxt_re_create_qp(struct ib_pd *qp_in, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata) { struct bnxt_re_pd *pd; struct ib_pd *ib_pd = qp_in; struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_re_dev *rdev; u32 active_qps, tmp_qps; struct bnxt_re_qp *qp; int rc; pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); rdev = pd->rdev; dev_attr = rdev->dev_attr; if (rdev->mod_exit) { rc = -EIO; dev_dbg(rdev_to_dev(rdev), "%s(): in mod_exit, just return!\n", __func__); goto exit; } if (atomic_read(&rdev->stats.rsors.qp_count) >= dev_attr->max_qp) { dev_err(rdev_to_dev(rdev), "Create QP failed - max exceeded(QPs Alloc'd %u of max %u)\n", atomic_read(&rdev->stats.rsors.qp_count), dev_attr->max_qp); rc = -EINVAL; goto exit; } rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr); if (!rc) { rc = -EINVAL; goto exit; } qp = __get_qp_from_qp_in(qp_in, rdev); if (!qp) { rc = -ENOMEM; goto exit; } qp->rdev = rdev; rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata); if (rc) goto fail; if (qp_init_attr->qp_type == IB_QPT_GSI && !_is_chip_gen_p5_p7(rdev->chip_ctx)) { rc = bnxt_re_create_gsi_qp(qp, pd); if (rc == -ENODEV) goto qp_destroy; if (rc) goto fail; } else { rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "create HW QP failed!\n"); goto free_umem; } if (udata) { struct bnxt_re_qp_resp resp; resp.qpid = qp->qplib_qp.id; rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) goto qp_destroy; } } qp->ib_qp.qp_num = qp->qplib_qp.id; if (qp_init_attr->qp_type == IB_QPT_GSI) rdev->gsi_ctx.gsi_qp = qp; spin_lock_init(&qp->sq_lock); spin_lock_init(&qp->rq_lock); INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); mutex_unlock(&rdev->qp_lock); atomic_inc(&rdev->stats.rsors.qp_count); active_qps = atomic_read(&rdev->stats.rsors.qp_count); if (active_qps > atomic_read(&rdev->stats.rsors.max_qp_count)) atomic_set(&rdev->stats.rsors.max_qp_count, active_qps); bnxt_re_dump_debug_stats(rdev, active_qps); /* Get the counters for RC QPs and UD QPs */ if (qp_init_attr->qp_type == IB_QPT_RC) { tmp_qps = atomic_inc_return(&rdev->stats.rsors.rc_qp_count); if (tmp_qps > atomic_read(&rdev->stats.rsors.max_rc_qp_count)) atomic_set(&rdev->stats.rsors.max_rc_qp_count, tmp_qps); } else if (qp_init_attr->qp_type == IB_QPT_UD) { tmp_qps = atomic_inc_return(&rdev->stats.rsors.ud_qp_count); if (tmp_qps > atomic_read(&rdev->stats.rsors.max_ud_qp_count)) atomic_set(&rdev->stats.rsors.max_ud_qp_count, tmp_qps); } return &qp->ib_qp; qp_destroy: bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp); free_umem: if (udata) { if (qp->rumem && !IS_ERR(qp->rumem)) ib_umem_release(qp->rumem); if (qp->sumem && !IS_ERR(qp->sumem)) ib_umem_release(qp->sumem); } fail: kfree(qp); exit: return ERR_PTR(rc); } static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp1_qp, int qp_attr_mask) { struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp; int rc = 0; if (qp_attr_mask & IB_QP_STATE) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; qp->qplib_qp.state = qp1_qp->qplib_qp.state; } if (qp_attr_mask & IB_QP_PKEY_INDEX) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; qp->qplib_qp.pkey_index = qp1_qp->qplib_qp.pkey_index; } if (qp_attr_mask & IB_QP_QKEY) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY; /* Using a Random QKEY */ qp->qplib_qp.qkey = BNXT_RE_QP_RANDOM_QKEY; } if (qp_attr_mask & IB_QP_SQ_PSN) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN; qp->qplib_qp.sq.psn = qp1_qp->qplib_qp.sq.psn; } rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) dev_err(rdev_to_dev(rdev), "Modify Shadow QP for QP1 failed\n"); return rc; } static u32 ipv4_from_gid(u8 *gid) { return (gid[15] << 24 | gid[14] << 16 | gid[13] << 8 | gid[12]); } static u16 get_source_port(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) { u8 ip_off, data[48], smac[ETH_ALEN]; u16 crc = 0, buf_len = 0, i; u8 addr_len; u32 qpn; if (qp->qplib_qp.nw_type == CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6) { addr_len = 6; ip_off = 10; } else { addr_len = 4; ip_off = 12; } memcpy(smac, qp->qplib_qp.smac, ETH_ALEN); memset(data, 0, 48); memcpy(data, qp->qplib_qp.ah.dmac, ETH_ALEN); buf_len += ETH_ALEN; memcpy(data + buf_len, smac, ETH_ALEN); buf_len += ETH_ALEN; memcpy(data + buf_len, qp->qplib_qp.ah.dgid.data + ip_off, addr_len); buf_len += addr_len; memcpy(data + buf_len, qp->qp_info_entry.sgid.raw + ip_off, addr_len); buf_len += addr_len; qpn = htonl(qp->qplib_qp.dest_qpn); memcpy(data + buf_len, (u8 *)&qpn + 1, 3); buf_len += 3; for (i = 0; i < buf_len; i++) crc = crc16(crc, (data + i), 1); return crc; } static void bnxt_re_update_qp_info(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) { u16 type; type = __from_hw_to_ib_qp_type(qp->qplib_qp.type); /* User-space can extract ip address with sgid_index. */ if (ipv6_addr_v4mapped((struct in6_addr *)&qp->qplib_qp.ah.dgid)) { qp->qp_info_entry.s_ip.ipv4_addr = ipv4_from_gid(qp->qp_info_entry.sgid.raw); qp->qp_info_entry.d_ip.ipv4_addr = ipv4_from_gid(qp->qplib_qp.ah.dgid.data); } else { memcpy(&qp->qp_info_entry.s_ip.ipv6_addr, qp->qp_info_entry.sgid.raw, sizeof(qp->qp_info_entry.s_ip.ipv6_addr)); memcpy(&qp->qp_info_entry.d_ip.ipv6_addr, qp->qplib_qp.ah.dgid.data, sizeof(qp->qp_info_entry.d_ip.ipv6_addr)); } if (type == IB_QPT_RC && (qp->qplib_qp.nw_type == CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 || qp->qplib_qp.nw_type == CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6)) { qp->qp_info_entry.s_port = get_source_port(rdev, qp); } qp->qp_info_entry.d_port = BNXT_RE_QP_DEST_PORT; } static void bnxt_qplib_manage_flush_qp(struct bnxt_re_qp *qp) { struct bnxt_qplib_q *rq, *sq; unsigned long flags; if (qp->sumem) return; if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { rq = &qp->qplib_qp.rq; sq = &qp->qplib_qp.sq; dev_dbg(rdev_to_dev(qp->rdev), "Move QP = %p to flush list\n", qp); flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); if (sq->hwq.prod != sq->hwq.cons) bnxt_re_handle_cqn(&qp->scq->qplib_cq); if (qp->rcq && (qp->rcq != qp->scq) && (rq->hwq.prod != rq->hwq.cons)) bnxt_re_handle_cqn(&qp->rcq->qplib_cq); } if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) { dev_dbg(rdev_to_dev(qp->rdev), "Move QP = %p out of flush list\n", qp); flags = bnxt_re_lock_cqs(qp); bnxt_qplib_clean_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); } } bool ib_modify_qp_is_ok_compat(enum ib_qp_state cur_state, enum ib_qp_state next_state, enum ib_qp_type type, enum ib_qp_attr_mask mask) { return (ib_modify_qp_is_ok(cur_state, next_state, type, mask)); } int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata) { enum ib_qp_state curr_qp_state, new_qp_state; struct bnxt_re_modify_qp_ex_resp resp = {}; struct bnxt_re_modify_qp_ex_req ureq = {}; struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_ppp *ppp = NULL; struct bnxt_re_dev *rdev; struct bnxt_re_qp *qp; struct ib_gid_attr *sgid_attr; struct ib_gid_attr gid_attr; union ib_gid sgid, *gid_ptr = NULL; u8 nw_type; int rc, entries, status; bool is_copy_to_udata = false; bool is_qpmtu_high = false; qp = to_bnxt_re(ib_qp, struct bnxt_re_qp, ib_qp); rdev = qp->rdev; dev_attr = rdev->dev_attr; qp->qplib_qp.modify_flags = 0; ppp = &qp->qplib_qp.ppp; if (qp_attr_mask & IB_QP_STATE) { curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); new_qp_state = qp_attr->qp_state; if (!ib_modify_qp_is_ok_compat(curr_qp_state, new_qp_state, ib_qp->qp_type, qp_attr_mask)) { dev_err(rdev_to_dev(rdev),"invalid attribute mask=0x%x" " specified for qpn=0x%x of type=0x%x" " current_qp_state=0x%x, new_qp_state=0x%x\n", qp_attr_mask, ib_qp->qp_num, ib_qp->qp_type, curr_qp_state, new_qp_state); return -EINVAL; } dev_dbg(rdev_to_dev(rdev), "%s:%d INFO attribute mask=0x%x qpn=0x%x " "of type=0x%x current_qp_state=0x%x, new_qp_state=0x%x\n", __func__, __LINE__, qp_attr_mask, ib_qp->qp_num, ib_qp->qp_type, curr_qp_state, new_qp_state); qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state); if (udata && curr_qp_state == IB_QPS_RESET && new_qp_state == IB_QPS_INIT) { if (!ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { if (ureq.comp_mask & BNXT_RE_COMP_MASK_MQP_EX_PPP_REQ_EN_MASK) { ppp->req = BNXT_QPLIB_PPP_REQ; ppp->dpi = ureq.dpi; } } } } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY; qp->qplib_qp.en_sqd_async_notify = true; } if (qp_attr_mask & IB_QP_ACCESS_FLAGS) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS; qp->qplib_qp.access = __from_ib_access_flags(qp_attr->qp_access_flags); /* LOCAL_WRITE access must be set to allow RC receive */ qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE; qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE; qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ; } if (qp_attr_mask & IB_QP_PKEY_INDEX) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY; qp->qplib_qp.pkey_index = qp_attr->pkey_index; } if (qp_attr_mask & IB_QP_QKEY) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY; qp->qplib_qp.qkey = qp_attr->qkey; } if (qp_attr_mask & IB_QP_AV) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID | CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL | CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX | CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT | CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS | CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC | CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID; memcpy(qp->qplib_qp.ah.dgid.data, qp_attr->ah_attr.grh.dgid.raw, sizeof(qp->qplib_qp.ah.dgid.data)); qp->qplib_qp.ah.flow_label = qp_attr->ah_attr.grh.flow_label; qp->qplib_qp.ah.sgid_index = _get_sgid_index(rdev, qp_attr->ah_attr.grh.sgid_index); qp->qplib_qp.ah.host_sgid_index = qp_attr->ah_attr.grh.sgid_index; qp->qplib_qp.ah.hop_limit = qp_attr->ah_attr.grh.hop_limit; qp->qplib_qp.ah.traffic_class = qp_attr->ah_attr.grh.traffic_class; qp->qplib_qp.ah.sl = qp_attr->ah_attr.sl; ether_addr_copy(qp->qplib_qp.ah.dmac, ROCE_DMAC(&qp_attr->ah_attr)); sgid_attr = &gid_attr; status = bnxt_re_get_cached_gid(&rdev->ibdev, 1, qp_attr->ah_attr.grh.sgid_index, &sgid, &sgid_attr, &qp_attr->ah_attr.grh, NULL); if (!status) if_rele(sgid_attr->ndev); gid_ptr = &sgid; if (sgid_attr->ndev) { memcpy(qp->qplib_qp.smac, rdev->dev_addr, ETH_ALEN); nw_type = bnxt_re_gid_to_network_type(sgid_attr, &sgid); dev_dbg(rdev_to_dev(rdev), "Connection using the nw_type %d\n", nw_type); switch (nw_type) { case RDMA_NETWORK_IPV4: qp->qplib_qp.nw_type = CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4; break; case RDMA_NETWORK_IPV6: qp->qplib_qp.nw_type = CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6; break; default: qp->qplib_qp.nw_type = CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1; break; } } memcpy(&qp->qp_info_entry.sgid, gid_ptr, sizeof(qp->qp_info_entry.sgid)); } /* MTU settings allowed only during INIT -> RTR */ if (qp_attr->qp_state == IB_QPS_RTR) { - bnxt_re_init_qpmtu(qp, rdev->netdev->if_mtu, qp_attr_mask, qp_attr, + bnxt_re_init_qpmtu(qp, if_getmtu(rdev->netdev), qp_attr_mask, qp_attr, &is_qpmtu_high); if (udata && !ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { if (ureq.comp_mask & BNXT_RE_COMP_MASK_MQP_EX_PATH_MTU_MASK) { resp.comp_mask |= BNXT_RE_COMP_MASK_MQP_EX_PATH_MTU_MASK; resp.path_mtu = qp->qplib_qp.mtu; is_copy_to_udata = true; } else if (is_qpmtu_high) { dev_err(rdev_to_dev(rdev), "qp %#x invalid mtu\n", qp->qplib_qp.id); return -EINVAL; } } } if (qp_attr_mask & IB_QP_TIMEOUT) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT; qp->qplib_qp.timeout = qp_attr->timeout; } if (qp_attr_mask & IB_QP_RETRY_CNT) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT; qp->qplib_qp.retry_cnt = qp_attr->retry_cnt; } if (qp_attr_mask & IB_QP_RNR_RETRY) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY; qp->qplib_qp.rnr_retry = qp_attr->rnr_retry; } if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER; qp->qplib_qp.min_rnr_timer = qp_attr->min_rnr_timer; } if (qp_attr_mask & IB_QP_RQ_PSN) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN; qp->qplib_qp.rq.psn = qp_attr->rq_psn; } if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC; /* Cap the max_rd_atomic to device max */ if (qp_attr->max_rd_atomic > dev_attr->max_qp_rd_atom) dev_dbg(rdev_to_dev(rdev), "max_rd_atomic requested %d is > device max %d\n", qp_attr->max_rd_atomic, dev_attr->max_qp_rd_atom); qp->qplib_qp.max_rd_atomic = min_t(u32, qp_attr->max_rd_atomic, dev_attr->max_qp_rd_atom); } if (qp_attr_mask & IB_QP_SQ_PSN) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN; qp->qplib_qp.sq.psn = qp_attr->sq_psn; } if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (qp_attr->max_dest_rd_atomic > dev_attr->max_qp_init_rd_atom) { dev_err(rdev_to_dev(rdev), "max_dest_rd_atomic requested %d is > device max %d\n", qp_attr->max_dest_rd_atomic, dev_attr->max_qp_init_rd_atom); return -EINVAL; } qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC; qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic; } if (qp_attr_mask & IB_QP_CAP) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE | CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE | CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE | CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE | CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA; if ((qp_attr->cap.max_send_wr >= dev_attr->max_qp_wqes) || (qp_attr->cap.max_recv_wr >= dev_attr->max_qp_wqes) || (qp_attr->cap.max_send_sge >= dev_attr->max_qp_sges) || (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) || (qp_attr->cap.max_inline_data >= dev_attr->max_inline_data)) { dev_err(rdev_to_dev(rdev), "Create QP failed - max exceeded\n"); return -EINVAL; } entries = roundup_pow_of_two(qp_attr->cap.max_send_wr); if (entries > dev_attr->max_qp_wqes) entries = dev_attr->max_qp_wqes; entries = min_t(u32, entries, dev_attr->max_qp_wqes); qp->qplib_qp.sq.max_wqe = entries; qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe - qp_attr->cap.max_send_wr; /* * Reserving one slot for Phantom WQE. Some application can * post one extra entry in this case. Allowing this to avoid * unexpected Queue full condition */ qp->qplib_qp.sq.q_full_delta -= 1; qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge; if (qp->qplib_qp.rq.max_wqe) { entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr); if (entries > dev_attr->max_qp_wqes) entries = dev_attr->max_qp_wqes; qp->qplib_qp.rq.max_wqe = entries; qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe - qp_attr->cap.max_recv_wr; qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge; } else { /* SRQ was used prior, just ignore the RQ caps */ } } if (qp_attr_mask & IB_QP_DEST_QPN) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID; qp->qplib_qp.dest_qpn = qp_attr->dest_qp_num; } rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Modify HW QP failed!\n"); return rc; } if (qp_attr_mask & IB_QP_STATE) bnxt_qplib_manage_flush_qp(qp); if (ureq.comp_mask & BNXT_RE_COMP_MASK_MQP_EX_PPP_REQ_EN_MASK && ppp->st_idx_en & CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_ENABLED) { resp.comp_mask |= BNXT_RE_COMP_MASK_MQP_EX_PPP_REQ_EN; resp.ppp_st_idx = ppp->st_idx_en >> BNXT_QPLIB_PPP_ST_IDX_SHIFT; is_copy_to_udata = true; } if (is_copy_to_udata) { rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) return rc; } if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL && rdev->gsi_ctx.gsi_sqp) rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask); /* * Update info when qp_info_info */ bnxt_re_update_qp_info(rdev, qp); return rc; } int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { struct bnxt_re_qp *qp = to_bnxt_re(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; struct bnxt_qplib_qp *qplib_qp; int rc; qplib_qp = kcalloc(1, sizeof(*qplib_qp), GFP_KERNEL); if (!qplib_qp) return -ENOMEM; qplib_qp->id = qp->qplib_qp.id; qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index; rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Query HW QP (0x%x) failed! rc = %d\n", qplib_qp->id, rc); goto free_mem; } qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state); qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); qp_attr->pkey_index = qplib_qp->pkey_index; qp_attr->qkey = qplib_qp->qkey; memcpy(qp_attr->ah_attr.grh.dgid.raw, qplib_qp->ah.dgid.data, sizeof(qplib_qp->ah.dgid.data)); qp_attr->ah_attr.grh.flow_label = qplib_qp->ah.flow_label; qp_attr->ah_attr.grh.sgid_index = qplib_qp->ah.host_sgid_index; qp_attr->ah_attr.grh.hop_limit = qplib_qp->ah.hop_limit; qp_attr->ah_attr.grh.traffic_class = qplib_qp->ah.traffic_class; qp_attr->ah_attr.sl = qplib_qp->ah.sl; ether_addr_copy(ROCE_DMAC(&qp_attr->ah_attr), qplib_qp->ah.dmac); qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu); qp_attr->timeout = qplib_qp->timeout; qp_attr->retry_cnt = qplib_qp->retry_cnt; qp_attr->rnr_retry = qplib_qp->rnr_retry; qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer; qp_attr->rq_psn = qplib_qp->rq.psn; qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic; qp_attr->sq_psn = qplib_qp->sq.psn; qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic; qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; qp_attr->dest_qp_num = qplib_qp->dest_qpn; qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe; qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge; qp_attr->cap.max_recv_wr = qp->qplib_qp.rq.max_wqe; qp_attr->cap.max_recv_sge = qp->qplib_qp.rq.max_sge; qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data; qp_init_attr->cap = qp_attr->cap; free_mem: kfree(qplib_qp); return rc; } /* Builders */ /* For Raw, the application is responsible to build the entire packet */ static void bnxt_re_build_raw_send(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->send_flags) { case IB_SEND_IP_CSUM: wqe->rawqp1.lflags |= SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM; break; default: /* Pad HW RoCE iCRC */ wqe->rawqp1.lflags |= SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC; break; } } /* For QP1, the driver must build the entire RoCE (v1/v2) packet hdr * as according to the sgid and AV */ static int bnxt_re_build_qp1_send(struct bnxt_re_qp *qp, const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe, int payload_size) { struct bnxt_re_ah *ah = to_bnxt_re(ud_wr(wr)->ah, struct bnxt_re_ah, ibah); struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah; struct bnxt_qplib_sge sge; int i, rc = 0; union ib_gid sgid; u16 vlan_id; u8 *ptmac; void *buf; memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); /* Get sgid */ rc = bnxt_re_query_gid(&qp->rdev->ibdev, 1, qplib_ah->sgid_index, &sgid); if (rc) return rc; /* ETH */ qp->qp1_hdr.eth_present = 1; ptmac = ah->qplib_ah.dmac; memcpy(qp->qp1_hdr.eth.dmac_h, ptmac, 4); ptmac += 4; memcpy(qp->qp1_hdr.eth.dmac_l, ptmac, 2); ptmac = qp->qplib_qp.smac; memcpy(qp->qp1_hdr.eth.smac_h, ptmac, 2); ptmac += 2; memcpy(qp->qp1_hdr.eth.smac_l, ptmac, 4); qp->qp1_hdr.eth.type = cpu_to_be16(BNXT_QPLIB_ETHTYPE_ROCEV1); /* For vlan, check the sgid for vlan existence */ vlan_id = rdma_get_vlan_id(&sgid); if (vlan_id && vlan_id < 0x1000) { qp->qp1_hdr.vlan_present = 1; qp->qp1_hdr.eth.type = cpu_to_be16(ETH_P_8021Q); } /* GRH */ qp->qp1_hdr.grh_present = 1; qp->qp1_hdr.grh.ip_version = 6; qp->qp1_hdr.grh.payload_length = cpu_to_be16((IB_BTH_BYTES + IB_DETH_BYTES + payload_size + 7) & ~3); qp->qp1_hdr.grh.next_header = 0x1b; memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid)); memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data, sizeof(sgid)); /* BTH */ if (wr->opcode == IB_WR_SEND_WITH_IMM) { qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; qp->qp1_hdr.immediate_present = 1; } else { qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY; } if (wr->send_flags & IB_SEND_SOLICITED) qp->qp1_hdr.bth.solicited_event = 1; qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3; /* P_key for QP1 is for all members */ qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF); qp->qp1_hdr.bth.destination_qpn = IB_QP1; qp->qp1_hdr.bth.ack_req = 0; qp->send_psn++; qp->send_psn &= BTH_PSN_MASK; qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn); /* DETH */ /* Use the priviledged Q_Key for QP1 */ qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY); qp->qp1_hdr.deth.source_qpn = IB_QP1; /* Pack the QP1 to the transmit buffer */ buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge); if (!buf) { dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!\n"); rc = -ENOMEM; } for (i = wqe->num_sge; i; i--) { wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr; wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey; wqe->sg_list[i].size = wqe->sg_list[i - 1].size; } wqe->sg_list[0].addr = sge.addr; wqe->sg_list[0].lkey = sge.lkey; wqe->sg_list[0].size = sge.size; wqe->num_sge++; return rc; } static int bnxt_re_build_gsi_send(struct bnxt_re_qp *qp, const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_dev *rdev; int rc, indx, len = 0; rdev = qp->rdev; /* Mode UD is applicable to Gen P5 only */ if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_UD) return 0; for (indx = 0; indx < wr->num_sge; indx++) { wqe->sg_list[indx].addr = wr->sg_list[indx].addr; wqe->sg_list[indx].lkey = wr->sg_list[indx].lkey; wqe->sg_list[indx].size = wr->sg_list[indx].length; len += wr->sg_list[indx].length; } rc = bnxt_re_build_qp1_send(qp, wr, wqe, len); wqe->rawqp1.lflags |= SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC; return rc; } /* For the MAD layer, it only provides the recv SGE the size of ib_grh + MAD datagram. No Ethernet headers, Ethertype, BTH, DETH, nor RoCE iCRC. The Cu+ solution must provide buffer for the entire receive packet (334 bytes) with no VLAN and then copy the GRH and the MAD datagram out to the provided SGE. */ static int bnxt_re_build_qp1_recv(struct bnxt_re_qp *qp, const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_dev *rdev = qp->rdev; struct bnxt_qplib_sge ref, sge; u8 udp_hdr_size = 0; u8 ip_hdr_size = 0; int rc = 0; int size; if (bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge)) { /* Create 5 SGEs as according to the following: * Ethernet header (14) * ib_grh (40) - as provided from the wr * ib_bth + ib_deth + UDP(RoCE v2 only) (28) * MAD (256) - as provided from the wr * iCRC (4) */ /* Set RoCE v2 header size and offsets */ if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ROCE_V2_IPV4) ip_hdr_size = 20; if (rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_ROCE_V1) udp_hdr_size = 8; /* Save the reference from ULP */ ref.addr = wr->sg_list[0].addr; ref.lkey = wr->sg_list[0].lkey; ref.size = wr->sg_list[0].length; /* SGE 1 */ size = sge.size; wqe->sg_list[0].addr = sge.addr; wqe->sg_list[0].lkey = sge.lkey; wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE; size -= wqe->sg_list[0].size; if (size <= 0) { dev_err(rdev_to_dev(qp->rdev),"QP1 rq buffer is empty!\n"); rc = -ENOMEM; goto done; } sge.size = (u32)size; sge.addr += wqe->sg_list[0].size; /* SGE 2 */ /* In case of RoCE v2 ipv4 lower 20 bytes should have IP hdr */ wqe->sg_list[1].addr = ref.addr + ip_hdr_size; wqe->sg_list[1].lkey = ref.lkey; wqe->sg_list[1].size = sizeof(struct ib_grh) - ip_hdr_size; ref.size -= wqe->sg_list[1].size; if (ref.size <= 0) { dev_err(rdev_to_dev(qp->rdev), "QP1 ref buffer is empty!\n"); rc = -ENOMEM; goto done; } ref.addr += wqe->sg_list[1].size + ip_hdr_size; /* SGE 3 */ wqe->sg_list[2].addr = sge.addr; wqe->sg_list[2].lkey = sge.lkey; wqe->sg_list[2].size = BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE + udp_hdr_size; size -= wqe->sg_list[2].size; if (size <= 0) { dev_err(rdev_to_dev(qp->rdev), "QP1 rq buffer is empty!\n"); rc = -ENOMEM; goto done; } sge.size = (u32)size; sge.addr += wqe->sg_list[2].size; /* SGE 4 */ wqe->sg_list[3].addr = ref.addr; wqe->sg_list[3].lkey = ref.lkey; wqe->sg_list[3].size = ref.size; ref.size -= wqe->sg_list[3].size; if (ref.size) { dev_err(rdev_to_dev(qp->rdev), "QP1 ref buffer is incorrect!\n"); rc = -ENOMEM; goto done; } /* SGE 5 */ wqe->sg_list[4].addr = sge.addr; wqe->sg_list[4].lkey = sge.lkey; wqe->sg_list[4].size = sge.size; size -= wqe->sg_list[4].size; if (size) { dev_err(rdev_to_dev(qp->rdev), "QP1 rq buffer is incorrect!\n"); rc = -ENOMEM; goto done; } sge.size = (u32)size; wqe->num_sge = 5; } else { dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!\n"); rc = -ENOMEM; } done: return rc; } static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp, const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_sqp_entries *sqp_entry; struct bnxt_qplib_sge sge; struct bnxt_re_dev *rdev; u32 rq_prod_index; int rc = 0; rdev = qp->rdev; rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp); if (bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge)) { /* Create 1 SGE to receive the entire * ethernet packet */ /* SGE 1 */ wqe->sg_list[0].addr = sge.addr; /* TODO check the lkey to be used */ wqe->sg_list[0].lkey = sge.lkey; wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2; if (sge.size < wqe->sg_list[0].size) { dev_err(rdev_to_dev(qp->rdev), "QP1 rq buffer is empty!\n"); rc = -ENOMEM; goto done; } sqp_entry = &rdev->gsi_ctx.sqp_tbl[rq_prod_index]; sqp_entry->sge.addr = wr->sg_list[0].addr; sqp_entry->sge.lkey = wr->sg_list[0].lkey; sqp_entry->sge.size = wr->sg_list[0].length; /* Store the wrid for reporting completion */ sqp_entry->wrid = wqe->wr_id; /* change the wqe->wrid to table index */ wqe->wr_id = rq_prod_index; } done: return rc; } static bool is_ud_qp(struct bnxt_re_qp *qp) { return (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD || qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI); } static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp, const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_ah *ah = NULL; if(is_ud_qp(qp)) { ah = to_bnxt_re(ud_wr(wr)->ah, struct bnxt_re_ah, ibah); wqe->send.q_key = ud_wr(wr)->remote_qkey; wqe->send.dst_qp = ud_wr(wr)->remote_qpn; wqe->send.avid = ah->qplib_ah.id; } switch (wr->opcode) { case IB_WR_SEND: wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND; break; case IB_WR_SEND_WITH_IMM: wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM; wqe->send.imm_data = wr->ex.imm_data; break; case IB_WR_SEND_WITH_INV: wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV; wqe->send.inv_key = wr->ex.invalidate_rkey; break; default: dev_err(rdev_to_dev(qp->rdev), "%s Invalid opcode %d!\n", __func__, wr->opcode); return -EINVAL; } if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_FENCE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; if (wr->send_flags & IB_SEND_INLINE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE; return 0; } static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { case IB_WR_RDMA_WRITE: wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE; break; case IB_WR_RDMA_WRITE_WITH_IMM: wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM; wqe->rdma.imm_data = wr->ex.imm_data; break; case IB_WR_RDMA_READ: wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ; wqe->rdma.inv_key = wr->ex.invalidate_rkey; break; default: return -EINVAL; } wqe->rdma.remote_va = rdma_wr(wr)->remote_addr; wqe->rdma.r_key = rdma_wr(wr)->rkey; if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_FENCE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; if (wr->send_flags & IB_SEND_INLINE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE; return 0; } static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP; wqe->atomic.cmp_data = atomic_wr(wr)->compare_add; wqe->atomic.swap_data = atomic_wr(wr)->swap; break; case IB_WR_ATOMIC_FETCH_AND_ADD: wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD; wqe->atomic.cmp_data = atomic_wr(wr)->compare_add; break; default: return -EINVAL; } wqe->atomic.remote_va = atomic_wr(wr)->remote_addr; wqe->atomic.r_key = atomic_wr(wr)->rkey; if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_FENCE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; return 0; } static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV; wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey; if (wr->send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (wr->send_flags & IB_SEND_FENCE) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; if (wr->send_flags & IB_SEND_SOLICITED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT; return 0; } static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_mr *mr = to_bnxt_re(wr->mr, struct bnxt_re_mr, ib_mr); struct bnxt_qplib_frpl *qplib_frpl = &mr->qplib_frpl; int reg_len, i, access = wr->access; if (mr->npages > qplib_frpl->max_pg_ptrs) { dev_err_ratelimited(rdev_to_dev(mr->rdev), " %s: failed npages %d > %d\n", __func__, mr->npages, qplib_frpl->max_pg_ptrs); return -EINVAL; } wqe->frmr.pbl_ptr = (__le64 *)qplib_frpl->hwq.pbl_ptr[0]; wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0]; wqe->frmr.levels = qplib_frpl->hwq.level; wqe->frmr.page_list = mr->pages; wqe->frmr.page_list_len = mr->npages; wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR; if (wr->wr.send_flags & IB_SEND_SIGNALED) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP; if (access & IB_ACCESS_LOCAL_WRITE) wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE; if (access & IB_ACCESS_REMOTE_READ) wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ; if (access & IB_ACCESS_REMOTE_WRITE) wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE; if (access & IB_ACCESS_REMOTE_ATOMIC) wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC; if (access & IB_ACCESS_MW_BIND) wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND; /* TODO: OFED provides the rkey of the MR instead of the lkey */ wqe->frmr.l_key = wr->key; wqe->frmr.length = wr->mr->length; wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K); wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K); wqe->frmr.va = wr->mr->iova; reg_len = wqe->frmr.page_list_len * wr->mr->page_size; if (wqe->frmr.length > reg_len) { dev_err_ratelimited(rdev_to_dev(mr->rdev), "%s: bnxt_re_mr 0x%px len (%d > %d)\n", __func__, (void *)mr, wqe->frmr.length, reg_len); for (i = 0; i < mr->npages; i++) dev_dbg(rdev_to_dev(mr->rdev), "%s: build_reg_wqe page[%d] = 0x%llx\n", __func__, i, mr->pages[i]); return -EINVAL; } return 0; } static void bnxt_re_set_sg_list(const struct ib_send_wr *wr, struct bnxt_qplib_swqe *wqe) { wqe->sg_list = (struct bnxt_qplib_sge *)wr->sg_list; wqe->num_sge = wr->num_sge; } static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp) { if ((qp->ib_qp.qp_type == IB_QPT_UD || qp->ib_qp.qp_type == IB_QPT_GSI || qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) && qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) { int qp_attr_mask; struct ib_qp_attr qp_attr; qp_attr_mask = IB_QP_STATE; qp_attr.qp_state = IB_QPS_RTS; bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL); qp->qplib_qp.wqe_cnt = 0; } } static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, const struct ib_send_wr *wr) { struct bnxt_qplib_swqe wqe; unsigned long flags; int rc = 0; spin_lock_irqsave(&qp->sq_lock, flags); while (wr) { /* House keeping */ memset(&wqe, 0, sizeof(wqe)); /* Common */ if (wr->num_sge > qp->qplib_qp.sq.max_sge) { dev_err(rdev_to_dev(rdev), "Limit exceeded for Send SGEs\n"); rc = -EINVAL; break; } bnxt_re_set_sg_list(wr, &wqe); wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_SEND; rc = bnxt_re_build_send_wqe(qp, wr, &wqe); if (rc) break; rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); if (rc) { dev_err(rdev_to_dev(rdev), "bad_wr seen with opcode = 0x%x rc = %d\n", wr->opcode, rc); break; } wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; } static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe) { /* Need unconditional fence for non-wire memory opcode * to work as expected. */ if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV || wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR || wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR || wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW) wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE; } int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct bnxt_re_qp *qp = to_bnxt_re(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_sge sge[6]; struct bnxt_qplib_swqe wqe; struct bnxt_re_dev *rdev; unsigned long flags; int rc = 0; rdev = qp->rdev; spin_lock_irqsave(&qp->sq_lock, flags); while (wr) { /* House keeping */ memset(&wqe, 0, sizeof(wqe)); /* Common */ if (wr->num_sge > qp->qplib_qp.sq.max_sge) { dev_err(rdev_to_dev(rdev), "Limit exceeded for Send SGEs\n"); rc = -EINVAL; goto bad; } bnxt_re_set_sg_list(wr, &wqe); wqe.wr_id = wr->wr_id; switch (wr->opcode) { case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_UD) { memset(sge, 0, sizeof(sge)); wqe.sg_list = sge; rc = bnxt_re_build_gsi_send(qp, wr, &wqe); if (rc) goto bad; } else if (ib_qp->qp_type == IB_QPT_RAW_ETHERTYPE) { bnxt_re_build_raw_send(wr, &wqe); } switch (wr->send_flags) { case IB_SEND_IP_CSUM: wqe.rawqp1.lflags |= SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM; break; default: break; } fallthrough; case IB_WR_SEND_WITH_INV: rc = bnxt_re_build_send_wqe(qp, wr, &wqe); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_READ: rc = bnxt_re_build_rdma_wqe(wr, &wqe); break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: rc = bnxt_re_build_atomic_wqe(wr, &wqe); break; case IB_WR_RDMA_READ_WITH_INV: dev_err(rdev_to_dev(rdev), "RDMA Read with Invalidate is not supported\n"); rc = -EINVAL; goto bad; case IB_WR_LOCAL_INV: rc = bnxt_re_build_inv_wqe(wr, &wqe); break; case IB_WR_REG_MR: rc = bnxt_re_build_reg_wqe(reg_wr(wr), &wqe); break; default: /* Unsupported WRs */ dev_err(rdev_to_dev(rdev), "WR (0x%x) is not supported\n", wr->opcode); rc = -EINVAL; goto bad; } if (likely(!rc)) { if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) bnxt_re_legacy_set_uc_fence(&wqe); rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe); } bad: if (unlikely(rc)) { dev_err(rdev_to_dev(rdev), "bad_wr seen with opcode = 0x%x\n", wr->opcode); *bad_wr = wr; break; } wr = wr->next; } bnxt_qplib_post_send_db(&qp->qplib_qp); if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) bnxt_ud_qp_hw_stall_workaround(qp); spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; } static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, struct ib_recv_wr *wr) { struct bnxt_qplib_swqe wqe; int rc = 0; /* rq lock can be pardoned here. */ while (wr) { /* House keeping */ memset(&wqe, 0, sizeof(wqe)); /* Common */ if (wr->num_sge > qp->qplib_qp.rq.max_sge) { dev_err(rdev_to_dev(rdev), "Limit exceeded for Receive SGEs\n"); rc = -EINVAL; goto bad; } wqe.sg_list = (struct bnxt_qplib_sge *)wr->sg_list; wqe.num_sge = wr->num_sge; wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe); bad: if (rc) { dev_err(rdev_to_dev(rdev), "bad_wr seen with RQ post\n"); break; } wr = wr->next; } bnxt_qplib_post_recv_db(&qp->qplib_qp); return rc; } static int bnxt_re_build_gsi_recv(struct bnxt_re_qp *qp, const struct ib_recv_wr *wr, struct bnxt_qplib_swqe *wqe) { struct bnxt_re_dev *rdev = qp->rdev; int rc = 0; if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL) rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, wqe); else rc = bnxt_re_build_qp1_recv(qp, wr, wqe); return rc; } int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct bnxt_re_qp *qp = to_bnxt_re(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_qplib_sge sge[6]; struct bnxt_qplib_swqe wqe; unsigned long flags; u32 count = 0; int rc = 0; spin_lock_irqsave(&qp->rq_lock, flags); while (wr) { memset(&wqe, 0, sizeof(wqe)); if (wr->num_sge > qp->qplib_qp.rq.max_sge) { dev_err(rdev_to_dev(qp->rdev), "Limit exceeded for Receive SGEs\n"); rc = -EINVAL; goto bad; } wqe.num_sge = wr->num_sge; wqe.sg_list = (struct bnxt_qplib_sge *)wr->sg_list; wqe.wr_id = wr->wr_id; wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV; if (ib_qp->qp_type == IB_QPT_GSI && qp->rdev->gsi_ctx.gsi_qp_mode != BNXT_RE_GSI_MODE_UD) { memset(sge, 0, sizeof(sge)); wqe.sg_list = sge; rc = bnxt_re_build_gsi_recv(qp, wr, &wqe); if (rc) goto bad; } rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe); bad: if (rc) { dev_err(rdev_to_dev(qp->rdev), "bad_wr seen with RQ post\n"); *bad_wr = wr; break; } /* Ring DB if the RQEs posted reaches a threshold value */ if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) { bnxt_qplib_post_recv_db(&qp->qplib_qp); count = 0; } wr = wr->next; } if (count) bnxt_qplib_post_recv_db(&qp->qplib_qp); spin_unlock_irqrestore(&qp->rq_lock, flags); return rc; } /* Completion Queues */ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct bnxt_re_cq *cq = to_bnxt_re(ib_cq, struct bnxt_re_cq, ibcq); struct bnxt_re_dev *rdev = cq->rdev; int rc = 0; if (cq->uctx_cq_page) { BNXT_RE_CQ_PAGE_LIST_DEL(cq->uctx, cq); free_page((u64)cq->uctx_cq_page); cq->uctx_cq_page = NULL; } if (cq->is_dbr_soft_cq && cq->uctx) { void *dbr_page; if (cq->uctx->dbr_recov_cq) { dbr_page = cq->uctx->dbr_recov_cq_page; cq->uctx->dbr_recov_cq_page = NULL; cq->uctx->dbr_recov_cq = NULL; free_page((unsigned long)dbr_page); } goto end; } /* CQ getting destroyed. Set this state for cqn handler */ spin_lock_bh(&cq->qplib_cq.compl_lock); cq->qplib_cq.destroyed = true; spin_unlock_bh(&cq->qplib_cq.compl_lock); if (ib_cq->poll_ctx == IB_POLL_WORKQUEUE || ib_cq->poll_ctx == IB_POLL_UNBOUND_WORKQUEUE) cancel_work_sync(&ib_cq->work); rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) dev_err_ratelimited(rdev_to_dev(rdev), "%s id = %d failed rc = %d\n", __func__, cq->qplib_cq.id, rc); bnxt_re_put_nq(rdev, cq->qplib_cq.nq); if (cq->umem && !IS_ERR(cq->umem)) ib_umem_release(cq->umem); kfree(cq->cql); atomic_dec(&rdev->stats.rsors.cq_count); end: return; } static inline struct bnxt_re_cq *__get_cq_from_cq_in(struct ib_cq *cq_in, struct bnxt_re_dev *rdev) { struct bnxt_re_cq *cq; cq = container_of(cq_in, struct bnxt_re_cq, ibcq); return cq; } int bnxt_re_create_cq(struct ib_cq *cq_in, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_re_ucontext *uctx = NULL; struct ib_ucontext *context = NULL; struct bnxt_qplib_cq *qplcq; struct bnxt_re_cq_req ureq; struct bnxt_re_dev *rdev; int rc, entries; struct bnxt_re_cq *cq; u32 max_active_cqs; int cqe = attr->cqe; if (attr->flags) return -EOPNOTSUPP; rdev = rdev_from_cq_in(cq_in); if (rdev->mod_exit) { rc = -EIO; dev_dbg(rdev_to_dev(rdev), "%s(): in mod_exit, just return!\n", __func__); goto exit; } if (udata) { uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ibucontext); context = &uctx->ibucontext; } dev_attr = rdev->dev_attr; if (atomic_read(&rdev->stats.rsors.cq_count) >= dev_attr->max_cq) { dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded(CQs)\n"); rc = -EINVAL; goto exit; } /* Validate CQ fields */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded(CQ_WQs)\n"); rc = -EINVAL; goto exit; } cq = __get_cq_from_cq_in(cq_in, rdev); if (!cq) { rc = -ENOMEM; goto exit; } cq->rdev = rdev; cq->uctx = uctx; qplcq = &cq->qplib_cq; qplcq->cq_handle = (u64)qplcq; /* * Since CQ is for QP1 is shared with Shadow CQ, the size * should be double the size. There is no way to identify * whether this CQ is for GSI QP. So assuming that the first * CQ created is for QP1 */ if (!udata && !rdev->gsi_ctx.first_cq_created && rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL) { rdev->gsi_ctx.first_cq_created = true; /* * Total CQE required for the CQ = CQE for QP1 RQ + * CQE for Shadow QP SQEs + CQE for Shadow QP RQEs. * Max entries of shadow QP SQ and RQ = QP1 RQEs = cqe */ cqe *= 3; } entries = bnxt_re_init_depth(cqe + 1, uctx); if (entries > dev_attr->max_cq_wqes + 1) entries = dev_attr->max_cq_wqes + 1; qplcq->sginfo.pgshft = PAGE_SHIFT; qplcq->sginfo.pgsize = PAGE_SIZE; if (udata) { if (udata->inlen < sizeof(ureq)) dev_warn(rdev_to_dev(rdev), "Update the library ulen %d klen %d\n", (unsigned int)udata->inlen, (unsigned int)sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) goto fail; if (BNXT_RE_IS_DBR_PACING_NOTIFY_CQ(ureq)) { cq->is_dbr_soft_cq = true; goto success; } if (BNXT_RE_IS_DBR_RECOV_CQ(ureq)) { void *dbr_page; u32 *epoch; dbr_page = (void *)__get_free_page(GFP_KERNEL); if (!dbr_page) { dev_err(rdev_to_dev(rdev), "DBR recov CQ page allocation failed!"); rc = -ENOMEM; goto fail; } /* memset the epoch and epoch_ack to 0 */ epoch = dbr_page; epoch[0] = 0x0; epoch[1] = 0x0; uctx->dbr_recov_cq = cq; uctx->dbr_recov_cq_page = dbr_page; cq->is_dbr_soft_cq = true; goto success; } cq->umem = ib_umem_get_compat (rdev, context, udata, ureq.cq_va, entries * sizeof(struct cq_base), IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->umem)) { rc = PTR_ERR(cq->umem); dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed! rc = %d\n", __func__, rc); goto fail; } qplcq->sginfo.sghead = get_ib_umem_sgl(cq->umem, &qplcq->sginfo.nmap); qplcq->sginfo.npages = ib_umem_num_pages_compat(cq->umem); if (!uctx->dpi.dbr) { rc = bnxt_re_get_user_dpi(rdev, uctx); if (rc) goto c2fail; } qplcq->dpi = &uctx->dpi; } else { cq->max_cql = entries > MAX_CQL_PER_POLL ? MAX_CQL_PER_POLL : entries; cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe), GFP_KERNEL); if (!cq->cql) { dev_err(rdev_to_dev(rdev), "Allocate CQL for %d failed!\n", cq->max_cql); rc = -ENOMEM; goto fail; } qplcq->dpi = &rdev->dpi_privileged; } /* * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a * used for getting the NQ index. */ qplcq->max_wqe = entries; qplcq->nq = bnxt_re_get_nq(rdev); qplcq->cnq_hw_ring_id = qplcq->nq->ring_id; rc = bnxt_qplib_create_cq(&rdev->qplib_res, qplcq); if (rc) { dev_err(rdev_to_dev(rdev), "Create HW CQ failed!\n"); goto fail; } INIT_LIST_HEAD(&cq->cq_list); cq->ibcq.cqe = entries; cq->cq_period = qplcq->period; atomic_inc(&rdev->stats.rsors.cq_count); max_active_cqs = atomic_read(&rdev->stats.rsors.cq_count); if (max_active_cqs > atomic_read(&rdev->stats.rsors.max_cq_count)) atomic_set(&rdev->stats.rsors.max_cq_count, max_active_cqs); spin_lock_init(&cq->cq_lock); if (udata) { struct bnxt_re_cq_resp resp; resp.cqid = qplcq->id; resp.tail = qplcq->hwq.cons; resp.phase = qplcq->period; resp.comp_mask = 0; resp.dbr = (u64)uctx->dpi.umdbr; resp.dpi = uctx->dpi.dpi; resp.comp_mask |= BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO; /* Copy only on a valid wcpdi */ if (uctx->wcdpi.dpi) { resp.wcdpi = uctx->wcdpi.dpi; resp.comp_mask |= BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI; } if (_is_chip_p7(rdev->chip_ctx)) { cq->uctx_cq_page = (void *)__get_free_page(GFP_KERNEL); if (!cq->uctx_cq_page) { dev_err(rdev_to_dev(rdev), "CQ page allocation failed!\n"); bnxt_qplib_destroy_cq(&rdev->qplib_res, qplcq); rc = -ENOMEM; goto c2fail; } resp.uctx_cq_page = (u64)cq->uctx_cq_page; resp.comp_mask |= BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE; } rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) { free_page((u64)cq->uctx_cq_page); cq->uctx_cq_page = NULL; bnxt_qplib_destroy_cq(&rdev->qplib_res, qplcq); goto c2fail; } if (cq->uctx_cq_page) BNXT_RE_CQ_PAGE_LIST_ADD(uctx, cq); } success: return 0; c2fail: if (udata && cq->umem && !IS_ERR(cq->umem)) ib_umem_release(cq->umem); fail: if (cq) { if (cq->cql) kfree(cq->cql); } exit: return rc; } int bnxt_re_modify_cq(struct ib_cq *ib_cq, u16 cq_count, u16 cq_period) { struct bnxt_re_cq *cq = to_bnxt_re(ib_cq, struct bnxt_re_cq, ibcq); struct bnxt_re_dev *rdev = cq->rdev; int rc; if ((cq->cq_count != cq_count) || (cq->cq_period != cq_period)) { cq->qplib_cq.count = cq_count; cq->qplib_cq.period = cq_period; rc = bnxt_qplib_modify_cq(&rdev->qplib_res, &cq->qplib_cq); if (rc) { dev_err(rdev_to_dev(rdev), "Modify HW CQ %#x failed!\n", cq->qplib_cq.id); return rc; } /* On success, update the shadow */ cq->cq_count = cq_count; cq->cq_period = cq_period; } return 0; } static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq) { struct bnxt_re_dev *rdev = cq->rdev; bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq); cq->qplib_cq.max_wqe = cq->resize_cqe; if (cq->resize_umem) { ib_umem_release(cq->umem); cq->umem = cq->resize_umem; cq->resize_umem = NULL; cq->resize_cqe = 0; } } int bnxt_re_resize_cq(struct ib_cq *ib_cq, int cqe, struct ib_udata *udata) { struct bnxt_qplib_sg_info sginfo = {}; struct bnxt_qplib_dpi *orig_dpi = NULL; struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_re_ucontext *uctx = NULL; struct bnxt_re_resize_cq_req ureq; struct ib_ucontext *context = NULL; struct bnxt_re_dev *rdev; struct bnxt_re_cq *cq; int rc, entries; /* Don't allow more than one resize request at the same time. * TODO: need a mutex here when we support kernel consumers of resize. */ cq = to_bnxt_re(ib_cq, struct bnxt_re_cq, ibcq); rdev = cq->rdev; dev_attr = rdev->dev_attr; if (ib_cq->uobject) { uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ibucontext); context = &uctx->ibucontext; } if (cq->resize_umem) { dev_err(rdev_to_dev(rdev), "Resize CQ %#x failed - Busy\n", cq->qplib_cq.id); return -EBUSY; } /* Check the requested cq depth out of supported depth */ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) { dev_err(rdev_to_dev(rdev), "Resize CQ %#x failed - max exceeded\n", cq->qplib_cq.id); return -EINVAL; } entries = bnxt_re_init_depth(cqe + 1, uctx); entries = min_t(u32, (u32)entries, dev_attr->max_cq_wqes + 1); /* Check to see if the new requested size can be handled by already * existing CQ */ if (entries == cq->ibcq.cqe) { dev_info(rdev_to_dev(rdev), "CQ is already at size %d\n", cqe); return 0; } if (ib_cq->uobject && udata) { if (udata->inlen < sizeof(ureq)) dev_warn(rdev_to_dev(rdev), "Update the library ulen %d klen %d\n", (unsigned int)udata->inlen, (unsigned int)sizeof(ureq)); rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) goto fail; dev_dbg(rdev_to_dev(rdev), "%s: va %p\n", __func__, (void *)ureq.cq_va); cq->resize_umem = ib_umem_get_compat (rdev, context, udata, ureq.cq_va, entries * sizeof(struct cq_base), IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(cq->resize_umem)) { rc = PTR_ERR(cq->resize_umem); cq->resize_umem = NULL; dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed! rc = %d\n", __func__, rc); goto fail; } cq->resize_cqe = entries; dev_dbg(rdev_to_dev(rdev), "%s: ib_umem_get() success\n", __func__); memcpy(&sginfo, &cq->qplib_cq.sginfo, sizeof(sginfo)); orig_dpi = cq->qplib_cq.dpi; cq->qplib_cq.sginfo.sghead = get_ib_umem_sgl(cq->resize_umem, &cq->qplib_cq.sginfo.nmap); cq->qplib_cq.sginfo.npages = ib_umem_num_pages_compat(cq->resize_umem); cq->qplib_cq.sginfo.pgsize = PAGE_SIZE; cq->qplib_cq.sginfo.pgshft = PAGE_SHIFT; cq->qplib_cq.dpi = &uctx->dpi; } else { /* TODO: kernel consumer */ } rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries); if (rc) { dev_err(rdev_to_dev(rdev), "Resize HW CQ %#x failed!\n", cq->qplib_cq.id); goto fail; } cq->ibcq.cqe = cq->resize_cqe; /* For kernel consumers complete resize here. For uverbs consumers, * we complete it in the context of ibv_poll_cq(). */ if (!cq->resize_umem) bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq); atomic_inc(&rdev->stats.rsors.resize_count); return 0; fail: if (cq->resize_umem) { ib_umem_release(cq->resize_umem); cq->resize_umem = NULL; cq->resize_cqe = 0; memcpy(&cq->qplib_cq.sginfo, &sginfo, sizeof(sginfo)); cq->qplib_cq.dpi = orig_dpi; } return rc; } static enum ib_wc_status __req_to_ib_wc_status(u8 qstatus) { switch(qstatus) { case CQ_REQ_STATUS_OK: return IB_WC_SUCCESS; case CQ_REQ_STATUS_BAD_RESPONSE_ERR: return IB_WC_BAD_RESP_ERR; case CQ_REQ_STATUS_LOCAL_LENGTH_ERR: return IB_WC_LOC_LEN_ERR; case CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR: return IB_WC_LOC_QP_OP_ERR; case CQ_REQ_STATUS_LOCAL_PROTECTION_ERR: return IB_WC_LOC_PROT_ERR; case CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR: return IB_WC_GENERAL_ERR; case CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR: return IB_WC_REM_INV_REQ_ERR; case CQ_REQ_STATUS_REMOTE_ACCESS_ERR: return IB_WC_REM_ACCESS_ERR; case CQ_REQ_STATUS_REMOTE_OPERATION_ERR: return IB_WC_REM_OP_ERR; case CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR: return IB_WC_RNR_RETRY_EXC_ERR; case CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR: return IB_WC_RETRY_EXC_ERR; case CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR: return IB_WC_WR_FLUSH_ERR; default: return IB_WC_GENERAL_ERR; } return 0; } static enum ib_wc_status __rawqp1_to_ib_wc_status(u8 qstatus) { switch(qstatus) { case CQ_RES_RAWETH_QP1_STATUS_OK: return IB_WC_SUCCESS; case CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR: return IB_WC_LOC_ACCESS_ERR; case CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR: return IB_WC_LOC_LEN_ERR; case CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR: return IB_WC_LOC_PROT_ERR; case CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR: return IB_WC_LOC_QP_OP_ERR; case CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR: return IB_WC_GENERAL_ERR; case CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR: return IB_WC_WR_FLUSH_ERR; case CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR: return IB_WC_WR_FLUSH_ERR; default: return IB_WC_GENERAL_ERR; } } static enum ib_wc_status __rc_to_ib_wc_status(u8 qstatus) { switch(qstatus) { case CQ_RES_RC_STATUS_OK: return IB_WC_SUCCESS; case CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR: return IB_WC_LOC_ACCESS_ERR; case CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR: return IB_WC_LOC_LEN_ERR; case CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR: return IB_WC_LOC_PROT_ERR; case CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR: return IB_WC_LOC_QP_OP_ERR; case CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR: return IB_WC_GENERAL_ERR; case CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR: return IB_WC_REM_INV_REQ_ERR; case CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR: return IB_WC_WR_FLUSH_ERR; case CQ_RES_RC_STATUS_HW_FLUSH_ERR: return IB_WC_WR_FLUSH_ERR; default: return IB_WC_GENERAL_ERR; } } static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { switch (cqe->type) { case BNXT_QPLIB_SWQE_TYPE_SEND: wc->opcode = IB_WC_SEND; break; case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM: wc->opcode = IB_WC_SEND; wc->wc_flags |= IB_WC_WITH_IMM; break; case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV: wc->opcode = IB_WC_SEND; wc->wc_flags |= IB_WC_WITH_INVALIDATE; break; case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM: wc->opcode = IB_WC_RDMA_WRITE; wc->wc_flags |= IB_WC_WITH_IMM; break; case BNXT_QPLIB_SWQE_TYPE_RDMA_READ: wc->opcode = IB_WC_RDMA_READ; break; case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP: wc->opcode = IB_WC_COMP_SWAP; break; case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD: wc->opcode = IB_WC_FETCH_ADD; break; case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV: wc->opcode = IB_WC_LOCAL_INV; break; case BNXT_QPLIB_SWQE_TYPE_REG_MR: wc->opcode = IB_WC_REG_MR; break; default: wc->opcode = IB_WC_SEND; break; } wc->status = __req_to_ib_wc_status(cqe->status); } static int bnxt_re_check_packet_type(u16 raweth_qp1_flags, u16 raweth_qp1_flags2) { bool is_ipv6 = false, is_ipv4 = false; /* raweth_qp1_flags Bit 9-6 indicates itype */ if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE) != CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE) return -1; if (raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC && raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) { /* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */ (raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ? (is_ipv6 = true) : (is_ipv4 = true); return ((is_ipv6) ? BNXT_RE_ROCEV2_IPV6_PACKET : BNXT_RE_ROCEV2_IPV4_PACKET); } else { return BNXT_RE_ROCE_V1_PACKET; } } static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev, void *rq_hdr_buf) { u8 *tmp_buf = NULL; struct ethhdr *eth_hdr; u16 eth_type; bool rc = false; tmp_buf = (u8 *)rq_hdr_buf; /* * If dest mac is not same as I/F mac, this could be a * loopback address or multicast address, check whether * it is a loopback packet */ if (!ether_addr_equal(tmp_buf, rdev->dev_addr)) { tmp_buf += 4; /* Check the ether type */ eth_hdr = (struct ethhdr *)tmp_buf; eth_type = ntohs(eth_hdr->h_proto); switch (eth_type) { case BNXT_QPLIB_ETHTYPE_ROCEV1: rc = true; break; default: break; } } return rc; } static bool bnxt_re_is_vlan_in_packet(struct bnxt_re_dev *rdev, void *rq_hdr_buf, struct bnxt_qplib_cqe *cqe) { struct vlan_hdr *vlan_hdr; struct ethhdr *eth_hdr; u8 *tmp_buf = NULL; u16 eth_type; tmp_buf = (u8 *)rq_hdr_buf; /* Check the ether type */ eth_hdr = (struct ethhdr *)tmp_buf; eth_type = ntohs(eth_hdr->h_proto); if (eth_type == ETH_P_8021Q) { tmp_buf += sizeof(struct ethhdr); vlan_hdr = (struct vlan_hdr *)tmp_buf; cqe->raweth_qp1_metadata = ntohs(vlan_hdr->h_vlan_TCI) | (eth_type << CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT); cqe->raweth_qp1_flags2 |= CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN; return true; } return false; } static int bnxt_re_process_raw_qp_packet_receive(struct bnxt_re_qp *gsi_qp, struct bnxt_qplib_cqe *cqe) { struct bnxt_re_sqp_entries *sqp_entry = NULL; struct bnxt_qplib_hdrbuf *hdr_buf; dma_addr_t shrq_hdr_buf_map; struct ib_sge s_sge[2] = {}; struct ib_sge r_sge[2] = {}; struct ib_recv_wr rwr = {}; struct bnxt_re_ah *gsi_sah; struct bnxt_re_qp *gsi_sqp; dma_addr_t rq_hdr_buf_map; struct bnxt_re_dev *rdev; struct ib_send_wr *swr; u32 skip_bytes = 0; void *rq_hdr_buf; int pkt_type = 0; u32 offset = 0; u32 tbl_idx; int rc; struct ib_ud_wr udwr = {}; swr = &udwr.wr; rdev = gsi_qp->rdev; gsi_sqp = rdev->gsi_ctx.gsi_sqp; tbl_idx = cqe->wr_id; hdr_buf = gsi_qp->qplib_qp.rq_hdr_buf; rq_hdr_buf = (u8 *) hdr_buf->va + tbl_idx * hdr_buf->step; rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp, tbl_idx); /* Shadow QP header buffer */ shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_sqp->qplib_qp, tbl_idx); sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx]; /* Find packet type from the cqe */ pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags, cqe->raweth_qp1_flags2); if (pkt_type < 0) { dev_err(rdev_to_dev(rdev), "Not handling this packet\n"); return -EINVAL; } /* Adjust the offset for the user buffer and post in the rq */ if (pkt_type == BNXT_RE_ROCEV2_IPV4_PACKET) offset = 20; /* * QP1 loopback packet has 4 bytes of internal header before * ether header. Skip these four bytes. */ if (bnxt_re_is_loopback_packet(rdev, rq_hdr_buf)) skip_bytes = 4; if (bnxt_re_is_vlan_in_packet(rdev, rq_hdr_buf, cqe)) skip_bytes += VLAN_HLEN; /* Store this cqe */ memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe)); sqp_entry->qp1_qp = gsi_qp; /* First send SGE . Skip the ether header*/ s_sge[0].addr = rq_hdr_buf_map + BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE + skip_bytes; s_sge[0].lkey = 0xFFFFFFFF; s_sge[0].length = offset ? BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 : BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6; /* Second Send SGE */ s_sge[1].addr = s_sge[0].addr + s_sge[0].length + BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE; if (pkt_type != BNXT_RE_ROCE_V1_PACKET) s_sge[1].addr += 8; s_sge[1].lkey = 0xFFFFFFFF; s_sge[1].length = 256; /* First recv SGE */ r_sge[0].addr = shrq_hdr_buf_map; r_sge[0].lkey = 0xFFFFFFFF; r_sge[0].length = 40; r_sge[1].addr = sqp_entry->sge.addr + offset; r_sge[1].lkey = sqp_entry->sge.lkey; r_sge[1].length = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 + 256 - offset; /* Create receive work request */ rwr.num_sge = 2; rwr.sg_list = r_sge; rwr.wr_id = tbl_idx; rwr.next = NULL; rc = bnxt_re_post_recv_shadow_qp(rdev, gsi_sqp, &rwr); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to post Rx buffers to shadow QP\n"); return -ENOMEM; } swr->num_sge = 2; swr->sg_list = s_sge; swr->wr_id = tbl_idx; swr->opcode = IB_WR_SEND; swr->next = NULL; gsi_sah = rdev->gsi_ctx.gsi_sah; udwr.ah = &gsi_sah->ibah; udwr.remote_qpn = gsi_sqp->qplib_qp.id; udwr.remote_qkey = gsi_sqp->qplib_qp.qkey; /* post data received in the send queue */ rc = bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr); return rc; } static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { wc->opcode = IB_WC_RECV; wc->status = __rawqp1_to_ib_wc_status(cqe->status); wc->wc_flags |= IB_WC_GRH; } static void bnxt_re_process_res_rc_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); if (cqe->flags & CQ_RES_RC_FLAGS_IMM) wc->wc_flags |= IB_WC_WITH_IMM; if (cqe->flags & CQ_RES_RC_FLAGS_INV) wc->wc_flags |= IB_WC_WITH_INVALIDATE; if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) == (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; } /* Returns TRUE if pkt has valid VLAN and if VLAN id is non-zero */ static bool bnxt_re_is_nonzero_vlanid_pkt(struct bnxt_qplib_cqe *orig_cqe, u16 *vid, u8 *sl) { u32 metadata; u16 tpid; bool ret = false; metadata = orig_cqe->raweth_qp1_metadata; if (orig_cqe->raweth_qp1_flags2 & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN) { tpid = ((metadata & CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK) >> CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT); if (tpid == ETH_P_8021Q) { *vid = metadata & CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK; *sl = (metadata & CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK) >> CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT; ret = !!(*vid); } } return ret; } static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { u32 tbl_idx; struct bnxt_re_dev *rdev = gsi_sqp->rdev; struct bnxt_re_qp *gsi_qp = NULL; struct bnxt_qplib_cqe *orig_cqe = NULL; struct bnxt_re_sqp_entries *sqp_entry = NULL; int nw_type; u16 vlan_id; u8 sl; tbl_idx = cqe->wr_id; sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx]; gsi_qp = sqp_entry->qp1_qp; orig_cqe = &sqp_entry->cqe; wc->wr_id = sqp_entry->wrid; wc->byte_len = orig_cqe->length; wc->qp = &gsi_qp->ib_qp; wc->ex.imm_data = orig_cqe->immdata; wc->src_qp = orig_cqe->src_qp; memcpy(wc->smac, orig_cqe->smac, ETH_ALEN); if (bnxt_re_is_nonzero_vlanid_pkt(orig_cqe, &vlan_id, &sl)) { if (bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { wc->sl = sl; wc->vlan_id = vlan_id; wc->wc_flags |= IB_WC_WITH_VLAN; } } wc->port_num = 1; wc->vendor_err = orig_cqe->status; wc->opcode = IB_WC_RECV; wc->status = __rawqp1_to_ib_wc_status(orig_cqe->status); wc->wc_flags |= IB_WC_GRH; nw_type = bnxt_re_check_packet_type(orig_cqe->raweth_qp1_flags, orig_cqe->raweth_qp1_flags2); if(nw_type >= 0) dev_dbg(rdev_to_dev(rdev), "%s nw_type = %d\n", __func__, nw_type); } static void bnxt_re_process_res_ud_wc(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { u16 vlan_id = 0; wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); if (cqe->flags & CQ_RES_UD_FLAGS_IMM) wc->wc_flags |= IB_WC_WITH_IMM; if (cqe->flags & CQ_RES_RC_FLAGS_INV) wc->wc_flags |= IB_WC_WITH_INVALIDATE; /* report only on GSI QP for Thor */ if (rdev->gsi_ctx.gsi_qp->qplib_qp.id == qp->qplib_qp.id && rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_UD) { wc->wc_flags |= IB_WC_GRH; memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= IB_WC_WITH_SMAC; if (_is_cqe_v2_supported(rdev->dev_attr->dev_cap_flags)) { if (cqe->flags & CQ_RES_UD_V2_FLAGS_META_FORMAT_MASK) { if (cqe->cfa_meta & BNXT_QPLIB_CQE_CFA_META1_VALID) vlan_id = (cqe->cfa_meta & 0xFFF); } } else if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { vlan_id = (cqe->cfa_meta & 0xFFF); } /* Mark only if vlan_id is non zero */ if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { wc->vlan_id = vlan_id; wc->wc_flags |= IB_WC_WITH_VLAN; } } } static int bnxt_re_legacy_send_phantom_wqe(struct bnxt_re_qp *qp) { struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp; unsigned long flags; int rc = 0; spin_lock_irqsave(&qp->sq_lock, flags); rc = bnxt_re_legacy_bind_fence_mw(lib_qp); if (!rc) { lib_qp->sq.phantom_wqe_cnt++; dev_dbg(&lib_qp->sq.hwq.pdev->dev, "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n", lib_qp->id, lib_qp->sq.hwq.prod, HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq), lib_qp->sq.phantom_wqe_cnt); } spin_unlock_irqrestore(&qp->sq_lock, flags); return rc; } int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc) { struct bnxt_re_cq *cq = to_bnxt_re(ib_cq, struct bnxt_re_cq, ibcq); struct bnxt_re_dev *rdev = cq->rdev; struct bnxt_re_qp *qp; struct bnxt_qplib_cqe *cqe; int i, ncqe, budget, init_budget; struct bnxt_qplib_q *sq; struct bnxt_qplib_qp *lib_qp; u32 tbl_idx; struct bnxt_re_sqp_entries *sqp_entry = NULL; unsigned long flags; u8 gsi_mode; /* * DB recovery CQ; only process the door bell pacing alert from * the user lib */ if (cq->is_dbr_soft_cq) { bnxt_re_pacing_alert(rdev); return 0; } /* User CQ; the only processing we do is to * complete any pending CQ resize operation. */ if (cq->umem) { if (cq->resize_umem) bnxt_re_resize_cq_complete(cq); return 0; } spin_lock_irqsave(&cq->cq_lock, flags); budget = min_t(u32, num_entries, cq->max_cql); init_budget = budget; if (!cq->cql) { dev_err(rdev_to_dev(rdev), "POLL CQ no CQL to use\n"); goto exit; } cqe = &cq->cql[0]; gsi_mode = rdev->gsi_ctx.gsi_qp_mode; while (budget) { lib_qp = NULL; ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp); if (lib_qp) { sq = &lib_qp->sq; if (sq->legacy_send_phantom == true) { qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp); if (bnxt_re_legacy_send_phantom_wqe(qp) == -ENOMEM) dev_err(rdev_to_dev(rdev), "Phantom failed! Scheduled to send again\n"); else sq->legacy_send_phantom = false; } } if (ncqe < budget) ncqe += bnxt_qplib_process_flush_list(&cq->qplib_cq, cqe + ncqe, budget - ncqe); if (!ncqe) break; for (i = 0; i < ncqe; i++, cqe++) { /* Transcribe each qplib_wqe back to ib_wc */ memset(wc, 0, sizeof(*wc)); wc->wr_id = cqe->wr_id; wc->byte_len = cqe->length; qp = to_bnxt_re((struct bnxt_qplib_qp *)cqe->qp_handle, struct bnxt_re_qp, qplib_qp); if (!qp) { dev_err(rdev_to_dev(rdev), "POLL CQ bad QP handle\n"); continue; } wc->qp = &qp->ib_qp; wc->ex.imm_data = cqe->immdata; wc->src_qp = cqe->src_qp; memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->port_num = 1; wc->vendor_err = cqe->status; switch(cqe->opcode) { case CQ_BASE_CQE_TYPE_REQ: if (gsi_mode == BNXT_RE_GSI_MODE_ALL && qp->qplib_qp.id == rdev->gsi_ctx.gsi_sqp->qplib_qp.id) { /* Handle this completion with * the stored completion */ dev_dbg(rdev_to_dev(rdev), "Skipping this UD Send CQ\n"); memset(wc, 0, sizeof(*wc)); continue; } bnxt_re_process_req_wc(wc, cqe); break; case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1: if (gsi_mode == BNXT_RE_GSI_MODE_ALL) { if (!cqe->status) { int rc = 0; rc = bnxt_re_process_raw_qp_packet_receive(qp, cqe); if (!rc) { memset(wc, 0, sizeof(*wc)); continue; } cqe->status = -1; } /* Errors need not be looped back. * But change the wr_id to the one * stored in the table */ tbl_idx = cqe->wr_id; sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx]; wc->wr_id = sqp_entry->wrid; } bnxt_re_process_res_rawqp1_wc(wc, cqe); break; case CQ_BASE_CQE_TYPE_RES_RC: bnxt_re_process_res_rc_wc(wc, cqe); break; case CQ_BASE_CQE_TYPE_RES_UD: if (gsi_mode == BNXT_RE_GSI_MODE_ALL && qp->qplib_qp.id == rdev->gsi_ctx.gsi_sqp->qplib_qp.id) { /* Handle this completion with * the stored completion */ dev_dbg(rdev_to_dev(rdev), "Handling the UD receive CQ\n"); if (cqe->status) { /* TODO handle this completion as a failure in * loopback porocedure */ continue; } else { bnxt_re_process_res_shadow_qp_wc(qp, wc, cqe); break; } } bnxt_re_process_res_ud_wc(rdev, qp, wc, cqe); break; default: dev_err(rdev_to_dev(cq->rdev), "POLL CQ type 0x%x not handled, skip!\n", cqe->opcode); continue; } wc++; budget--; } } exit: spin_unlock_irqrestore(&cq->cq_lock, flags); return init_budget - budget; } int bnxt_re_req_notify_cq(struct ib_cq *ib_cq, enum ib_cq_notify_flags ib_cqn_flags) { struct bnxt_re_cq *cq = to_bnxt_re(ib_cq, struct bnxt_re_cq, ibcq); int type = 0, rc = 0; unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); /* Trigger on the very next completion */ if (ib_cqn_flags & IB_CQ_NEXT_COMP) type = DBC_DBC_TYPE_CQ_ARMALL; /* Trigger on the next solicited completion */ else if (ib_cqn_flags & IB_CQ_SOLICITED) type = DBC_DBC_TYPE_CQ_ARMSE; bnxt_qplib_req_notify_cq(&cq->qplib_cq, type); /* Poll to see if there are missed events */ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) && !(bnxt_qplib_is_cq_empty(&cq->qplib_cq))) rc = 1; spin_unlock_irqrestore(&cq->cq_lock, flags); return rc; } /* Memory Regions */ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) { struct bnxt_qplib_mrinfo mrinfo; struct bnxt_re_dev *rdev; struct bnxt_re_mr *mr; struct bnxt_re_pd *pd; u32 max_mr_count; u64 pbl = 0; int rc; memset(&mrinfo, 0, sizeof(mrinfo)); pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); rdev = pd->rdev; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { dev_err(rdev_to_dev(rdev), "Allocate memory for DMA MR failed!\n"); return ERR_PTR(-ENOMEM); } mr->rdev = rdev; mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; /* Allocate and register 0 as the address */ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate DMA MR failed!\n"); goto fail; } mr->qplib_mr.total_size = -1; /* Infinite length */ mrinfo.ptes = &pbl; mrinfo.sg.npages = 0; mrinfo.sg.pgsize = PAGE_SIZE; mrinfo.sg.pgshft = PAGE_SHIFT; mrinfo.sg.pgsize = PAGE_SIZE; mrinfo.mrw = &mr->qplib_mr; mrinfo.is_dma = true; rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mrinfo, false); if (rc) { dev_err(rdev_to_dev(rdev), "Register DMA MR failed!\n"); goto fail_mr; } mr->ib_mr.lkey = mr->qplib_mr.lkey; if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC)) mr->ib_mr.rkey = mr->ib_mr.lkey; atomic_inc(&rdev->stats.rsors.mr_count); max_mr_count = atomic_read(&rdev->stats.rsors.mr_count); if (max_mr_count > atomic_read(&rdev->stats.rsors.max_mr_count)) atomic_set(&rdev->stats.rsors.max_mr_count, max_mr_count); return &mr->ib_mr; fail_mr: bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); fail: kfree(mr); return ERR_PTR(rc); } int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { struct bnxt_re_mr *mr = to_bnxt_re(ib_mr, struct bnxt_re_mr, ib_mr); struct bnxt_re_dev *rdev = mr->rdev; int rc = 0; rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) dev_err(rdev_to_dev(rdev), "Dereg MR failed (%d): rc - %#x\n", mr->qplib_mr.lkey, rc); if (mr->pages) { bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res, &mr->qplib_frpl); kfree(mr->pages); mr->npages = 0; mr->pages = NULL; } if (!IS_ERR(mr->ib_umem) && mr->ib_umem) { mr->is_invalcb_active = false; bnxt_re_peer_mem_release(mr->ib_umem); } kfree(mr); atomic_dec(&rdev->stats.rsors.mr_count); return 0; } static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr) { struct bnxt_re_mr *mr = to_bnxt_re(ib_mr, struct bnxt_re_mr, ib_mr); if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs)) return -ENOMEM; mr->pages[mr->npages++] = addr; dev_dbg(NULL, "%s: ibdev %p Set MR pages[%d] = 0x%lx\n", ROCE_DRV_MODULE_NAME, ib_mr->device, mr->npages - 1, mr->pages[mr->npages - 1]); return 0; } int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct bnxt_re_mr *mr = to_bnxt_re(ib_mr, struct bnxt_re_mr, ib_mr); mr->npages = 0; return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page); } struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, u32 max_num_sg, struct ib_udata *udata) { struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; u32 max_mr_count; int rc; dev_dbg(rdev_to_dev(rdev), "Alloc MR\n"); if (type != IB_MR_TYPE_MEM_REG) { dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported\n", type); return ERR_PTR(-EINVAL); } if (max_num_sg > MAX_PBL_LVL_1_PGS) { dev_dbg(rdev_to_dev(rdev), "Max SG exceeded\n"); return ERR_PTR(-EINVAL); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { dev_err(rdev_to_dev(rdev), "Allocate MR mem failed!\n"); return ERR_PTR(-ENOMEM); } mr->rdev = rdev; mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR; mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate MR failed!\n"); goto fail; } mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->ib_mr.lkey; mr->pages = kzalloc(sizeof(u64) * max_num_sg, GFP_KERNEL); if (!mr->pages) { dev_err(rdev_to_dev(rdev), "Allocate MR page list mem failed!\n"); rc = -ENOMEM; goto fail_mr; } rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res, &mr->qplib_frpl, max_num_sg); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate HW Fast reg page list failed!\n"); goto free_page; } dev_dbg(rdev_to_dev(rdev), "Alloc MR pages = 0x%p\n", mr->pages); atomic_inc(&rdev->stats.rsors.mr_count); max_mr_count = atomic_read(&rdev->stats.rsors.mr_count); if (max_mr_count > atomic_read(&rdev->stats.rsors.max_mr_count)) atomic_set(&rdev->stats.rsors.max_mr_count, max_mr_count); return &mr->ib_mr; free_page: kfree(mr->pages); fail_mr: bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); fail: kfree(mr); return ERR_PTR(rc); } /* Memory Windows */ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct ib_udata *udata) { struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mw *mw; u32 max_mw_count; int rc; mw = kzalloc(sizeof(*mw), GFP_KERNEL); if (!mw) { dev_err(rdev_to_dev(rdev), "Allocate MW failed!\n"); rc = -ENOMEM; goto exit; } mw->rdev = rdev; mw->qplib_mw.pd = &pd->qplib_pd; mw->qplib_mw.type = (type == IB_MW_TYPE_1 ? CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 : CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B); rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw); if (rc) { dev_err(rdev_to_dev(rdev), "Allocate MW failed!\n"); goto fail; } mw->ib_mw.rkey = mw->qplib_mw.rkey; atomic_inc(&rdev->stats.rsors.mw_count); max_mw_count = atomic_read(&rdev->stats.rsors.mw_count); if (max_mw_count > atomic_read(&rdev->stats.rsors.max_mw_count)) atomic_set(&rdev->stats.rsors.max_mw_count, max_mw_count); return &mw->ib_mw; fail: kfree(mw); exit: return ERR_PTR(rc); } int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) { struct bnxt_re_mw *mw = to_bnxt_re(ib_mw, struct bnxt_re_mw, ib_mw); struct bnxt_re_dev *rdev = mw->rdev; int rc; rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw); if (rc) { dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc); return rc; } kfree(mw); atomic_dec(&rdev->stats.rsors.mw_count); return rc; } static int bnxt_re_page_size_ok(int page_shift) { switch (page_shift) { case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256MB: case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G: return 1; default: return 0; } } static int bnxt_re_get_page_shift(struct ib_umem *umem, u64 va, u64 st, u64 cmask) { int pgshft; pgshft = ilog2(umem->page_size); return pgshft; } static int bnxt_re_get_num_pages(struct ib_umem *umem, u64 start, u64 length, int page_shift) { int npages = 0; if (page_shift == PAGE_SHIFT) { npages = ib_umem_num_pages_compat(umem); } else { npages = ALIGN(length, BIT(page_shift)) / BIT(page_shift); if (start % BIT(page_shift)) npages++; } return npages; } /* uverbs */ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_mrinfo mrinfo; int umem_pgs, page_shift, rc; struct bnxt_re_mr *mr; struct ib_umem *umem; u32 max_mr_count; int npages; dev_dbg(rdev_to_dev(rdev), "Reg user MR\n"); if (bnxt_re_get_total_mr_mw_count(rdev) >= rdev->dev_attr->max_mr) return ERR_PTR(-ENOMEM); if (rdev->mod_exit) { dev_dbg(rdev_to_dev(rdev), "%s(): in mod_exit, just return!\n", __func__); return ERR_PTR(-EIO); } memset(&mrinfo, 0, sizeof(mrinfo)); if (length > BNXT_RE_MAX_MR_SIZE) { dev_err(rdev_to_dev(rdev), "Requested MR Size: %lu " "> Max supported: %ld\n", length, BNXT_RE_MAX_MR_SIZE); return ERR_PTR(-ENOMEM); } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { dev_err(rdev_to_dev(rdev), "Allocate MR failed!\n"); return ERR_PTR (-ENOMEM); } mr->rdev = rdev; mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR; if (!_is_alloc_mr_unified(rdev->qplib_res.dattr)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { dev_err(rdev_to_dev(rdev), "Alloc MR failed!\n"); goto fail; } /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; } umem = ib_umem_get_flags_compat(rdev, ib_pd->uobject->context, udata, start, length, mr_access_flags, 0); if (IS_ERR(umem)) { rc = PTR_ERR(umem); dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed! rc = %d\n", __func__, rc); goto free_mr; } mr->ib_umem = umem; mr->qplib_mr.va = virt_addr; umem_pgs = ib_umem_num_pages_compat(umem); if (!umem_pgs) { dev_err(rdev_to_dev(rdev), "umem is invalid!\n"); rc = -EINVAL; goto free_umem; } mr->qplib_mr.total_size = length; page_shift = bnxt_re_get_page_shift(umem, virt_addr, start, rdev->dev_attr->page_size_cap); if (!bnxt_re_page_size_ok(page_shift)) { dev_err(rdev_to_dev(rdev), "umem page size unsupported!\n"); rc = -EFAULT; goto free_umem; } npages = bnxt_re_get_num_pages(umem, start, length, page_shift); /* Map umem buf ptrs to the PBL */ mrinfo.sg.npages = npages; mrinfo.sg.sghead = get_ib_umem_sgl(umem, &mrinfo.sg.nmap); mrinfo.sg.pgshft = page_shift; mrinfo.sg.pgsize = BIT(page_shift); mrinfo.mrw = &mr->qplib_mr; rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mrinfo, false); if (rc) { dev_err(rdev_to_dev(rdev), "Reg user MR failed!\n"); goto free_umem; } mr->ib_mr.lkey = mr->ib_mr.rkey = mr->qplib_mr.lkey; atomic_inc(&rdev->stats.rsors.mr_count); max_mr_count = atomic_read(&rdev->stats.rsors.mr_count); if (max_mr_count > atomic_read(&rdev->stats.rsors.max_mr_count)) atomic_set(&rdev->stats.rsors.max_mr_count, max_mr_count); return &mr->ib_mr; free_umem: bnxt_re_peer_mem_release(mr->ib_umem); free_mr: if (!_is_alloc_mr_unified(rdev->qplib_res.dattr)) bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); fail: kfree(mr); return ERR_PTR(rc); } int bnxt_re_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_mr *mr = to_bnxt_re(ib_mr, struct bnxt_re_mr, ib_mr); struct bnxt_re_pd *pd = to_bnxt_re(ib_pd, struct bnxt_re_pd, ibpd); int umem_pgs = 0, page_shift = PAGE_SHIFT, rc; struct bnxt_re_dev *rdev = mr->rdev; struct bnxt_qplib_mrinfo mrinfo; struct ib_umem *umem; u32 npages; /* TODO: Must decipher what to modify based on the flags */ memset(&mrinfo, 0, sizeof(mrinfo)); if (flags & IB_MR_REREG_TRANS) { umem = ib_umem_get_flags_compat(rdev, ib_pd->uobject->context, udata, start, length, mr_access_flags, 0); if (IS_ERR(umem)) { rc = PTR_ERR(umem); dev_err(rdev_to_dev(rdev), "%s: ib_umem_get failed! ret = %d\n", __func__, rc); goto fail; } mr->ib_umem = umem; mr->qplib_mr.va = virt_addr; umem_pgs = ib_umem_num_pages_compat(umem); if (!umem_pgs) { dev_err(rdev_to_dev(rdev), "umem is invalid!\n"); rc = -EINVAL; goto fail_free_umem; } mr->qplib_mr.total_size = length; page_shift = bnxt_re_get_page_shift(umem, virt_addr, start, rdev->dev_attr->page_size_cap); if (!bnxt_re_page_size_ok(page_shift)) { dev_err(rdev_to_dev(rdev), "umem page size unsupported!\n"); rc = -EFAULT; goto fail_free_umem; } npages = bnxt_re_get_num_pages(umem, start, length, page_shift); /* Map umem buf ptrs to the PBL */ mrinfo.sg.npages = npages; mrinfo.sg.sghead = get_ib_umem_sgl(umem, &mrinfo.sg.nmap); mrinfo.sg.pgshft = page_shift; mrinfo.sg.pgsize = BIT(page_shift); } mrinfo.mrw = &mr->qplib_mr; if (flags & IB_MR_REREG_PD) mr->qplib_mr.pd = &pd->qplib_pd; if (flags & IB_MR_REREG_ACCESS) mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mrinfo, false); if (rc) { dev_err(rdev_to_dev(rdev), "Rereg user MR failed!\n"); goto fail_free_umem; } mr->ib_mr.rkey = mr->qplib_mr.rkey; return 0; fail_free_umem: bnxt_re_peer_mem_release(mr->ib_umem); fail: return rc; } static int bnxt_re_check_abi_version(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; u32 uverbs_abi_ver; uverbs_abi_ver = GET_UVERBS_ABI_VERSION(ibdev); dev_dbg(rdev_to_dev(rdev), "ABI version requested %d\n", uverbs_abi_ver); if (uverbs_abi_ver != BNXT_RE_ABI_VERSION) { dev_dbg(rdev_to_dev(rdev), " is different from the device %d \n", BNXT_RE_ABI_VERSION); return -EPERM; } return 0; } int bnxt_re_alloc_ucontext(struct ib_ucontext *uctx_in, struct ib_udata *udata) { struct ib_ucontext *ctx = uctx_in; struct ib_device *ibdev = ctx->device; struct bnxt_re_ucontext *uctx = container_of(ctx, struct bnxt_re_ucontext, ibucontext); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; struct bnxt_re_uctx_resp resp = {}; struct bnxt_re_uctx_req ureq = {}; struct bnxt_qplib_chip_ctx *cctx; u32 chip_met_rev_num; bool genp5 = false; int rc; cctx = rdev->chip_ctx; rc = bnxt_re_check_abi_version(rdev); if (rc) goto fail; uctx->rdev = rdev; uctx->shpg = (void *)__get_free_page(GFP_KERNEL); if (!uctx->shpg) { dev_err(rdev_to_dev(rdev), "shared memory allocation failed!\n"); rc = -ENOMEM; goto fail; } spin_lock_init(&uctx->sh_lock); if (BNXT_RE_ABI_VERSION >= 4) { chip_met_rev_num = cctx->chip_num; chip_met_rev_num |= ((u32)cctx->chip_rev & 0xFF) << BNXT_RE_CHIP_ID0_CHIP_REV_SFT; chip_met_rev_num |= ((u32)cctx->chip_metal & 0xFF) << BNXT_RE_CHIP_ID0_CHIP_MET_SFT; resp.chip_id0 = chip_met_rev_num; resp.chip_id1 = 0; /* future extension of chip info */ } if (BNXT_RE_ABI_VERSION != 4) { /*Temp, Use idr_alloc instead*/ resp.dev_id = rdev->en_dev->pdev->devfn; resp.max_qp = rdev->qplib_res.hctx->qp_ctx.max; } genp5 = _is_chip_gen_p5_p7(cctx); if (BNXT_RE_ABI_VERSION > 5) { resp.modes = genp5 ? cctx->modes.wqe_mode : 0; if (rdev->dev_attr && BNXT_RE_HW_RETX(rdev->dev_attr->dev_cap_flags)) resp.comp_mask = BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED; } resp.pg_size = PAGE_SIZE; resp.cqe_sz = sizeof(struct cq_base); resp.max_cqd = dev_attr->max_cq_wqes; if (genp5 && cctx->modes.db_push) { resp.comp_mask |= BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED; if (_is_chip_p7(cctx) && !(dev_attr->dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_PINGPONG_PUSH_MODE)) resp.comp_mask &= ~BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED; } resp.comp_mask |= BNXT_RE_COMP_MASK_UCNTX_MQP_EX_SUPPORTED; if (rdev->dbr_pacing) resp.comp_mask |= BNXT_RE_COMP_MASK_UCNTX_DBR_PACING_ENABLED; if (rdev->dbr_drop_recov && rdev->user_dbr_drop_recov) resp.comp_mask |= BNXT_RE_COMP_MASK_UCNTX_DBR_RECOVERY_ENABLED; if (udata->inlen >= sizeof(ureq)) { rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) goto cfail; if (bnxt_re_init_pow2_flag(&ureq, &resp)) dev_warn(rdev_to_dev(rdev), "Enabled roundup logic. Library bug?\n"); if (bnxt_re_init_rsvd_wqe_flag(&ureq, &resp, genp5)) dev_warn(rdev_to_dev(rdev), "Rsvd wqe in use! Try the updated library.\n"); } else { dev_warn(rdev_to_dev(rdev), "Enabled roundup logic. Update the library!\n"); resp.comp_mask &= ~BNXT_RE_COMP_MASK_UCNTX_POW2_DISABLED; dev_warn(rdev_to_dev(rdev), "Rsvd wqe in use. Update the library!\n"); resp.comp_mask &= ~BNXT_RE_COMP_MASK_UCNTX_RSVD_WQE_DISABLED; } uctx->cmask = (uint64_t)resp.comp_mask; rc = bnxt_re_copy_to_udata(rdev, &resp, min(udata->outlen, sizeof(resp)), udata); if (rc) goto cfail; INIT_LIST_HEAD(&uctx->cq_list); mutex_init(&uctx->cq_lock); return 0; cfail: free_page((u64)uctx->shpg); uctx->shpg = NULL; fail: return rc; } void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx) { struct bnxt_re_ucontext *uctx = to_bnxt_re(ib_uctx, struct bnxt_re_ucontext, ibucontext); struct bnxt_re_dev *rdev = uctx->rdev; int rc = 0; if (uctx->shpg) free_page((u64)uctx->shpg); if (uctx->dpi.dbr) { /* Free DPI only if this is the first PD allocated by the * application and mark the context dpi as NULL */ if (_is_chip_gen_p5_p7(rdev->chip_ctx) && uctx->wcdpi.dbr) { rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi); if (rc) dev_err(rdev_to_dev(rdev), "dealloc push dp failed\n"); uctx->wcdpi.dbr = NULL; } rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi); if (rc) dev_err(rdev_to_dev(rdev), "Deallocte HW DPI failed!\n"); /* Don't fail, continue*/ uctx->dpi.dbr = NULL; } return; } static struct bnxt_re_cq *is_bnxt_re_cq_page(struct bnxt_re_ucontext *uctx, u64 pg_off) { struct bnxt_re_cq *cq = NULL, *tmp_cq; if (!_is_chip_p7(uctx->rdev->chip_ctx)) return NULL; mutex_lock(&uctx->cq_lock); list_for_each_entry(tmp_cq, &uctx->cq_list, cq_list) { if (((u64)tmp_cq->uctx_cq_page >> PAGE_SHIFT) == pg_off) { cq = tmp_cq; break; } } mutex_unlock(&uctx->cq_lock); return cq; } /* Helper function to mmap the virtual memory from user app */ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) { struct bnxt_re_ucontext *uctx = to_bnxt_re(ib_uctx, struct bnxt_re_ucontext, ibucontext); struct bnxt_re_dev *rdev = uctx->rdev; struct bnxt_re_cq *cq = NULL; int rc = 0; u64 pfn; switch (vma->vm_pgoff) { case BNXT_RE_MAP_SH_PAGE: pfn = vtophys(uctx->shpg) >> PAGE_SHIFT; return rdma_user_mmap_io(&uctx->ibucontext, vma, pfn, PAGE_SIZE, vma->vm_page_prot, NULL); dev_dbg(rdev_to_dev(rdev), "%s:%d uctx->shpg 0x%lx, vtophys(uctx->shpg) 0x%lx, pfn = 0x%lx \n", __func__, __LINE__, (u64) uctx->shpg, vtophys(uctx->shpg), pfn); if (rc) { dev_err(rdev_to_dev(rdev), "Shared page mapping failed!\n"); rc = -EAGAIN; } return rc; case BNXT_RE_MAP_WC: vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); pfn = (uctx->wcdpi.umdbr >> PAGE_SHIFT); if (!pfn) return -EFAULT; break; case BNXT_RE_DBR_PAGE: /* Driver doesn't expect write access request */ if (vma->vm_flags & VM_WRITE) return -EFAULT; pfn = vtophys(rdev->dbr_page) >> PAGE_SHIFT; if (!pfn) return -EFAULT; break; case BNXT_RE_MAP_DB_RECOVERY_PAGE: pfn = vtophys(uctx->dbr_recov_cq_page) >> PAGE_SHIFT; if (!pfn) return -EFAULT; break; default: cq = is_bnxt_re_cq_page(uctx, vma->vm_pgoff); if (cq) { pfn = vtophys((void *)cq->uctx_cq_page) >> PAGE_SHIFT; rc = rdma_user_mmap_io(&uctx->ibucontext, vma, pfn, PAGE_SIZE, vma->vm_page_prot, NULL); if (rc) { dev_err(rdev_to_dev(rdev), "CQ page mapping failed!\n"); rc = -EAGAIN; } goto out; } else { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = vma->vm_pgoff; } break; } rc = rdma_user_mmap_io(&uctx->ibucontext, vma, pfn, PAGE_SIZE, vma->vm_page_prot, NULL); if (rc) { dev_err(rdev_to_dev(rdev), "DPI mapping failed!\n"); return -EAGAIN; } rc = __bnxt_re_set_vma_data(uctx, vma); out: return rc; } int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, const struct ib_mad_hdr *in_mad, size_t in_mad_size, struct ib_mad_hdr *out_mad, size_t *out_mad_size, u16 *out_mad_pkey_index) { return IB_MAD_RESULT_SUCCESS; } void bnxt_re_disassociate_ucntx(struct ib_ucontext *ib_uctx) { } diff --git a/sys/dev/bnxt/bnxt_re/main.c b/sys/dev/bnxt/bnxt_re/main.c index e6c6f754ea47..3d26d21f3fc7 100644 --- a/sys/dev/bnxt/bnxt_re/main.c +++ b/sys/dev/bnxt/bnxt_re/main.c @@ -1,4467 +1,4467 @@ /* * Copyright (c) 2015-2024, Broadcom. All rights reserved. The term * Broadcom refers to Broadcom Limited and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Description: Main component of the bnxt_re driver */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bnxt_re.h" #include "ib_verbs.h" #include "bnxt_re-abi.h" #include "bnxt.h" static char drv_version[] = "Broadcom NetXtreme-C/E RoCE Driver " ROCE_DRV_MODULE_NAME \ " v" ROCE_DRV_MODULE_VERSION " (" ROCE_DRV_MODULE_RELDATE ")\n"; #define BNXT_RE_DESC "Broadcom NetXtreme RoCE" #define BNXT_ADEV_NAME "if_bnxt" MODULE_DESCRIPTION("Broadcom NetXtreme-C/E RoCE Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DEPEND(bnxt_re, linuxkpi, 1, 1, 1); MODULE_DEPEND(bnxt_re, ibcore, 1, 1, 1); MODULE_DEPEND(bnxt_re, if_bnxt, 1, 1, 1); MODULE_VERSION(bnxt_re, 1); DEFINE_MUTEX(bnxt_re_mutex); /* mutex lock for driver */ static unsigned int restrict_mrs = 0; module_param(restrict_mrs, uint, 0); MODULE_PARM_DESC(restrict_mrs, " Restrict the no. of MRs 0 = 256K , 1 = 64K"); unsigned int restrict_stats = 0; module_param(restrict_stats, uint, 0); MODULE_PARM_DESC(restrict_stats, "Restrict stats query frequency to ethtool coalesce value. Disabled by default"); unsigned int enable_fc = 1; module_param(enable_fc, uint, 0); MODULE_PARM_DESC(enable_fc, "Enable default PFC, CC,ETS during driver load. 1 - fc enable, 0 - fc disable - Default is 1"); unsigned int min_tx_depth = 1; module_param(min_tx_depth, uint, 0); MODULE_PARM_DESC(min_tx_depth, "Minimum TX depth - Default is 1"); static uint8_t max_msix_vec[BNXT_RE_MAX_DEVICES] = {0}; static unsigned int max_msix_vec_argc; module_param_array(max_msix_vec, byte, &max_msix_vec_argc, 0444); MODULE_PARM_DESC(max_msix_vec, "Max MSI-x vectors per PF (2 - 64) - Default is 64"); unsigned int cmdq_shadow_qd = RCFW_CMD_NON_BLOCKING_SHADOW_QD; module_param_named(cmdq_shadow_qd, cmdq_shadow_qd, uint, 0644); MODULE_PARM_DESC(cmdq_shadow_qd, "Perf Stat Debug: Shadow QD Range (1-64) - Default is 64"); /* globals */ struct list_head bnxt_re_dev_list = LINUX_LIST_HEAD_INIT(bnxt_re_dev_list); static int bnxt_re_probe_count; DEFINE_MUTEX(bnxt_re_dev_lock); static u32 gmod_exit; static u32 gadd_dev_inprogress; static void bnxt_re_task(struct work_struct *work_task); static struct workqueue_struct *bnxt_re_wq; static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset); static int bnxt_re_ib_init(struct bnxt_re_dev *rdev); static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev); void _bnxt_re_remove(struct auxiliary_device *adev); void writel_fbsd(struct bnxt_softc *bp, u32, u8, u32); u32 readl_fbsd(struct bnxt_softc *bp, u32, u8); static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev); int bnxt_re_register_netdevice_notifier(struct notifier_block *nb) { int rc; rc = register_netdevice_notifier(nb); return rc; } int bnxt_re_unregister_netdevice_notifier(struct notifier_block *nb) { int rc; rc = unregister_netdevice_notifier(nb); return rc; } void bnxt_re_set_dma_device(struct ib_device *ibdev, struct bnxt_re_dev *rdev) { ibdev->dma_device = &rdev->en_dev->pdev->dev; } void bnxt_re_init_resolve_wq(struct bnxt_re_dev *rdev) { rdev->resolve_wq = create_singlethread_workqueue("bnxt_re_resolve_wq"); INIT_LIST_HEAD(&rdev->mac_wq_list); } void bnxt_re_uninit_resolve_wq(struct bnxt_re_dev *rdev) { struct bnxt_re_resolve_dmac_work *tmp_work = NULL, *tmp_st; if (!rdev->resolve_wq) return; flush_workqueue(rdev->resolve_wq); list_for_each_entry_safe(tmp_work, tmp_st, &rdev->mac_wq_list, list) { list_del(&tmp_work->list); kfree(tmp_work); } destroy_workqueue(rdev->resolve_wq); rdev->resolve_wq = NULL; } u32 readl_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx) { if (bar_idx) return bus_space_read_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off); else return bus_space_read_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off); } void writel_fbsd(struct bnxt_softc *bp, u32 reg_off, u8 bar_idx, u32 val) { if (bar_idx) bus_space_write_8(bp->doorbell_bar.tag, bp->doorbell_bar.handle, reg_off, htole32(val)); else bus_space_write_8(bp->hwrm_bar.tag, bp->hwrm_bar.handle, reg_off, htole32(val)); } static void bnxt_re_update_fifo_occup_slabs(struct bnxt_re_dev *rdev, u32 fifo_occup) { if (fifo_occup > rdev->dbg_stats->dbq.fifo_occup_water_mark) rdev->dbg_stats->dbq.fifo_occup_water_mark = fifo_occup; if (fifo_occup > 8 * rdev->pacing_algo_th) rdev->dbg_stats->dbq.fifo_occup_slab_4++; else if (fifo_occup > 4 * rdev->pacing_algo_th) rdev->dbg_stats->dbq.fifo_occup_slab_3++; else if (fifo_occup > 2 * rdev->pacing_algo_th) rdev->dbg_stats->dbq.fifo_occup_slab_2++; else if (fifo_occup > rdev->pacing_algo_th) rdev->dbg_stats->dbq.fifo_occup_slab_1++; } static void bnxt_re_update_do_pacing_slabs(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; if (pacing_data->do_pacing > rdev->dbg_stats->dbq.do_pacing_water_mark) rdev->dbg_stats->dbq.do_pacing_water_mark = pacing_data->do_pacing; if (pacing_data->do_pacing > 16 * rdev->dbr_def_do_pacing) rdev->dbg_stats->dbq.do_pacing_slab_5++; else if (pacing_data->do_pacing > 8 * rdev->dbr_def_do_pacing) rdev->dbg_stats->dbq.do_pacing_slab_4++; else if (pacing_data->do_pacing > 4 * rdev->dbr_def_do_pacing) rdev->dbg_stats->dbq.do_pacing_slab_3++; else if (pacing_data->do_pacing > 2 * rdev->dbr_def_do_pacing) rdev->dbg_stats->dbq.do_pacing_slab_2++; else if (pacing_data->do_pacing > rdev->dbr_def_do_pacing) rdev->dbg_stats->dbq.do_pacing_slab_1++; } static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp) { return qp->ib_qp.qp_type == IB_QPT_GSI; } static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev) { struct bnxt_re_qp *qp; mutex_lock(&rdev->qp_lock); list_for_each_entry(qp, &rdev->qp_list, list) { if (bnxt_re_is_qp1_qp(qp)) { mutex_unlock(&rdev->qp_lock); return qp; } } mutex_unlock(&rdev->qp_lock); return NULL; } /* Set the maximum number of each resource that the driver actually wants * to allocate. This may be up to the maximum number the firmware has * reserved for the function. The driver may choose to allocate fewer * resources than the firmware maximum. */ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) { struct bnxt_qplib_max_res dev_res = {}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_dev_attr *attr; struct bnxt_qplib_ctx *hctx; int i; attr = rdev->dev_attr; hctx = rdev->qplib_res.hctx; cctx = rdev->chip_ctx; bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, false); if (!_is_chip_gen_p5_p7(cctx)) { hctx->qp_ctx.max = min_t(u32, dev_res.max_qp, attr->max_qp); hctx->mrw_ctx.max = min_t(u32, dev_res.max_mr, attr->max_mr); /* To accommodate 16k MRs and 16k AHs, * driver has to allocate 32k backing store memory */ hctx->mrw_ctx.max *= 2; hctx->srq_ctx.max = min_t(u32, dev_res.max_srq, attr->max_srq); hctx->cq_ctx.max = min_t(u32, dev_res.max_cq, attr->max_cq); for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) hctx->tqm_ctx.qcount[i] = attr->tqm_alloc_reqs[i]; } else { hctx->qp_ctx.max = attr->max_qp ? attr->max_qp : dev_res.max_qp; hctx->mrw_ctx.max = attr->max_mr ? attr->max_mr : dev_res.max_mr; hctx->srq_ctx.max = attr->max_srq ? attr->max_srq : dev_res.max_srq; hctx->cq_ctx.max = attr->max_cq ? attr->max_cq : dev_res.max_cq; } } static void bnxt_re_limit_vf_res(struct bnxt_re_dev *rdev, struct bnxt_qplib_vf_res *vf_res, u32 num_vf) { struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx; struct bnxt_qplib_max_res dev_res = {}; bnxt_qplib_max_res_supported(cctx, &rdev->qplib_res, &dev_res, true); vf_res->max_qp = dev_res.max_qp / num_vf; vf_res->max_srq = dev_res.max_srq / num_vf; vf_res->max_cq = dev_res.max_cq / num_vf; /* * MR and AH shares the same backing store, the value specified * for max_mrw is split into half by the FW for MR and AH */ vf_res->max_mrw = dev_res.max_mr * 2 / num_vf; vf_res->max_gid = BNXT_RE_MAX_GID_PER_VF; } static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev) { struct bnxt_qplib_ctx *hctx; hctx = rdev->qplib_res.hctx; memset(&hctx->vf_res, 0, sizeof(struct bnxt_qplib_vf_res)); bnxt_re_limit_pf_res(rdev); if (rdev->num_vfs) bnxt_re_limit_vf_res(rdev, &hctx->vf_res, rdev->num_vfs); } static void bnxt_re_dettach_irq(struct bnxt_re_dev *rdev) { struct bnxt_qplib_rcfw *rcfw = NULL; struct bnxt_qplib_nq *nq; int indx; rcfw = &rdev->rcfw; for (indx = 0; indx < rdev->nqr.max_init; indx++) { nq = &rdev->nqr.nq[indx]; mutex_lock(&nq->lock); bnxt_qplib_nq_stop_irq(nq, false); mutex_unlock(&nq->lock); } bnxt_qplib_rcfw_stop_irq(rcfw, false); } static void bnxt_re_detach_err_device(struct bnxt_re_dev *rdev) { /* Free the MSIx vectors only so that L2 can proceed with MSIx disable */ bnxt_re_dettach_irq(rdev); /* Set the state as detached to prevent sending any more commands */ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); wake_up_all(&rdev->rcfw.cmdq.waitq); } #define MAX_DSCP_PRI_TUPLE 64 struct bnxt_re_dcb_work { struct work_struct work; struct bnxt_re_dev *rdev; struct hwrm_async_event_cmpl cmpl; }; static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev) { rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq"); } static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev) { if (!rdev->dcb_wq) return; flush_workqueue(rdev->dcb_wq); destroy_workqueue(rdev->dcb_wq); rdev->dcb_wq = NULL; } static void bnxt_re_init_aer_wq(struct bnxt_re_dev *rdev) { rdev->aer_wq = create_singlethread_workqueue("bnxt_re_aer_wq"); } static void bnxt_re_uninit_aer_wq(struct bnxt_re_dev *rdev) { if (!rdev->aer_wq) return; flush_workqueue(rdev->aer_wq); destroy_workqueue(rdev->aer_wq); rdev->aer_wq = NULL; } static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev) { struct bnxt_re_qp *qp; if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) return 0; qp = bnxt_re_get_qp1_qp(rdev); if (!qp) return 0; qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP; qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp; return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); } static void bnxt_re_reconfigure_dscp(struct bnxt_re_dev *rdev) { struct bnxt_qplib_cc_param *cc_param; struct bnxt_re_tc_rec *tc_rec; bool update_cc = false; u8 dscp_user; int rc; cc_param = &rdev->cc_param; tc_rec = &rdev->tc_rec[0]; if (!(cc_param->roce_dscp_user || cc_param->cnp_dscp_user)) return; if (cc_param->cnp_dscp_user) { dscp_user = (cc_param->cnp_dscp_user & 0x3f); if ((tc_rec->cnp_dscp_bv & (1ul << dscp_user)) && (cc_param->alt_tos_dscp != dscp_user)) { cc_param->alt_tos_dscp = dscp_user; cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP; update_cc = true; } } if (cc_param->roce_dscp_user) { dscp_user = (cc_param->roce_dscp_user & 0x3f); if ((tc_rec->roce_dscp_bv & (1ul << dscp_user)) && (cc_param->tos_dscp != dscp_user)) { cc_param->tos_dscp = dscp_user; cc_param->mask |= CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; update_cc = true; } } if (update_cc) { rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param); if (rc) dev_err(rdev_to_dev(rdev), "Failed to apply cc settings\n"); } } static void bnxt_re_dcb_wq_task(struct work_struct *work) { struct bnxt_qplib_cc_param *cc_param; struct bnxt_re_tc_rec *tc_rec; struct bnxt_re_dev *rdev; struct bnxt_re_dcb_work *dcb_work = container_of(work, struct bnxt_re_dcb_work, work); int rc; rdev = dcb_work->rdev; if (!rdev) goto exit; mutex_lock(&rdev->cc_lock); cc_param = &rdev->cc_param; rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query ccparam rc:%d", rc); goto fail; } tc_rec = &rdev->tc_rec[0]; /* * Upon the receival of DCB Async event: * If roce_dscp or cnp_dscp or both (which user configured using configfs) * is in the list, re-program the value using modify_roce_cc command */ bnxt_re_reconfigure_dscp(rdev); cc_param->roce_pri = tc_rec->roce_prio; if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { cc_param->qp1_tos_dscp = cc_param->tos_dscp; rc = bnxt_re_update_qp1_tos_dscp(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1 rc:%d", __func__, rc); goto fail; } } fail: mutex_unlock(&rdev->cc_lock); exit: kfree(dcb_work); } static int bnxt_re_hwrm_dbr_pacing_broadcast_event(struct bnxt_re_dev *rdev) { struct hwrm_func_dbr_pacing_broadcast_event_output resp = {0}; struct hwrm_func_dbr_pacing_broadcast_event_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; int rc; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_DBR_PACING_BROADCAST_EVENT, -1, -1); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_dbg(rdev_to_dev(rdev), "Failed to send dbr pacing broadcast event rc:%d", rc); return rc; } return 0; } static int bnxt_re_hwrm_dbr_pacing_nqlist_query(struct bnxt_re_dev *rdev) { struct hwrm_func_dbr_pacing_nqlist_query_output resp = {0}; struct hwrm_func_dbr_pacing_nqlist_query_input req = {0}; struct bnxt_dbq_nq_list *nq_list = &rdev->nq_list; struct bnxt_en_dev *en_dev = rdev->en_dev; bool primary_found = false; struct bnxt_fw_msg fw_msg; struct bnxt_qplib_nq *nq; int rc, i, j = 1; u16 *nql_ptr; nq = &rdev->nqr.nq[0]; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_DBR_PACING_NQLIST_QUERY, -1, -1); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to send dbr pacing nq list query rc:%d", rc); return rc; } nq_list->num_nql_entries = le32_to_cpu(resp.num_nqs); nql_ptr = &resp.nq_ring_id0; /* populate the nq_list of the primary function with list received * from FW. Fill the NQ IDs of secondary functions from index 1 to * num_nql_entries - 1. Fill the nq_list->nq_id[0] with the * nq_id of the primary pf */ for (i = 0; i < nq_list->num_nql_entries; i++) { u16 nq_id = *nql_ptr; dev_dbg(rdev_to_dev(rdev), "nq_list->nq_id[%d] = %d\n", i, nq_id); if (nq_id != nq->ring_id) { nq_list->nq_id[j] = nq_id; j++; } else { primary_found = true; nq_list->nq_id[0] = nq->ring_id; } nql_ptr++; } if (primary_found) bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1); return 0; } static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; u32 read_val, fifo_occup; bool first_read = true; /* loop shouldn't run infintely as the occupancy usually goes * below pacing algo threshold as soon as pacing kicks in. */ while (1) { read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_db_fifo_reg_off, 0); fifo_occup = pacing_data->fifo_max_depth - ((read_val & pacing_data->fifo_room_mask) >> pacing_data->fifo_room_shift); /* Fifo occupancy cannot be greater the MAX FIFO depth */ if (fifo_occup > pacing_data->fifo_max_depth) break; if (first_read) { bnxt_re_update_fifo_occup_slabs(rdev, fifo_occup); first_read = false; } if (fifo_occup < pacing_data->pacing_th) break; } } static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; pacing_data->do_pacing = rdev->dbr_def_do_pacing; pacing_data->pacing_th = rdev->pacing_algo_th; pacing_data->alarm_th = pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx); } #define CAG_RING_MASK 0x7FF #define CAG_RING_SHIFT 17 #define WATERMARK_MASK 0xFFF #define WATERMARK_SHIFT 0 static bool bnxt_re_check_if_dbq_intr_triggered(struct bnxt_re_dev *rdev) { u32 read_val; int j; for (j = 0; j < 10; j++) { read_val = readl_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0); dev_dbg(rdev_to_dev(rdev), "AEQ ARM status = 0x%x\n", read_val); if (!read_val) return true; } return false; } int bnxt_re_set_dbq_throttling_reg(struct bnxt_re_dev *rdev, u16 nq_id, u32 throttle) { u32 cag_ring_water_mark = 0, read_val; u32 throttle_val; /* Convert throttle percentage to value */ throttle_val = (rdev->qplib_res.pacing_data->fifo_max_depth * throttle) / 100; if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { cag_ring_water_mark = (nq_id & CAG_RING_MASK) << CAG_RING_SHIFT | (throttle_val & WATERMARK_MASK); writel_fbsd(rdev->en_dev->softc, rdev->dbr_throttling_reg_off, 0, cag_ring_water_mark); read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_throttling_reg_off, 0); dev_dbg(rdev_to_dev(rdev), "%s: dbr_throttling_reg_off read_val = 0x%x\n", __func__, read_val); if (read_val != cag_ring_water_mark) { dev_dbg(rdev_to_dev(rdev), "nq_id = %d write_val=0x%x read_val=0x%x\n", nq_id, cag_ring_water_mark, read_val); return 1; } } writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off, 0, 1); return 0; } static void bnxt_re_set_dbq_throttling_for_non_primary(struct bnxt_re_dev *rdev) { struct bnxt_dbq_nq_list *nq_list; struct bnxt_qplib_nq *nq; int i; nq_list = &rdev->nq_list; /* Run a loop for other Active functions if this is primary function */ if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { dev_dbg(rdev_to_dev(rdev), "%s: nq_list->num_nql_entries= %d\n", __func__, nq_list->num_nql_entries); nq = &rdev->nqr.nq[0]; for (i = nq_list->num_nql_entries - 1; i > 0; i--) { u16 nq_id = nq_list->nq_id[i]; if (nq) dev_dbg(rdev_to_dev(rdev), "%s: nq_id = %d cur_fn_ring_id = %d\n", __func__, nq_id, nq->ring_id); if (bnxt_re_set_dbq_throttling_reg (rdev, nq_id, 0)) break; bnxt_re_check_if_dbq_intr_triggered(rdev); } } } static void bnxt_re_handle_dbr_nq_pacing_notification(struct bnxt_re_dev *rdev) { struct bnxt_qplib_nq *nq; int rc = 0; nq = &rdev->nqr.nq[0]; /* Query the NQ list*/ rc = bnxt_re_hwrm_dbr_pacing_nqlist_query(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to Query NQ list rc= %d", rc); return; } /*Configure GRC access for Throttling and aeq_arm register */ writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28, 0, rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK); rdev->dbr_throttling_reg_off = (rdev->chip_ctx->dbr_throttling_reg & BNXT_GRC_OFFSET_MASK) + 0x8000; rdev->dbr_aeq_arm_reg_off = (rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_OFFSET_MASK) + 0x8000; bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, rdev->dbq_watermark); } static void bnxt_re_dbq_wq_task(struct work_struct *work) { struct bnxt_re_dbq_work *dbq_work = container_of(work, struct bnxt_re_dbq_work, work); struct bnxt_re_dev *rdev; rdev = dbq_work->rdev; if (!rdev) goto exit; switch (dbq_work->event) { case BNXT_RE_DBQ_EVENT_SCHED: dev_dbg(rdev_to_dev(rdev), "%s: Handle DBQ Pacing event\n", __func__); if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev); else bnxt_re_pacing_alert(rdev); break; case BNXT_RE_DBR_PACING_EVENT: dev_dbg(rdev_to_dev(rdev), "%s: Sched interrupt/pacing worker\n", __func__); if (_is_chip_p7(rdev->chip_ctx)) bnxt_re_pacing_alert(rdev); else if (!rdev->chip_ctx->modes.dbr_pacing_v0) bnxt_re_hwrm_dbr_pacing_qcfg(rdev); break; case BNXT_RE_DBR_NQ_PACING_NOTIFICATION: bnxt_re_handle_dbr_nq_pacing_notification(rdev); /* Issue a broadcast event to notify other functions * that primary changed */ bnxt_re_hwrm_dbr_pacing_broadcast_event(rdev); break; } exit: kfree(dbq_work); } static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_re_dcb_work *dcb_work; struct bnxt_re_dbq_work *dbq_work; struct bnxt_re_dev *rdev; u16 event_id; u32 data1; u32 data2 = 0; if (!cmpl) { pr_err("Async event, bad completion\n"); return; } if (!en_info || !en_info->en_dev) { pr_err("Async event, bad en_info or en_dev\n"); return; } rdev = en_info->rdev; event_id = le16_to_cpu(cmpl->event_id); data1 = le32_to_cpu(cmpl->event_data1); data2 = le32_to_cpu(cmpl->event_data2); if (!rdev || !rdev_to_dev(rdev)) { dev_dbg(NULL, "Async event, bad rdev or netdev\n"); return; } if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags) || !test_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) { dev_dbg(NULL, "Async event, device already detached\n"); return; } if (data2 >= 0) dev_dbg(rdev_to_dev(rdev), "Async event_id = %d data1 = %d data2 = %d", event_id, data1, data2); switch (event_id) { case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: /* Not handling the event in older FWs */ if (!is_qport_service_type_supported(rdev)) break; if (!rdev->dcb_wq) break; dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); if (!dcb_work) break; dcb_work->rdev = rdev; memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl)); INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task); queue_work(rdev->dcb_wq, &dcb_work->work); break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { /* Set rcfw flag to control commands send to Bono */ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); /* Set bnxt_re flag to control commands send via L2 driver */ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); wake_up_all(&rdev->rcfw.cmdq.waitq); } break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD: if (!rdev->dbr_pacing) break; dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); if (!dbq_work) goto unlock; dbq_work->rdev = rdev; dbq_work->event = BNXT_RE_DBR_PACING_EVENT; INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); queue_work(rdev->dbq_wq, &dbq_work->work); rdev->dbr_sw_stats->dbq_int_recv++; break; case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE: if (!rdev->dbr_pacing) break; dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); if (!dbq_work) goto unlock; dbq_work->rdev = rdev; dbq_work->event = BNXT_RE_DBR_NQ_PACING_NOTIFICATION; INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); queue_work(rdev->dbq_wq, &dbq_work->work); break; default: break; } unlock: return; } static void bnxt_re_db_fifo_check(struct work_struct *work) { struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, dbq_fifo_check_work); struct bnxt_qplib_db_pacing_data *pacing_data; u32 pacing_save; if (!mutex_trylock(&rdev->dbq_lock)) return; pacing_data = rdev->qplib_res.pacing_data; pacing_save = rdev->do_pacing_save; __wait_for_fifo_occupancy_below_th(rdev); cancel_delayed_work_sync(&rdev->dbq_pacing_work); if (rdev->dbr_recovery_on) goto recovery_on; if (pacing_save > rdev->dbr_def_do_pacing) { /* Double the do_pacing value during the congestion */ pacing_save = pacing_save << 1; } else { /* * when a new congestion is detected increase the do_pacing * by 8 times. And also increase the pacing_th by 4 times. The * reason to increase pacing_th is to give more space for the * queue to oscillate down without getting empty, but also more * room for the queue to increase without causing another alarm. */ pacing_save = pacing_save << 3; pacing_data->pacing_th = rdev->pacing_algo_th * 4; } if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING) pacing_save = BNXT_RE_MAX_DBR_DO_PACING; pacing_data->do_pacing = pacing_save; rdev->do_pacing_save = pacing_data->do_pacing; pacing_data->alarm_th = pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE(rdev->chip_ctx); recovery_on: schedule_delayed_work(&rdev->dbq_pacing_work, msecs_to_jiffies(rdev->dbq_pacing_time)); rdev->dbr_sw_stats->dbq_pacing_alerts++; mutex_unlock(&rdev->dbq_lock); } static void bnxt_re_pacing_timer_exp(struct work_struct *work) { struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, dbq_pacing_work.work); struct bnxt_qplib_db_pacing_data *pacing_data; u32 read_val, fifo_occup; struct bnxt_qplib_nq *nq; if (!mutex_trylock(&rdev->dbq_lock)) return; pacing_data = rdev->qplib_res.pacing_data; read_val = readl_fbsd(rdev->en_dev->softc , rdev->dbr_db_fifo_reg_off, 0); fifo_occup = pacing_data->fifo_max_depth - ((read_val & pacing_data->fifo_room_mask) >> pacing_data->fifo_room_shift); if (fifo_occup > pacing_data->pacing_th) goto restart_timer; /* * Instead of immediately going back to the default do_pacing * reduce it by 1/8 times and restart the timer. */ pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3); pacing_data->do_pacing = max_t(u32, rdev->dbr_def_do_pacing, pacing_data->do_pacing); /* * If the fifo_occup is less than the interrupt enable threshold * enable the interrupt on the primary PF. */ if (rdev->dbq_int_disable && fifo_occup < rdev->pacing_en_int_th) { if (bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { if (!rdev->chip_ctx->modes.dbr_pacing_v0) { nq = &rdev->nqr.nq[0]; bnxt_re_set_dbq_throttling_reg(rdev, nq->ring_id, rdev->dbq_watermark); rdev->dbr_sw_stats->dbq_int_en++; rdev->dbq_int_disable = false; } } } if (pacing_data->do_pacing <= rdev->dbr_def_do_pacing) { bnxt_re_set_default_pacing_data(rdev); rdev->dbr_sw_stats->dbq_pacing_complete++; goto dbq_unlock; } restart_timer: schedule_delayed_work(&rdev->dbq_pacing_work, msecs_to_jiffies(rdev->dbq_pacing_time)); bnxt_re_update_do_pacing_slabs(rdev); rdev->dbr_sw_stats->dbq_pacing_resched++; dbq_unlock: rdev->do_pacing_save = pacing_data->do_pacing; mutex_unlock(&rdev->dbq_lock); } void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data; if (!rdev->dbr_pacing) return; mutex_lock(&rdev->dbq_lock); pacing_data = rdev->qplib_res.pacing_data; /* * Increase the alarm_th to max so that other user lib instances do not * keep alerting the driver. */ pacing_data->alarm_th = pacing_data->fifo_max_depth; pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; cancel_work_sync(&rdev->dbq_fifo_check_work); schedule_work(&rdev->dbq_fifo_check_work); mutex_unlock(&rdev->dbq_lock); } void bnxt_re_schedule_dbq_event(struct bnxt_qplib_res *res) { struct bnxt_re_dbq_work *dbq_work; struct bnxt_re_dev *rdev; rdev = container_of(res, struct bnxt_re_dev, qplib_res); atomic_set(&rdev->dbq_intr_running, 1); if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) goto exit; /* Run the loop to send dbq event to other functions * for newer FW */ if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && !rdev->chip_ctx->modes.dbr_pacing_v0) bnxt_re_set_dbq_throttling_for_non_primary(rdev); dbq_work = kzalloc(sizeof(*dbq_work), GFP_ATOMIC); if (!dbq_work) goto exit; dbq_work->rdev = rdev; dbq_work->event = BNXT_RE_DBQ_EVENT_SCHED; INIT_WORK(&dbq_work->work, bnxt_re_dbq_wq_task); queue_work(rdev->dbq_wq, &dbq_work->work); rdev->dbr_sw_stats->dbq_int_recv++; rdev->dbq_int_disable = true; exit: atomic_set(&rdev->dbq_intr_running, 0); } static void bnxt_re_free_msix(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; int rc; rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP); if (rc) dev_err(rdev_to_dev(rdev), "netdev %p free_msix failed! rc = 0x%x", rdev->netdev, rc); } static int bnxt_re_request_msix(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; int rc = 0, num_msix_want, num_msix_got; struct bnxt_msix_entry *entry; /* * Request MSIx based on the function type. This is * a temporory solution to enable max VFs when NPAR is * enabled. * TODO - change the scheme with an adapter specific check * as the latest adapters can support more NQs. For now * this change satisfy all adapter versions. */ if (rdev->is_virtfn) num_msix_want = BNXT_RE_MAX_MSIX_VF; else if (BNXT_EN_NPAR(en_dev)) num_msix_want = BNXT_RE_MAX_MSIX_NPAR_PF; else if (_is_chip_gen_p5_p7(rdev->chip_ctx)) num_msix_want = rdev->num_msix_requested ?: BNXT_RE_MAX_MSIX_GEN_P5_PF; else num_msix_want = BNXT_RE_MAX_MSIX_PF; /* * Since MSIX vectors are used for both NQs and CREQ, we should try to * allocate num_online_cpus + 1 by taking into account the CREQ. This * leaves the number of MSIX vectors for NQs match the number of CPUs * and allows the system to be fully utilized */ num_msix_want = min_t(u32, num_msix_want, num_online_cpus() + 1); num_msix_want = min_t(u32, num_msix_want, BNXT_RE_MAX_MSIX); num_msix_want = max_t(u32, num_msix_want, BNXT_RE_MIN_MSIX); entry = rdev->nqr.msix_entries; num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP, entry, num_msix_want); if (num_msix_got < BNXT_RE_MIN_MSIX) { rc = -EINVAL; goto done; } if (num_msix_got != num_msix_want) dev_warn(rdev_to_dev(rdev), "bnxt_request_msix: wanted %d vectors, got %d\n", num_msix_want, num_msix_got); rdev->nqr.num_msix = num_msix_got; return 0; done: if (num_msix_got) bnxt_re_free_msix(rdev); return rc; } static int __wait_for_ib_unregister(struct bnxt_re_dev *rdev, struct bnxt_re_en_dev_info *en_info) { u64 timeout = 0; u32 cur_prod = 0, cur_cons = 0; int retry = 0, rc = 0, ret = 0; cur_prod = rdev->rcfw.cmdq.hwq.prod; cur_cons = rdev->rcfw.cmdq.hwq.cons; timeout = msecs_to_jiffies(BNXT_RE_RECOVERY_IB_UNINIT_WAIT_TIME_MS); retry = BNXT_RE_RECOVERY_IB_UNINIT_WAIT_RETRY; /* During module exit, increase timeout ten-fold to 100 mins to wait * as long as possible for ib_unregister() to complete */ if (rdev->mod_exit) retry *= 10; do { /* * Since the caller of this function invokes with bnxt_re_mutex held, * release it to avoid holding a lock while in wait / sleep mode. */ mutex_unlock(&bnxt_re_mutex); rc = wait_event_timeout(en_info->waitq, en_info->ib_uninit_done, timeout); mutex_lock(&bnxt_re_mutex); if (!bnxt_re_is_rdev_valid(rdev)) break; if (rc) break; if (!RCFW_NO_FW_ACCESS(&rdev->rcfw)) { /* No need to check for cmdq stall during module exit, * wait for ib unregister to complete */ if (!rdev->mod_exit) ret = __check_cmdq_stall(&rdev->rcfw, &cur_prod, &cur_cons); if (ret || en_info->ib_uninit_done) break; } } while (retry--); return rc; } static int bnxt_re_handle_start(struct auxiliary_device *adev) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); struct bnxt_re_dev *rdev = NULL; struct ifnet *real_dev; struct bnxt_en_dev *en_dev; struct ifnet *netdev; int rc = 0; if (!en_info || !en_info->en_dev) { pr_err("Start, bad en_info or en_dev\n"); return -EINVAL; } netdev = en_info->en_dev->net; if (en_info->rdev) { dev_info(rdev_to_dev(en_info->rdev), "%s: Device is already added adev %p rdev: %p\n", __func__, adev, en_info->rdev); return 0; } en_dev = en_info->en_dev; real_dev = rdma_vlan_dev_real_dev(netdev); if (!real_dev) real_dev = netdev; rc = bnxt_re_add_device(&rdev, real_dev, en_info->gsi_mode, BNXT_RE_POST_RECOVERY_INIT, en_info->wqe_mode, en_info->num_msix_requested, adev); if (rc) { /* Add device failed. Unregister the device. * This has to be done explicitly as * bnxt_re_stop would not have unregistered */ rtnl_lock(); en_dev->en_ops->bnxt_unregister_device(en_dev, BNXT_ROCE_ULP); rtnl_unlock(); mutex_lock(&bnxt_re_dev_lock); gadd_dev_inprogress--; mutex_unlock(&bnxt_re_dev_lock); return rc; } rdev->adev = adev; rtnl_lock(); bnxt_re_get_link_speed(rdev); rtnl_unlock(); rc = bnxt_re_ib_init(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "Failed ib_init\n"); return rc; } bnxt_re_ib_init_2(rdev); return rc; } static void bnxt_re_stop(void *handle) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct ifnet *netdev; struct bnxt_re_dev *rdev; struct bnxt_en_dev *en_dev; int rc = 0; rtnl_unlock(); mutex_lock(&bnxt_re_mutex); if (!en_info || !en_info->en_dev) { pr_err("Stop, bad en_info or en_dev\n"); goto exit; } netdev = en_info->en_dev->net; rdev = en_info->rdev; if (!rdev) goto exit; if (!bnxt_re_is_rdev_valid(rdev)) goto exit; /* * Check if fw has undergone reset or is in a fatal condition. * If so, set flags so that no further commands are sent down to FW */ en_dev = rdev->en_dev; if (en_dev->en_state & BNXT_STATE_FW_FATAL_COND || en_dev->en_state & BNXT_STATE_FW_RESET_DET) { /* Set rcfw flag to control commands send to Bono */ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); /* Set bnxt_re flag to control commands send via L2 driver */ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); wake_up_all(&rdev->rcfw.cmdq.waitq); } if (test_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags)) goto exit; set_bit(BNXT_RE_FLAG_STOP_IN_PROGRESS, &rdev->flags); en_info->wqe_mode = rdev->chip_ctx->modes.wqe_mode; en_info->gsi_mode = rdev->gsi_ctx.gsi_qp_mode; en_info->num_msix_requested = rdev->num_msix_requested; en_info->ib_uninit_done = false; if (rdev->dbr_pacing) bnxt_re_set_pacing_dev_state(rdev); dev_info(rdev_to_dev(rdev), "%s: L2 driver notified to stop." "Attempting to stop and Dispatching event " "to inform the stack\n", __func__); init_waitqueue_head(&en_info->waitq); /* Schedule a work item to handle IB UNINIT for recovery */ bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER, NULL, netdev, rdev->adev); rc = __wait_for_ib_unregister(rdev, en_info); if (!bnxt_re_is_rdev_valid(rdev)) goto exit; if (!rc) { dev_info(rdev_to_dev(rdev), "%s: Attempt to stop failed\n", __func__); bnxt_re_detach_err_device(rdev); goto exit; } bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, rdev->adev); exit: mutex_unlock(&bnxt_re_mutex); /* Take rtnl_lock before return, bnxt_re_stop is called with rtnl_lock */ rtnl_lock(); return; } static void bnxt_re_start(void *handle) { rtnl_unlock(); mutex_lock(&bnxt_re_mutex); if (bnxt_re_handle_start((struct auxiliary_device *)handle)) pr_err("Failed to start RoCE device"); mutex_unlock(&bnxt_re_mutex); /* Take rtnl_lock before return, bnxt_re_start is called with rtnl_lock */ rtnl_lock(); return; } static void bnxt_re_shutdown(void *p) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(p); struct bnxt_re_dev *rdev; if (!en_info) { pr_err("Shutdown, bad en_info\n"); return; } rtnl_unlock(); mutex_lock(&bnxt_re_mutex); rdev = en_info->rdev; if (!rdev || !bnxt_re_is_rdev_valid(rdev)) goto exit; /* rtnl_lock held by L2 before coming here */ bnxt_re_stopqps_and_ib_uninit(rdev); bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, rdev->adev); exit: mutex_unlock(&bnxt_re_mutex); rtnl_lock(); return; } static void bnxt_re_stop_irq(void *handle) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_qplib_rcfw *rcfw = NULL; struct bnxt_re_dev *rdev; struct bnxt_qplib_nq *nq; int indx; if (!en_info) { pr_err("Stop irq, bad en_info\n"); return; } rdev = en_info->rdev; if (!rdev) return; rcfw = &rdev->rcfw; for (indx = 0; indx < rdev->nqr.max_init; indx++) { nq = &rdev->nqr.nq[indx]; mutex_lock(&nq->lock); bnxt_qplib_nq_stop_irq(nq, false); mutex_unlock(&nq->lock); } if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) bnxt_qplib_rcfw_stop_irq(rcfw, false); } static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_msix_entry *msix_ent = NULL; struct bnxt_qplib_rcfw *rcfw = NULL; struct bnxt_re_dev *rdev; struct bnxt_qplib_nq *nq; int indx, rc, vec; if (!en_info) { pr_err("Start irq, bad en_info\n"); return; } rdev = en_info->rdev; if (!rdev) return; if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return; msix_ent = rdev->nqr.msix_entries; rcfw = &rdev->rcfw; if (!ent) { /* Not setting the f/w timeout bit in rcfw. * During the driver unload the first command * to f/w will timeout and that will set the * timeout bit. */ dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n"); return; } /* Vectors may change after restart, so update with new vectors * in device structure. */ for (indx = 0; indx < rdev->nqr.num_msix; indx++) rdev->nqr.msix_entries[indx].vector = ent[indx].vector; if (test_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) { rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, false); if (rc) { dev_warn(rdev_to_dev(rdev), "Failed to reinit CREQ\n"); return; } } for (indx = 0 ; indx < rdev->nqr.max_init; indx++) { nq = &rdev->nqr.nq[indx]; vec = indx + 1; rc = bnxt_qplib_nq_start_irq(nq, indx, msix_ent[vec].vector, false); if (rc) { dev_warn(rdev_to_dev(rdev), "Failed to reinit NQ index %d\n", indx); return; } } } /* * Except for ulp_async_notifier, the remaining ulp_ops * below are called with rtnl_lock held */ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { .ulp_async_notifier = bnxt_re_async_notifier, .ulp_stop = bnxt_re_stop, .ulp_start = bnxt_re_start, .ulp_shutdown = bnxt_re_shutdown, .ulp_irq_stop = bnxt_re_stop_irq, .ulp_irq_restart = bnxt_re_start_irq, }; static inline const char *bnxt_re_netevent(unsigned long event) { BNXT_RE_NETDEV_EVENT(event, NETDEV_UP); BNXT_RE_NETDEV_EVENT(event, NETDEV_DOWN); BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGE); BNXT_RE_NETDEV_EVENT(event, NETDEV_REGISTER); BNXT_RE_NETDEV_EVENT(event, NETDEV_UNREGISTER); BNXT_RE_NETDEV_EVENT(event, NETDEV_CHANGEADDR); return "Unknown"; } /* RoCE -> Net driver */ /* Driver registration routines used to let the networking driver (bnxt_en) * to know that the RoCE driver is now installed */ static void bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; int rc; rtnl_lock(); rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev, BNXT_ROCE_ULP); rtnl_unlock(); if (rc) dev_err(rdev_to_dev(rdev), "netdev %p unregister failed! rc = 0x%x", rdev->en_dev->net, rc); clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); } static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; int rc = 0; rtnl_lock(); rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP, &bnxt_re_ulp_ops, rdev->adev); rtnl_unlock(); if (rc) { dev_err(rdev_to_dev(rdev), "netdev %p register failed! rc = 0x%x", rdev->netdev, rc); return rc; } return rc; } static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; struct bnxt_en_dev *en_dev; struct bnxt_qplib_res *res; u32 l2db_len = 0; u32 offset = 0; u32 barlen; int rc; res = &rdev->qplib_res; en_dev = rdev->en_dev; cctx = rdev->chip_ctx; /* Issue qcfg */ rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset); if (rc) dev_info(rdev_to_dev(rdev), "Couldn't get DB bar size, Low latency framework is disabled\n"); /* set register offsets for both UC and WC */ if (_is_chip_p7(cctx)) res->dpi_tbl.ucreg.offset = offset; else res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET : BNXT_QPLIB_DBR_PF_DB_OFFSET; res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset; /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size * is equal to the DB-Bar actual size. This indicates that L2 * is mapping entire bar as UC-. RoCE driver can't enable WC mapping * in such cases and DB-push will be disabled. */ barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION); if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) { res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n"); } return; } static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) { struct bnxt_qplib_chip_ctx *cctx; struct bnxt_en_dev *en_dev; en_dev = rdev->en_dev; cctx = rdev->chip_ctx; cctx->modes.wqe_mode = _is_chip_gen_p5_p7(rdev->chip_ctx) ? mode : BNXT_QPLIB_WQE_MODE_STATIC; cctx->modes.te_bypass = false; if (bnxt_re_hwrm_qcaps(rdev)) dev_err(rdev_to_dev(rdev), "Failed to query hwrm qcaps\n"); /* * TODO: Need a better mechanism for spreading of the * 512 extended PPP pages in the presence of VF and * NPAR, until then not enabling push */ if (_is_chip_p7(rdev->chip_ctx) && cctx->modes.db_push) { if (rdev->is_virtfn || BNXT_EN_NPAR(en_dev)) cctx->modes.db_push = false; } rdev->roce_mode = en_dev->flags & BNXT_EN_FLAG_ROCE_CAP; dev_dbg(rdev_to_dev(rdev), "RoCE is supported on the device - caps:0x%x", rdev->roce_mode); if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) rdev->roce_mode = BNXT_RE_FLAG_ROCEV2_CAP; cctx->hw_stats_size = en_dev->hw_ring_stats_size; } static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_qplib_res *res; if (!rdev->chip_ctx) return; res = &rdev->qplib_res; bnxt_qplib_unmap_db_bar(res); kfree(res->hctx); res->rcfw = NULL; kfree(rdev->dev_attr); rdev->dev_attr = NULL; chip_ctx = rdev->chip_ctx; rdev->chip_ctx = NULL; res->cctx = NULL; res->hctx = NULL; res->pdev = NULL; res->netdev = NULL; kfree(chip_ctx); } static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; int rc; en_dev = rdev->en_dev; /* Supply pci device to qplib */ rdev->qplib_res.pdev = en_dev->pdev; rdev->qplib_res.netdev = rdev->netdev; rdev->qplib_res.en_dev = en_dev; chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL); if (!chip_ctx) return -ENOMEM; rdev->chip_ctx = chip_ctx; rdev->qplib_res.cctx = chip_ctx; rc = bnxt_re_query_hwrm_intf_version(rdev); if (rc) goto fail; rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); if (!rdev->dev_attr) { rc = -ENOMEM; goto fail; } rdev->qplib_res.dattr = rdev->dev_attr; rdev->qplib_res.rcfw = &rdev->rcfw; rdev->qplib_res.is_vf = rdev->is_virtfn; rdev->qplib_res.hctx = kzalloc(sizeof(*rdev->qplib_res.hctx), GFP_KERNEL); if (!rdev->qplib_res.hctx) { rc = -ENOMEM; goto fail; } bnxt_re_set_drv_mode(rdev, wqe_mode); bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); if (rc) goto fail; rc = bnxt_qplib_enable_atomic_ops_to_root(en_dev->pdev); if (rc) dev_dbg(rdev_to_dev(rdev), "platform doesn't support global atomics"); return 0; fail: kfree(rdev->chip_ctx); rdev->chip_ctx = NULL; kfree(rdev->dev_attr); rdev->dev_attr = NULL; kfree(rdev->qplib_res.hctx); rdev->qplib_res.hctx = NULL; return rc; } static u16 bnxt_re_get_rtype(struct bnxt_re_dev *rdev) { return _is_chip_gen_p5_p7(rdev->chip_ctx) ? HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ : HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL; } static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id) { int rc = -EINVAL; struct hwrm_ring_free_input req = {0}; struct hwrm_ring_free_output resp; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; if (!en_dev) return rc; /* To avoid unnecessary error messages during recovery. * HW is anyway in error state. So dont send down the command */ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; /* allocation had failed, no need to issue hwrm */ if (fw_ring_id == 0xffff) return 0; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1); req.ring_type = bnxt_re_get_rtype(rdev); req.ring_id = cpu_to_le16(fw_ring_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to free HW ring with rc = 0x%x", rc); return rc; } dev_dbg(rdev_to_dev(rdev), "HW ring freed with id = 0x%x\n", fw_ring_id); return rc; } static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, struct bnxt_re_ring_attr *ring_attr, u16 *fw_ring_id) { int rc = -EINVAL; struct hwrm_ring_alloc_input req = {0}; struct hwrm_ring_alloc_output resp; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; if (!en_dev) return rc; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1); req.flags = cpu_to_le16(ring_attr->flags); req.enables = 0; req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]); if (ring_attr->pages > 1) { /* Page size is in log2 units */ req.page_size = BNXT_PAGE_SHIFT; req.page_tbl_depth = 1; } else { req.page_size = 4; req.page_tbl_depth = 0; } req.fbo = 0; /* Association of ring index with doorbell index and MSIX number */ req.logical_id = cpu_to_le16(ring_attr->lrid); req.length = cpu_to_le32(ring_attr->depth + 1); req.ring_type = ring_attr->type; req.int_mode = ring_attr->mode; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW ring with rc = 0x%x", rc); return rc; } *fw_ring_id = le16_to_cpu(resp.ring_id); dev_dbg(rdev_to_dev(rdev), "HW ring allocated with id = 0x%x at slot 0x%x", resp.ring_id, ring_attr->lrid); return rc; } static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev, u32 fw_stats_ctx_id, u16 tid) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_stat_ctx_free_input req = {0}; struct hwrm_stat_ctx_free_output resp; struct bnxt_fw_msg fw_msg; int rc = -EINVAL; if (!en_dev) return rc; /* To avoid unnecessary error messages during recovery. * HW is anyway in error state. So dont send down the command */ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, tid); req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to free HW stats ctx with rc = 0x%x", rc); return rc; } dev_dbg(rdev_to_dev(rdev), "HW stats ctx freed with id = 0x%x", fw_stats_ctx_id); return rc; } static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, u16 tid) { struct hwrm_stat_ctx_alloc_output resp = {}; struct hwrm_stat_ctx_alloc_input req = {}; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_qplib_stats *stat; struct bnxt_qplib_ctx *hctx; struct bnxt_fw_msg fw_msg; int rc = 0; hctx = rdev->qplib_res.hctx; stat = (tid == 0xffff) ? &hctx->stats : &hctx->stats2; stat->fw_id = INVALID_STATS_CTX_ID; if (!en_dev) return -EINVAL; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, tid); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_length = rdev->chip_ctx->hw_stats_size; req.stats_dma_addr = cpu_to_le64(stat->dma_map); req.stat_ctx_flags = HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW stats ctx, rc = 0x%x", rc); return rc; } stat->fw_id = le32_to_cpu(resp.stat_ctx_id); dev_dbg(rdev_to_dev(rdev), "HW stats ctx allocated with id = 0x%x", stat->fw_id); return rc; } static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) { const struct bnxt_en_ops *en_ops; if (rdev->is_virtfn || test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return; memset(rdev->event_bitmap, 0, sizeof(rdev->event_bitmap)); en_ops = rdev->en_dev->en_ops; if (en_ops->bnxt_register_fw_async_events (rdev->en_dev, BNXT_ROCE_ULP, (unsigned long *)rdev->event_bitmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE)) dev_err(rdev_to_dev(rdev), "Failed to unregister async event"); } static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) { const struct bnxt_en_ops *en_ops; if (rdev->is_virtfn) return; rdev->event_bitmap[0] |= BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE) | BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY); rdev->event_bitmap[2] |= BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_REPORT - 64); rdev->event_bitmap[2] |= BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_THRESHOLD - 64) | BIT(HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE - 64); en_ops = rdev->en_dev->en_ops; if (en_ops->bnxt_register_fw_async_events (rdev->en_dev, BNXT_ROCE_ULP, (unsigned long *)rdev->event_bitmap, HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DOORBELL_PACING_NQ_UPDATE)) dev_err(rdev_to_dev(rdev), "Failed to reg Async event"); } static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_ver_get_output resp = {0}; struct hwrm_ver_get_input req = {0}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg; int rc = 0; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_VER_GET, -1, -1); req.hwrm_intf_maj = HWRM_VERSION_MAJOR; req.hwrm_intf_min = HWRM_VERSION_MINOR; req.hwrm_intf_upd = HWRM_VERSION_UPDATE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query HW version, rc = 0x%x", rc); return rc; } cctx = rdev->chip_ctx; cctx->hwrm_intf_ver = (u64) le16_to_cpu(resp.hwrm_intf_major) << 48 | (u64) le16_to_cpu(resp.hwrm_intf_minor) << 32 | (u64) le16_to_cpu(resp.hwrm_intf_build) << 16 | le16_to_cpu(resp.hwrm_intf_patch); cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout); if (!cctx->hwrm_cmd_max_timeout) cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT; cctx->chip_num = le16_to_cpu(resp.chip_num); cctx->chip_rev = resp.chip_rev; cctx->chip_metal = resp.chip_metal; return 0; } /* Query device config using common hwrm */ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_func_qcfg_output resp = {0}; struct hwrm_func_qcfg_input req = {0}; struct bnxt_fw_msg fw_msg; int rc; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_QCFG, -1, -1); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query config, rc = %#x", rc); return rc; } *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024); *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024); return 0; } /* Query function capabilities using common hwrm */ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_func_qcaps_output resp = {0}; struct hwrm_func_qcaps_input req = {0}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg; u8 push_enable = false; int rc; cctx = rdev->chip_ctx; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_QCAPS, -1, -1); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query capabilities, rc = %#x", rc); return rc; } if (_is_chip_p7(rdev->chip_ctx)) push_enable = (resp.flags_ext & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED) ? true : false; else push_enable = (resp.flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_WCB_PUSH_MODE) ? true : false; cctx->modes.db_push = push_enable; cctx->modes.dbr_pacing = resp.flags_ext & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_DBR_PACING_SUPPORTED ? true : false; cctx->modes.dbr_pacing_ext = resp.flags_ext2 & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ? true : false; cctx->modes.dbr_drop_recov = (resp.flags_ext2 & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_SW_DBR_DROP_RECOVERY_SUPPORTED) ? true : false; cctx->modes.dbr_pacing_v0 = (resp.flags_ext2 & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED) ? true : false; dev_dbg(rdev_to_dev(rdev), "%s: cctx->modes.dbr_pacing = %d cctx->modes.dbr_pacing_ext = %d, dbr_drop_recov %d\n", __func__, cctx->modes.dbr_pacing, cctx->modes.dbr_pacing_ext, cctx->modes.dbr_drop_recov); return 0; } static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) { struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; struct hwrm_func_dbr_pacing_qcfg_output resp = {0}; struct hwrm_func_dbr_pacing_qcfg_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg; u32 primary_nq_id; int rc; cctx = rdev->chip_ctx; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_DBR_PACING_QCFG, -1, -1); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_dbg(rdev_to_dev(rdev), "Failed to query dbr pacing config, rc = %#x", rc); return rc; } primary_nq_id = le32_to_cpu(resp.primary_nq_id); if (primary_nq_id == 0xffffffff && !bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { dev_err(rdev_to_dev(rdev), "%s:%d Invoke bnxt_qplib_dbr_pacing_set_primary_pf with 1\n", __func__, __LINE__); bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 1); } if (bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { struct bnxt_qplib_nq *nq; nq = &rdev->nqr.nq[0]; /* Reset the primary capability */ if (nq->ring_id != primary_nq_id) bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0); } if ((resp.dbr_stat_db_fifo_reg & HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) == HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC) cctx->dbr_stat_db_fifo = resp.dbr_stat_db_fifo_reg & ~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; if ((resp.dbr_throttling_aeq_arm_reg & HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK) == HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC) { cctx->dbr_aeq_arm_reg = resp.dbr_throttling_aeq_arm_reg & ~HWRM_FUNC_DBR_PACING_QCFG_OUTPUT_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; cctx->dbr_throttling_reg = cctx->dbr_aeq_arm_reg - 4; } pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth); if (!pacing_data->fifo_max_depth) pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx); pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask); pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift; dev_dbg(rdev_to_dev(rdev), "%s: nq:0x%x primary_pf:%d db_fifo:0x%x aeq_arm:0x%x i" "fifo_max_depth 0x%x , resp.dbr_stat_db_max_fifo_depth 0x%x);\n", __func__, resp.primary_nq_id, cctx->modes.dbr_primary_pf, cctx->dbr_stat_db_fifo, cctx->dbr_aeq_arm_reg, pacing_data->fifo_max_depth, le32_to_cpu(resp.dbr_stat_db_max_fifo_depth)); return 0; } static int bnxt_re_hwrm_dbr_pacing_cfg(struct bnxt_re_dev *rdev, bool enable) { struct hwrm_func_dbr_pacing_cfg_output resp = {0}; struct hwrm_func_dbr_pacing_cfg_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; int rc; if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) return 0; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_DBR_PACING_CFG, -1, -1); if (enable) { req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_ENABLE; req.enables = cpu_to_le32(HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PRIMARY_NQ_ID_VALID | HWRM_FUNC_DBR_PACING_CFG_INPUT_ENABLES_PACING_THRESHOLD_VALID); } else { req.flags = HWRM_FUNC_DBR_PACING_CFG_INPUT_FLAGS_DBR_NQ_EVENT_DISABLE; } req.primary_nq_id = cpu_to_le32(rdev->dbq_nq_id); req.pacing_threshold = cpu_to_le32(rdev->dbq_watermark); dev_dbg(rdev_to_dev(rdev), "%s: nq_id = 0x%x pacing_threshold = 0x%x", __func__, req.primary_nq_id, req.pacing_threshold); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev)); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) { dev_dbg(rdev_to_dev(rdev), "Failed to set dbr pacing config, rc = %#x", rc); return rc; } return 0; } /* Net -> RoCE driver */ /* Device */ struct bnxt_re_dev *bnxt_re_from_netdev(struct ifnet *netdev) { struct bnxt_re_dev *rdev; rcu_read_lock(); list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) { if (rdev->netdev == netdev) { rcu_read_unlock(); dev_dbg(rdev_to_dev(rdev), "netdev (%p) found, ref_count = 0x%x", netdev, atomic_read(&rdev->ref_count)); return rdev; } } rcu_read_unlock(); return NULL; } static ssize_t show_rev(struct device *device, struct device_attribute *attr, char *buf) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor); } static ssize_t show_hca(struct device *device, struct device_attribute *attr, char *buf) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev); return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc); } static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL); static DEVICE_ATTR(hca_type, 0444, show_hca, NULL); static struct device_attribute *bnxt_re_attributes[] = { &dev_attr_hw_rev, &dev_attr_hca_type }; int ib_register_device_compat(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; char name[IB_DEVICE_NAME_MAX]; memset(name, 0, IB_DEVICE_NAME_MAX); strlcpy(name, "bnxt_re%d", IB_DEVICE_NAME_MAX); strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); return ib_register_device(ibdev, NULL); } static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; int ret = 0; /* ib device init */ ibdev->owner = THIS_MODULE; ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION; ibdev->node_type = RDMA_NODE_IB_CA; strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&ibdev->node_guid); /* Data path irqs is one less than the max msix vectors */ ibdev->num_comp_vectors = rdev->nqr.num_msix - 1; bnxt_re_set_dma_device(ibdev, rdev); ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; /* User space */ ibdev->uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); ibdev->uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); ibdev->uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_POLL_CQ); #define bnxt_re_ib_ah bnxt_re_ah #define bnxt_re_ib_cq bnxt_re_cq #define bnxt_re_ib_pd bnxt_re_pd #define bnxt_re_ib_srq bnxt_re_srq #define bnxt_re_ib_ucontext bnxt_re_ucontext INIT_IB_DEVICE_OPS(&ibdev->ops, bnxt_re, BNXT_RE); ibdev->query_device = bnxt_re_query_device; ibdev->modify_device = bnxt_re_modify_device; ibdev->query_port = bnxt_re_query_port; ibdev->modify_port = bnxt_re_modify_port; ibdev->get_port_immutable = bnxt_re_get_port_immutable; ibdev->query_pkey = bnxt_re_query_pkey; ibdev->query_gid = bnxt_re_query_gid; ibdev->get_netdev = bnxt_re_get_netdev; ibdev->add_gid = bnxt_re_add_gid; ibdev->del_gid = bnxt_re_del_gid; ibdev->get_link_layer = bnxt_re_get_link_layer; ibdev->alloc_pd = bnxt_re_alloc_pd; ibdev->dealloc_pd = bnxt_re_dealloc_pd; ibdev->create_ah = bnxt_re_create_ah; ibdev->modify_ah = bnxt_re_modify_ah; ibdev->query_ah = bnxt_re_query_ah; ibdev->destroy_ah = bnxt_re_destroy_ah; ibdev->create_srq = bnxt_re_create_srq; ibdev->modify_srq = bnxt_re_modify_srq; ibdev->query_srq = bnxt_re_query_srq; ibdev->destroy_srq = bnxt_re_destroy_srq; ibdev->post_srq_recv = bnxt_re_post_srq_recv; ibdev->create_qp = bnxt_re_create_qp; ibdev->modify_qp = bnxt_re_modify_qp; ibdev->query_qp = bnxt_re_query_qp; ibdev->destroy_qp = bnxt_re_destroy_qp; ibdev->post_send = bnxt_re_post_send; ibdev->post_recv = bnxt_re_post_recv; ibdev->create_cq = bnxt_re_create_cq; ibdev->modify_cq = bnxt_re_modify_cq; ibdev->destroy_cq = bnxt_re_destroy_cq; ibdev->resize_cq = bnxt_re_resize_cq; ibdev->poll_cq = bnxt_re_poll_cq; ibdev->req_notify_cq = bnxt_re_req_notify_cq; ibdev->get_dma_mr = bnxt_re_get_dma_mr; ibdev->get_hw_stats = bnxt_re_get_hw_stats; ibdev->alloc_hw_stats = bnxt_re_alloc_hw_port_stats; ibdev->dereg_mr = bnxt_re_dereg_mr; ibdev->alloc_mr = bnxt_re_alloc_mr; ibdev->map_mr_sg = bnxt_re_map_mr_sg; ibdev->alloc_mw = bnxt_re_alloc_mw; ibdev->dealloc_mw = bnxt_re_dealloc_mw; ibdev->reg_user_mr = bnxt_re_reg_user_mr; ibdev->rereg_user_mr = bnxt_re_rereg_user_mr; ibdev->disassociate_ucontext = bnxt_re_disassociate_ucntx; ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; ibdev->mmap = bnxt_re_mmap; ibdev->process_mad = bnxt_re_process_mad; ret = ib_register_device_compat(rdev); return ret; } static void bnxt_re_dev_dealloc(struct bnxt_re_dev *rdev) { int i = BNXT_RE_REF_WAIT_COUNT; dev_dbg(rdev_to_dev(rdev), "%s:Remove the device %p\n", __func__, rdev); /* Wait for rdev refcount to come down */ while ((atomic_read(&rdev->ref_count) > 1) && i--) msleep(100); if (atomic_read(&rdev->ref_count) > 1) dev_err(rdev_to_dev(rdev), "Failed waiting for ref count to deplete %d", atomic_read(&rdev->ref_count)); atomic_set(&rdev->ref_count, 0); if_rele(rdev->netdev); rdev->netdev = NULL; synchronize_rcu(); kfree(rdev->gid_map); kfree(rdev->dbg_stats); ib_dealloc_device(&rdev->ibdev); } static struct bnxt_re_dev *bnxt_re_dev_alloc(struct ifnet *netdev, struct bnxt_en_dev *en_dev) { struct bnxt_re_dev *rdev; u32 count; /* Allocate bnxt_re_dev instance here */ rdev = (struct bnxt_re_dev *)compat_ib_alloc_device(sizeof(*rdev)); if (!rdev) { pr_err("%s: bnxt_re_dev allocation failure!", ROCE_DRV_MODULE_NAME); return NULL; } /* Default values */ atomic_set(&rdev->ref_count, 0); rdev->netdev = netdev; dev_hold(rdev->netdev); rdev->en_dev = en_dev; rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); mutex_init(&rdev->cc_lock); mutex_init(&rdev->dbq_lock); bnxt_re_clear_rsors_stat(&rdev->stats.rsors); rdev->cosq[0] = rdev->cosq[1] = 0xFFFF; rdev->min_tx_depth = 1; rdev->stats.stats_query_sec = 1; /* Disable priority vlan as the default mode is DSCP based PFC */ rdev->cc_param.disable_prio_vlan_tx = 1; /* Initialize worker for DBR Pacing */ INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check); INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp); rdev->gid_map = kzalloc(sizeof(*(rdev->gid_map)) * BNXT_RE_MAX_SGID_ENTRIES, GFP_KERNEL); if (!rdev->gid_map) { ib_dealloc_device(&rdev->ibdev); return NULL; } for(count = 0; count < BNXT_RE_MAX_SGID_ENTRIES; count++) rdev->gid_map[count] = -1; rdev->dbg_stats = kzalloc(sizeof(*rdev->dbg_stats), GFP_KERNEL); if (!rdev->dbg_stats) { ib_dealloc_device(&rdev->ibdev); return NULL; } return rdev; } static int bnxt_re_handle_unaffi_async_event( struct creq_func_event *unaffi_async) { switch (unaffi_async->event) { case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR: case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR: case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR: case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR: case CREQ_FUNC_EVENT_EVENT_CQ_ERROR: case CREQ_FUNC_EVENT_EVENT_TQM_ERROR: case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR: case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR: case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR: case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR: case CREQ_FUNC_EVENT_EVENT_TIM_ERROR: break; default: return -EINVAL; } return 0; } static int bnxt_re_handle_qp_async_event(void *qp_event, struct bnxt_re_qp *qp) { struct creq_qp_error_notification *err_event; struct ib_event event; unsigned int flags; if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && !qp->qplib_qp.is_user) { flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); } memset(&event, 0, sizeof(event)); event.device = &qp->rdev->ibdev; event.element.qp = &qp->ib_qp; event.event = IB_EVENT_QP_FATAL; err_event = qp_event; switch(err_event->res_err_state_reason) { case CFCQ_RES_ERR_STATE_REASON_RES_EXCEED_MAX: case CFCQ_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH: case CFCQ_RES_ERR_STATE_REASON_RES_OPCODE_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT: case CFCQ_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY: case CFCQ_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION: case CFCQ_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY: case CFCQ_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION: case CFCQ_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_IVALID_DUP_RKEY: case CFCQ_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC: event.event = IB_EVENT_QP_ACCESS_ERR; break; case CFCQ_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE: case CFCQ_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE: case CFCQ_RES_ERR_STATE_REASON_RES_REM_INVALIDATE: event.event = IB_EVENT_QP_REQ_ERR; break; case CFCQ_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW: case CFCQ_RES_ERR_STATE_REASON_RES_CMP_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_MEMORY_ERROR: case CFCQ_RES_ERR_STATE_REASON_RES_SRQ_ERROR: event.event = IB_EVENT_QP_FATAL; break; default: if (qp->qplib_qp.srq) event.event = IB_EVENT_QP_LAST_WQE_REACHED; break; } if (err_event->res_err_state_reason) dev_err(rdev_to_dev(qp->rdev), "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n", __func__, qp->qplib_qp.is_user ? "user" : "kernel", qp->qplib_qp.id, err_event->sq_cons_idx, err_event->rq_cons_idx, err_event->req_slow_path_state, err_event->req_err_state_reason, err_event->res_slow_path_state, err_event->res_err_state_reason); if (event.device && qp->ib_qp.event_handler) qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); return 0; } static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq) { struct creq_cq_error_notification *cqerr; bool send = false; cqerr = event; switch (cqerr->cq_err_reason) { case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR: case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR: case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR: case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR: case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR: case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR: send = true; default: break; } if (send && cq->ibcq.event_handler) { struct ib_event ibevent = {}; ibevent.event = IB_EVENT_CQ_ERR; ibevent.element.cq = &cq->ibcq; ibevent.device = &cq->rdev->ibdev; dev_err(rdev_to_dev(cq->rdev), "%s err reason %d\n", __func__, cqerr->cq_err_reason); cq->ibcq.event_handler(&ibevent, cq->ibcq.cq_context); } cq->qplib_cq.is_cq_err_event = true; return 0; } static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async, void *obj) { struct bnxt_qplib_qp *qplqp; struct bnxt_qplib_cq *qplcq; struct bnxt_re_qp *qp; struct bnxt_re_cq *cq; int rc = 0; u8 event; if (!obj) return rc; /* QP was already dead, still return success */ event = affi_async->event; switch (event) { case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION: qplqp = obj; qp = container_of(qplqp, struct bnxt_re_qp, qplib_qp); rc = bnxt_re_handle_qp_async_event(affi_async, qp); break; case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION: qplcq = obj; cq = container_of(qplcq, struct bnxt_re_cq, qplib_cq); rc = bnxt_re_handle_cq_async_error(affi_async, cq); break; default: rc = -EINVAL; } return rc; } static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw, void *aeqe, void *obj) { struct creq_func_event *unaffi_async; struct creq_qp_event *affi_async; u8 type; int rc; type = ((struct creq_base *)aeqe)->type; if (type == CREQ_BASE_TYPE_FUNC_EVENT) { unaffi_async = aeqe; rc = bnxt_re_handle_unaffi_async_event(unaffi_async); } else { affi_async = aeqe; rc = bnxt_re_handle_affi_async_event(affi_async, obj); } return rc; } static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq, struct bnxt_qplib_srq *handle, u8 event) { struct bnxt_re_srq *srq = to_bnxt_re(handle, struct bnxt_re_srq, qplib_srq); struct ib_event ib_event; if (srq == NULL) { pr_err("%s: SRQ is NULL, SRQN not handled", ROCE_DRV_MODULE_NAME); return -EINVAL; } ib_event.device = &srq->rdev->ibdev; ib_event.element.srq = &srq->ibsrq; if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT) ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED; else ib_event.event = IB_EVENT_SRQ_ERR; if (srq->ibsrq.event_handler) { /* Lock event_handler? */ (*srq->ibsrq.event_handler)(&ib_event, srq->ibsrq.srq_context); } return 0; } static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *handle) { struct bnxt_re_cq *cq = to_bnxt_re(handle, struct bnxt_re_cq, qplib_cq); u32 *cq_ptr; if (cq == NULL) { pr_err("%s: CQ is NULL, CQN not handled", ROCE_DRV_MODULE_NAME); return -EINVAL; } /* CQ already in destroy path. Do not handle any more events */ if (handle->destroyed || !atomic_read(&cq->ibcq.usecnt)) { if (!handle->destroyed) dev_dbg(NULL, "%s: CQ being destroyed, CQN not handled", ROCE_DRV_MODULE_NAME); return 0; } if (cq->ibcq.comp_handler) { if (cq->uctx_cq_page) { cq_ptr = (u32 *)cq->uctx_cq_page; *cq_ptr = cq->qplib_cq.toggle; } /* Lock comp_handler? */ (*cq->ibcq.comp_handler)(&cq->ibcq, cq->ibcq.cq_context); } return 0; } struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev) { int min, indx; mutex_lock(&rdev->nqr.load_lock); for (indx = 0, min = 0; indx < (rdev->nqr.num_msix - 1); indx++) { if (rdev->nqr.nq[min].load > rdev->nqr.nq[indx].load) min = indx; } rdev->nqr.nq[min].load++; mutex_unlock(&rdev->nqr.load_lock); return &rdev->nqr.nq[min]; } void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq) { mutex_lock(&rdev->nqr.load_lock); nq->load--; mutex_unlock(&rdev->nqr.load_lock); } static bool bnxt_re_check_min_attr(struct bnxt_re_dev *rdev) { struct bnxt_qplib_dev_attr *attr; bool rc = true; attr = rdev->dev_attr; if (!attr->max_cq || !attr->max_qp || !attr->max_sgid || !attr->max_mr) { dev_err(rdev_to_dev(rdev),"Insufficient RoCE resources"); dev_dbg(rdev_to_dev(rdev), "max_cq = %d, max_qp = %d, max_dpi = %d, max_sgid = %d, max_mr = %d", attr->max_cq, attr->max_qp, attr->max_dpi, attr->max_sgid, attr->max_mr); rc = false; } return rc; } static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, u8 port_num, enum ib_event_type event) { struct ib_event ib_event; ib_event.device = ibdev; if (qp) { ib_event.element.qp = qp; ib_event.event = event; if (qp->event_handler) qp->event_handler(&ib_event, qp->qp_context); } else { ib_event.element.port_num = port_num; ib_event.event = event; ib_dispatch_event(&ib_event); } dev_dbg(rdev_to_dev(to_bnxt_re_dev(ibdev, ibdev)), "ibdev %p Event 0x%x port_num 0x%x", ibdev, event, port_num); } static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp) { if (rdev->gsi_ctx.gsi_qp_mode == BNXT_RE_GSI_MODE_ALL) return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->gsi_ctx.gsi_sqp); else return (qp->ib_qp.qp_type == IB_QPT_GSI); } static void bnxt_re_stop_all_nonqp1_nonshadow_qps(struct bnxt_re_dev *rdev) { struct bnxt_qplib_qp *qpl_qp; bool dev_detached = false; struct ib_qp_attr qp_attr; int num_qps_stopped = 0; int mask = IB_QP_STATE; struct bnxt_re_qp *qp; unsigned long flags; if (!rdev) return; restart: if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags)) dev_detached = true; qp_attr.qp_state = IB_QPS_ERR; mutex_lock(&rdev->qp_lock); list_for_each_entry(qp, &rdev->qp_list, list) { qpl_qp = &qp->qplib_qp; if (dev_detached || !bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) { if (qpl_qp->state != CMDQ_MODIFY_QP_NEW_STATE_RESET && qpl_qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) { if (dev_detached) { /* * Cant actually send the command down, * marking the state for bookkeeping */ qpl_qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; qpl_qp->cur_qp_state = qpl_qp->state; if (!qpl_qp->is_user) { /* Add to flush list */ flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(qpl_qp); bnxt_re_unlock_cqs(qp, flags); } } else { num_qps_stopped++; bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask, NULL); } bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp, 1, IB_EVENT_QP_FATAL); /* * 1. Release qp_lock after a budget to unblock other verb * requests (like qp_destroy) from stack. * 2. Traverse through the qp_list freshly as addition / deletion * might have happened since qp_lock is getting released here. */ if (num_qps_stopped % BNXT_RE_STOP_QPS_BUDGET == 0) { mutex_unlock(&rdev->qp_lock); goto restart; } } } } mutex_unlock(&rdev->qp_lock); } static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) { struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; struct bnxt_qplib_gid gid; u16 gid_idx, index; int rc = 0; if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) return 0; if (sgid_tbl == NULL) { dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated"); return -EINVAL; } for (index = 0; index < sgid_tbl->active; index++) { gid_idx = sgid_tbl->hw_id[index]; if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero, sizeof(bnxt_qplib_gid_zero))) continue; /* Need to modify the VLAN enable setting of non VLAN GID only * as setting is done for VLAN GID while adding GID * * If disable_prio_vlan_tx is enable, then we'll need to remove the * vlan entry from the sgid_tbl. */ if (sgid_tbl->vlan[index] == true) continue; memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid)); rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, rdev->dev_addr); } return rc; } static void bnxt_re_clear_cc(struct bnxt_re_dev *rdev) { struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param; if (_is_chip_p7(rdev->chip_ctx)) { cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; } else { cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); if (!is_qport_service_type_supported(rdev)) cc_param->mask |= (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP); } cc_param->cur_mask = cc_param->mask; if (bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param)) dev_err(rdev_to_dev(rdev), "Failed to modify cc\n"); } static int bnxt_re_setup_cc(struct bnxt_re_dev *rdev) { struct bnxt_qplib_cc_param *cc_param = &rdev->cc_param; int rc; if (_is_chip_p7(rdev->chip_ctx)) { cc_param->enable = 0x0; cc_param->mask = CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP; } else { cc_param->enable = 0x1; cc_param->mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); if (!is_qport_service_type_supported(rdev)) cc_param->mask |= (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP | CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP); } cc_param->cur_mask = cc_param->mask; rc = bnxt_qplib_modify_cc(&rdev->qplib_res, cc_param); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to modify cc\n"); return rc; } /* Reset the programming mask */ cc_param->mask = 0; if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { cc_param->qp1_tos_dscp = cc_param->tos_dscp; rc = bnxt_re_update_qp1_tos_dscp(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "%s:Failed to modify QP1:%d", __func__, rc); goto clear; } } return 0; clear: bnxt_re_clear_cc(rdev); return rc; } int bnxt_re_query_hwrm_dscp2pri(struct bnxt_re_dev *rdev, struct bnxt_re_dscp2pri *d2p, u16 *count, u16 target_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_queue_dscp2pri_qcfg_input req; struct hwrm_queue_dscp2pri_qcfg_output resp; struct bnxt_re_dscp2pri *dscp2pri; struct bnxt_fw_msg fw_msg; u16 in_count = *count; dma_addr_t dma_handle; int rc = 0, i; u16 data_len; u8 *kmem; data_len = *count * sizeof(*dscp2pri); memset(&fw_msg, 0, sizeof(fw_msg)); memset(&req, 0, sizeof(req)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_DSCP2PRI_QCFG, -1, target_id); req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; kmem = dma_zalloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle, GFP_KERNEL); if (!kmem) { dev_err(rdev_to_dev(rdev), "dma_zalloc_coherent failure, length = %u\n", (unsigned)data_len); return -ENOMEM; } req.dest_data_addr = cpu_to_le64(dma_handle); req.dest_data_buffer_size = cpu_to_le16(data_len); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) goto out; /* Upload the DSCP-MASK-PRI tuple(s) */ dscp2pri = (struct bnxt_re_dscp2pri *)kmem; for (i = 0; i < le16_to_cpu(resp.entry_cnt) && i < in_count; i++) { d2p[i].dscp = dscp2pri->dscp; d2p[i].mask = dscp2pri->mask; d2p[i].pri = dscp2pri->pri; dscp2pri++; } *count = le16_to_cpu(resp.entry_cnt); out: dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle); return rc; } int bnxt_re_prio_vlan_tx_update(struct bnxt_re_dev *rdev) { /* Remove the VLAN from the GID entry */ if (rdev->cc_param.disable_prio_vlan_tx) rdev->qplib_res.prio = false; else rdev->qplib_res.prio = true; return bnxt_re_update_gid(rdev); } int bnxt_re_set_hwrm_dscp2pri(struct bnxt_re_dev *rdev, struct bnxt_re_dscp2pri *d2p, u16 count, u16 target_id) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_queue_dscp2pri_cfg_input req; struct hwrm_queue_dscp2pri_cfg_output resp; struct bnxt_fw_msg fw_msg; struct bnxt_re_dscp2pri *dscp2pri; int i, rc, data_len = 3 * 256; dma_addr_t dma_handle; u8 *kmem; memset(&req, 0, sizeof(req)); memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_DSCP2PRI_CFG, -1, target_id); req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; kmem = dma_alloc_coherent(&en_dev->pdev->dev, data_len, &dma_handle, GFP_KERNEL); if (!kmem) { dev_err(rdev_to_dev(rdev), "dma_alloc_coherent failure, length = %u\n", (unsigned)data_len); return -ENOMEM; } req.src_data_addr = cpu_to_le64(dma_handle); /* Download the DSCP-MASK-PRI tuple(s) */ dscp2pri = (struct bnxt_re_dscp2pri *)kmem; for (i = 0; i < count; i++) { dscp2pri->dscp = d2p[i].dscp; dscp2pri->mask = d2p[i].mask; dscp2pri->pri = d2p[i].pri; dscp2pri++; } req.entry_cnt = cpu_to_le16(count); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); dma_free_coherent(&en_dev->pdev->dev, data_len, kmem, dma_handle); return rc; } int bnxt_re_query_hwrm_qportcfg(struct bnxt_re_dev *rdev, struct bnxt_re_tc_rec *tc_rec, u16 tid) { u8 max_tc, tc, *qptr, *type_ptr0, *type_ptr1; struct hwrm_queue_qportcfg_output resp = {0}; struct hwrm_queue_qportcfg_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; bool def_init = false; u8 *tmp_type; u8 cos_id; int rc; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_QPORTCFG, -1, tid); req.port_id = (tid == 0xFFFF) ? en_dev->pf_port_id : 1; if (BNXT_EN_ASYM_Q(en_dev)) req.flags = htole32(HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) return rc; if (!resp.max_configurable_queues) return -EINVAL; max_tc = resp.max_configurable_queues; tc_rec->max_tc = max_tc; if (resp.queue_cfg_info & HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_USE_PROFILE_TYPE) tc_rec->serv_type_enabled = true; qptr = &resp.queue_id0; type_ptr0 = &resp.queue_id0_service_profile_type; type_ptr1 = &resp.queue_id1_service_profile_type; for (tc = 0; tc < max_tc; tc++) { tmp_type = tc ? type_ptr1 + (tc - 1) : type_ptr0; cos_id = *qptr++; /* RoCE CoS queue is the first cos queue. * For MP12 and MP17 order is 405 and 141015. */ if (is_bnxt_roce_queue(rdev, *qptr, *tmp_type)) { tc_rec->cos_id_roce = cos_id; tc_rec->tc_roce = tc; } else if (is_bnxt_cnp_queue(rdev, *qptr, *tmp_type)) { tc_rec->cos_id_cnp = cos_id; tc_rec->tc_cnp = tc; } else if (!def_init) { def_init = true; tc_rec->tc_def = tc; tc_rec->cos_id_def = cos_id; } qptr++; } return rc; } int bnxt_re_hwrm_cos2bw_qcfg(struct bnxt_re_dev *rdev, u16 target_id, struct bnxt_re_cos2bw_cfg *cfg) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_queue_cos2bw_qcfg_output resp; struct hwrm_queue_cos2bw_qcfg_input req = {0}; struct bnxt_fw_msg fw_msg; int rc, indx; void *data; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_COS2BW_QCFG, -1, target_id); req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); if (rc) return rc; data = &resp.queue_id0 + offsetof(struct bnxt_re_cos2bw_cfg, queue_id); for (indx = 0; indx < 8; indx++, data += (sizeof(cfg->cfg))) { memcpy(&cfg->cfg, data, sizeof(cfg->cfg)); if (indx == 0) cfg->queue_id = resp.queue_id0; cfg++; } return rc; } int bnxt_re_hwrm_cos2bw_cfg(struct bnxt_re_dev *rdev, u16 target_id, struct bnxt_re_cos2bw_cfg *cfg) { struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_queue_cos2bw_cfg_input req = {0}; struct hwrm_queue_cos2bw_cfg_output resp = {0}; struct bnxt_fw_msg fw_msg; void *data; int indx; int rc; memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_QUEUE_COS2BW_CFG, -1, target_id); req.port_id = (target_id == 0xFFFF) ? en_dev->pf_port_id : 1; /* Chimp wants enable bit to retain previous * config done by L2 driver */ for (indx = 0; indx < 8; indx++) { if (cfg[indx].queue_id < 40) { req.enables |= cpu_to_le32( HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID << indx); } data = (char *)&req.unused_0 + indx * (sizeof(*cfg) - 4); memcpy(data, &cfg[indx].queue_id, sizeof(*cfg) - 4); if (indx == 0) { req.queue_id0 = cfg[0].queue_id; req.unused_0 = 0; } } memset(&resp, 0, sizeof(resp)); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); return rc; } int bnxt_re_host_pf_id_query(struct bnxt_re_dev *rdev, struct bnxt_qplib_query_fn_info *fn_info, u32 *pf_mask, u32 *first_pf) { struct hwrm_func_host_pf_ids_query_output resp = {0}; struct hwrm_func_host_pf_ids_query_input req; struct bnxt_en_dev *en_dev = rdev->en_dev; struct bnxt_fw_msg fw_msg; int rc; memset(&fw_msg, 0, sizeof(fw_msg)); memset(&req, 0, sizeof(req)); bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_FUNC_HOST_PF_IDS_QUERY, -1, -1); /* To query the info from the host EPs */ switch (fn_info->host) { case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_SOC: case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0: case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_1: case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_2: case HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_3: req.host = fn_info->host; break; default: req.host = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_HOST_EP_0; break; } req.filter = fn_info->filter; if (req.filter > HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ROCE) req.filter = HWRM_FUNC_HOST_PF_IDS_QUERY_INPUT_FILTER_ALL; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); *first_pf = le16_to_cpu(resp.first_pf_id); *pf_mask = le16_to_cpu(resp.pf_ordinal_mask); return rc; } static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_ctx *hctx; struct bnxt_qplib_res *res; u16 tid = 0xffff; res = &rdev->qplib_res; hctx = res->hctx; if (test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags)) { bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id, tid); bnxt_qplib_free_stat_mem(res, &hctx->stats); } } static void bnxt_re_put_stats2_ctx(struct bnxt_re_dev *rdev) { test_and_clear_bit(BNXT_RE_FLAG_STATS_CTX2_ALLOC, &rdev->flags); } static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_ctx *hctx; struct bnxt_qplib_res *res; u16 tid = 0xffff; int rc; res = &rdev->qplib_res; hctx = res->hctx; rc = bnxt_qplib_alloc_stat_mem(res->pdev, rdev->chip_ctx, &hctx->stats); if (rc) return -ENOMEM; rc = bnxt_re_net_stats_ctx_alloc(rdev, tid); if (rc) goto free_stat_mem; set_bit(BNXT_RE_FLAG_STATS_CTX_ALLOC, &rdev->flags); return 0; free_stat_mem: bnxt_qplib_free_stat_mem(res, &hctx->stats); return rc; } static int bnxt_re_update_dev_attr(struct bnxt_re_dev *rdev) { int rc; rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) return rc; if (!bnxt_re_check_min_attr(rdev)) return -EINVAL; return 0; } static void bnxt_re_free_tbls(struct bnxt_re_dev *rdev) { bnxt_qplib_clear_tbls(&rdev->qplib_res); bnxt_qplib_free_tbls(&rdev->qplib_res); } static int bnxt_re_alloc_init_tbls(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; u8 pppp_factor = 0; int rc; /* * TODO: Need a better mechanism for spreading of the * 512 extended PPP pages. For now, spreading it * based on port_count */ if (_is_chip_p7(chip_ctx) && chip_ctx->modes.db_push) pppp_factor = rdev->en_dev->port_count; rc = bnxt_qplib_alloc_tbls(&rdev->qplib_res, pppp_factor); if (rc) return rc; bnxt_qplib_init_tbls(&rdev->qplib_res); set_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags); return 0; } static void bnxt_re_clean_nqs(struct bnxt_re_dev *rdev) { struct bnxt_qplib_nq *nq; int i; if (!rdev->nqr.max_init) return; for (i = (rdev->nqr.max_init - 1) ; i >= 0; i--) { nq = &rdev->nqr.nq[i]; bnxt_qplib_disable_nq(nq); bnxt_re_net_ring_free(rdev, nq->ring_id); bnxt_qplib_free_nq_mem(nq); } rdev->nqr.max_init = 0; } static int bnxt_re_setup_nqs(struct bnxt_re_dev *rdev) { struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_nq *nq; int rc, i; int depth; u32 offt; u16 vec; mutex_init(&rdev->nqr.load_lock); /* * TODO: Optimize the depth based on the * number of NQs. */ depth = BNXT_QPLIB_NQE_MAX_CNT; for (i = 0; i < rdev->nqr.num_msix - 1; i++) { nq = &rdev->nqr.nq[i]; vec = rdev->nqr.msix_entries[i + 1].vector; offt = rdev->nqr.msix_entries[i + 1].db_offset; nq->hwq.max_elements = depth; rc = bnxt_qplib_alloc_nq_mem(&rdev->qplib_res, nq); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to get mem for NQ %d, rc = 0x%x", i, rc); goto fail_mem; } rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; rattr.pages = nq->hwq.pbl[rdev->nqr.nq[i].hwq.level].pg_count; rattr.type = bnxt_re_get_rtype(rdev); rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX; rattr.depth = nq->hwq.max_elements - 1; rattr.lrid = rdev->nqr.msix_entries[i + 1].ring_idx; /* Set DBR pacing capability on the first NQ ring only */ if (!i && bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) rattr.flags = HWRM_RING_ALLOC_INPUT_FLAGS_NQ_DBR_PACING; else rattr.flags = 0; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); if (rc) { nq->ring_id = 0xffff; /* Invalid ring-id */ dev_err(rdev_to_dev(rdev), "Failed to get fw id for NQ %d, rc = 0x%x", i, rc); goto fail_ring; } rc = bnxt_qplib_enable_nq(nq, i, vec, offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to enable NQ %d, rc = 0x%x", i, rc); goto fail_en; } } rdev->nqr.max_init = i; return 0; fail_en: /* *nq was i'th nq */ bnxt_re_net_ring_free(rdev, nq->ring_id); fail_ring: bnxt_qplib_free_nq_mem(nq); fail_mem: rdev->nqr.max_init = i; return rc; } static void bnxt_re_sysfs_destroy_file(struct bnxt_re_dev *rdev) { int i; for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) device_remove_file(&rdev->ibdev.dev, bnxt_re_attributes[i]); } static int bnxt_re_sysfs_create_file(struct bnxt_re_dev *rdev) { int i, j, rc = 0; for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) { rc = device_create_file(&rdev->ibdev.dev, bnxt_re_attributes[i]); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to create IB sysfs with rc = 0x%x", rc); /* Must clean up all created device files */ for (j = 0; j < i; j++) device_remove_file(&rdev->ibdev.dev, bnxt_re_attributes[j]); clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); ib_unregister_device(&rdev->ibdev); return 1; } } return 0; } /* worker thread for polling periodic events. Now used for QoS programming*/ static void bnxt_re_worker(struct work_struct *work) { struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, worker.work); int rc; /* QoS is in 30s cadence for PFs*/ if (!rdev->is_virtfn && !rdev->worker_30s--) rdev->worker_30s = 30; /* Use trylock for bnxt_re_dev_lock as this can be * held for long time by debugfs show path while issuing * HWRMS. If the debugfs name update is not done in this * iteration, the driver will check for the same in the * next schedule of the worker i.e after 1 sec. */ if (mutex_trylock(&bnxt_re_dev_lock)) mutex_unlock(&bnxt_re_dev_lock); if (!rdev->stats.stats_query_sec) goto resched; if (test_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags) && (rdev->is_virtfn || !_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags))) { if (!(rdev->stats.stats_query_counter++ % rdev->stats.stats_query_sec)) { rc = bnxt_re_get_qos_stats(rdev); if (rc && rc != -ENOMEM) clear_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags); } } resched: schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000)); } static int bnxt_re_alloc_dbr_sw_stats_mem(struct bnxt_re_dev *rdev) { if (!(rdev->dbr_drop_recov || rdev->dbr_pacing)) return 0; rdev->dbr_sw_stats = kzalloc(sizeof(*rdev->dbr_sw_stats), GFP_KERNEL); if (!rdev->dbr_sw_stats) return -ENOMEM; return 0; } static void bnxt_re_free_dbr_sw_stats_mem(struct bnxt_re_dev *rdev) { kfree(rdev->dbr_sw_stats); rdev->dbr_sw_stats = NULL; } static int bnxt_re_initialize_dbr_drop_recov(struct bnxt_re_dev *rdev) { rdev->dbr_drop_recov_wq = create_singlethread_workqueue("bnxt_re_dbr_drop_recov"); if (!rdev->dbr_drop_recov_wq) { dev_err(rdev_to_dev(rdev), "DBR Drop Revov wq alloc failed!"); return -EINVAL; } rdev->dbr_drop_recov = true; /* Enable configfs setting dbr_drop_recov by default*/ rdev->user_dbr_drop_recov = true; rdev->user_dbr_drop_recov_timeout = BNXT_RE_DBR_RECOV_USERLAND_TIMEOUT; return 0; } static void bnxt_re_deinitialize_dbr_drop_recov(struct bnxt_re_dev *rdev) { if (rdev->dbr_drop_recov_wq) { flush_workqueue(rdev->dbr_drop_recov_wq); destroy_workqueue(rdev->dbr_drop_recov_wq); rdev->dbr_drop_recov_wq = NULL; } rdev->dbr_drop_recov = false; } static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) { int rc; /* Allocate a page for app use */ rdev->dbr_page = (void *)__get_free_page(GFP_KERNEL); if (!rdev->dbr_page) { dev_err(rdev_to_dev(rdev), "DBR page allocation failed!"); return -ENOMEM; } memset((u8 *)rdev->dbr_page, 0, PAGE_SIZE); rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->dbr_page; rc = bnxt_re_hwrm_dbr_pacing_qcfg(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query dbr pacing config %d\n", rc); goto fail; } /* Create a work queue for scheduling dbq event */ rdev->dbq_wq = create_singlethread_workqueue("bnxt_re_dbq"); if (!rdev->dbq_wq) { dev_err(rdev_to_dev(rdev), "DBQ wq alloc failed!"); rc = -ENOMEM; goto fail; } /* MAP grc window 2 for reading db fifo depth */ writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4, 0, rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK); rdev->dbr_db_fifo_reg_off = (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) + 0x2000; rdev->qplib_res.pacing_data->grc_reg_offset = rdev->dbr_db_fifo_reg_off; rdev->dbr_bar_addr = pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->dbr_db_fifo_reg_off; /* Percentage of DB FIFO */ rdev->dbq_watermark = BNXT_RE_PACING_DBQ_THRESHOLD; rdev->pacing_en_int_th = BNXT_RE_PACING_EN_INT_THRESHOLD; rdev->pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; rdev->dbq_pacing_time = BNXT_RE_DBR_INT_TIME; rdev->dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; rdev->do_pacing_save = rdev->dbr_def_do_pacing; bnxt_re_set_default_pacing_data(rdev); dev_dbg(rdev_to_dev(rdev), "Initialized db pacing\n"); return 0; fail: free_page((u64)rdev->dbr_page); rdev->dbr_page = NULL; return rc; } static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) { if (rdev->dbq_wq) flush_workqueue(rdev->dbq_wq); cancel_work_sync(&rdev->dbq_fifo_check_work); cancel_delayed_work_sync(&rdev->dbq_pacing_work); if (rdev->dbq_wq) { destroy_workqueue(rdev->dbq_wq); rdev->dbq_wq = NULL; } if (rdev->dbr_page) free_page((u64)rdev->dbr_page); rdev->dbr_page = NULL; rdev->dbr_pacing = false; } /* enable_dbr_pacing needs to be done only for older FWs * where host selects primary function. ie. pacing_ext * flags is not set. */ int bnxt_re_enable_dbr_pacing(struct bnxt_re_dev *rdev) { struct bnxt_qplib_nq *nq; nq = &rdev->nqr.nq[0]; rdev->dbq_nq_id = nq->ring_id; if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) { if (bnxt_re_hwrm_dbr_pacing_cfg(rdev, true)) { dev_err(rdev_to_dev(rdev), "Failed to set dbr pacing config\n"); return -EIO; } /* MAP grc window 8 for ARMing the NQ DBQ */ writel_fbsd(rdev->en_dev->softc, BNXT_GRCPF_REG_WINDOW_BASE_OUT + 28 , 0, rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_BASE_MASK); rdev->dbr_aeq_arm_reg_off = (rdev->chip_ctx->dbr_aeq_arm_reg & BNXT_GRC_OFFSET_MASK) + 0x8000; writel_fbsd(rdev->en_dev->softc, rdev->dbr_aeq_arm_reg_off , 0, 1); } return 0; } /* disable_dbr_pacing needs to be done only for older FWs * where host selects primary function. ie. pacing_ext * flags is not set. */ int bnxt_re_disable_dbr_pacing(struct bnxt_re_dev *rdev) { int rc = 0; if (!bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx) && bnxt_qplib_dbr_pacing_is_primary_pf(rdev->chip_ctx)) rc = bnxt_re_hwrm_dbr_pacing_cfg(rdev, false); return rc; } static void bnxt_re_ib_uninit(struct bnxt_re_dev *rdev) { if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { bnxt_re_sysfs_destroy_file(rdev); /* Cleanup ib dev */ ib_unregister_device(&rdev->ibdev); clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); return; } } static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { struct bnxt_qplib_dpi *kdpi; int rc, wait_count = BNXT_RE_RES_FREE_WAIT_COUNT; bnxt_re_net_unregister_async_event(rdev); bnxt_re_put_stats2_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, &rdev->flags)) { /* did the caller hold the lock? */ mutex_lock(&bnxt_re_dev_lock); list_del_rcu(&rdev->list); mutex_unlock(&bnxt_re_dev_lock); } bnxt_re_uninit_resolve_wq(rdev); bnxt_re_uninit_dcb_wq(rdev); bnxt_re_uninit_aer_wq(rdev); bnxt_re_deinitialize_dbr_drop_recov(rdev); if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) (void)bnxt_re_disable_dbr_pacing(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags)) { cancel_delayed_work_sync(&rdev->worker); } /* Wait for ULPs to release references */ while (atomic_read(&rdev->stats.rsors.cq_count) && --wait_count) usleep_range(500, 1000); if (!wait_count) dev_err(rdev_to_dev(rdev), "CQ resources not freed by stack, count = 0x%x", atomic_read(&rdev->stats.rsors.cq_count)); kdpi = &rdev->dpi_privileged; if (kdpi->umdbr) { /* kernel DPI was allocated with success */ (void)bnxt_qplib_dealloc_dpi(&rdev->qplib_res, kdpi); /* * Driver just need to know no command had failed * during driver load sequence and below command is * required indeed. Piggybacking dpi allocation status. */ } /* Protect the device uninitialization and start_irq/stop_irq L2 * callbacks with rtnl lock to avoid race condition between these calls */ rtnl_lock(); if (test_and_clear_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags)) bnxt_re_clean_nqs(rdev); rtnl_unlock(); if (test_and_clear_bit(BNXT_RE_FLAG_TBLS_ALLOCINIT, &rdev->flags)) bnxt_re_free_tbls(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags)) { rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); if (rc) dev_warn(rdev_to_dev(rdev), "Failed to deinitialize fw, rc = 0x%x", rc); } bnxt_re_put_stats_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags)) bnxt_qplib_free_hwctx(&rdev->qplib_res); rtnl_lock(); if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) bnxt_qplib_disable_rcfw_channel(&rdev->rcfw); if (rdev->dbr_pacing) bnxt_re_deinitialize_dbr_pacing(rdev); bnxt_re_free_dbr_sw_stats_mem(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_NET_RING_ALLOC, &rdev->flags)) bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id); if (test_and_clear_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags)) bnxt_qplib_free_rcfw_channel(&rdev->qplib_res); if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) bnxt_re_free_msix(rdev); rtnl_unlock(); bnxt_re_destroy_chip_ctx(rdev); if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) { if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) bnxt_re_unregister_netdev(rdev); } } static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type, u8 wqe_mode) { struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; int vec, offset; int rc = 0; if (op_type != BNXT_RE_POST_RECOVERY_INIT) { /* Registered a new RoCE device instance to netdev */ rc = bnxt_re_register_netdev(rdev); if (rc) return -EINVAL; } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to get chip context rc 0x%x", rc); bnxt_re_unregister_netdev(rdev); clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); rc = -EINVAL; return rc; } /* Protect the device initialization and start_irq/stop_irq L2 callbacks * with rtnl lock to avoid race condition between these calls */ rtnl_lock(); rc = bnxt_re_request_msix(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "Requesting MSI-X vectors failed with rc = 0x%x", rc); rc = -EINVAL; goto release_rtnl; } set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); /* Establish RCFW Communication Channel to initialize the context memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to alloc mem for rcfw, rc = %#x\n", rc); goto release_rtnl; } set_bit(BNXT_RE_FLAG_ALLOC_RCFW, &rdev->flags); creq = &rdev->rcfw.creq; rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr; rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count; rattr.type = bnxt_re_get_rtype(rdev); rattr.mode = HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX; rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; rattr.lrid = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].ring_idx; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); if (rc) { creq->ring_id = 0xffff; dev_err(rdev_to_dev(rdev), "Failed to allocate CREQ fw id with rc = 0x%x", rc); goto release_rtnl; } if (!rdev->chip_ctx) goto release_rtnl; /* Program the NQ ID for DBQ notification */ if (rdev->chip_ctx->modes.dbr_pacing_v0 || bnxt_qplib_dbr_pacing_en(rdev->chip_ctx) || bnxt_qplib_dbr_pacing_ext_en(rdev->chip_ctx)) { rc = bnxt_re_initialize_dbr_pacing(rdev); if (!rc) rdev->dbr_pacing = true; else rdev->dbr_pacing = false; dev_dbg(rdev_to_dev(rdev), "%s: initialize db pacing ret %d\n", __func__, rc); } vec = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].vector; offset = rdev->nqr.msix_entries[BNXT_RE_AEQ_IDX].db_offset; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vec, offset, &bnxt_re_aeq_handler); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to enable RCFW channel with rc = 0x%x", rc); goto release_rtnl; } set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags); rc = bnxt_re_update_dev_attr(rdev); if (rc) goto release_rtnl; bnxt_re_set_resource_limits(rdev); if (!rdev->is_virtfn && !_is_chip_gen_p5_p7(rdev->chip_ctx)) { rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to alloc hw contexts, rc = 0x%x", rc); goto release_rtnl; } set_bit(BNXT_RE_FLAG_ALLOC_CTX, &rdev->flags); } rc = bnxt_re_get_stats_ctx(rdev); if (rc) goto release_rtnl; rc = bnxt_qplib_init_rcfw(&rdev->rcfw, rdev->is_virtfn); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to initialize fw with rc = 0x%x", rc); goto release_rtnl; } set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_INIT, &rdev->flags); /* Based resource count on the 'new' device caps */ rc = bnxt_re_update_dev_attr(rdev); if (rc) goto release_rtnl; rc = bnxt_re_alloc_init_tbls(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "tbls alloc-init failed rc = %#x", rc); goto release_rtnl; } rc = bnxt_re_setup_nqs(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "NQs alloc-init failed rc = %#x\n", rc); if (rdev->nqr.max_init == 0) goto release_rtnl; dev_warn(rdev_to_dev(rdev), "expected nqs %d available nqs %d\n", rdev->nqr.num_msix, rdev->nqr.max_init); } set_bit(BNXT_RE_FLAG_SETUP_NQ, &rdev->flags); rtnl_unlock(); rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged, rdev, BNXT_QPLIB_DPI_TYPE_KERNEL); if (rc) goto fail; if (rdev->dbr_pacing) bnxt_re_enable_dbr_pacing(rdev); if (rdev->chip_ctx->modes.dbr_drop_recov) bnxt_re_initialize_dbr_drop_recov(rdev); rc = bnxt_re_alloc_dbr_sw_stats_mem(rdev); if (rc) goto fail; /* This block of code is needed for error recovery support */ if (!rdev->is_virtfn) { struct bnxt_re_tc_rec *tc_rec; tc_rec = &rdev->tc_rec[0]; rc = bnxt_re_query_hwrm_qportcfg(rdev, tc_rec, 0xFFFF); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query port config rc:%d", rc); return rc; } /* Query f/w defaults of CC params */ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param); if (rc) dev_warn(rdev_to_dev(rdev), "Failed to query CC defaults\n"); if (1) { rdev->num_vfs = pci_num_vf(rdev->en_dev->pdev); if (rdev->num_vfs) { bnxt_re_set_resource_limits(rdev); bnxt_qplib_set_func_resources(&rdev->qplib_res); } } } INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker); set_bit(BNXT_RE_FLAG_WORKER_REG, &rdev->flags); schedule_delayed_work(&rdev->worker, msecs_to_jiffies(1000)); bnxt_re_init_dcb_wq(rdev); bnxt_re_init_aer_wq(rdev); bnxt_re_init_resolve_wq(rdev); mutex_lock(&bnxt_re_dev_lock); list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list); /* Added to the list, not in progress anymore */ gadd_dev_inprogress--; set_bit(BNXT_RE_FLAG_DEV_LIST_INITIALIZED, &rdev->flags); mutex_unlock(&bnxt_re_dev_lock); return rc; release_rtnl: rtnl_unlock(); fail: bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); return rc; } static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) { int rc = 0; rc = bnxt_re_register_ib(rdev); if (rc) { dev_err(rdev_to_dev(rdev), "Register IB failed with rc = 0x%x", rc); goto fail; } if (bnxt_re_sysfs_create_file(rdev)) { bnxt_re_stopqps_and_ib_uninit(rdev); goto fail; } set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); set_bit(BNXT_RE_FLAG_ISSUE_CFA_FLOW_STATS, &rdev->flags); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); return rc; fail: bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); return rc; } /* wrapper for ib_init funcs */ int _bnxt_re_ib_init(struct bnxt_re_dev *rdev) { return bnxt_re_ib_init(rdev); } /* wrapper for aux init funcs */ int _bnxt_re_ib_init2(struct bnxt_re_dev *rdev) { bnxt_re_ib_init_2(rdev); return 0; /* add return for future proof */ } static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev) { bnxt_re_dev_dealloc(rdev); } static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct ifnet *netdev, struct bnxt_en_dev *en_dev) { struct ifnet *realdev = NULL; realdev = netdev; if (realdev) dev_dbg(NULL, "%s: realdev = %p netdev = %p\n", __func__, realdev, netdev); /* * Note: * The first argument to bnxt_re_dev_alloc() is 'netdev' and * not 'realdev', since in the case of bonding we want to * register the bonded virtual netdev (master) to the ib stack. * And 'en_dev' (for L2/PCI communication) is the first slave * device (PF0 on the card). * In the case of a regular netdev, both netdev and the en_dev * correspond to the same device. */ *rdev = bnxt_re_dev_alloc(netdev, en_dev); if (!*rdev) { pr_err("%s: netdev %p not handled", ROCE_DRV_MODULE_NAME, netdev); return -ENOMEM; } bnxt_re_hold(*rdev); return 0; } void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev) { rdev->espeed = rdev->en_dev->espeed; return; } void bnxt_re_stopqps_and_ib_uninit(struct bnxt_re_dev *rdev) { dev_dbg(rdev_to_dev(rdev), "%s: Stopping QPs, IB uninit on rdev: %p\n", __func__, rdev); bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); bnxt_re_ib_uninit(rdev); } void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { struct bnxt_re_en_dev_info *en_info; struct bnxt_qplib_cmdq_ctx *cmdq; struct bnxt_qplib_rcfw *rcfw; rcfw = &rdev->rcfw; cmdq = &rcfw->cmdq; if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags)) set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); dev_dbg(rdev_to_dev(rdev), "%s: Removing rdev: %p\n", __func__, rdev); bnxt_re_dev_uninit(rdev, op_type); en_info = auxiliary_get_drvdata(aux_dev); if (en_info) { rtnl_lock(); en_info->rdev = NULL; rtnl_unlock(); if (op_type != BNXT_RE_PRE_RECOVERY_REMOVE) { clear_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags); clear_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags); clear_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags); } } bnxt_re_dev_unreg(rdev); } int bnxt_re_add_device(struct bnxt_re_dev **rdev, struct ifnet *netdev, u8 qp_mode, u8 op_type, u8 wqe_mode, u32 num_msix_requested, struct auxiliary_device *aux_dev) { struct bnxt_re_en_dev_info *en_info; struct bnxt_en_dev *en_dev; int rc = 0; en_info = auxiliary_get_drvdata(aux_dev); en_dev = en_info->en_dev; mutex_lock(&bnxt_re_dev_lock); /* Check if driver already in mod exit and aux_dev is valid */ if (gmod_exit || !aux_dev) { mutex_unlock(&bnxt_re_dev_lock); return -ENODEV; } /* Add device in progress */ gadd_dev_inprogress++; mutex_unlock(&bnxt_re_dev_lock); rc = bnxt_re_dev_reg(rdev, netdev, en_dev); if (rc) { dev_dbg(NULL, "Failed to create add device for netdev %p\n", netdev); /* * For BNXT_RE_POST_RECOVERY_INIT special case * called from bnxt_re_start, the work is * complete only after, bnxt_re_start completes * bnxt_unregister_device in case of failure. * So bnxt_re_start will decrement gadd_dev_inprogress * in case of failure. */ if (op_type != BNXT_RE_POST_RECOVERY_INIT) { mutex_lock(&bnxt_re_dev_lock); gadd_dev_inprogress--; mutex_unlock(&bnxt_re_dev_lock); } return rc; } if (rc != 0) goto ref_error; /* * num_msix_requested = BNXT_RE_MSIX_FROM_MOD_PARAM indicates fresh driver load. * Otherwaise, this invocation can be the result of lag create / destroy, * err revovery, hot fw upgrade, etc.. */ if (num_msix_requested == BNXT_RE_MSIX_FROM_MOD_PARAM) { if (bnxt_re_probe_count < BNXT_RE_MAX_DEVICES) num_msix_requested = max_msix_vec[bnxt_re_probe_count++]; else /* Consider as default when probe_count exceeds its limit */ num_msix_requested = 0; /* if user specifies only one value, use the same for all PFs */ if (max_msix_vec_argc == 1) num_msix_requested = max_msix_vec[0]; } (*rdev)->num_msix_requested = num_msix_requested; (*rdev)->gsi_ctx.gsi_qp_mode = qp_mode; (*rdev)->adev = aux_dev; (*rdev)->dev_addr = en_dev->softc->func.mac_addr; /* Before updating the rdev pointer in bnxt_re_en_dev_info structure, * take the rtnl lock to avoid accessing invalid rdev pointer from * L2 ULP callbacks. This is applicable in all the places where rdev * pointer is updated in bnxt_re_en_dev_info. */ rtnl_lock(); en_info->rdev = *rdev; rtnl_unlock(); rc = bnxt_re_dev_init(*rdev, op_type, wqe_mode); if (rc) { ref_error: bnxt_re_dev_unreg(*rdev); *rdev = NULL; /* * For BNXT_RE_POST_RECOVERY_INIT special case * called from bnxt_re_start, the work is * complete only after, bnxt_re_start completes * bnxt_unregister_device in case of failure. * So bnxt_re_start will decrement gadd_dev_inprogress * in case of failure. */ if (op_type != BNXT_RE_POST_RECOVERY_INIT) { mutex_lock(&bnxt_re_dev_lock); gadd_dev_inprogress--; mutex_unlock(&bnxt_re_dev_lock); } } dev_dbg(rdev_to_dev(*rdev), "%s: Adding rdev: %p\n", __func__, *rdev); if (!rc) { set_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags); } return rc; } struct bnxt_re_dev *bnxt_re_get_peer_pf(struct bnxt_re_dev *rdev) { struct pci_dev *pdev_in = rdev->en_dev->pdev; int tmp_bus_num, bus_num = pdev_in->bus->number; int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn); int tmp_func_num, func_num = PCI_FUNC(pdev_in->devfn); struct bnxt_re_dev *tmp_rdev; rcu_read_lock(); list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) { tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number; tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn); tmp_func_num = PCI_FUNC(tmp_rdev->en_dev->pdev->devfn); if (bus_num == tmp_bus_num && dev_num == tmp_dev_num && func_num != tmp_func_num) { rcu_read_unlock(); return tmp_rdev; } } rcu_read_unlock(); return NULL; } int bnxt_re_schedule_work(struct bnxt_re_dev *rdev, unsigned long event, struct ifnet *vlan_dev, struct ifnet *netdev, struct auxiliary_device *adev) { struct bnxt_re_work *re_work; /* Allocate for the deferred task */ re_work = kzalloc(sizeof(*re_work), GFP_KERNEL); if (!re_work) return -ENOMEM; re_work->rdev = rdev; re_work->event = event; re_work->vlan_dev = vlan_dev; re_work->adev = adev; INIT_WORK(&re_work->work, bnxt_re_task); if (rdev) atomic_inc(&rdev->sched_count); re_work->netdev = netdev; queue_work(bnxt_re_wq, &re_work->work); return 0; } int bnxt_re_get_slot_pf_count(struct bnxt_re_dev *rdev) { struct pci_dev *pdev_in = rdev->en_dev->pdev; int tmp_bus_num, bus_num = pdev_in->bus->number; int tmp_dev_num, dev_num = PCI_SLOT(pdev_in->devfn); struct bnxt_re_dev *tmp_rdev; int pf_cnt = 0; rcu_read_lock(); list_for_each_entry_rcu(tmp_rdev, &bnxt_re_dev_list, list) { tmp_bus_num = tmp_rdev->en_dev->pdev->bus->number; tmp_dev_num = PCI_SLOT(tmp_rdev->en_dev->pdev->devfn); if (bus_num == tmp_bus_num && dev_num == tmp_dev_num) pf_cnt++; } rcu_read_unlock(); return pf_cnt; } /* Handle all deferred netevents tasks */ static void bnxt_re_task(struct work_struct *work) { struct bnxt_re_en_dev_info *en_info; struct auxiliary_device *aux_dev; struct bnxt_re_work *re_work; struct bnxt_re_dev *rdev; re_work = container_of(work, struct bnxt_re_work, work); mutex_lock(&bnxt_re_mutex); rdev = re_work->rdev; /* * If the previous rdev is deleted due to bond creation * do not handle the event */ if (!bnxt_re_is_rdev_valid(rdev)) goto exit; /* Ignore the event, if the device is not registred with IB stack. This * is to avoid handling any event while the device is added/removed. */ if (rdev && !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { dev_dbg(rdev_to_dev(rdev), "%s: Ignoring netdev event 0x%lx", __func__, re_work->event); goto done; } /* Extra check to silence coverity. We shouldn't handle any event * when rdev is NULL. */ if (!rdev) goto exit; dev_dbg(rdev_to_dev(rdev), "Scheduled work for event 0x%lx", re_work->event); switch (re_work->event) { case NETDEV_UP: bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); bnxt_re_net_register_async_event(rdev); break; case NETDEV_DOWN: bnxt_qplib_dbr_pacing_set_primary_pf(rdev->chip_ctx, 0); bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ERR); break; case NETDEV_CHANGE: if (bnxt_re_get_link_state(rdev) == IB_PORT_DOWN) { bnxt_re_stop_all_nonqp1_nonshadow_qps(rdev); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ERR); break; } else if (bnxt_re_get_link_state(rdev) == IB_PORT_ACTIVE) { bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); } /* temporarily disable the check for SR2 */ if (!bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param) && !_is_chip_p7(rdev->chip_ctx)) { /* * Disable CC for 10G speed * for non p5 devices */ if (rdev->sl_espeed == SPEED_10000 && !_is_chip_gen_p5_p7(rdev->chip_ctx)) { if (rdev->cc_param.enable) bnxt_re_clear_cc(rdev); } else { if (!rdev->cc_param.enable && rdev->cc_param.admin_enable) bnxt_re_setup_cc(rdev); } } break; case NETDEV_UNREGISTER: bnxt_re_stopqps_and_ib_uninit(rdev); aux_dev = rdev->adev; if (re_work->adev) goto done; bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, aux_dev); break; default: break; } done: if (rdev) { /* memory barrier to guarantee task completion * before decrementing sched count */ mmiowb(); atomic_dec(&rdev->sched_count); } exit: if (re_work->adev && re_work->event == NETDEV_UNREGISTER) { en_info = auxiliary_get_drvdata(re_work->adev); en_info->ib_uninit_done = true; wake_up(&en_info->waitq); } kfree(re_work); mutex_unlock(&bnxt_re_mutex); } /* "Notifier chain callback can be invoked for the same chain from different CPUs at the same time". For cases when the netdev is already present, our call to the register_netdevice_notifier() will actually get the rtnl_lock() before sending NETDEV_REGISTER and (if up) NETDEV_UP events. But for cases when the netdev is not already present, the notifier chain is subjected to be invoked from different CPUs simultaneously. This is protected by the netdev_mutex. */ static int bnxt_re_netdev_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct ifnet *real_dev, *netdev; struct bnxt_re_dev *rdev = NULL; netdev = netdev_notifier_info_to_ifp(ptr); real_dev = rdma_vlan_dev_real_dev(netdev); if (!real_dev) real_dev = netdev; /* In case of bonding,this will be bond's rdev */ rdev = bnxt_re_from_netdev(real_dev); if (!rdev) goto exit; dev_info(rdev_to_dev(rdev), "%s: Event = %s (0x%lx), rdev %s (real_dev %s)\n", __func__, bnxt_re_netevent(event), event, - rdev ? rdev->netdev ? rdev->netdev->if_dname : "->netdev = NULL" : "= NULL", - (real_dev == netdev) ? "= netdev" : real_dev->if_dname); + rdev ? rdev->netdev ? if_getdname(rdev->netdev) : "->netdev = NULL" : "= NULL", + (real_dev == netdev) ? "= netdev" : if_getdname(real_dev)); if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) goto exit; bnxt_re_hold(rdev); if (real_dev != netdev) { switch (event) { case NETDEV_UP: bnxt_re_schedule_work(rdev, event, netdev, NULL, NULL); break; case NETDEV_DOWN: break; default: break; } goto done; } switch (event) { case NETDEV_CHANGEADDR: if (!_is_chip_gen_p5_p7(rdev->chip_ctx)) bnxt_re_update_shadow_ah(rdev); bnxt_qplib_get_guid(rdev->dev_addr, (u8 *)&rdev->ibdev.node_guid); break; case NETDEV_CHANGE: bnxt_re_get_link_speed(rdev); bnxt_re_schedule_work(rdev, event, NULL, NULL, NULL); break; case NETDEV_UNREGISTER: /* netdev notifier will call NETDEV_UNREGISTER again later since * we are still holding the reference to the netdev */ /* * Workaround to avoid ib_unregister hang. Check for module * reference and dont free up the device if the reference * is non zero. Checking only for PF functions. */ if (rdev) { dev_info(rdev_to_dev(rdev), "bnxt_re:Unreg recvd when module refcnt > 0"); dev_info(rdev_to_dev(rdev), "bnxt_re:Close all apps using bnxt_re devs"); dev_info(rdev_to_dev(rdev), "bnxt_re:Remove the configfs entry created for the device"); dev_info(rdev_to_dev(rdev), "bnxt_re:Refer documentation for details"); goto done; } if (atomic_read(&rdev->sched_count) > 0) goto done; if (!rdev->unreg_sched) { bnxt_re_schedule_work(rdev, NETDEV_UNREGISTER, NULL, NULL, NULL); rdev->unreg_sched = true; goto done; } break; default: break; } done: if (rdev) bnxt_re_put(rdev); exit: return NOTIFY_DONE; } static struct notifier_block bnxt_re_netdev_notifier = { .notifier_call = bnxt_re_netdev_event }; static void bnxt_re_remove_base_interface(struct bnxt_re_dev *rdev, struct auxiliary_device *adev) { bnxt_re_stopqps_and_ib_uninit(rdev); bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev); auxiliary_set_drvdata(adev, NULL); } /* * bnxt_re_remove - Removes the roce aux device * @adev - aux device pointer * * This function removes the roce device. This gets * called in the mod exit path and pci unbind path. * If the rdev is bond interace, destroys the lag * in module exit path, and in pci unbind case * destroys the lag and recreates other base interface. * If the device is already removed in error recovery * path, it just unregister with the L2. */ static void bnxt_re_remove(struct auxiliary_device *adev) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; bool primary_dev = false; bool secondary_dev = false; if (!en_info) return; mutex_lock(&bnxt_re_mutex); en_dev = en_info->en_dev; rdev = en_info->rdev; if (rdev && bnxt_re_is_rdev_valid(rdev)) { if (pci_channel_offline(rdev->rcfw.pdev)) set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); if (test_bit(BNXT_RE_FLAG_EN_DEV_PRIMARY_DEV, &en_info->flags)) primary_dev = true; if (test_bit(BNXT_RE_FLAG_EN_DEV_SECONDARY_DEV, &en_info->flags)) secondary_dev = true; /* * en_dev_info of primary device and secondary device have the * same rdev pointer when LAG is configured. This rdev pointer * is rdev of bond interface. */ if (!primary_dev && !secondary_dev) { /* removal of non bond interface */ bnxt_re_remove_base_interface(rdev, adev); } else { /* * removal of bond primary/secondary interface. In this * case bond device is already removed, so rdev->binfo * is NULL. */ auxiliary_set_drvdata(adev, NULL); } } else { /* device is removed from ulp stop, unregister the net dev */ if (test_bit(BNXT_RE_FLAG_EN_DEV_NETDEV_REG, &en_info->flags)) { rtnl_lock(); en_dev->en_ops->bnxt_unregister_device(en_dev, BNXT_ROCE_ULP); rtnl_unlock(); } } mutex_unlock(&bnxt_re_mutex); return; } /* wrapper for all external user context callers */ void _bnxt_re_remove(struct auxiliary_device *adev) { bnxt_re_remove(adev); } static void bnxt_re_ib_init_2(struct bnxt_re_dev *rdev) { int rc; rc = bnxt_re_get_device_stats(rdev); if (rc) dev_err(rdev_to_dev(rdev), "Failed initial device stat query"); bnxt_re_net_register_async_event(rdev); } static int bnxt_re_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { struct bnxt_aux_dev *aux_dev = container_of(adev, struct bnxt_aux_dev, aux_dev); struct bnxt_re_en_dev_info *en_info; struct bnxt_en_dev *en_dev = NULL; struct bnxt_re_dev *rdev; int rc = -ENODEV; if (aux_dev) en_dev = aux_dev->edev; if (!en_dev) return rc; if (en_dev->ulp_version != BNXT_ULP_VERSION) { pr_err("%s: probe error: bnxt_en ulp version magic %x is not compatible!\n", ROCE_DRV_MODULE_NAME, en_dev->ulp_version); return -EINVAL; } en_info = kzalloc(sizeof(*en_info), GFP_KERNEL); if (!en_info) return -ENOMEM; memset(en_info, 0, sizeof(struct bnxt_re_en_dev_info)); en_info->en_dev = en_dev; auxiliary_set_drvdata(adev, en_info); mutex_lock(&bnxt_re_mutex); rc = bnxt_re_add_device(&rdev, en_dev->net, BNXT_RE_GSI_MODE_ALL, BNXT_RE_COMPLETE_INIT, BNXT_QPLIB_WQE_MODE_STATIC, BNXT_RE_MSIX_FROM_MOD_PARAM, adev); if (rc) { mutex_unlock(&bnxt_re_mutex); return rc; } rc = bnxt_re_ib_init(rdev); if (rc) goto err; bnxt_re_ib_init_2(rdev); dev_dbg(rdev_to_dev(rdev), "%s: adev: %p\n", __func__, adev); rdev->adev = adev; mutex_unlock(&bnxt_re_mutex); return 0; err: mutex_unlock(&bnxt_re_mutex); bnxt_re_remove(adev); return rc; } static const struct auxiliary_device_id bnxt_re_id_table[] = { { .name = BNXT_ADEV_NAME ".rdma", }, {}, }; MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table); static struct auxiliary_driver bnxt_re_driver = { .name = "rdma", .probe = bnxt_re_probe, .remove = bnxt_re_remove, .id_table = bnxt_re_id_table, }; static int __init bnxt_re_mod_init(void) { int rc = 0; pr_info("%s: %s", ROCE_DRV_MODULE_NAME, drv_version); bnxt_re_wq = create_singlethread_workqueue("bnxt_re"); if (!bnxt_re_wq) return -ENOMEM; rc = bnxt_re_register_netdevice_notifier(&bnxt_re_netdev_notifier); if (rc) { pr_err("%s: Cannot register to netdevice_notifier", ROCE_DRV_MODULE_NAME); goto err_netdev; } INIT_LIST_HEAD(&bnxt_re_dev_list); rc = auxiliary_driver_register(&bnxt_re_driver); if (rc) { pr_err("%s: Failed to register auxiliary driver\n", ROCE_DRV_MODULE_NAME); goto err_auxdrv; } return 0; err_auxdrv: bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier); err_netdev: destroy_workqueue(bnxt_re_wq); return rc; } static void __exit bnxt_re_mod_exit(void) { gmod_exit = 1; auxiliary_driver_unregister(&bnxt_re_driver); bnxt_re_unregister_netdevice_notifier(&bnxt_re_netdev_notifier); if (bnxt_re_wq) destroy_workqueue(bnxt_re_wq); } module_init(bnxt_re_mod_init); module_exit(bnxt_re_mod_exit);