Index: stable/11/sys/dev/mlx5/driver.h =================================================================== --- stable/11/sys/dev/mlx5/driver.h (revision 368225) +++ stable/11/sys/dev/mlx5/driver.h (revision 368226) @@ -1,1155 +1,1176 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_DRIVER_H #define MLX5_DRIVER_H #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #define MLX5_QCOUNTER_SETS_NETDEV 64 #define MLX5_MAX_NUMBER_OF_VFS 128 enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, }; enum { MLX5_CMD_TIMEOUT_MSEC = 60 * 1000, }; enum { CMD_OWNER_SW = 0x0, CMD_OWNER_HW = 0x1, CMD_STATUS_SUCCESS = 0, }; enum mlx5_sqp_t { MLX5_SQP_SMI = 0, MLX5_SQP_GSI = 1, MLX5_SQP_IEEE_1588 = 2, MLX5_SQP_SNIFFER = 3, MLX5_SQP_SYNC_UMR = 4, }; enum { MLX5_MAX_PORTS = 2, }; enum { MLX5_EQ_VEC_PAGES = 0, MLX5_EQ_VEC_CMD = 1, MLX5_EQ_VEC_ASYNC = 2, MLX5_EQ_VEC_COMP_BASE, }; enum { MLX5_ATOMIC_MODE_OFF = 16, MLX5_ATOMIC_MODE_NONE = 0 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_IB_COMP = 1 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_CX = 2 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_8B = 3 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_16B = 4 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_32B = 5 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_64B = 6 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_128B = 7 << MLX5_ATOMIC_MODE_OFF, MLX5_ATOMIC_MODE_256B = 8 << MLX5_ATOMIC_MODE_OFF, }; enum { MLX5_ATOMIC_MODE_DCT_OFF = 20, MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_8B = 3 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_16B = 4 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_32B = 5 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_64B = 6 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_128B = 7 << MLX5_ATOMIC_MODE_DCT_OFF, MLX5_ATOMIC_MODE_DCT_256B = 8 << MLX5_ATOMIC_MODE_DCT_OFF, }; enum { MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, MLX5_ATOMIC_OPS_MASKED_CMP_SWAP = 1 << 2, MLX5_ATOMIC_OPS_MASKED_FETCH_ADD = 1 << 3, }; enum { MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QPDP = 0x4007, MLX5_REG_QTCT = 0x400A, MLX5_REG_QPDPM = 0x4013, MLX5_REG_QHLL = 0x4016, MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, MLX5_REG_FPGA_CTRL = 0x4023, MLX5_REG_FPGA_ACCESS_REG = 0x4024, MLX5_REG_FPGA_SHELL_CNTR = 0x4025, MLX5_REG_PCAP = 0x5001, MLX5_REG_PMLP = 0x5002, MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, MLX5_REG_PAOS = 0x5006, MLX5_REG_PFCC = 0x5007, MLX5_REG_PPCNT = 0x5008, MLX5_REG_PUDE = 0x5009, MLX5_REG_PPTB = 0x500B, MLX5_REG_PBMC = 0x500C, MLX5_REG_PELC = 0x500E, MLX5_REG_PVLC = 0x500F, MLX5_REG_PMPE = 0x5010, MLX5_REG_PMAOS = 0x5012, MLX5_REG_PPLM = 0x5023, MLX5_REG_PDDR = 0x5031, MLX5_REG_PBSR = 0x5038, MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, MLX5_REG_MTMP = 0x900a, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MPCNT = 0x9051, MLX5_REG_MCQI = 0x9061, MLX5_REG_MCC = 0x9062, MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, }; enum dbg_rsc_type { MLX5_DBG_RSC_QP, MLX5_DBG_RSC_EQ, MLX5_DBG_RSC_CQ, }; enum { MLX5_INTERFACE_PROTOCOL_IB = 0, MLX5_INTERFACE_PROTOCOL_ETH = 1, MLX5_INTERFACE_NUMBER = 2, }; struct mlx5_field_desc { struct dentry *dent; int i; }; struct mlx5_rsc_debug { struct mlx5_core_dev *dev; void *object; enum dbg_rsc_type type; struct dentry *root; struct mlx5_field_desc fields[0]; }; enum mlx5_dev_event { MLX5_DEV_EVENT_SYS_ERROR, MLX5_DEV_EVENT_PORT_UP, MLX5_DEV_EVENT_PORT_DOWN, MLX5_DEV_EVENT_PORT_INITIALIZED, MLX5_DEV_EVENT_LID_CHANGE, MLX5_DEV_EVENT_PKEY_CHANGE, MLX5_DEV_EVENT_GUID_CHANGE, MLX5_DEV_EVENT_CLIENT_REREG, MLX5_DEV_EVENT_VPORT_CHANGE, MLX5_DEV_EVENT_ERROR_STATE_DCBX, MLX5_DEV_EVENT_REMOTE_CONFIG_CHANGE, MLX5_DEV_EVENT_LOCAL_OPER_CHANGE, MLX5_DEV_EVENT_REMOTE_CONFIG_APPLICATION_PRIORITY_CHANGE, }; enum mlx5_port_status { MLX5_PORT_UP = 1 << 0, MLX5_PORT_DOWN = 1 << 1, }; enum { MLX5_VSC_SPACE_SUPPORTED = 0x1, MLX5_VSC_SPACE_OFFSET = 0x4, MLX5_VSC_COUNTER_OFFSET = 0x8, MLX5_VSC_SEMA_OFFSET = 0xC, MLX5_VSC_ADDR_OFFSET = 0x10, MLX5_VSC_DATA_OFFSET = 0x14, MLX5_VSC_MAX_RETRIES = 0x1000, }; #define MLX5_PROT_MASK(link_mode) (1 << link_mode) struct mlx5_uuar_info { struct mlx5_uar *uars; int num_uars; int num_low_latency_uuars; unsigned long *bitmap; unsigned int *count; struct mlx5_bf *bfs; /* * protect uuar allocation data structs */ struct mutex lock; u32 ver; }; struct mlx5_bf { void __iomem *reg; void __iomem *regreg; int buf_size; struct mlx5_uar *uar; unsigned long offset; int need_lock; /* protect blue flame buffer selection when needed */ spinlock_t lock; /* serialize 64 bit writes when done as two 32 bit accesses */ spinlock_t lock32; int uuarn; }; struct mlx5_cmd_first { __be32 data[4]; }; struct cache_ent; struct mlx5_fw_page { union { struct rb_node rb_node; struct list_head list; }; struct mlx5_cmd_first first; struct mlx5_core_dev *dev; bus_dmamap_t dma_map; bus_addr_t dma_addr; void *virt_addr; struct cache_ent *cache; u32 numpages; u16 load_done; #define MLX5_LOAD_ST_NONE 0 #define MLX5_LOAD_ST_SUCCESS 1 #define MLX5_LOAD_ST_FAILURE 2 u16 func_id; }; #define mlx5_cmd_msg mlx5_fw_page struct mlx5_cmd_debug { struct dentry *dbg_root; struct dentry *dbg_in; struct dentry *dbg_out; struct dentry *dbg_outlen; struct dentry *dbg_status; struct dentry *dbg_run; void *in_msg; void *out_msg; u8 status; u16 inlen; u16 outlen; }; struct cache_ent { /* protect block chain allocations */ spinlock_t lock; struct list_head head; }; struct cmd_msg_cache { struct cache_ent large; struct cache_ent med; }; struct mlx5_traffic_counter { u64 packets; u64 octets; }; enum mlx5_cmd_mode { MLX5_CMD_MODE_POLLING, MLX5_CMD_MODE_EVENTS }; struct mlx5_cmd_stats { u64 sum; u64 n; struct dentry *root; struct dentry *avg; struct dentry *count; /* protect command average calculations */ spinlock_t lock; }; struct mlx5_cmd { struct mlx5_fw_page *cmd_page; bus_dma_tag_t dma_tag; struct sx dma_sx; struct mtx dma_mtx; #define MLX5_DMA_OWNED(dev) mtx_owned(&(dev)->cmd.dma_mtx) #define MLX5_DMA_LOCK(dev) mtx_lock(&(dev)->cmd.dma_mtx) #define MLX5_DMA_UNLOCK(dev) mtx_unlock(&(dev)->cmd.dma_mtx) struct cv dma_cv; #define MLX5_DMA_DONE(dev) cv_broadcast(&(dev)->cmd.dma_cv) #define MLX5_DMA_WAIT(dev) cv_wait(&(dev)->cmd.dma_cv, &(dev)->cmd.dma_mtx) void *cmd_buf; dma_addr_t dma; u16 cmdif_rev; u8 log_sz; u8 log_stride; int max_reg_cmds; int events; u32 __iomem *vector; /* protect command queue allocations */ spinlock_t alloc_lock; /* protect token allocations */ spinlock_t token_lock; u8 token; unsigned long bitmask; struct semaphore sem; struct semaphore pages_sem; enum mlx5_cmd_mode mode; struct mlx5_cmd_work_ent * volatile ent_arr[MLX5_MAX_COMMANDS]; volatile enum mlx5_cmd_mode ent_mode[MLX5_MAX_COMMANDS]; struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache; int checksum_disabled; struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_port_caps { int gid_table_len; int pkey_table_len; u8 ext_port_cap; }; struct mlx5_buf { bus_dma_tag_t dma_tag; bus_dmamap_t dma_map; struct mlx5_core_dev *dev; struct { void *buf; } direct; u64 *page_list; int npages; int size; u8 page_shift; u8 load_done; }; struct mlx5_frag_buf { struct mlx5_buf_list *frags; int npages; int size; u8 page_shift; }; struct mlx5_eq { struct mlx5_core_dev *dev; __be32 __iomem *doorbell; u32 cons_index; struct mlx5_buf buf; int size; u8 irqn; u8 eqn; int nent; u64 mask; struct list_head list; int index; struct mlx5_rsc_debug *dbg; }; struct mlx5_core_psv { u32 psv_idx; struct psv_layout { u32 pd; u16 syndrome; u16 reserved; u16 bg; u16 app_tag; u32 ref_tag; } psv; }; struct mlx5_core_sig_ctx { struct mlx5_core_psv psv_memory; struct mlx5_core_psv psv_wire; #if (__FreeBSD_version >= 1100000) struct ib_sig_err err_item; #endif bool sig_status_checked; bool sig_err_exists; u32 sigerr_count; }; enum { MLX5_MKEY_MR = 1, MLX5_MKEY_MW, MLX5_MKEY_MR_USER, }; struct mlx5_core_mkey { u64 iova; u64 size; u32 key; u32 pd; u32 type; }; struct mlx5_core_mr { u64 iova; u64 size; u32 key; u32 pd; }; enum mlx5_res_type { MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ, MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, MLX5_RES_DCT = 5, }; struct mlx5_core_rsc_common { enum mlx5_res_type res; atomic_t refcount; struct completion free; }; struct mlx5_core_srq { struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; int max; size_t max_gs; size_t max_avail_gather; int wqe_shift; void (*event)(struct mlx5_core_srq *, int); atomic_t refcount; struct completion free; }; struct mlx5_eq_table { void __iomem *update_ci; void __iomem *update_arm_ci; struct list_head comp_eqs_list; struct mlx5_eq pages_eq; struct mlx5_eq async_eq; struct mlx5_eq cmd_eq; int num_comp_vectors; /* protect EQs list */ spinlock_t lock; }; struct mlx5_uar { u32 index; void __iomem *bf_map; void __iomem *map; }; struct mlx5_core_health { struct mlx5_health_buffer __iomem *health; __be32 __iomem *health_counter; struct timer_list timer; u32 prev; int miss_counter; u32 fatal_error; struct workqueue_struct *wq_watchdog; struct work_struct work_watchdog; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; struct workqueue_struct *wq; unsigned long flags; struct work_struct work; struct delayed_work recover_work; unsigned int last_reset_req; struct work_struct work_cmd_completion; struct workqueue_struct *wq_cmd; }; #define MLX5_CQ_LINEAR_ARRAY_SIZE 1024 struct mlx5_cq_linear_array_entry { spinlock_t lock; struct mlx5_core_cq * volatile cq; }; struct mlx5_cq_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; struct mlx5_cq_linear_array_entry linear_array[MLX5_CQ_LINEAR_ARRAY_SIZE]; }; struct mlx5_qp_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; struct mlx5_srq_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; struct mlx5_mr_table { /* protect radix tree */ spinlock_t lock; struct radix_tree_root tree; }; struct mlx5_pme_stats { u64 status_counters[MLX5_MODULE_STATUS_NUM]; u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; }; struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; struct msix_entry *msix_arr; struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); int disable_irqs; struct io_mapping *bf_mapping; /* pages stuff */ struct workqueue_struct *pg_wq; struct rb_root page_root; s64 fw_pages; atomic_t reg_pages; s64 pages_per_func[MLX5_MAX_NUMBER_OF_VFS]; struct mlx5_core_health health; struct mlx5_srq_table srq_table; /* start: qp staff */ struct mlx5_qp_table qp_table; struct dentry *qp_debugfs; struct dentry *eq_debugfs; struct dentry *cq_debugfs; struct dentry *cmdif_debugfs; /* end: qp staff */ /* start: cq staff */ struct mlx5_cq_table cq_table; /* end: cq staff */ /* start: mr staff */ struct mlx5_mr_table mr_table; /* end: mr staff */ /* start: alloc staff */ int numa_node; struct mutex pgdir_mutex; struct list_head pgdir_list; /* end: alloc staff */ struct dentry *dbg_root; /* protect mkey key part */ spinlock_t mkey_lock; u8 mkey_key; struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; unsigned long pci_dev_data; struct mlx5_pme_stats pme_stats; struct mlx5_eswitch *eswitch; }; enum mlx5_device_state { MLX5_DEVICE_STATE_UP, MLX5_DEVICE_STATE_INTERNAL_ERROR, }; enum mlx5_interface_state { MLX5_INTERFACE_STATE_UP = 0x1, MLX5_INTERFACE_STATE_TEARDOWN = 0x2, }; enum mlx5_pci_status { MLX5_PCI_STATUS_DISABLED, MLX5_PCI_STATUS_ENABLED, }; #define MLX5_MAX_RESERVED_GIDS 8 struct mlx5_rsvd_gids { unsigned int start; unsigned int count; struct ida ida; }; struct mlx5_special_contexts { int resd_lkey; }; struct mlx5_flow_root_namespace; struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ struct mutex pci_status_mutex; enum mlx5_pci_status pci_status; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; u32 hca_caps_cur[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; struct { u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); struct mlx5_priv priv; struct mlx5_profile *profile; atomic_t num_qps; u32 vsc_addr; u32 issi; struct mlx5_special_contexts special_contexts; unsigned int module_status[MLX5_MAX_PORTS]; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; struct mlx5_flow_root_namespace *esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER]; struct mlx5_crspace_regmap *dump_rege; uint32_t *dump_data; unsigned dump_size; bool dump_valid; bool dump_copyout; struct mtx dump_lock; struct sysctl_ctx_list sysctl_ctx; int msix_eqvec; int pwr_status; int pwr_value; struct { struct mlx5_rsvd_gids reserved_gids; atomic_t roce_en; } roce; struct { spinlock_t spinlock; #define MLX5_MPFS_TABLE_MAX 32 long bitmap[BITS_TO_LONGS(MLX5_MPFS_TABLE_MAX)]; } mpfs; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif }; enum { MLX5_WOL_DISABLE = 0, MLX5_WOL_SECURED_MAGIC = 1 << 1, MLX5_WOL_MAGIC = 1 << 2, MLX5_WOL_ARP = 1 << 3, MLX5_WOL_BROADCAST = 1 << 4, MLX5_WOL_MULTICAST = 1 << 5, MLX5_WOL_UNICAST = 1 << 6, MLX5_WOL_PHY_ACTIVITY = 1 << 7, }; struct mlx5_db { __be32 *db; union { struct mlx5_db_pgdir *pgdir; struct mlx5_ib_user_db_page *user_page; } u; dma_addr_t dma; int index; }; struct mlx5_net_counters { u64 packets; u64 octets; }; struct mlx5_ptys_reg { u8 an_dis_admin; u8 an_dis_ap; u8 local_port; u8 proto_mask; u32 eth_proto_cap; u16 ib_link_width_cap; u16 ib_proto_cap; u32 eth_proto_admin; u16 ib_link_width_admin; u16 ib_proto_admin; u32 eth_proto_oper; u16 ib_link_width_oper; u16 ib_proto_oper; u32 eth_proto_lp_advertise; }; struct mlx5_pvlc_reg { u8 local_port; u8 vl_hw_cap; u8 vl_admin; u8 vl_operational; }; struct mlx5_pmtu_reg { u8 local_port; u16 max_mtu; u16 admin_mtu; u16 oper_mtu; }; struct mlx5_vport_counters { struct mlx5_net_counters received_errors; struct mlx5_net_counters transmit_errors; struct mlx5_net_counters received_ib_unicast; struct mlx5_net_counters transmitted_ib_unicast; struct mlx5_net_counters received_ib_multicast; struct mlx5_net_counters transmitted_ib_multicast; struct mlx5_net_counters received_eth_broadcast; struct mlx5_net_counters transmitted_eth_broadcast; struct mlx5_net_counters received_eth_unicast; struct mlx5_net_counters transmitted_eth_unicast; struct mlx5_net_counters received_eth_multicast; struct mlx5_net_counters transmitted_eth_multicast; }; enum { MLX5_DB_PER_PAGE = MLX5_ADAPTER_PAGE_SIZE / L1_CACHE_BYTES, }; struct mlx5_core_dct { struct mlx5_core_rsc_common common; /* must be first */ void (*event)(struct mlx5_core_dct *, int); int dctn; struct completion drained; struct mlx5_rsc_debug *dbg; int pid; }; enum { MLX5_COMP_EQ_SIZE = 1024, }; enum { MLX5_PTYS_IB = 1 << 0, MLX5_PTYS_EN = 1 << 2, }; struct mlx5_db_pgdir { struct list_head list; DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); struct mlx5_fw_page *fw_page; __be32 *db_page; dma_addr_t db_dma; }; typedef void (*mlx5_cmd_cbk_t)(int status, void *context); struct mlx5_cmd_work_ent { struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; int uin_size; void *uout; int uout_size; mlx5_cmd_cbk_t callback; struct delayed_work cb_timeout_work; void *context; int idx; struct completion done; struct mlx5_cmd *cmd; struct work_struct work; struct mlx5_cmd_layout *lay; int ret; int page_queue; u8 status; u8 token; u64 ts1; u64 ts2; u16 op; u8 busy; bool polling; }; struct mlx5_pas { u64 pa; u8 log_sz; }; enum port_state_policy { MLX5_POLICY_DOWN = 0, MLX5_POLICY_UP = 1, MLX5_POLICY_FOLLOW = 2, MLX5_POLICY_INVALID = 0xffffffff }; static inline void * mlx5_buf_offset(struct mlx5_buf *buf, int offset) { return ((char *)buf->direct.buf + offset); } extern struct workqueue_struct *mlx5_core_wq; #define STRUCT_FIELD(header, field) \ .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \ .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev) { return pci_get_drvdata(pdev); } extern struct dentry *mlx5_debugfs_root; static inline u16 fw_rev_maj(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->fw_rev) & 0xffff; } static inline u16 fw_rev_min(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->fw_rev) >> 16; } static inline u16 fw_rev_sub(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff; } static inline u16 cmdif_rev_get(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; } static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { printf("M4_CORE_DRV_NAME: WARN: ""gid table length is zero\n"); return 0; } return 8 * (1 << param); } static inline void *mlx5_vzalloc(unsigned long size) { void *rtn; rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); return rtn; } static inline void *mlx5_vmalloc(unsigned long size) { void *rtn; rtn = kmalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!rtn) rtn = vmalloc(size); return rtn; } static inline u32 mlx5_base_mkey(const u32 key) { return key & 0xffffff00u; } int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); + +struct mlx5_async_ctx { + struct mlx5_core_dev *dev; + atomic_t num_inflight; + struct wait_queue_head wait; +}; + +struct mlx5_async_work; + +typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context); + +struct mlx5_async_work { + struct mlx5_async_ctx *ctx; + mlx5_async_cbk_t user_callback; +}; + +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx); +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx); +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work); int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); void mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev); #define mlx5_buf_alloc_node(dev, size, direct, buf, node) \ mlx5_buf_alloc(dev, size, direct, buf) int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq); int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mr_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev); int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context); + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, u32 *out, int outlen); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn); int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); void mlx5_fwp_flush(struct mlx5_fw_page *fwp); void mlx5_fwp_invalidate(struct mlx5_fw_page *fwp); struct mlx5_fw_page *mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num); void mlx5_fwp_free(struct mlx5_fw_page *fwp); u64 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset); void *mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset); void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev); s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev); void mlx5_register_debugfs(void); void mlx5_unregister_debugfs(void); int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vector, enum mlx5_cmd_mode mode); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, struct mlx5_uar *uar); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_set_dc_cnak_trace(struct mlx5_core_dev *dev, int enable, u64 addr); int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, u16 reg_num, int arg, int write); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen); int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); u8 mlx5_is_wol_supported(struct mlx5_core_dev *dev); int mlx5_set_wol(struct mlx5_core_dev *dev, u8 wol_mode); int mlx5_set_dropless_mode(struct mlx5_core_dev *dev, u16 timeout); int mlx5_query_dropless_mode(struct mlx5_core_dev *dev, u16 *timeout); int mlx5_query_wol(struct mlx5_core_dev *dev, u8 *wol_mode); int mlx5_core_access_pvlc(struct mlx5_core_dev *dev, struct mlx5_pvlc_reg *pvlc, int write); int mlx5_core_access_ptys(struct mlx5_core_dev *dev, struct mlx5_ptys_reg *ptys, int write); int mlx5_core_access_pmtu(struct mlx5_core_dev *dev, struct mlx5_pmtu_reg *pmtu, int write); int mlx5_vxlan_udp_port_add(struct mlx5_core_dev *dev, u16 port); int mlx5_vxlan_udp_port_delete(struct mlx5_core_dev *dev, u16 port); int mlx5_query_port_cong_status(struct mlx5_core_dev *mdev, int protocol, int priority, int *is_enable); int mlx5_modify_port_cong_status(struct mlx5_core_dev *mdev, int protocol, int priority, int enable); int mlx5_query_port_cong_params(struct mlx5_core_dev *mdev, int protocol, void *out, int out_size); int mlx5_modify_port_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_query_port_cong_statistics(struct mlx5_core_dev *mdev, int clear, void *out, int out_size); int mlx5_set_diagnostic_params(struct mlx5_core_dev *mdev, void *in, int in_size); int mlx5_query_diagnostic_counters(struct mlx5_core_dev *mdev, u8 num_of_samples, u16 sample_index, void *out, int out_size); int mlx5_vsc_find_cap(struct mlx5_core_dev *mdev); int mlx5_vsc_lock(struct mlx5_core_dev *mdev); void mlx5_vsc_unlock(struct mlx5_core_dev *mdev); int mlx5_vsc_set_space(struct mlx5_core_dev *mdev, u16 space); int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *mdev, u32 expected); int mlx5_vsc_write(struct mlx5_core_dev *mdev, u32 addr, const u32 *data); int mlx5_vsc_read(struct mlx5_core_dev *mdev, u32 addr, u32 *data); int mlx5_vsc_lock_addr_space(struct mlx5_core_dev *mdev, u32 addr); int mlx5_vsc_unlock_addr_space(struct mlx5_core_dev *mdev, u32 addr); int mlx5_pci_read_power_status(struct mlx5_core_dev *mdev, u16 *p_power, u8 *p_status); static inline u32 mlx5_mkey_to_idx(u32 mkey) { return mkey >> 8; } static inline u32 mlx5_idx_to_mkey(u32 mkey_idx) { return mkey_idx << 8; } static inline u8 mlx5_mkey_variant(u32 mkey) { return mkey & 0xff; } enum { MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0, MLX5_PROF_MASK_MR_CACHE = (u64)1 << 1, }; enum { MAX_MR_CACHE_ENTRIES = 15, }; struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); int protocol; struct list_head list; }; void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, const u8 *mac, bool vlan, u16 vlan_id); struct mlx5_profile { u64 mask; u8 log_max_qp; struct { int size; int limit; } mr_cache[MAX_MR_CACHE_ENTRIES]; }; enum { MLX5_PCI_DEV_IS_VF = 1 << 0, }; enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) { return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } void mlx5_disable_interrupts(struct mlx5_core_dev *); void mlx5_poll_interrupts(struct mlx5_core_dev *); #endif /* MLX5_DRIVER_H */ Index: stable/11/sys/dev/mlx5/mlx5_core/mlx5_cmd.c =================================================================== --- stable/11/sys/dev/mlx5/mlx5_core/mlx5_cmd.c (revision 368225) +++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_cmd.c (revision 368226) @@ -1,1617 +1,1663 @@ /*- * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx5_core.h" static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size); static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); enum { CMD_IF_REV = 5, }; enum { NUM_LONG_LISTS = 2, NUM_MED_LISTS = 64, LONG_LIST_SIZE = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 + MLX5_CMD_DATA_BLOCK_SIZE, MED_LIST_SIZE = 16 + MLX5_CMD_DATA_BLOCK_SIZE, }; enum { MLX5_CMD_DELIVERY_STAT_OK = 0x0, MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1, MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2, MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3, MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4, MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5, MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6, MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7, MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8, MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9, MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; struct mlx5_ifc_mbox_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; u8 syndrome[0x20]; u8 reserved_at_40[0x40]; }; struct mlx5_ifc_mbox_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; u8 reserved_at_20[0x10]; u8 op_mod[0x10]; u8 reserved_at_40[0x40]; }; static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, int uin_size, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t cbk, void *context, int page_queue) { gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; struct mlx5_cmd_work_ent *ent; ent = kzalloc(sizeof(*ent), alloc_flags); if (!ent) return ERR_PTR(-ENOMEM); ent->in = in; ent->uin_size = uin_size; ent->out = out; ent->uout = uout; ent->uout_size = uout_size; ent->callback = cbk; ent->context = context; ent->cmd = cmd; ent->page_queue = page_queue; return ent; } static u8 alloc_token(struct mlx5_cmd *cmd) { u8 token; spin_lock(&cmd->token_lock); cmd->token++; if (cmd->token == 0) cmd->token++; token = cmd->token; spin_unlock(&cmd->token_lock); return token; } static int alloc_ent(struct mlx5_cmd_work_ent *ent) { unsigned long flags; struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); int ret = cmd->max_reg_cmds; spin_lock_irqsave(&cmd->alloc_lock, flags); if (!ent->page_queue) { ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds); if (ret >= cmd->max_reg_cmds) ret = -1; } if (dev->state != MLX5_DEVICE_STATE_UP) ret = -1; if (ret != -1) { ent->busy = 1; ent->idx = ret; clear_bit(ent->idx, &cmd->bitmask); cmd->ent_mode[ent->idx] = ent->polling ? MLX5_CMD_MODE_POLLING : MLX5_CMD_MODE_EVENTS; cmd->ent_arr[ent->idx] = ent; } spin_unlock_irqrestore(&cmd->alloc_lock, flags); return ret; } static void free_ent(struct mlx5_cmd *cmd, int idx) { unsigned long flags; spin_lock_irqsave(&cmd->alloc_lock, flags); cmd->ent_arr[idx] = NULL; /* safety clear */ cmd->ent_mode[idx] = MLX5_CMD_MODE_POLLING; /* reset mode */ set_bit(idx, &cmd->bitmask); spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) { return cmd->cmd_buf + (idx << cmd->log_stride); } static u8 xor8_buf(void *buf, int len) { u8 *ptr = buf; u8 sum = 0; int i; for (i = 0; i < len; i++) sum ^= ptr[i]; return sum; } static int verify_block_sig(struct mlx5_cmd_prot_block *block) { if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff) return -EINVAL; if (xor8_buf(block, sizeof(*block)) != 0xff) return -EINVAL; return 0; } static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token, int csum) { block->token = token; if (csum) { block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2); block->sig = ~xor8_buf(block, sizeof(*block) - 1); } } static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum) { size_t i; for (i = 0; i != (msg->numpages * MLX5_NUM_CMDS_IN_ADAPTER_PAGE); i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(msg, i * MLX5_CMD_MBOX_SIZE); /* compute signature */ calc_block_sig(block, token, csum); /* check for last block */ if (block->next == 0) break; } /* make sure data gets written to RAM */ mlx5_fwp_flush(msg); } static void set_signature(struct mlx5_cmd_work_ent *ent, int csum) { ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay)); calc_chain_sig(ent->in, ent->token, csum); calc_chain_sig(ent->out, ent->token, csum); } static void poll_timeout(struct mlx5_cmd_work_ent *ent) { struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); int poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000); u8 own; do { own = ent->lay->status_own; if (!(own & CMD_OWNER_HW) || dev->state != MLX5_DEVICE_STATE_UP) { ent->ret = 0; return; } usleep_range(5000, 10000); } while (time_before(jiffies, poll_end)); ent->ret = -ETIMEDOUT; } static void free_cmd(struct mlx5_cmd_work_ent *ent) { cancel_delayed_work_sync(&ent->cb_timeout_work); kfree(ent); } static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_msg *msg = ent->out; size_t i; int err; u8 sig; sig = xor8_buf(ent->lay, sizeof(*ent->lay)); if (sig != 0xff) return -EINVAL; for (i = 0; i != (msg->numpages * MLX5_NUM_CMDS_IN_ADAPTER_PAGE); i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(msg, i * MLX5_CMD_MBOX_SIZE); /* compute signature */ err = verify_block_sig(block); if (err != 0) return (err); /* check for last block */ if (block->next == 0) break; } return (0); } static void dump_buf(void *buf, int size, int data_only, int offset) { __be32 *p = buf; int i; for (i = 0; i < size; i += 16) { pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]), be32_to_cpu(p[1]), be32_to_cpu(p[2]), be32_to_cpu(p[3])); p += 4; offset += 16; } if (!data_only) pr_debug("\n"); } enum { MLX5_DRIVER_STATUS_ABORTED = 0xfe, MLX5_DRIVER_SYND = 0xbadd00de, }; static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, u32 *synd, u8 *status) { *synd = 0; *status = 0; switch (op) { case MLX5_CMD_OP_TEARDOWN_HCA: case MLX5_CMD_OP_DISABLE_HCA: case MLX5_CMD_OP_MANAGE_PAGES: case MLX5_CMD_OP_DESTROY_MKEY: case MLX5_CMD_OP_DESTROY_EQ: case MLX5_CMD_OP_DESTROY_CQ: case MLX5_CMD_OP_DESTROY_QP: case MLX5_CMD_OP_DESTROY_PSV: case MLX5_CMD_OP_DESTROY_SRQ: case MLX5_CMD_OP_DESTROY_XRC_SRQ: case MLX5_CMD_OP_DESTROY_DCT: case MLX5_CMD_OP_DEALLOC_Q_COUNTER: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: case MLX5_CMD_OP_DETACH_FROM_MCG: case MLX5_CMD_OP_DEALLOC_XRCD: case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: case MLX5_CMD_OP_DESTROY_TIR: case MLX5_CMD_OP_DESTROY_SQ: case MLX5_CMD_OP_DESTROY_RQ: case MLX5_CMD_OP_DESTROY_RMP: case MLX5_CMD_OP_DESTROY_TIS: case MLX5_CMD_OP_DESTROY_RQT: case MLX5_CMD_OP_DESTROY_FLOW_TABLE: case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_2ERR_QP: case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_ADAPTER: case MLX5_CMD_OP_INIT_HCA: case MLX5_CMD_OP_ENABLE_HCA: case MLX5_CMD_OP_QUERY_PAGES: case MLX5_CMD_OP_SET_HCA_CAP: case MLX5_CMD_OP_QUERY_ISSI: case MLX5_CMD_OP_SET_ISSI: case MLX5_CMD_OP_CREATE_MKEY: case MLX5_CMD_OP_QUERY_MKEY: case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_CREATE_EQ: case MLX5_CMD_OP_QUERY_EQ: case MLX5_CMD_OP_GEN_EQE: case MLX5_CMD_OP_CREATE_CQ: case MLX5_CMD_OP_QUERY_CQ: case MLX5_CMD_OP_MODIFY_CQ: case MLX5_CMD_OP_CREATE_QP: case MLX5_CMD_OP_RST2INIT_QP: case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: case MLX5_CMD_OP_QUERY_QP: case MLX5_CMD_OP_SQD_RTS_QP: case MLX5_CMD_OP_INIT2INIT_QP: case MLX5_CMD_OP_CREATE_PSV: case MLX5_CMD_OP_CREATE_SRQ: case MLX5_CMD_OP_QUERY_SRQ: case MLX5_CMD_OP_ARM_RQ: case MLX5_CMD_OP_CREATE_XRC_SRQ: case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_ARM_XRC_SRQ: case MLX5_CMD_OP_CREATE_DCT: case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_QUERY_VPORT_STATE: case MLX5_CMD_OP_MODIFY_VPORT_STATE: case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: case MLX5_CMD_OP_SET_ROCE_ADDRESS: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_HCA_VPORT_GID: case MLX5_CMD_OP_QUERY_HCA_VPORT_PKEY: case MLX5_CMD_OP_QUERY_VNIC_ENV: case MLX5_CMD_OP_QUERY_VPORT_COUNTER: case MLX5_CMD_OP_ALLOC_Q_COUNTER: case MLX5_CMD_OP_QUERY_Q_COUNTER: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: case MLX5_CMD_OP_CONFIG_INT_MODERATION: case MLX5_CMD_OP_ACCESS_REG: case MLX5_CMD_OP_ATTACH_TO_MCG: case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG: case MLX5_CMD_OP_MAD_IFC: case MLX5_CMD_OP_QUERY_MAD_DEMUX: case MLX5_CMD_OP_SET_MAD_DEMUX: case MLX5_CMD_OP_NOP: case MLX5_CMD_OP_ALLOC_XRCD: case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: case MLX5_CMD_OP_QUERY_CONG_STATUS: case MLX5_CMD_OP_MODIFY_CONG_STATUS: case MLX5_CMD_OP_QUERY_CONG_PARAMS: case MLX5_CMD_OP_MODIFY_CONG_PARAMS: case MLX5_CMD_OP_QUERY_CONG_STATISTICS: case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: case MLX5_CMD_OP_CREATE_TIR: case MLX5_CMD_OP_MODIFY_TIR: case MLX5_CMD_OP_QUERY_TIR: case MLX5_CMD_OP_CREATE_SQ: case MLX5_CMD_OP_MODIFY_SQ: case MLX5_CMD_OP_QUERY_SQ: case MLX5_CMD_OP_CREATE_RQ: case MLX5_CMD_OP_MODIFY_RQ: case MLX5_CMD_OP_QUERY_RQ: case MLX5_CMD_OP_CREATE_RMP: case MLX5_CMD_OP_MODIFY_RMP: case MLX5_CMD_OP_QUERY_RMP: case MLX5_CMD_OP_CREATE_TIS: case MLX5_CMD_OP_MODIFY_TIS: case MLX5_CMD_OP_QUERY_TIS: case MLX5_CMD_OP_CREATE_RQT: case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; default: mlx5_core_err(dev, "Unknown FW command (%d)\n", op); return -EINVAL; } } const char *mlx5_command_str(int command) { #define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd switch (command) { MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); MLX5_COMMAND_STR_CASE(SET_HCA_CAP); MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); MLX5_COMMAND_STR_CASE(INIT_HCA); MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); MLX5_COMMAND_STR_CASE(ENABLE_HCA); MLX5_COMMAND_STR_CASE(DISABLE_HCA); MLX5_COMMAND_STR_CASE(QUERY_PAGES); MLX5_COMMAND_STR_CASE(MANAGE_PAGES); MLX5_COMMAND_STR_CASE(QUERY_ISSI); MLX5_COMMAND_STR_CASE(SET_ISSI); MLX5_COMMAND_STR_CASE(CREATE_MKEY); MLX5_COMMAND_STR_CASE(QUERY_MKEY); MLX5_COMMAND_STR_CASE(DESTROY_MKEY); MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); MLX5_COMMAND_STR_CASE(CREATE_EQ); MLX5_COMMAND_STR_CASE(DESTROY_EQ); MLX5_COMMAND_STR_CASE(QUERY_EQ); MLX5_COMMAND_STR_CASE(GEN_EQE); MLX5_COMMAND_STR_CASE(CREATE_CQ); MLX5_COMMAND_STR_CASE(DESTROY_CQ); MLX5_COMMAND_STR_CASE(QUERY_CQ); MLX5_COMMAND_STR_CASE(MODIFY_CQ); MLX5_COMMAND_STR_CASE(CREATE_QP); MLX5_COMMAND_STR_CASE(DESTROY_QP); MLX5_COMMAND_STR_CASE(RST2INIT_QP); MLX5_COMMAND_STR_CASE(INIT2RTR_QP); MLX5_COMMAND_STR_CASE(RTR2RTS_QP); MLX5_COMMAND_STR_CASE(RTS2RTS_QP); MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); MLX5_COMMAND_STR_CASE(2ERR_QP); MLX5_COMMAND_STR_CASE(2RST_QP); MLX5_COMMAND_STR_CASE(QUERY_QP); MLX5_COMMAND_STR_CASE(SQD_RTS_QP); MLX5_COMMAND_STR_CASE(MAD_IFC); MLX5_COMMAND_STR_CASE(INIT2INIT_QP); MLX5_COMMAND_STR_CASE(CREATE_PSV); MLX5_COMMAND_STR_CASE(DESTROY_PSV); MLX5_COMMAND_STR_CASE(CREATE_SRQ); MLX5_COMMAND_STR_CASE(DESTROY_SRQ); MLX5_COMMAND_STR_CASE(QUERY_SRQ); MLX5_COMMAND_STR_CASE(ARM_RQ); MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); MLX5_COMMAND_STR_CASE(CREATE_DCT); MLX5_COMMAND_STR_CASE(SET_DC_CNAK_TRACE); MLX5_COMMAND_STR_CASE(DESTROY_DCT); MLX5_COMMAND_STR_CASE(DRAIN_DCT); MLX5_COMMAND_STR_CASE(QUERY_DCT); MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); MLX5_COMMAND_STR_CASE(QUERY_VNIC_ENV); MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); MLX5_COMMAND_STR_CASE(SET_WOL_ROL); MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); MLX5_COMMAND_STR_CASE(ALLOC_UAR); MLX5_COMMAND_STR_CASE(DEALLOC_UAR); MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); MLX5_COMMAND_STR_CASE(NOP); MLX5_COMMAND_STR_CASE(ALLOC_XRCD); MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(CREATE_RMP); MLX5_COMMAND_STR_CASE(MODIFY_RMP); MLX5_COMMAND_STR_CASE(DESTROY_RMP); MLX5_COMMAND_STR_CASE(QUERY_RMP); MLX5_COMMAND_STR_CASE(CREATE_RQT); MLX5_COMMAND_STR_CASE(MODIFY_RQT); MLX5_COMMAND_STR_CASE(DESTROY_RQT); MLX5_COMMAND_STR_CASE(QUERY_RQT); MLX5_COMMAND_STR_CASE(ACCESS_REG); MLX5_COMMAND_STR_CASE(CREATE_SQ); MLX5_COMMAND_STR_CASE(MODIFY_SQ); MLX5_COMMAND_STR_CASE(DESTROY_SQ); MLX5_COMMAND_STR_CASE(QUERY_SQ); MLX5_COMMAND_STR_CASE(CREATE_RQ); MLX5_COMMAND_STR_CASE(MODIFY_RQ); MLX5_COMMAND_STR_CASE(DESTROY_RQ); MLX5_COMMAND_STR_CASE(QUERY_RQ); MLX5_COMMAND_STR_CASE(CREATE_TIR); MLX5_COMMAND_STR_CASE(MODIFY_TIR); MLX5_COMMAND_STR_CASE(DESTROY_TIR); MLX5_COMMAND_STR_CASE(QUERY_TIR); MLX5_COMMAND_STR_CASE(CREATE_TIS); MLX5_COMMAND_STR_CASE(MODIFY_TIS); MLX5_COMMAND_STR_CASE(DESTROY_TIS); MLX5_COMMAND_STR_CASE(QUERY_TIS); MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(SET_DIAGNOSTICS); MLX5_COMMAND_STR_CASE(QUERY_DIAGNOSTICS); default: return "unknown command opcode"; } } static const char *cmd_status_str(u8 status) { switch (status) { case MLX5_CMD_STAT_OK: return "OK"; case MLX5_CMD_STAT_INT_ERR: return "internal error"; case MLX5_CMD_STAT_BAD_OP_ERR: return "bad operation"; case MLX5_CMD_STAT_BAD_PARAM_ERR: return "bad parameter"; case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return "bad system state"; case MLX5_CMD_STAT_BAD_RES_ERR: return "bad resource"; case MLX5_CMD_STAT_RES_BUSY: return "resource busy"; case MLX5_CMD_STAT_LIM_ERR: return "limits exceeded"; case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return "bad resource state"; case MLX5_CMD_STAT_IX_ERR: return "bad index"; case MLX5_CMD_STAT_NO_RES_ERR: return "no resources"; case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return "bad input length"; case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return "bad output length"; case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return "bad QP state"; case MLX5_CMD_STAT_BAD_PKT_ERR: return "bad packet (discarded)"; case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return "bad size too many outstanding CQEs"; default: return "unknown status"; } } static int cmd_status_to_err_helper(u8 status) { switch (status) { case MLX5_CMD_STAT_OK: return 0; case MLX5_CMD_STAT_INT_ERR: return -EIO; case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; case MLX5_CMD_STAT_IX_ERR: return -EINVAL; case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; default: return -EIO; } } void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) { *status = MLX5_GET(mbox_out, out, status); *syndrome = MLX5_GET(mbox_out, out, syndrome); } static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) { u32 syndrome; u8 status; u16 opcode; u16 op_mod; mlx5_cmd_mbox_status(out, &status, &syndrome); if (!status) return 0; opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); mlx5_core_err(dev, "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", mlx5_command_str(opcode), opcode, op_mod, cmd_status_str(status), status, syndrome); return cmd_status_to_err_helper(status); } static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) { struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); size_t i; int data_only; int offset = 0; int msg_len = input ? ent->uin_size : ent->uout_size; int dump_len; data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); if (data_only) mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, "dump command data %s(0x%x) %s\n", mlx5_command_str(op), op, input ? "INPUT" : "OUTPUT"); else mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n", mlx5_command_str(op), op, input ? "INPUT" : "OUTPUT"); if (data_only) { if (input) { dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset); offset += sizeof(ent->lay->in); } else { dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset); offset += sizeof(ent->lay->out); } } else { dump_buf(ent->lay, sizeof(*ent->lay), 0, offset); offset += sizeof(*ent->lay); } for (i = 0; i != (msg->numpages * MLX5_NUM_CMDS_IN_ADAPTER_PAGE); i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(msg, i * MLX5_CMD_MBOX_SIZE); if (data_only) { if (offset >= msg_len) break; dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg_len - offset); dump_buf(block->data, dump_len, 1, offset); offset += MLX5_CMD_DATA_BLOCK_SIZE; } else { mlx5_core_dbg(dev, "command block:\n"); dump_buf(block, sizeof(*block), 0, offset); offset += sizeof(*block); } /* check for last block */ if (block->next == 0) break; } if (data_only) pr_debug("\n"); } static u16 msg_to_opcode(struct mlx5_cmd_msg *in) { return MLX5_GET(mbox_in, in->first.data, opcode); } static void cb_timeout_handler(struct work_struct *work) { struct delayed_work *dwork = container_of(work, struct delayed_work, work); struct mlx5_cmd_work_ent *ent = container_of(dwork, struct mlx5_cmd_work_ent, cb_timeout_work); struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); ent->ret = -ETIMEDOUT; mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, MLX5_CMD_MODE_EVENTS); } static void complete_command(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); mlx5_cmd_cbk_t callback; void *context; s64 ds; struct mlx5_cmd_stats *stats; unsigned long flags; int err; struct semaphore *sem; if (ent->page_queue) sem = &cmd->pages_sem; else sem = &cmd->sem; if (dev->state != MLX5_DEVICE_STATE_UP) { u8 status = 0; u32 drv_synd; ent->ret = mlx5_internal_err_ret_value(dev, msg_to_opcode(ent->in), &drv_synd, &status); MLX5_SET(mbox_out, ent->out, status, status); MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); } if (ent->callback) { ds = ent->ts2 - ent->ts1; if (ent->op < ARRAY_SIZE(cmd->stats)) { stats = &cmd->stats[ent->op]; spin_lock_irqsave(&stats->lock, flags); stats->sum += ds; ++stats->n; spin_unlock_irqrestore(&stats->lock, flags); } callback = ent->callback; context = ent->context; err = ent->ret; if (!err) { err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); err = err ? err : mlx5_cmd_check(dev, ent->in->first.data, ent->uout); } mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); err = err ? err : ent->status; free_cmd(ent); callback(err, context); } else { complete(&ent->done); } up(sem); } static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); struct mlx5_cmd_layout *lay; struct semaphore *sem; bool poll_cmd = ent->polling; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (alloc_ent(ent) < 0) { complete_command(ent); return; } ent->token = alloc_token(cmd); lay = get_inst(cmd, ent->idx); ent->lay = lay; memset(lay, 0, sizeof(*lay)); memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); ent->op = be32_to_cpu(lay->in[0]) >> 16; if (ent->in->numpages != 0) lay->in_ptr = cpu_to_be64(mlx5_fwp_get_dma(ent->in, 0)); if (ent->out->numpages != 0) lay->out_ptr = cpu_to_be64(mlx5_fwp_get_dma(ent->out, 0)); lay->inlen = cpu_to_be32(ent->uin_size); lay->outlen = cpu_to_be32(ent->uout_size); lay->type = MLX5_PCI_CMD_XPORT; lay->token = ent->token; lay->status_own = CMD_OWNER_HW; set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); ent->busy = 0; if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); /* make sure data is written to RAM */ mlx5_fwp_flush(cmd->cmd_page); iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); mmiowb(); /* if not in polling don't use ent after this point */ if (poll_cmd) { poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ mlx5_cmd_comp_handler(dev, 1U << ent->idx, MLX5_CMD_MODE_POLLING); } } static const char *deliv_status_to_str(u8 status) { switch (status) { case MLX5_CMD_DELIVERY_STAT_OK: return "no errors"; case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR: return "signature error"; case MLX5_CMD_DELIVERY_STAT_TOK_ERR: return "token error"; case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR: return "bad block number"; case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR: return "output pointer not aligned to block size"; case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR: return "input pointer not aligned to block size"; case MLX5_CMD_DELIVERY_STAT_FW_ERR: return "firmware internal error"; case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR: return "command input length error"; case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR: return "command ouput length error"; case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR: return "reserved fields not cleared"; case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR: return "bad command descriptor type"; default: return "unknown status code"; } } static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { int timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); int err; if (ent->polling) { wait_for_completion(&ent->done); } else if (!wait_for_completion_timeout(&ent->done, timeout)) { ent->ret = -ETIMEDOUT; mlx5_cmd_comp_handler(dev, 1UL << ent->idx, MLX5_CMD_MODE_EVENTS); } err = ent->ret; if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); return err; } /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion */ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, int uin_size, struct mlx5_cmd_msg *out, void *uout, int uout_size, mlx5_cmd_cbk_t callback, void *context, int page_queue, u8 *status, bool force_polling) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; struct mlx5_cmd_stats *stats; int err = 0; s64 ds; u16 op; if (callback && page_queue) return -EINVAL; ent = alloc_cmd(cmd, in, uin_size, out, uout, uout_size, callback, context, page_queue); if (IS_ERR(ent)) return PTR_ERR(ent); ent->polling = force_polling || (cmd->mode == MLX5_CMD_MODE_POLLING); if (!callback) init_completion(&ent->done); INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); INIT_WORK(&ent->work, cmd_work_handler); if (page_queue) { cmd_work_handler(&ent->work); } else if (!queue_work(dev->priv.health.wq_cmd, &ent->work)) { mlx5_core_warn(dev, "failed to queue work\n"); err = -ENOMEM; goto out_free; } if (callback) goto out; err = wait_func(dev, ent); if (err == -ETIMEDOUT) goto out; ds = ent->ts2 - ent->ts1; op = MLX5_GET(mbox_in, in->first.data, opcode); if (op < ARRAY_SIZE(cmd->stats)) { stats = &cmd->stats[op]; spin_lock_irq(&stats->lock); stats->sum += ds; ++stats->n; spin_unlock_irq(&stats->lock); } mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", mlx5_command_str(op), (long long)ds); *status = ent->status; free_cmd(ent); return err; out_free: free_cmd(ent); out: return err; } static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, size_t size) { size_t delta; size_t i; if (to == NULL || from == NULL) return (-ENOMEM); delta = min_t(size_t, size, sizeof(to->first.data)); memcpy(to->first.data, from, delta); from = (char *)from + delta; size -= delta; for (i = 0; size != 0; i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(to, i * MLX5_CMD_MBOX_SIZE); delta = min_t(size_t, size, MLX5_CMD_DATA_BLOCK_SIZE); memcpy(block->data, from, delta); from = (char *)from + delta; size -= delta; } return (0); } static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size) { size_t delta; size_t i; if (to == NULL || from == NULL) return (-ENOMEM); delta = min_t(size_t, size, sizeof(from->first.data)); memcpy(to, from->first.data, delta); to = (char *)to + delta; size -= delta; for (i = 0; size != 0; i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(from, i * MLX5_CMD_MBOX_SIZE); delta = min_t(size_t, size, MLX5_CMD_DATA_BLOCK_SIZE); memcpy(to, block->data, delta); to = (char *)to + delta; size -= delta; } return (0); } static struct mlx5_cmd_msg * mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev, gfp_t flags, size_t size) { struct mlx5_cmd_msg *msg; size_t blen; size_t n; size_t i; blen = size - min_t(size_t, sizeof(msg->first.data), size); n = howmany(blen, MLX5_CMD_DATA_BLOCK_SIZE); msg = mlx5_fwp_alloc(dev, flags, howmany(n, MLX5_NUM_CMDS_IN_ADAPTER_PAGE)); if (msg == NULL) return (ERR_PTR(-ENOMEM)); for (i = 0; i != n; i++) { struct mlx5_cmd_prot_block *block; block = mlx5_fwp_get_virt(msg, i * MLX5_CMD_MBOX_SIZE); memset(block, 0, MLX5_CMD_MBOX_SIZE); if (i != (n - 1)) { u64 dma = mlx5_fwp_get_dma(msg, (i + 1) * MLX5_CMD_MBOX_SIZE); block->next = cpu_to_be64(dma); } block->block_num = cpu_to_be32(i); } /* make sure initial data is written to RAM */ mlx5_fwp_flush(msg); return (msg); } static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) { mlx5_fwp_free(msg); } static void clean_debug_files(struct mlx5_core_dev *dev) { } static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; int i; if (cmd->mode == mode) return; for (i = 0; i < cmd->max_reg_cmds; i++) down(&cmd->sem); down(&cmd->pages_sem); cmd->mode = mode; up(&cmd->pages_sem); for (i = 0; i < cmd->max_reg_cmds; i++) up(&cmd->sem); } void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, MLX5_CMD_MODE_EVENTS); } void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) { mlx5_cmd_change_mod(dev, MLX5_CMD_MODE_POLLING); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) { unsigned long flags; if (msg->cache) { spin_lock_irqsave(&msg->cache->lock, flags); list_add_tail(&msg->list, &msg->cache->head); spin_unlock_irqrestore(&msg->cache->lock, flags); } else { mlx5_free_cmd_msg(dev, msg); } } void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vector_flags, enum mlx5_cmd_mode cmd_mode) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_work_ent *ent; bool triggered = (vector_flags & MLX5_TRIGGERED_CMD_COMP) ? 1 : 0; u32 vector = vector_flags; /* discard flags in the upper dword */ int i; /* make sure data gets read from RAM */ mlx5_fwp_invalidate(cmd->cmd_page); while (vector != 0) { i = ffs(vector) - 1; vector &= ~(1U << i); /* check command mode */ if (cmd->ent_mode[i] != cmd_mode) continue; ent = cmd->ent_arr[i]; /* check if command was already handled */ if (ent == NULL) continue; if (ent->callback) cancel_delayed_work(&ent->cb_timeout_work); ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); /* make sure data gets read from RAM */ mlx5_fwp_invalidate(ent->out); dump_command(dev, ent, 0); if (!ent->ret) { if (!cmd->checksum_disabled) ent->ret = verify_signature(ent); else ent->ret = 0; if (triggered) ent->status = MLX5_DRIVER_STATUS_ABORTED; else ent->status = ent->lay->status_own >> 1; mlx5_core_dbg(dev, "FW command ret 0x%x, status %s(0x%x)\n", ent->ret, deliv_status_to_str(ent->status), ent->status); } free_ent(cmd, ent->idx); complete_command(ent); } } EXPORT_SYMBOL(mlx5_cmd_comp_handler); static int status_to_err(u8 status) { return status ? -EIO : 0; /* TBD more meaningful codes */ } static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, gfp_t gfp) { struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM); struct mlx5_cmd *cmd = &dev->cmd; struct cache_ent *ent = NULL; if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE) ent = &cmd->cache.large; else if (in_size > 16 && in_size <= MED_LIST_SIZE) ent = &cmd->cache.med; if (ent) { spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { msg = list_entry(ent->head.next, struct mlx5_cmd_msg, list); list_del(&msg->list); } spin_unlock_irq(&ent->lock); } if (IS_ERR(msg)) msg = mlx5_alloc_cmd_msg(dev, gfp, in_size); return msg; } static int is_manage_pages(void *in) { return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } static int cmd_exec_helper(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { struct mlx5_cmd_msg *inb; struct mlx5_cmd_msg *outb; int pages_queue; const gfp_t gfp = GFP_KERNEL; int err; u8 status = 0; u32 drv_synd; if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { u16 opcode = MLX5_GET(mbox_in, in, opcode); err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); return err; } pages_queue = is_manage_pages(in); inb = alloc_msg(dev, in_size, gfp); if (IS_ERR(inb)) { err = PTR_ERR(inb); return err; } err = mlx5_copy_to_msg(inb, in, in_size); if (err) { mlx5_core_warn(dev, "err %d\n", err); goto out_in; } outb = mlx5_alloc_cmd_msg(dev, gfp, out_size); if (IS_ERR(outb)) { err = PTR_ERR(outb); goto out_in; } err = mlx5_cmd_invoke(dev, inb, in_size, outb, out, out_size, callback, context, pages_queue, &status, force_polling); if (err) { if (err == -ETIMEDOUT) return err; goto out_out; } mlx5_core_dbg(dev, "err %d, status %d\n", err, status); if (status) { err = status_to_err(status); goto out_out; } if (callback) return err; err = mlx5_copy_from_msg(out, outb, out_size); out_out: mlx5_free_cmd_msg(dev, outb); out_in: free_msg(dev, inb); return err; } int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err; err = cmd_exec_helper(dev, in, in_size, out, out_size, NULL, NULL, false); return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); -int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, - void *out, int out_size, mlx5_cmd_cbk_t callback, - void *context) +void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev, + struct mlx5_async_ctx *ctx) { - return cmd_exec_helper(dev, in, in_size, out, out_size, callback, context, false); + ctx->dev = dev; + /* Starts at 1 to avoid doing wake_up if we are not cleaning up */ + atomic_set(&ctx->num_inflight, 1); + init_waitqueue_head(&ctx->wait); +} +EXPORT_SYMBOL(mlx5_cmd_init_async_ctx); + +/** + * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx + * @ctx: The ctx to clean + * + * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The + * caller must ensure that mlx5_cmd_exec_cb() is not called during or after + * the call mlx5_cleanup_async_ctx(). + */ +void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx) +{ + atomic_dec(&ctx->num_inflight); + wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0); +} +EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx); + +static void mlx5_cmd_exec_cb_handler(int status, void *_work) +{ + struct mlx5_async_work *work = _work; + struct mlx5_async_ctx *ctx = work->ctx; + + work->user_callback(status, work); + if (atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); +} + +int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, + void *out, int out_size, mlx5_async_cbk_t callback, + struct mlx5_async_work *work) +{ + int ret; + + work->ctx = ctx; + work->user_callback = callback; + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) + return -EIO; + ret = cmd_exec_helper(ctx->dev, in, in_size, out, out_size, + mlx5_cmd_exec_cb_handler, work, false); + if (ret && atomic_dec_and_test(&ctx->num_inflight)) + wake_up(&ctx->wait); + + return ret; } EXPORT_SYMBOL(mlx5_cmd_exec_cb); int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err; err = cmd_exec_helper(dev, in, in_size, out, out_size, NULL, NULL, true); return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); static void destroy_msg_cache(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_msg *msg; struct mlx5_cmd_msg *n; list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) { list_del(&msg->list); mlx5_free_cmd_msg(dev, msg); } list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) { list_del(&msg->list); mlx5_free_cmd_msg(dev, msg); } } static int create_msg_cache(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; struct mlx5_cmd_msg *msg; int err; int i; spin_lock_init(&cmd->cache.large.lock); INIT_LIST_HEAD(&cmd->cache.large.head); spin_lock_init(&cmd->cache.med.lock); INIT_LIST_HEAD(&cmd->cache.med.head); for (i = 0; i < NUM_LONG_LISTS; i++) { msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; } msg->cache = &cmd->cache.large; list_add_tail(&msg->list, &cmd->cache.large.head); } for (i = 0; i < NUM_MED_LISTS; i++) { msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE); if (IS_ERR(msg)) { err = PTR_ERR(msg); goto ex_err; } msg->cache = &cmd->cache.med; list_add_tail(&msg->list, &cmd->cache.med.head); } return 0; ex_err: destroy_msg_cache(dev); return err; } static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { int err; sx_init(&cmd->dma_sx, "MLX5-DMA-SX"); mtx_init(&cmd->dma_mtx, "MLX5-DMA-MTX", NULL, MTX_DEF); cv_init(&cmd->dma_cv, "MLX5-DMA-CV"); /* * Create global DMA descriptor tag for allocating * 4K firmware pages: */ err = -bus_dma_tag_create( bus_get_dma_tag(dev->pdev->dev.bsddev), MLX5_ADAPTER_PAGE_SIZE, /* alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5_ADAPTER_PAGE_SIZE, /* maxsize */ 1, /* nsegments */ MLX5_ADAPTER_PAGE_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &cmd->dma_tag); if (err != 0) goto failure_destroy_sx; cmd->cmd_page = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); if (cmd->cmd_page == NULL) { err = -ENOMEM; goto failure_alloc_page; } cmd->dma = mlx5_fwp_get_dma(cmd->cmd_page, 0); cmd->cmd_buf = mlx5_fwp_get_virt(cmd->cmd_page, 0); return (0); failure_alloc_page: bus_dma_tag_destroy(cmd->dma_tag); failure_destroy_sx: cv_destroy(&cmd->dma_cv); mtx_destroy(&cmd->dma_mtx); sx_destroy(&cmd->dma_sx); return (err); } static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd) { mlx5_fwp_free(cmd->cmd_page); bus_dma_tag_destroy(cmd->dma_tag); cv_destroy(&cmd->dma_cv); mtx_destroy(&cmd->dma_mtx); sx_destroy(&cmd->dma_sx); } int mlx5_cmd_init(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; u32 cmd_h, cmd_l; u16 cmd_if_rev; int err; int i; memset(cmd, 0, sizeof(*cmd)); cmd_if_rev = cmdif_rev_get(dev); if (cmd_if_rev != CMD_IF_REV) { mlx5_core_err(dev, "Driver cmdif rev(%d) differs from firmware's(%d)\n", CMD_IF_REV, cmd_if_rev); return -EINVAL; } err = alloc_cmd_page(dev, cmd); if (err) goto err_free_pool; cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff; cmd->log_sz = cmd_l >> 4 & 0xf; cmd->log_stride = cmd_l & 0xf; if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) { mlx5_core_err(dev, "firmware reports too many outstanding commands %d\n", 1 << cmd->log_sz); err = -EINVAL; goto err_free_page; } if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { mlx5_core_err(dev, "command queue size overflow\n"); err = -EINVAL; goto err_free_page; } cmd->checksum_disabled = 1; cmd->max_reg_cmds = (1 << cmd->log_sz) - 1; cmd->bitmask = (1 << cmd->max_reg_cmds) - 1; cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16; if (cmd->cmdif_rev > CMD_IF_REV) { mlx5_core_err(dev, "driver does not support command interface version. driver %d, firmware %d\n", CMD_IF_REV, cmd->cmdif_rev); err = -ENOTSUPP; goto err_free_page; } spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); for (i = 0; i < ARRAY_SIZE(cmd->stats); i++) spin_lock_init(&cmd->stats[i].lock); sema_init(&cmd->sem, cmd->max_reg_cmds); sema_init(&cmd->pages_sem, 1); cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); if (cmd_l & 0xfff) { mlx5_core_err(dev, "invalid command queue address\n"); err = -ENOMEM; goto err_free_page; } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); /* Make sure firmware sees the complete address before we proceed */ wmb(); mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma)); cmd->mode = MLX5_CMD_MODE_POLLING; err = create_msg_cache(dev); if (err) { mlx5_core_err(dev, "failed to create command cache\n"); goto err_free_page; } return 0; err_free_page: free_cmd_page(dev, cmd); err_free_pool: return err; } EXPORT_SYMBOL(mlx5_cmd_init); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; clean_debug_files(dev); flush_workqueue(dev->priv.health.wq_cmd); destroy_msg_cache(dev); free_cmd_page(dev, cmd); } EXPORT_SYMBOL(mlx5_cmd_cleanup); int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, bool reset, void *out, int out_size) { u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; MLX5_SET(query_cong_statistics_in, in, opcode, MLX5_CMD_OP_QUERY_CONG_STATISTICS); MLX5_SET(query_cong_statistics_in, in, clear, reset); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL(mlx5_cmd_query_cong_counter); int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size) { u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { }; MLX5_SET(query_cong_params_in, in, opcode, MLX5_CMD_OP_QUERY_CONG_PARAMS); MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point); return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL(mlx5_cmd_query_cong_params); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, void *in, int in_size) { u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { }; return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_modify_cong_params); Index: stable/11/sys/dev/mlx5/mlx5_core/mlx5_mr.c =================================================================== --- stable/11/sys/dev/mlx5/mlx5_core/mlx5_mr.c (revision 368225) +++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_mr.c (revision 368226) @@ -1,219 +1,220 @@ /*- * Copyright (c) 2013-2018, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include "mlx5_core.h" static int mlx5_relaxed_ordering_write; SYSCTL_INT(_hw_mlx5, OID_AUTO, relaxed_ordering_write, CTLFLAG_RWTUN, &mlx5_relaxed_ordering_write, 0, "Set to enable relaxed ordering for PCIe writes"); void mlx5_init_mr_table(struct mlx5_core_dev *dev) { struct mlx5_mr_table *table = &dev->priv.mr_table; memset(table, 0, sizeof(*table)); spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); } void mlx5_cleanup_mr_table(struct mlx5_core_dev *dev) { } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, - u32 *in, int inlen, - u32 *out, int outlen, - mlx5_cmd_cbk_t callback, void *context) + struct mlx5_async_ctx *async_ctx, u32 *in, + int inlen, u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) { struct mlx5_mr_table *table = &dev->priv.mr_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; void *mkc; unsigned long flags; int err; u8 key; spin_lock_irq(&dev->priv.mkey_lock); key = dev->priv.mkey_key++; spin_unlock_irq(&dev->priv.mkey_lock); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); MLX5_SET(mkc, mkc, mkey_7_0, key); if (mlx5_relaxed_ordering_write != 0) { if (MLX5_CAP_GEN(dev, relaxed_ordering_write)) MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); else return (-EPROTONOSUPPORT); } if (callback) - return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, callback, context); err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) { mlx5_core_dbg(dev, "cmd exec failed %d\n", err); return err; } mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); mkey->key = mlx5_idx_to_mkey(mkey_index) | key; mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); /* connect to MR tree */ spin_lock_irqsave(&table->lock, flags); err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mkey->key), mkey); spin_unlock_irqrestore(&table->lock, flags); if (err) { mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n", mkey->key, err); mlx5_core_destroy_mkey(dev, mkey); } return err; } EXPORT_SYMBOL(mlx5_core_create_mkey_cb); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, u32 *in, int inlen) { - return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, NULL, 0, NULL, NULL); } EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey) { struct mlx5_mr_table *table = &dev->priv.mr_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct mlx5_core_mr *deleted_mr; unsigned long flags; spin_lock_irqsave(&table->lock, flags); deleted_mr = radix_tree_delete(&table->tree, mlx5_mkey_to_idx(mkey->key)); spin_unlock_irqrestore(&table->lock, flags); if (!deleted_mr) { mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n", mkey->key); return -ENOENT; } MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mkey, u32 *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; memset(out, 0, outlen); MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_mkey); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *_mkey, u32 *mkey) { u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0}; int err; MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *mkey = MLX5_GET(query_special_contexts_out, out, dump_fill_mkey); return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); static inline u32 mlx5_get_psv(u32 *out, int psv_index) { switch (psv_index) { case 1: return MLX5_GET(create_psv_out, out, psv1_index); case 2: return MLX5_GET(create_psv_out, out, psv2_index); case 3: return MLX5_GET(create_psv_out, out, psv3_index); default: return MLX5_GET(create_psv_out, out, psv0_index); } } int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index) { u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0}; int i, err; if (npsvs > MLX5_MAX_PSVS) return -EINVAL; MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); MLX5_SET(create_psv_in, in, pd, pdn); MLX5_SET(create_psv_in, in, num_psv, npsvs); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) { mlx5_core_err(dev, "create_psv cmd exec failed %d\n", err); return err; } for (i = 0; i < npsvs; i++) sig_index[i] = mlx5_get_psv(out, i); return err; } EXPORT_SYMBOL(mlx5_core_create_psv); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) { u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0}; MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); MLX5_SET(destroy_psv_in, in, psvn, psv_num); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_psv); Index: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib.h =================================================================== --- stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib.h (revision 368225) +++ stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib.h (revision 368226) @@ -1,1060 +1,1063 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef MLX5_IB_H #define MLX5_IB_H #include #include #include #include #include #include #include #include #include #include #include #include #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define mlx5_ib_err(dev, format, arg...) \ pr_err("%s: ERR: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define mlx5_ib_warn(dev, format, arg...) \ pr_warn("%s: WARN: %s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ __LINE__, current->pid, ##arg) #define field_avail(type, fld, sz) (offsetof(type, fld) + \ sizeof(((type *)0)->fld) <= (sz)) #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, }; enum mlx5_ib_mmap_cmd { MLX5_IB_MMAP_REGULAR_PAGE = 0, MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, MLX5_IB_MMAP_WC_PAGE = 2, MLX5_IB_MMAP_NC_PAGE = 3, /* 5 is chosen in order to be compatible with old versions of libmlx5 */ MLX5_IB_MMAP_CORE_CLOCK = 5, }; enum { MLX5_RES_SCAT_DATA32_CQE = 0x1, MLX5_RES_SCAT_DATA64_CQE = 0x2, MLX5_REQ_SCAT_DATA32_CQE = 0x11, MLX5_REQ_SCAT_DATA64_CQE = 0x22, }; enum mlx5_ib_latency_class { MLX5_IB_LATENCY_CLASS_LOW, MLX5_IB_LATENCY_CLASS_MEDIUM, MLX5_IB_LATENCY_CLASS_HIGH, MLX5_IB_LATENCY_CLASS_FAST_PATH }; enum mlx5_ib_mad_ifc_flags { MLX5_MAD_IFC_IGNORE_MKEY = 1, MLX5_MAD_IFC_IGNORE_BKEY = 2, MLX5_MAD_IFC_NET_VIEW = 4, }; enum { MLX5_CROSS_CHANNEL_UUAR = 0, }; enum { MLX5_CQE_VERSION_V0, MLX5_CQE_VERSION_V1, }; struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; }; struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; /* protect doorbell record alloc/free */ struct mutex db_page_mutex; struct mlx5_uuar_info uuari; u8 cqe_version; /* Transport Domain number */ u32 tdn; struct list_head vma_private_list; }; static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) { return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext); } struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; }; #define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1) #define MLX5_IB_FLOW_LAST_PRIO (MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1) #if (MLX5_IB_FLOW_LAST_PRIO <= 0) #error "Invalid number of bypass priorities" #endif #define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) #define MLX5_IB_NUM_SNIFFER_FTS 2 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; }; struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_rule *rule; }; struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done * simultaneously. */ struct mutex lock; }; /* Use macros here so that don't have to duplicate * enum ib_send_flags and enum ib_qp_type for low-level driver */ #define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1) #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2) #define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3) #define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4) #define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END #define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 /* * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI * creates the actual hardware QP. */ #define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2 #define MLX5_IB_WR_UMR IB_WR_RESERVED1 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags. * * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ /* Create a UD QP whose source QP number is 1 */ static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) { return IB_QP_CREATE_RESERVED_START; } struct wr_list { u16 opcode; u16 next; }; struct mlx5_ib_wq { u64 *wrid; u32 *wr_data; struct wr_list *w_list; unsigned *wqe_head; u16 unsig_count; /* serialize post to the work queue */ spinlock_t lock; int wqe_cnt; int max_post; int max_gs; int offset; int wqe_shift; unsigned head; unsigned tail; u16 cur_post; u16 last_poll; void *qend; }; struct mlx5_ib_rwq { struct ib_wq ibwq; struct mlx5_core_qp core_qp; u32 rq_num_pas; u32 log_rq_stride; u32 log_rq_size; u32 rq_page_offset; u32 log_page_size; struct ib_umem *umem; size_t buf_size; unsigned int page_shift; int create_type; struct mlx5_db db; u32 user_index; u32 wqe_count; u32 wqe_shift; int wq_sig; }; enum { MLX5_QP_USER, MLX5_QP_KERNEL, MLX5_QP_EMPTY }; enum { MLX5_WQ_USER, MLX5_WQ_KERNEL }; struct mlx5_ib_rwq_ind_table { struct ib_rwq_ind_table ib_rwq_ind_tbl; u32 rqtn; }; /* * Connect-IB can trigger up to four concurrent pagefaults * per-QP. */ enum mlx5_ib_pagefault_context { MLX5_IB_PAGEFAULT_RESPONDER_READ, MLX5_IB_PAGEFAULT_REQUESTOR_READ, MLX5_IB_PAGEFAULT_RESPONDER_WRITE, MLX5_IB_PAGEFAULT_REQUESTOR_WRITE, MLX5_IB_PAGEFAULT_CONTEXTS }; static inline enum mlx5_ib_pagefault_context mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault) { return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE); } struct mlx5_ib_pfault { struct work_struct work; struct mlx5_pagefault mpfault; }; struct mlx5_ib_ubuffer { struct ib_umem *umem; int buf_size; u64 buf_addr; }; struct mlx5_ib_qp_base { struct mlx5_ib_qp *container_mibqp; struct mlx5_core_qp mqp; struct mlx5_ib_ubuffer ubuffer; }; struct mlx5_ib_qp_trans { struct mlx5_ib_qp_base base; u16 xrcdn; u8 alt_port; u8 atomic_rd_en; u8 resp_depth; }; struct mlx5_ib_rss_qp { u32 tirn; }; struct mlx5_ib_rq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *rq; struct mlx5_ib_ubuffer ubuffer; struct mlx5_db *doorbell; u32 tirn; u8 state; }; struct mlx5_ib_sq { struct mlx5_ib_qp_base base; struct mlx5_ib_wq *sq; struct mlx5_ib_ubuffer ubuffer; struct mlx5_db *doorbell; u32 tisn; u8 state; }; struct mlx5_ib_raw_packet_qp { struct mlx5_ib_sq sq; struct mlx5_ib_rq rq; }; struct mlx5_ib_qp { struct ib_qp ibqp; union { struct mlx5_ib_qp_trans trans_qp; struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; }; struct mlx5_buf buf; struct mlx5_db db; struct mlx5_ib_wq rq; u8 sq_signal_bits; u8 fm_cache; struct mlx5_ib_wq sq; /* serialize qp state modifications */ struct mutex mutex; u32 flags; u8 port; u8 state; int wq_sig; int scat_cqe; int max_inline_data; struct mlx5_bf *bf; int has_rq; /* only for user space QPs. For kernel * we have it from the bf object */ int uuarn; int create_type; /* Store signature errors */ bool signature_en; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* * A flag that is true for QP's that are in a state that doesn't * allow page faults, and shouldn't schedule any more faults. */ int disable_page_faults; /* * The disable_page_faults_lock protects a QP's disable_page_faults * field, allowing for a thread to atomically check whether the QP * allows page faults, and if so schedule a page fault. */ spinlock_t disable_page_faults_lock; struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS]; #endif struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; }; struct mlx5_ib_cq_buf { struct mlx5_buf buf; struct ib_umem *umem; int cqe_size; int nent; }; enum mlx5_ib_qp_flags { MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL, MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND, MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV, MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5, /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { struct ib_send_wr wr; union { u64 virt_addr; u64 offset; } target; struct ib_pd *pd; unsigned int page_shift; unsigned int npages; u32 length; int access_flags; u32 mkey; }; static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr) { return container_of(wr, struct mlx5_umr_wr, wr); } struct mlx5_shared_mr_info { int mr_id; struct ib_umem *umem; }; struct mlx5_ib_cq { struct ib_cq ibcq; struct mlx5_core_cq mcq; struct mlx5_ib_cq_buf buf; struct mlx5_db db; /* serialize access to the CQ */ spinlock_t lock; /* protect resize cq */ struct mutex resize_mutex; struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; struct list_head list_send_qp; struct list_head list_recv_qp; u32 create_flags; struct list_head wc_list; enum ib_cq_notify_flags notify_flags; struct work_struct notify_work; }; struct mlx5_ib_wc { struct ib_wc wc; struct list_head list; }; struct mlx5_ib_srq { struct ib_srq ibsrq; struct mlx5_core_srq msrq; struct mlx5_buf buf; struct mlx5_db db; u64 *wrid; /* protect SRQ hanlding */ spinlock_t lock; int head; int tail; u16 wqe_ctr; struct ib_umem *umem; /* serialize arming a SRQ */ struct mutex mutex; int wq_sig; }; struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; }; enum mlx5_ib_mtt_access_flags { MLX5_IB_MTT_READ = (1 << 0), MLX5_IB_MTT_WRITE = (1 << 1), }; #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE) struct mlx5_ib_mr { struct ib_mr ibmr; void *descs; dma_addr_t desc_map; int ndescs; int max_descs; int desc_size; int access_mode; struct mlx5_core_mr mmkey; struct ib_umem *umem; struct mlx5_shared_mr_info *smr_info; struct list_head list; int order; int umred; int npages; struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ + struct mlx5_async_work cb_work; }; struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mr mmkey; }; struct mlx5_ib_umr_context { struct ib_cqe cqe; enum ib_wc_status status; struct completion done; }; struct umr_common { struct ib_pd *pd; struct ib_cq *cq; struct ib_qp *qp; /* control access to UMR QP */ struct semaphore sem; }; enum { MLX5_FMR_INVALID, MLX5_FMR_VALID, MLX5_FMR_BUSY, }; struct mlx5_cache_ent { struct list_head head; /* sync access to the cahce entry */ spinlock_t lock; struct dentry *dir; char name[4]; u32 order; u32 size; u32 cur; u32 miss; u32 limit; struct dentry *fsize; struct dentry *fcur; struct dentry *fmiss; struct dentry *flimit; struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; int pending; }; struct mlx5_mr_cache { struct workqueue_struct *wq; struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; int stopped; struct dentry *root; unsigned long last_add; }; struct mlx5_ib_gsi_qp; struct mlx5_ib_port_resources { struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; }; struct mlx5_ib_resources { struct ib_cq *c0; struct ib_xrcd *x0; struct ib_xrcd *x1; struct ib_pd *p0; struct ib_srq *s0; struct ib_srq *s1; struct mlx5_ib_port_resources ports[2]; /* Protects changes to the port resources */ struct mutex mutex; }; struct mlx5_ib_port { u16 q_cnt_id; }; struct mlx5_roce { /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL * netdev pointer */ rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; atomic_t next_port; }; #define MLX5_IB_STATS_COUNT(a,b,c,d) a #define MLX5_IB_STATS_VAR(a,b,c,d) b; #define MLX5_IB_STATS_DESC(a,b,c,d) c, d, #define MLX5_IB_CONG_PARAMS(m) \ /* ECN RP */ \ m(+1, u64 rp_clamp_tgt_rate, "rp_clamp_tgt_rate", "If set, whenever a CNP is processed, the target rate is updated to be the current rate") \ m(+1, u64 rp_clamp_tgt_rate_ati, "rp_clamp_tgt_rate_ati", "If set, when receiving a CNP, the target rate should be updated if the transission rate was increased due to the timer, and not only due to the byte counter") \ m(+1, u64 rp_time_reset, "rp_time_reset", "Time in microseconds between rate increases if no CNPs are received") \ m(+1, u64 rp_byte_reset, "rp_byte_reset", "Transmitted data in bytes between rate increases if no CNP's are received. A value of zero means disabled.") \ m(+1, u64 rp_threshold, "rp_threshold", "The number of times rpByteStage or rpTimeStage can count before the RP rate control state machine advances states") \ m(+1, u64 rp_ai_rate, "rp_ai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the active increase state") \ m(+1, u64 rp_hai_rate, "rp_hai_rate", "The rate, in Mbits per second, used to increase rpTargetRate in the hyper increase state") \ m(+1, u64 rp_min_dec_fac, "rp_min_dec_fac", "The minimum factor by which the current transmit rate can be changed when processing a CNP. Value is given as a percentage, [1 .. 100]") \ m(+1, u64 rp_min_rate, "rp_min_rate", "The minimum value, in Mbps per second, for rate to limit") \ m(+1, u64 rp_rate_to_set_on_first_cnp, "rp_rate_to_set_on_first_cnp", "The rate that is set for the flow when a rate limiter is allocated to it upon first CNP received, in Mbps. A value of zero means use full port speed") \ m(+1, u64 rp_dce_tcp_g, "rp_dce_tcp_g", "Used to update the congestion estimator, alpha, once every dce_tcp_rtt once every dce_tcp_rtt microseconds") \ m(+1, u64 rp_dce_tcp_rtt, "rp_dce_tcp_rtt", "The time between updates of the aolpha value, in microseconds") \ m(+1, u64 rp_rate_reduce_monitor_period, "rp_rate_reduce_monitor_period", "The minimum time between two consecutive rate reductions for a single flow") \ m(+1, u64 rp_initial_alpha_value, "rp_initial_alpha_value", "The initial value of alpha to use when receiving the first CNP for a flow") \ m(+1, u64 rp_gd, "rp_gd", "If a CNP is received, the flow rate is reduced at the beginning of the next rate_reduce_monitor_period interval") \ /* ECN NP */ \ m(+1, u64 np_cnp_dscp, "np_cnp_dscp", "The DiffServ Code Point of the generated CNP for this port") \ m(+1, u64 np_cnp_prio_mode, "np_cnp_prio_mode", "The 802.1p priority value of the generated CNP for this port") \ m(+1, u64 np_cnp_prio, "np_cnp_prio", "The 802.1p priority value of the generated CNP for this port") #define MLX5_IB_CONG_PARAMS_NUM (0 MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_COUNT)) #define MLX5_IB_CONG_STATS(m) \ m(+1, u64 syndrome, "syndrome", "Syndrome number") \ m(+1, u64 rp_cur_flows, "rp_cur_flows", "Number of flows limited") \ m(+1, u64 sum_flows, "sum_flows", "Sum of the number of flows limited over time") \ m(+1, u64 rp_cnp_ignored, "rp_cnp_ignored", "Number of CNPs and CNMs ignored") \ m(+1, u64 rp_cnp_handled, "rp_cnp_handled", "Number of CNPs and CNMs successfully handled") \ m(+1, u64 time_stamp, "time_stamp", "Time stamp in microseconds") \ m(+1, u64 accumulators_period, "accumulators_period", "The value of X variable for accumulating counters") \ m(+1, u64 np_ecn_marked_roce_packets, "np_ecn_marked_roce_packets", "Number of ECN marked packets seen") \ m(+1, u64 np_cnp_sent, "np_cnp_sent", "Number of CNPs sent") #define MLX5_IB_CONG_STATS_NUM (0 MLX5_IB_CONG_STATS(MLX5_IB_STATS_COUNT)) struct mlx5_ib_congestion { struct sysctl_ctx_list ctx; struct sx lock; struct delayed_work dwork; union { u64 arg[1]; struct { MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR) MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR) }; }; }; struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; struct mlx5_roce roce; MLX5_DECLARE_DOORBELL_LOCK(uar_lock); int num_ports; /* serialize update of capability mask */ struct mutex cap_mask_mutex; bool ib_active; struct umr_common umrc; /* sync used page count stats */ struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ struct mutex slow_path_mutex; int fill_delay; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ struct srcu_struct mr_srcu; #endif struct mlx5_ib_flow_db flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_ib_congestion congestion; + + struct mlx5_async_ctx async_ctx; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) { return container_of(mcq, struct mlx5_ib_cq, mcq); } static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) { return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd); } static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx5_ib_dev, ib_dev); } static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx5_ib_cq, ibcq); } static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) { return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) { return container_of(core_qp, struct mlx5_ib_rwq, core_qp); } static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); } static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); } static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq) { return container_of(ibsrq, struct mlx5_ib_srq, ibsrq); } static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx5_ib_qp, ibqp); } static inline struct mlx5_ib_rwq *to_mrwq(struct ib_wq *ibwq) { return container_of(ibwq, struct mlx5_ib_rwq, ibwq); } static inline struct mlx5_ib_rwq_ind_table *to_mrwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) { return container_of(ib_rwq_ind_tbl, struct mlx5_ib_rwq_ind_table, ib_rwq_ind_tbl); } static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) { return container_of(msrq, struct mlx5_ib_srq, msrq); } static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) { return container_of(ibmr, struct mlx5_ib_mr, ibmr); } static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw) { return container_of(ibmw, struct mlx5_ib_mw, ibmw); } struct mlx5_ib_ah { struct ib_ah ibah; struct mlx5_av av; }; static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) { return container_of(ibah, struct mlx5_ib_ah, ibah); } int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db); void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); int mlx5_ib_destroy_srq(struct ib_srq *srq); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_destroy_qp(struct ib_qp *qp); int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_destroy_cq(struct ib_cq *cq); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap); int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *mlx5_ib_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int access_flags, u64 *virt_addr); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, u16 *out_mad_pkey_index); struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, u16 *max_pkeys); int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, u32 *vendor_id); int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc); int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid); int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, __be64 *pas, int access_flags); void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_wq(struct ib_wq *wq); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, struct ib_rwq_ind_table_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING extern struct workqueue_struct *mlx5_ib_page_fault_wq; void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault); void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp); void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, unsigned long end); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { return; } static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {} static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {} static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, int state); int mlx5_ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, int index); int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num, int index, enum ib_gid_type *gid_type); /* GSI QP helper functions */ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr); int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask); int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; mad->class_version = 1; mad->method = IB_MGMT_METHOD_GET; } static inline u8 convert_access(int acc) { return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | MLX5_PERM_LOCAL_READ; } static inline int is_qp1(enum ib_qp_type qp_type) { return qp_type == MLX5_IB_QPT_HW_GSI; } #define MLX5_MAX_UMR_SHIFT 16 #define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT) static inline u32 check_cq_create_flags(u32 flags) { /* * It returns non-zero value for unsupported CQ * create flags, otherwise it returns zero. */ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN | IB_CQ_FLAGS_TIMESTAMP_COMPLETION)); } static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, u32 *user_index) { if (cqe_version) { if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) || (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK)) return -EINVAL; *user_index = cmd_uidx; } else { *user_index = MLX5_IB_DEFAULT_UIDX; } return 0; } static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_qp *ucmd, int inlen, u32 *user_index) { u8 cqe_version = ucontext->cqe_version; if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != !!cqe_version)) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); } static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, struct mlx5_ib_create_srq *ucmd, int inlen, u32 *user_index) { u8 cqe_version = ucontext->cqe_version; if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) return 0; if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != !!cqe_version)) return -EINVAL; return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); } void mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *); int mlx5_ib_init_congestion(struct mlx5_ib_dev *); #endif /* MLX5_IB_H */ Index: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c =================================================================== --- stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c (revision 368225) +++ stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c (revision 368226) @@ -1,1778 +1,1781 @@ /*- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include "mlx5_ib.h" enum { MAX_PENDING_REG_MR = 8, }; #define MLX5_UMR_ALIGN 2048 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static __be64 mlx5_ib_update_mtt_emergency_buffer[ MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] __aligned(MLX5_UMR_ALIGN); static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); #endif static int clean_mr(struct mlx5_ib_mr *mr); static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ synchronize_srcu(&dev->mr_srcu); #endif return err; } static int order2idx(struct mlx5_ib_dev *dev, int order) { struct mlx5_mr_cache *cache = &dev->cache; if (order < cache->ent[0].order) return 0; else return order - cache->ent[0].order; } static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING static void update_odp_mr(struct mlx5_ib_mr *mr) { if (mr->umem->odp_data) { /* * This barrier prevents the compiler from moving the * setting of umem->odp_data->private to point to our * MR, before reg_umr finished, to ensure that the MR * initialization have finished before starting to * handle invalidations. */ smp_wmb(); mr->umem->odp_data->private = mr; /* * Make sure we will see the new * umem->odp_data->private value in the invalidation * routines, before we can get page faults on the * MR. Page faults can happen once we put the MR in * the tree, below this line. Without the barrier, * there can be a fault handling and an invalidation * before umem->odp_data->private == mr is visible to * the invalidation handler. */ smp_wmb(); } } #endif -static void reg_mr_callback(int status, void *context) +static void reg_mr_callback(int status, struct mlx5_async_work *context) { - struct mlx5_ib_mr *mr = context; + struct mlx5_ib_mr *mr = + container_of(context, struct mlx5_ib_mr, cb_work); struct mlx5_ib_dev *dev = mr->dev; struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); struct mlx5_cache_ent *ent = &cache->ent[c]; u8 key; unsigned long flags; struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; int err; spin_lock_irqsave(&ent->lock, flags); ent->pending--; spin_unlock_irqrestore(&ent->lock, flags); if (status) { mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); kfree(mr); dev->fill_delay = 1; mod_timer(&dev->delay_timer, jiffies + HZ); return; } spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; cache->last_add = jiffies; spin_lock_irqsave(&ent->lock, flags); list_add_tail(&mr->list, &ent->head); ent->cur++; ent->size++; spin_unlock_irqrestore(&ent->lock, flags); spin_lock_irqsave(&table->lock, flags); err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key), &mr->mmkey); if (err) pr_err("Error inserting to mkey tree. 0x%x\n", -err); spin_unlock_irqrestore(&table->lock, flags); } static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; int npages = 1 << ent->order; void *mkc; u32 *in; int err = 0; int i; in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; break; } mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { err = -ENOMEM; break; } mr->order = ent->order; mr->umred = 1; mr->dev = dev; MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); MLX5_SET(mkc, mkc, log_page_size, 12); spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, - in, inlen, + &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, mr); + reg_mr_callback, &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; spin_unlock_irq(&ent->lock); mlx5_ib_warn(dev, "create mkey failed %d\n", err); kfree(mr); break; } } kfree(in); return err; } static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; int err; int i; for (i = 0; i < num; i++) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); return; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); err = destroy_mkey(dev, mr); if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); else kfree(mr); } } static int someone_adding(struct mlx5_mr_cache *cache) { int i; for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { if (cache->ent[i].cur < cache->ent[i].limit) return 1; } return 0; } static void __cache_work_func(struct mlx5_cache_ent *ent) { struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; int i = order2idx(dev, ent->order); int err; if (cache->stopped) return; ent = &dev->cache.ent[i]; if (ent->cur < 2 * ent->limit && !dev->fill_delay) { err = add_keys(dev, i, 1); if (ent->cur < 2 * ent->limit) { if (err == -EAGAIN) { mlx5_ib_dbg(dev, "returned eagain, order %d\n", i + 2); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(3)); } else if (err) { mlx5_ib_warn(dev, "command failed order %d, err %d\n", i + 2, err); queue_delayed_work(cache->wq, &ent->dwork, msecs_to_jiffies(1000)); } else { queue_work(cache->wq, &ent->work); } } } else if (ent->cur > 2 * ent->limit) { /* * The remove_keys() logic is performed as garbage collection * task. Such task is intended to be run when no other active * processes are running. * * The need_resched() will return TRUE if there are user tasks * to be activated in near future. * * In such case, we don't execute remove_keys() and postpone * the garbage collection work to try to run in next cycle, * in order to free CPU resources to other tasks. */ if (!need_resched() && !someone_adding(cache) && time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); if (ent->cur > ent->limit) queue_work(cache->wq, &ent->work); } else { queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); } } } static void delayed_cache_work_func(struct work_struct *work) { struct mlx5_cache_ent *ent; ent = container_of(work, struct mlx5_cache_ent, dwork.work); __cache_work_func(ent); } static void cache_work_func(struct work_struct *work) { struct mlx5_cache_ent *ent; ent = container_of(work, struct mlx5_cache_ent, work); __cache_work_func(ent); } static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_ib_mr *mr = NULL; struct mlx5_cache_ent *ent; int c; int i; c = order2idx(dev, order); if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); return NULL; } for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { ent = &cache->ent[i]; mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; spin_unlock_irq(&ent->lock); if (ent->cur < ent->limit) queue_work(cache->wq, &ent->work); break; } spin_unlock_irq(&ent->lock); queue_work(cache->wq, &ent->work); } if (!mr) cache->ent[c].miss++; return mr; } static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; int shrink = 0; int c; c = order2idx(dev, mr->order); if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); return; } ent = &cache->ent[c]; spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->cur++; if (ent->cur > 2 * ent->limit) shrink = 1; spin_unlock_irq(&ent->lock); if (shrink) queue_work(cache->wq, &ent->work); } static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; int err; cancel_delayed_work(&ent->dwork); while (1) { spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { spin_unlock_irq(&ent->lock); return; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; ent->size--; spin_unlock_irq(&ent->lock); err = destroy_mkey(dev, mr); if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); else kfree(mr); } } static void delay_time_func(unsigned long ctx) { struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; dev->fill_delay = 0; } int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; int limit; int i; mutex_init(&dev->slow_path_mutex); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); return -ENOMEM; } + mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { INIT_LIST_HEAD(&cache->ent[i].head); spin_lock_init(&cache->ent[i].lock); ent = &cache->ent[i]; INIT_LIST_HEAD(&ent->head); spin_lock_init(&ent->lock); ent->order = i + 2; ent->dev = dev; if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); ent->limit = limit; queue_work(cache->wq, &ent->work); } return 0; } int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; dev->cache.stopped = 1; flush_workqueue(dev->cache.wq); + mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); return 0; } struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET64(mkc, mkc, start_addr, 0); err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) goto err_in; kfree(in); mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; return &mr->ibmr; err_in: kfree(in); err_free: kfree(mr); return ERR_PTR(err); } static int get_octo_len(u64 addr, u64 len, int page_size) { u64 offset; int npages; offset = addr & (page_size - 1); npages = ALIGN(len + offset, page_size) >> ilog2(page_size); return (npages + 1) / 2; } static int use_umr(int order) { return order <= MLX5_MAX_UMR_SHIFT; } static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int npages, int page_shift, int *size, __be64 **mr_pas, dma_addr_t *dma) { __be64 *pas; struct device *ddev = dev->ib_dev.dma_device; /* * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. * To avoid copying garbage after the pas array, we allocate * a little more. */ *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); if (!(*mr_pas)) return -ENOMEM; pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); /* Clear padding after the actual pages. */ memset(pas + npages, 0, *size - npages * sizeof(u64)); *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); if (dma_mapping_error(ddev, *dma)) { kfree(*mr_pas); return -ENOMEM; } return 0; } static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, struct ib_sge *sg, u64 dma, int n, u32 key, int page_shift) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_umr_wr *umrwr = umr_wr(wr); sg->addr = dma; sg->length = ALIGN(sizeof(u64) * n, 64); sg->lkey = dev->umrc.pd->local_dma_lkey; wr->next = NULL; wr->sg_list = sg; if (n) wr->num_sge = 1; else wr->num_sge = 0; wr->opcode = MLX5_IB_WR_UMR; umrwr->npages = n; umrwr->page_shift = page_shift; umrwr->mkey = key; } static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, struct ib_sge *sg, u64 dma, int n, u32 key, int page_shift, u64 virt_addr, u64 len, int access_flags) { struct mlx5_umr_wr *umrwr = umr_wr(wr); prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); wr->send_flags = 0; umrwr->target.virt_addr = virt_addr; umrwr->length = len; umrwr->access_flags = access_flags; umrwr->pd = pd; } static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, struct ib_send_wr *wr, u32 key) { struct mlx5_umr_wr *umrwr = umr_wr(wr); wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; wr->opcode = MLX5_IB_WR_UMR; umrwr->mkey = key; } static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, int access_flags, int *npages, int *page_shift, int *ncont, int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 0); if (IS_ERR(umem)) { mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); return (void *)umem; } mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); ib_umem_release(umem); return ERR_PTR(-EINVAL); } mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); return umem; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); context->status = wc->status; complete(&context->done); } static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) { context->cqe.done = mlx5_ib_umr_done; context->status = -1; init_completion(&context->done); } static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, u64 len, int npages, int page_shift, int order, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; int size; __be64 *mr_pas; dma_addr_t dma; int err = 0; int i; for (i = 0; i < 1; i++) { mr = alloc_cached_mr(dev, order); if (mr) break; err = add_keys(dev, order2idx(dev, order), 1); if (err && err != -EAGAIN) { mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); break; } } if (!mr) return ERR_PTR(-EAGAIN); err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, &dma); if (err) goto free_mr; mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift, virt_addr, len, access_flags); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); goto unmap_dma; } else { wait_for_completion(&umr_context.done); if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "reg umr failed\n"); err = -EFAULT; } } mr->mmkey.iova = virt_addr; mr->mmkey.size = len; mr->mmkey.pd = to_mpd(pd)->pdn; mr->live = 1; unmap_dma: up(&umrc->sem); dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); kfree(mr_pas); free_mr: if (err) { free_cached_mr(dev, mr); return ERR_PTR(err); } return mr; } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, int zap) { struct mlx5_ib_dev *dev = mr->dev; struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct ib_umem *umem = mr->umem; int size; __be64 *pas; dma_addr_t dma; struct ib_send_wr *bad; struct mlx5_umr_wr wr; struct ib_sge sg; int err = 0; const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); const int page_index_mask = page_index_alignment - 1; size_t pages_mapped = 0; size_t pages_to_map = 0; size_t pages_iter = 0; int use_emergency_buf = 0; /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ if (start_page_index & page_index_mask) { npages += start_page_index & page_index_mask; start_page_index &= ~page_index_mask; } pages_to_map = ALIGN(npages, page_index_alignment); if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) return -EINVAL; size = sizeof(u64) * pages_to_map; size = min_t(int, PAGE_SIZE, size); /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim * code, when we are called from an invalidation. The pas buffer must * be 2k-aligned for Connect-IB. */ pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); if (!pas) { mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); pas = mlx5_ib_update_mtt_emergency_buffer; size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; use_emergency_buf = 1; mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); memset(pas, 0, size); } pages_iter = size / sizeof(u64); dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); if (dma_mapping_error(ddev, dma)) { mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); err = -ENOMEM; goto free_pas; } for (pages_mapped = 0; pages_mapped < pages_to_map && !err; pages_mapped += pages_iter, start_page_index += pages_iter) { dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); npages = min_t(size_t, pages_iter, ib_umem_num_pages(umem) - start_page_index); if (!zap) { __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, start_page_index, npages, pas, MLX5_IB_MTT_PRESENT); /* Clear padding after the pages brought from the * umem. */ memset(pas + npages, 0, size - npages * sizeof(u64)); } dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); mlx5_ib_init_umr_context(&umr_context); memset(&wr, 0, sizeof(wr)); wr.wr.wr_cqe = &umr_context.cqe; sg.addr = dma; sg.length = ALIGN(npages * sizeof(u64), MLX5_UMR_MTT_ALIGNMENT); sg.lkey = dev->umrc.pd->local_dma_lkey; wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | MLX5_IB_SEND_UMR_UPDATE_MTT; wr.wr.sg_list = &sg; wr.wr.num_sge = 1; wr.wr.opcode = MLX5_IB_WR_UMR; wr.npages = sg.length / sizeof(u64); wr.page_shift = PAGE_SHIFT; wr.mkey = mr->mmkey.key; wr.target.offset = start_page_index; down(&umrc->sem); err = ib_post_send(umrc->qp, &wr.wr, &bad); if (err) { mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); } else { wait_for_completion(&umr_context.done); if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_err(dev, "UMR completion failed, code %d\n", umr_context.status); err = -EFAULT; } } up(&umrc->sem); } dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); free_pas: if (!use_emergency_buf) free_page((unsigned long)pas); else mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); return err; } #endif /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. */ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, u64 virt_addr, u64 length, struct ib_umem *umem, int npages, int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; __be64 *pas; void *mkc; int inlen; u32 *in; int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*pas) * ((npages + 1) / 2) * 2; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(virt_addr, length, 1 << page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(virt_addr, length, 1 << page_shift)); err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; } mr->umem = umem; mr->dev = dev; mr->live = 1; kvfree(in); mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); return mr; err_2: kvfree(in); err_1: if (!ibmr) kfree(mr); return ERR_PTR(err); } static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, int npages, u64 length, int access_flags) { mr->npages = npages; atomic_add(npages, &dev->mdev->priv.reg_pages); mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = length; mr->access_flags = access_flags; } struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; struct ib_umem *umem; int page_shift; int npages; int ncont; int order; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", (long long)start, (long long)virt_addr, (long long)length, access_flags); umem = mr_umem_get(pd, start, length, access_flags, &npages, &page_shift, &ncont, &order); if (IS_ERR(umem)) return (void *)umem; if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); if (PTR_ERR(mr) == -EAGAIN) { mlx5_ib_dbg(dev, "cache empty for order %d", order); mr = NULL; } } else if (access_flags & IB_ACCESS_ON_DEMAND) { err = -EINVAL; pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); goto error; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, page_shift, access_flags); mutex_unlock(&dev->slow_path_mutex); } if (IS_ERR(mr)) { err = PTR_ERR(mr); goto error; } mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; set_mr_fileds(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); #endif return &mr->ibmr; error: ib_umem_release(umem); return ERR_PTR(err); } static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_core_dev *mdev = dev->mdev; struct umr_common *umrc = &dev->umrc; struct mlx5_ib_umr_context umr_context; struct mlx5_umr_wr umrwr = {}; struct ib_send_wr *bad; int err; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) return 0; mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); goto error; } else { wait_for_completion(&umr_context.done); up(&umrc->sem); } if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "unreg umr failed\n"); err = -EFAULT; goto error; } return 0; error: return err; } static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, u64 length, int npages, int page_shift, int order, int access_flags, int flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct device *ddev = dev->ib_dev.dma_device; struct mlx5_ib_umr_context umr_context; struct ib_send_wr *bad; struct mlx5_umr_wr umrwr = {}; struct ib_sge sg; struct umr_common *umrc = &dev->umrc; dma_addr_t dma = 0; __be64 *mr_pas = NULL; int size; int err; mlx5_ib_init_umr_context(&umr_context); umrwr.wr.wr_cqe = &umr_context.cqe; umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; if (flags & IB_MR_REREG_TRANS) { err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, &mr_pas, &dma); if (err) return err; umrwr.target.virt_addr = virt_addr; umrwr.length = length; umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; } prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, page_shift); if (flags & IB_MR_REREG_PD) { umrwr.pd = pd; umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; } if (flags & IB_MR_REREG_ACCESS) { umrwr.access_flags = access_flags; umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; } /* post send request to UMR QP */ down(&umrc->sem); err = ib_post_send(umrc->qp, &umrwr.wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); } else { wait_for_completion(&umr_context.done); if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "reg umr failed (%u)\n", umr_context.status); err = -EFAULT; } } up(&umrc->sem); if (flags & IB_MR_REREG_TRANS) { dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); kfree(mr_pas); } return err; } int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int new_access_flags, struct ib_pd *new_pd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); struct mlx5_ib_mr *mr = to_mmr(ib_mr); struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; int access_flags = flags & IB_MR_REREG_ACCESS ? new_access_flags : mr->access_flags; u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; int page_shift = 0; int npages = 0; int ncont = 0; int order = 0; int err; mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", (long long)start, (long long)virt_addr, (long long)length, access_flags); if (flags != IB_MR_REREG_PD) { /* * Replace umem. This needs to be done whether or not UMR is * used. */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, &page_shift, &ncont, &order); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); mr->umem = NULL; return err; } } if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { /* * UMR can't be used - MKey needs to be replaced. */ if (mr->umred) { err = unreg_umr(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to unregister MR\n"); } else { err = destroy_mkey(dev, mr); if (err) mlx5_ib_warn(dev, "Failed to destroy MKey\n"); } if (err) return err; mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, page_shift, access_flags); if (IS_ERR(mr)) return PTR_ERR(mr); mr->umred = 0; } else { /* * Send a UMR WQE */ err = rereg_umr(pd, mr, addr, len, npages, page_shift, order, access_flags, flags); if (err) { mlx5_ib_warn(dev, "Failed to rereg UMR\n"); return err; } } if (flags & IB_MR_REREG_PD) { ib_mr->pd = pd; mr->mmkey.pd = to_mpd(pd)->pdn; } if (flags & IB_MR_REREG_ACCESS) mr->access_flags = access_flags; if (flags & IB_MR_REREG_TRANS) { atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); set_mr_fileds(dev, mr, npages, len, access_flags); mr->mmkey.iova = addr; mr->mmkey.size = len; } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); #endif return 0; } static int mlx5_alloc_priv_descs(struct ib_device *device, struct mlx5_ib_mr *mr, int ndescs, int desc_size) { int size = ndescs * desc_size; int add_size; int ret; add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0); mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); if (!mr->descs_alloc) return -ENOMEM; mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); mr->desc_map = dma_map_single(device->dma_device, mr->descs, size, DMA_TO_DEVICE); if (dma_mapping_error(device->dma_device, mr->desc_map)) { ret = -ENOMEM; goto err; } return 0; err: kfree(mr->descs_alloc); return ret; } static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { if (mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; dma_unmap_single(device->dma_device, mr->desc_map, size, DMA_TO_DEVICE); kfree(mr->descs_alloc); mr->descs = NULL; } } static int clean_mr(struct mlx5_ib_mr *mr) { struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); int umred = mr->umred; int err; if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", mr->sig->psv_memory.psv_idx); if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); kfree(mr->sig); mr->sig = NULL; } mlx5_free_priv_descs(mr); if (!umred) { err = destroy_mkey(dev, mr); if (err) { mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", mr->mmkey.key, err); return err; } } else { err = unreg_umr(dev, mr); if (err) { mlx5_ib_warn(dev, "failed unregister\n"); return err; } free_cached_mr(dev, mr); } if (!umred) kfree(mr); return 0; } CTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8); struct ib_mr * mlx5_ib_reg_phys_mr(struct ib_pd *pd, struct ib_phys_buf *buffer_list, int num_phys_buf, int access_flags, u64 *virt_addr) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr; __be64 *pas; void *mkc; u32 *in; u64 total_size; u32 octo_len; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); unsigned long mask; int shift; int npages; int inlen; int err; int i, j, n; mask = buffer_list[0].addr ^ *virt_addr; total_size = 0; for (i = 0; i < num_phys_buf; ++i) { if (i != 0) mask |= buffer_list[i].addr; if (i != num_phys_buf - 1) mask |= buffer_list[i].addr + buffer_list[i].size; total_size += buffer_list[i].size; } if (mask & ~PAGE_MASK) return ERR_PTR(-EINVAL); shift = __ffs(mask | 1 << 31); buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); buffer_list[0].addr &= ~0ULL << shift; npages = 0; for (i = 0; i < num_phys_buf; ++i) npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; if (!npages) { mlx5_ib_warn(dev, "avoid zero region\n"); return ERR_PTR(-EINVAL); } mr = kzalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift); octo_len = ALIGN(octo_len, 4); inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + (octo_len * 16); in = mlx5_vzalloc(inlen); if (!in) { kfree(mr); return ERR_PTR(-ENOMEM); } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); n = 0; for (i = 0; i < num_phys_buf; ++i) { for (j = 0; j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; ++j) { u64 temp = buffer_list[i].addr + ((u64) j << shift); if (pg_cap) temp |= MLX5_IB_MTT_PRESENT; pas[n++] = cpu_to_be64(temp); } } /* * The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access * flags in the page list submitted with the command: */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET64(mkc, mkc, start_addr, *virt_addr); MLX5_SET64(mkc, mkc, len, total_size); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, octo_len); MLX5_SET(mkc, mkc, log_page_size, shift); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octo_len); err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); mr->umem = NULL; mr->dev = dev; mr->live = 1; mr->npages = npages; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->ibmr.length = total_size; mr->access_flags = access_flags; kvfree(in); if (err) { kfree(mr); return ERR_PTR(err); } return &mr->ibmr; } int mlx5_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx5_ib_dev *dev = to_mdev(ibmr->device); struct mlx5_ib_mr *mr = to_mmr(ibmr); int npages = mr->npages; struct ib_umem *umem = mr->umem; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (umem && umem->odp_data) { /* Prevent new page faults from succeeding */ mr->live = 0; /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ mlx5_ib_invalidate_range(umem, ib_umem_start(umem), ib_umem_end(umem)); /* * We kill the umem before the MR for ODP, * so that there will not be any invalidations in * flight, looking at the *mr struct. */ ib_umem_release(umem); atomic_sub(npages, &dev->mdev->priv.reg_pages); /* Avoid double-freeing the umem. */ umem = NULL; } #endif clean_mr(mr); if (umem) { ib_umem_release(umem); atomic_sub(npages, &dev->mdev->priv.reg_pages); } return 0; } struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg, 4); struct mlx5_ib_mr *mr; void *mkc; u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { mr->access_mode = MLX5_ACCESS_MODE_MTT; MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(u64)); if (err) goto err_free_in; mr->desc_size = sizeof(u64); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { mr->access_mode = MLX5_ACCESS_MODE_KLM; err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); if (err) goto err_free_in; mr->desc_size = sizeof(struct mlx5_klm); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; MLX5_SET(mkc, mkc, bsf_en, 1); MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; goto err_free_in; } /* create mem & wire PSVs */ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); if (err) goto err_free_sig; mr->access_mode = MLX5_ACCESS_MODE_KLM; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; mr->sig->sig_status_checked = true; mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; } else { mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; goto err_free_in; } MLX5_SET(mkc, mkc, access_mode, mr->access_mode); MLX5_SET(mkc, mkc, umr_en, 1); err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; kfree(in); return &mr->ibmr; err_destroy_psv: if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", mr->sig->psv_memory.psv_idx); if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", mr->sig->psv_wire.psv_idx); } mlx5_free_priv_descs(mr); err_free_sig: kfree(mr->sig); err_free_in: kfree(in); err_free: kfree(mr); return ERR_PTR(err); } struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mw *mw = NULL; u32 *in = NULL; void *mkc; int ndescs; int err; struct mlx5_ib_alloc_mw req = {}; struct { __u32 comp_mask; __u32 response_length; } resp = {}; err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) return ERR_PTR(err); if (req.comp_mask || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); if (udata->inlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), udata->inlen - sizeof(req))) return ERR_PTR(-EOPNOTSUPP); ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); mw = kzalloc(sizeof(*mw), GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL); if (!mw || !in) { err = -ENOMEM; goto free; } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM); MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); if (err) goto free; mw->ibmw.rkey = mw->mmkey.key; resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); if (resp.response_length) { err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) { mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); goto free; } } kfree(in); return &mw->ibmw; free: kfree(mw); kfree(in); return ERR_PTR(err); } int mlx5_ib_dealloc_mw(struct ib_mw *mw) { struct mlx5_ib_mw *mmw = to_mmw(mw); int err; err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, &mmw->mmkey); if (!err) kfree(mmw); return err; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status) { struct mlx5_ib_mr *mmr = to_mmr(ibmr); int ret = 0; if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { pr_err("Invalid status check mask\n"); ret = -EINVAL; goto done; } mr_status->fail_status = 0; if (check_mask & IB_MR_CHECK_SIG_STATUS) { if (!mmr->sig) { ret = -EINVAL; pr_err("signature status check requested on a non-signature enabled MR\n"); goto done; } mmr->sig->sig_status_checked = true; if (!mmr->sig->sig_err_exists) goto done; if (ibmr->lkey == mmr->sig->err_item.key) memcpy(&mr_status->sig_err, &mmr->sig->err_item, sizeof(mr_status->sig_err)); else { mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; mr_status->sig_err.sig_err_offset = 0; mr_status->sig_err.key = mmr->sig->err_item.key; } mmr->sig->sig_err_exists = false; mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; } done: return ret; } static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, unsigned int *sg_offset_p) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; u32 lkey = mr->ibmr.pd->local_dma_lkey; int i; mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; mr->ndescs = sg_nents; for_each_sg(sgl, sg, sg_nents, i) { if (unlikely(i > mr->max_descs)) break; klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); klms[i].key = cpu_to_be32(lkey); mr->ibmr.length += sg_dma_len(sg); sg_offset = 0; } if (sg_offset_p) *sg_offset_p = sg_offset; return i; } static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) { struct mlx5_ib_mr *mr = to_mmr(ibmr); __be64 *descs; if (unlikely(mr->ndescs == mr->max_descs)) return -ENOMEM; descs = mr->descs; descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); return 0; } int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); int n; mr->ndescs = 0; ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); if (mr->access_mode == MLX5_ACCESS_MODE_KLM) n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx5_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); return n; } Index: stable/11 =================================================================== --- stable/11 (revision 368225) +++ stable/11 (revision 368226) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r367719