Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/sys/dev/mlx4/mlx4_core/mlx4.h b/sys/dev/mlx4/mlx4_core/mlx4.h
index 691cdcef75d3..1d869ca4b6e0 100644
--- a/sys/dev/mlx4/mlx4_core/mlx4.h
+++ b/sys/dev/mlx4/mlx4_core/mlx4.h
@@ -1,1462 +1,1462 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
* Copyright (c) 2004 Voltaire, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_H
#define MLX4_H
#include <linux/mutex.h>
#include <linux/radix-tree.h>
#include <linux/rbtree.h>
#include <linux/timer.h>
#include <linux/semaphore.h>
#include <linux/workqueue.h>
#include <linux/device.h>
#include <linux/rwsem.h>
#include <dev/mlx4/device.h>
#include <dev/mlx4/driver.h>
#include <dev/mlx4/doorbell.h>
#include <dev/mlx4/cmd.h>
#include <dev/mlx4/mlx4_core/fw_qos.h>
#define DRV_NAME "mlx4_core"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "3.4.1"
-#define DRV_RELDATE "October 2017"
+#define DRV_VERSION "3.5.0"
+#define DRV_RELDATE "November 2018"
#define MLX4_FS_UDP_UC_EN (1 << 1)
#define MLX4_FS_TCP_UC_EN (1 << 2)
#define MLX4_FS_NUM_OF_L2_ADDR 8
#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7
#define MLX4_FS_NUM_MCG (1 << 17)
#define INIT_HCA_TPT_MW_ENABLE (1 << 7)
#define MLX4_QUERY_IF_STAT_RESET BIT(31)
enum {
MLX4_HCR_BASE = 0x80680,
MLX4_HCR_SIZE = 0x0001c,
MLX4_CLR_INT_SIZE = 0x00008,
MLX4_SLAVE_COMM_BASE = 0x0,
MLX4_COMM_PAGESIZE = 0x1000,
MLX4_CLOCK_SIZE = 0x00008,
MLX4_COMM_CHAN_CAPS = 0x8,
MLX4_COMM_CHAN_FLAGS = 0xc
};
enum {
MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE = 10,
MLX4_MIN_MGM_LOG_ENTRY_SIZE = 7,
MLX4_MAX_MGM_LOG_ENTRY_SIZE = 12,
MLX4_MAX_QP_PER_MGM = 4 * ((1 << MLX4_MAX_MGM_LOG_ENTRY_SIZE) / 16 - 2),
MLX4_MTT_ENTRY_PER_SEG = 8,
};
enum {
MLX4_NUM_PDS = 1 << 15
};
enum {
MLX4_CMPT_TYPE_QP = 0,
MLX4_CMPT_TYPE_SRQ = 1,
MLX4_CMPT_TYPE_CQ = 2,
MLX4_CMPT_TYPE_EQ = 3,
MLX4_CMPT_NUM_TYPE
};
enum {
MLX4_CMPT_SHIFT = 24,
MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
};
enum mlx4_mpt_state {
MLX4_MPT_DISABLED = 0,
MLX4_MPT_EN_HW,
MLX4_MPT_EN_SW
};
#define MLX4_COMM_TIME 10000
#define MLX4_COMM_OFFLINE_TIME_OUT 30000
#define MLX4_COMM_CMD_NA_OP 0x0
enum {
MLX4_COMM_CMD_RESET,
MLX4_COMM_CMD_VHCR0,
MLX4_COMM_CMD_VHCR1,
MLX4_COMM_CMD_VHCR2,
MLX4_COMM_CMD_VHCR_EN,
MLX4_COMM_CMD_VHCR_POST,
MLX4_COMM_CMD_FLR = 254
};
enum {
MLX4_VF_SMI_DISABLED,
MLX4_VF_SMI_ENABLED
};
/*The flag indicates that the slave should delay the RESET cmd*/
#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
/*indicates how many retries will be done if we are in the middle of FLR*/
#define NUM_OF_RESET_RETRIES 10
#define SLEEP_TIME_IN_RESET (2 * 1000)
enum mlx4_resource {
RES_QP,
RES_CQ,
RES_SRQ,
RES_XRCD,
RES_MPT,
RES_MTT,
RES_MAC,
RES_VLAN,
RES_NPORT_ID,
RES_COUNTER,
RES_FS_RULE,
RES_EQ,
MLX4_NUM_OF_RESOURCE_TYPE
};
enum mlx4_alloc_mode {
RES_OP_RESERVE,
RES_OP_RESERVE_AND_MAP,
RES_OP_MAP_ICM,
};
enum mlx4_res_tracker_free_type {
RES_TR_FREE_ALL,
RES_TR_FREE_SLAVES_ONLY,
RES_TR_FREE_STRUCTS_ONLY,
};
/*
*Virtual HCR structures.
* mlx4_vhcr is the sw representation, in machine endianness
*
* mlx4_vhcr_cmd is the formalized structure, the one that is passed
* to FW to go through communication channel.
* It is big endian, and has the same structure as the physical HCR
* used by command interface
*/
struct mlx4_vhcr {
u64 in_param;
u64 out_param;
u32 in_modifier;
u32 errno;
u16 op;
u16 token;
u8 op_modifier;
u8 e_bit;
};
struct mlx4_vhcr_cmd {
__be64 in_param;
__be32 in_modifier;
u32 reserved1;
__be64 out_param;
__be16 token;
u16 reserved;
u8 status;
u8 flags;
__be16 opcode;
};
struct mlx4_cmd_info {
u16 opcode;
bool has_inbox;
bool has_outbox;
bool out_is_imm;
bool encode_slave_id;
int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox);
int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
};
#ifdef CONFIG_MLX4_DEBUG
extern int mlx4_debug_level;
#else /* CONFIG_MLX4_DEBUG */
#define mlx4_debug_level (0)
#endif /* CONFIG_MLX4_DEBUG */
#define mlx4_dbg(mdev, format, ...) \
do { \
if (mlx4_debug_level) \
dev_printk(KERN_DEBUG, \
&(mdev)->persist->pdev->dev, format, \
##__VA_ARGS__); \
} while (0)
#define mlx4_err(mdev, format, ...) \
dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_info(mdev, format, ...) \
dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_warn(mdev, format, ...) \
dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
extern int mlx4_log_num_mgm_entry_size;
extern int log_mtts_per_seg;
extern int mlx4_internal_err_reset;
#define MLX4_MAX_NUM_SLAVES (min(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF, \
MLX4_MFUNC_MAX))
#define ALL_SLAVES 0xff
struct mlx4_bitmap {
u32 last;
u32 top;
u32 max;
u32 reserved_top;
u32 mask;
u32 avail;
u32 effective_len;
spinlock_t lock;
unsigned long *table;
};
struct mlx4_buddy {
unsigned long **bits;
unsigned int *num_free;
u32 max_order;
spinlock_t lock;
};
struct mlx4_icm;
struct mlx4_icm_table {
u64 virt;
int num_icm;
u32 num_obj;
int obj_size;
int lowmem;
int coherent;
struct mutex mutex;
struct mlx4_icm **icm;
};
#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
#define MLX4_MPT_FLAG_MIO (1 << 17)
#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15)
#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
#define MLX4_MPT_FLAG_REGION (1 << 8)
#define MLX4_MPT_PD_MASK (0x1FFFFUL)
#define MLX4_MPT_PD_VF_MASK (0xFE0000UL)
#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7)
#define MLX4_MPT_STATUS_SW 0xF0
#define MLX4_MPT_STATUS_HW 0x00
#define MLX4_CQE_SIZE_MASK_STRIDE 0x3
#define MLX4_EQE_SIZE_MASK_STRIDE 0x30
#define MLX4_EQ_ASYNC 0
#define MLX4_EQ_TO_CQ_VECTOR(vector) ((vector) - \
!!((int)(vector) >= MLX4_EQ_ASYNC))
#define MLX4_CQ_TO_EQ_VECTOR(vector) ((vector) + \
!!((int)(vector) >= MLX4_EQ_ASYNC))
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
struct mlx4_mpt_entry {
__be32 flags;
__be32 qpn;
__be32 key;
__be32 pd_flags;
__be64 start;
__be64 length;
__be32 lkey;
__be32 win_cnt;
u8 reserved1[3];
u8 mtt_rep;
__be64 mtt_addr;
__be32 mtt_sz;
__be32 entity_size;
__be32 first_byte_offset;
} __packed;
/*
* Must be packed because start is 64 bits but only aligned to 32 bits.
*/
struct mlx4_eq_context {
__be32 flags;
u16 reserved1[3];
__be16 page_offset;
u8 log_eq_size;
u8 reserved2[4];
u8 eq_period;
u8 reserved3;
u8 eq_max_count;
u8 reserved4[3];
u8 intr;
u8 log_page_size;
u8 reserved5[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
u32 reserved6[2];
__be32 consumer_index;
__be32 producer_index;
u32 reserved7[4];
};
struct mlx4_cq_context {
__be32 flags;
u16 reserved1[3];
__be16 page_offset;
__be32 logsize_usrpage;
__be16 cq_period;
__be16 cq_max_count;
u8 reserved2[3];
u8 comp_eqn;
u8 log_page_size;
u8 reserved3[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 last_notified_index;
__be32 solicit_producer_index;
__be32 consumer_index;
__be32 producer_index;
u32 reserved4[2];
__be64 db_rec_addr;
};
struct mlx4_srq_context {
__be32 state_logsize_srqn;
u8 logstride;
u8 reserved1;
__be16 xrcd;
__be32 pg_offset_cqn;
u32 reserved2;
u8 log_page_size;
u8 reserved3[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 pd;
__be16 limit_watermark;
__be16 wqe_cnt;
u16 reserved4;
__be16 wqe_counter;
u32 reserved5;
__be64 db_rec_addr;
};
struct mlx4_eq {
struct mlx4_dev *dev;
void __iomem *doorbell;
int eqn;
u32 cons_index;
u16 irq;
u16 have_irq;
int nent;
struct mlx4_buf_list *page_list;
struct mlx4_mtt mtt;
u32 ncqs;
struct mlx4_active_ports actv_ports;
u32 ref_count;
int affinity_cpu_id;
};
struct mlx4_slave_eqe {
u8 type;
u8 port;
u32 param;
};
struct mlx4_slave_event_eq_info {
int eqn;
u16 token;
};
struct mlx4_profile {
int num_qp;
int rdmarc_per_qp;
int num_srq;
int num_cq;
int num_mcg;
int num_mpt;
unsigned num_mtt;
};
struct mlx4_fw {
u64 clr_int_base;
u64 catas_offset;
u64 comm_base;
u64 clock_offset;
struct mlx4_icm *fw_icm;
struct mlx4_icm *aux_icm;
u32 catas_size;
u16 fw_pages;
u8 clr_int_bar;
u8 catas_bar;
u8 comm_bar;
u8 clock_bar;
};
struct mlx4_comm {
u32 slave_write;
u32 slave_read;
};
enum {
MLX4_MCAST_CONFIG = 0,
MLX4_MCAST_DISABLE = 1,
MLX4_MCAST_ENABLE = 2,
};
#define VLAN_FLTR_SIZE 128
struct mlx4_vlan_fltr {
__be32 entry[VLAN_FLTR_SIZE];
};
struct mlx4_mcast_entry {
struct list_head list;
u64 addr;
};
struct mlx4_promisc_qp {
struct list_head list;
u32 qpn;
};
struct mlx4_steer_index {
struct list_head list;
unsigned int index;
struct list_head duplicates;
};
#define MLX4_EVENT_TYPES_NUM 64
struct mlx4_slave_state {
u8 comm_toggle;
u8 last_cmd;
u8 init_port_mask;
bool active;
bool old_vlan_api;
bool vst_qinq_supported;
u8 function;
dma_addr_t vhcr_dma;
u16 mtu[MLX4_MAX_PORTS + 1];
__be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
/* event type to eq number lookup */
struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM];
u16 eq_pi;
u16 eq_ci;
spinlock_t lock;
/*initialized via the kzalloc*/
u8 is_slave_going_down;
u32 cookie;
enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
};
#define MLX4_VGT 4095
#define NO_INDX (-1)
struct mlx4_vport_state {
u64 mac;
u16 default_vlan;
u8 default_qos;
__be16 vlan_proto;
u32 tx_rate;
bool spoofchk;
u8 qos_vport;
__be64 guid;
};
struct mlx4_vf_admin_state {
struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
u8 enable_smi[MLX4_MAX_PORTS + 1];
};
struct mlx4_vport_oper_state {
struct mlx4_vport_state state;
int mac_idx;
int vlan_idx;
};
struct mlx4_vf_oper_state {
struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
u8 smi_enabled[MLX4_MAX_PORTS + 1];
};
struct slave_list {
struct mutex mutex;
struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
};
struct resource_allocator {
spinlock_t alloc_lock; /* protect quotas */
union {
int res_reserved;
int res_port_rsvd[MLX4_MAX_PORTS];
};
union {
int res_free;
int res_port_free[MLX4_MAX_PORTS];
};
int *quota;
int *allocated;
int *guaranteed;
};
struct mlx4_resource_tracker {
spinlock_t lock;
/* tree for each resources */
struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
/* num_of_slave's lists, one per slave */
struct slave_list *slave_list;
struct resource_allocator res_alloc[MLX4_NUM_OF_RESOURCE_TYPE];
};
#define SLAVE_EVENT_EQ_SIZE 128
struct mlx4_slave_event_eq {
u32 eqn;
u32 cons;
u32 prod;
spinlock_t event_lock;
struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
};
struct mlx4_qos_manager {
int num_of_qos_vfs;
DECLARE_BITMAP(priority_bm, MLX4_NUM_UP);
};
struct mlx4_master_qp0_state {
int proxy_qp0_active;
int qp0_active;
int port_active;
};
struct mlx4_mfunc_master_ctx {
struct mlx4_slave_state *slave_state;
struct mlx4_vf_admin_state *vf_admin;
struct mlx4_vf_oper_state *vf_oper;
struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
int init_port_ref[MLX4_MAX_PORTS + 1];
u16 max_mtu[MLX4_MAX_PORTS + 1];
u8 pptx;
u8 pprx;
int disable_mcast_ref[MLX4_MAX_PORTS + 1];
struct mlx4_resource_tracker res_tracker;
struct workqueue_struct *comm_wq;
struct work_struct comm_work;
struct work_struct slave_event_work;
struct work_struct slave_flr_event_work;
spinlock_t slave_state_lock;
__be32 comm_arm_bit_vector[4];
struct mlx4_eqe cmd_eqe;
struct mlx4_slave_event_eq slave_eq;
struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX];
struct mlx4_qos_manager qos_ctl[MLX4_MAX_PORTS + 1];
};
struct mlx4_mfunc {
struct mlx4_comm __iomem *comm;
struct mlx4_vhcr_cmd *vhcr;
dma_addr_t vhcr_dma;
struct mlx4_mfunc_master_ctx master;
};
#define MGM_QPN_MASK 0x00FFFFFF
#define MGM_BLCK_LB_BIT 30
struct mlx4_mgm {
__be32 next_gid_index;
__be32 members_count;
u32 reserved[2];
u8 gid[16];
__be32 qp[MLX4_MAX_QP_PER_MGM];
};
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
struct rw_semaphore switch_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
struct mlx4_cmd_context *context;
u16 token_mask;
u8 use_events;
u8 toggle;
u8 comm_toggle;
u8 initialized;
};
enum {
MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0,
MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1,
MLX4_VF_IMMED_VLAN_FLAG_LINK_DISABLE = 1 << 2,
};
struct mlx4_vf_immed_vlan_work {
struct work_struct work;
struct mlx4_priv *priv;
int flags;
int slave;
int vlan_ix;
int orig_vlan_ix;
u8 port;
u8 qos;
u8 qos_vport;
u16 vlan_id;
u16 orig_vlan_id;
__be16 vlan_proto;
};
struct mlx4_uar_table {
struct mlx4_bitmap bitmap;
};
struct mlx4_mr_table {
struct mlx4_bitmap mpt_bitmap;
struct mlx4_buddy mtt_buddy;
u64 mtt_base;
u64 mpt_base;
struct mlx4_icm_table mtt_table;
struct mlx4_icm_table dmpt_table;
};
struct mlx4_cq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
struct mlx4_eq_table {
struct mlx4_bitmap bitmap;
char *irq_names;
void __iomem *clr_int;
void __iomem **uar_map;
u32 clr_mask;
struct mlx4_eq *eq;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
int have_irq;
u8 inta_pin;
};
struct mlx4_srq_table {
struct mlx4_bitmap bitmap;
spinlock_t lock;
struct radix_tree_root tree;
struct mlx4_icm_table table;
struct mlx4_icm_table cmpt_table;
};
enum mlx4_qp_table_zones {
MLX4_QP_TABLE_ZONE_GENERAL,
MLX4_QP_TABLE_ZONE_RSS,
MLX4_QP_TABLE_ZONE_RAW_ETH,
MLX4_QP_TABLE_ZONE_NUM
};
struct mlx4_qp_table {
struct mlx4_bitmap *bitmap_gen;
struct mlx4_zone_allocator *zones;
u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM];
u32 rdmarc_base;
int rdmarc_shift;
spinlock_t lock;
struct mlx4_icm_table qp_table;
struct mlx4_icm_table auxc_table;
struct mlx4_icm_table altc_table;
struct mlx4_icm_table rdmarc_table;
struct mlx4_icm_table cmpt_table;
};
struct mlx4_mcg_table {
struct mutex mutex;
struct mlx4_bitmap bitmap;
struct mlx4_icm_table table;
};
struct mlx4_catas_err {
u32 __iomem *map;
struct timer_list timer;
struct list_head list;
};
#define MLX4_MAX_MAC_NUM 128
#define MLX4_MAC_TABLE_SIZE (MLX4_MAX_MAC_NUM << 3)
struct mlx4_mac_table {
__be64 entries[MLX4_MAX_MAC_NUM];
int refs[MLX4_MAX_MAC_NUM];
bool is_dup[MLX4_MAX_MAC_NUM];
struct mutex mutex;
int total;
int max;
};
#define MLX4_ROCE_GID_ENTRY_SIZE 16
struct mlx4_roce_gid_entry {
u8 raw[MLX4_ROCE_GID_ENTRY_SIZE];
};
struct mlx4_roce_gid_table {
struct mlx4_roce_gid_entry roce_gids[MLX4_ROCE_MAX_GIDS];
struct mutex mutex;
};
#define MLX4_MAX_VLAN_NUM 128
#define MLX4_VLAN_TABLE_SIZE (MLX4_MAX_VLAN_NUM << 2)
struct mlx4_vlan_table {
__be32 entries[MLX4_MAX_VLAN_NUM];
int refs[MLX4_MAX_VLAN_NUM];
int is_dup[MLX4_MAX_VLAN_NUM];
struct mutex mutex;
int total;
int max;
};
#define SET_PORT_GEN_ALL_VALID 0x7
#define SET_PORT_PROMISC_SHIFT 31
#define SET_PORT_MC_PROMISC_SHIFT 30
enum {
MCAST_DIRECT_ONLY = 0,
MCAST_DIRECT = 1,
MCAST_DEFAULT = 2
};
struct mlx4_set_port_general_context {
u16 reserved1;
u8 v_ignore_fcs;
u8 flags;
union {
u8 ignore_fcs;
u8 roce_mode;
};
u8 reserved2;
__be16 mtu;
u8 pptx;
u8 pfctx;
u16 reserved3;
u8 pprx;
u8 pfcrx;
u16 reserved4;
u32 reserved5;
u8 phv_en;
u8 reserved6[3];
};
struct mlx4_set_port_rqp_calc_context {
__be32 base_qpn;
u8 rererved;
u8 n_mac;
u8 n_vlan;
u8 n_prio;
u8 reserved2[3];
u8 mac_miss;
u8 intra_no_vlan;
u8 no_vlan;
u8 intra_vlan_miss;
u8 vlan_miss;
u8 reserved3[3];
u8 no_vlan_prio;
__be32 promisc;
__be32 mcast;
};
struct mlx4_port_info {
struct mlx4_dev *dev;
int port;
char dev_name[16];
struct device_attribute port_attr;
enum mlx4_port_type tmp_type;
char dev_mtu_name[16];
struct device_attribute port_mtu_attr;
struct mlx4_mac_table mac_table;
struct mlx4_vlan_table vlan_table;
struct mlx4_roce_gid_table gid_table;
int base_qpn;
};
struct mlx4_sense {
struct mlx4_dev *dev;
u8 do_sense_port[MLX4_MAX_PORTS + 1];
u8 sense_allowed[MLX4_MAX_PORTS + 1];
struct delayed_work sense_poll;
int gone;
};
struct mlx4_msix_ctl {
DECLARE_BITMAP(pool_bm, MAX_MSIX);
struct mutex pool_lock;
};
struct mlx4_steer {
struct list_head promisc_qps[MLX4_NUM_STEERS];
struct list_head steer_entries[MLX4_NUM_STEERS];
};
enum {
MLX4_PCI_DEV_IS_VF = 1 << 0,
MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
};
enum {
MLX4_NO_RR = 0,
MLX4_USE_RR = 1,
};
struct mlx4_priv {
struct mlx4_dev dev;
struct list_head dev_list;
struct list_head ctx_list;
spinlock_t ctx_lock;
int pci_dev_data;
int removed;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
struct mlx4_fw fw;
struct mlx4_cmd cmd;
struct mlx4_mfunc mfunc;
struct mlx4_bitmap pd_bitmap;
struct mlx4_bitmap xrcd_bitmap;
struct mlx4_uar_table uar_table;
struct mlx4_mr_table mr_table;
struct mlx4_cq_table cq_table;
struct mlx4_eq_table eq_table;
struct mlx4_srq_table srq_table;
struct mlx4_qp_table qp_table;
struct mlx4_mcg_table mcg_table;
struct mlx4_bitmap counters_bitmap;
int def_counter[MLX4_MAX_PORTS];
struct mlx4_catas_err catas_err;
void __iomem *clr_base;
struct mlx4_uar driver_uar;
void __iomem *kar;
struct mlx4_port_info port[MLX4_MAX_PORTS + 1];
struct mlx4_sense sense;
struct mutex port_mutex;
struct mlx4_msix_ctl msix_ctl;
struct mlx4_steer *steer;
struct list_head bf_list;
struct mutex bf_mutex;
struct io_mapping *bf_mapping;
void __iomem *clock_mapping;
int reserved_mtts;
int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
struct mlx4_port_map v2p; /* cached port mapping configuration */
struct mutex bond_mutex; /* for bond mode */
__be64 slave_node_guids[MLX4_MFUNC_MAX];
atomic_t opreq_count;
struct work_struct opreq_task;
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
{
return container_of(dev, struct mlx4_priv, dev);
}
#define MLX4_SENSE_RANGE (HZ * 3)
extern struct workqueue_struct *mlx4_wq;
u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr);
u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
int align, u32 skip_mask);
void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
int use_rr);
u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 resetrved_top);
void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap);
int mlx4_reset(struct mlx4_dev *dev);
int mlx4_alloc_eq_table(struct mlx4_dev *dev);
void mlx4_free_eq_table(struct mlx4_dev *dev);
int mlx4_init_pd_table(struct mlx4_dev *dev);
int mlx4_init_xrcd_table(struct mlx4_dev *dev);
int mlx4_init_uar_table(struct mlx4_dev *dev);
int mlx4_init_mr_table(struct mlx4_dev *dev);
int mlx4_init_eq_table(struct mlx4_dev *dev);
int mlx4_init_cq_table(struct mlx4_dev *dev);
int mlx4_init_qp_table(struct mlx4_dev *dev);
int mlx4_init_srq_table(struct mlx4_dev *dev);
int mlx4_init_mcg_table(struct mlx4_dev *dev);
void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
int __mlx4_mpt_reserve(struct mlx4_dev *dev);
void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
int *base, u8 flags);
void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list);
int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
int mlx4_calc_vf_counters(struct mlx4_dev *dev, int slave, int port,
struct mlx4_counter *data);
int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
void mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
int mlx4_catas_init(struct mlx4_dev *dev);
void mlx4_catas_end(struct mlx4_dev *dev);
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
unsigned long param);
struct mlx4_dev_cap;
struct mlx4_init_hca_param;
u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_profile *request,
struct mlx4_dev_cap *dev_cap,
struct mlx4_init_hca_param *init_hca);
void mlx4_master_comm_channel(struct work_struct *work);
void mlx4_gen_slave_eqe(struct work_struct *work);
void mlx4_master_handle_slave_flr(struct work_struct *work);
int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
enum {
MLX4_CMD_CLEANUP_STRUCT = 1UL << 0,
MLX4_CMD_CLEANUP_POOL = 1UL << 1,
MLX4_CMD_CLEANUP_HCR = 1UL << 2,
MLX4_CMD_CLEANUP_VHCR = 1UL << 3,
MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1
};
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
int mlx4_multi_func_init(struct mlx4_dev *dev);
int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
u16 op, unsigned long timeout);
void mlx4_cq_tasklet_cb(unsigned long data);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
enum mlx4_port_type *type);
void mlx4_do_sense_ports(struct mlx4_dev *dev,
enum mlx4_port_type *stype,
enum mlx4_port_type *defaults);
void mlx4_start_sense(struct mlx4_dev *dev);
void mlx4_stop_sense(struct mlx4_dev *dev);
void mlx4_sense_init(struct mlx4_dev *dev);
int mlx4_check_port_params(struct mlx4_dev *dev,
enum mlx4_port_type *port_type);
int mlx4_change_port_types(struct mlx4_dev *dev,
enum mlx4_port_type *port_types);
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
void mlx4_init_roce_gid_table(struct mlx4_dev *dev,
struct mlx4_roce_gid_table *table);
void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
int mlx4_bond_vlan_table(struct mlx4_dev *dev);
int mlx4_unbond_vlan_table(struct mlx4_dev *dev);
int mlx4_bond_mac_table(struct mlx4_dev *dev);
int mlx4_unbond_mac_table(struct mlx4_dev *dev);
int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
/* resource tracker functions*/
int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
enum mlx4_resource resource_type,
u64 resource_id, int *slave);
void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave);
int mlx4_init_resource_tracker(struct mlx4_dev *dev);
void mlx4_free_resource_tracker(struct mlx4_dev *dev,
enum mlx4_res_tracker_free_type type);
int mlx4_QUERY_FW_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
int mlx4_get_slave_pkey_gid_tbl_len(struct mlx4_dev *dev, u8 port,
int *gid_tbl_len, int *pkey_tbl_len);
int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
enum mlx4_protocol prot, enum mlx4_steer_type steer);
int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_protocol prot,
enum mlx4_steer_type steer);
int mlx4_trans_to_dmfs_attach(struct mlx4_dev *dev, struct mlx4_qp *qp,
u8 gid[16], u8 port,
int block_mcast_loopback,
enum mlx4_protocol prot, u64 *reg_id);
int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
int port, void *buf);
int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
static inline void set_param_l(u64 *arg, u32 val)
{
*arg = (*arg & 0xffffffff00000000ULL) | (u64) val;
}
static inline void set_param_h(u64 *arg, u32 val)
{
*arg = (*arg & 0xffffffff) | ((u64) val << 32);
}
static inline u32 get_param_l(u64 *arg)
{
return (u32) (*arg & 0xffffffff);
}
static inline u32 get_param_h(u64 *arg)
{
return (u32)(*arg >> 32);
}
static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev)
{
return &mlx4_priv(dev)->mfunc.master.res_tracker.lock;
}
#define NOT_MASKED_PD_BITS 17
void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work);
void mlx4_init_quotas(struct mlx4_dev *dev);
/* for VFs, replace zero MACs with randomly-generated MACs at driver start */
void mlx4_replace_zero_macs(struct mlx4_dev *dev);
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
/* Returns the VF index of slave */
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
int mlx4_config_mad_demux(struct mlx4_dev *dev);
int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
int mlx4_bond_fs_rules(struct mlx4_dev *dev);
int mlx4_unbond_fs_rules(struct mlx4_dev *dev);
enum mlx4_zone_flags {
MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0,
MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO = 1UL << 1,
MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO = 1UL << 2,
MLX4_ZONE_USE_RR = 1UL << 3,
};
enum mlx4_zone_alloc_flags {
/* No two objects could overlap between zones. UID
* could be left unused. If this flag is given and
* two overlapped zones are used, an object will be free'd
* from the smallest possible matching zone.
*/
MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP = 1UL << 0,
};
struct mlx4_zone_allocator;
/* Create a new zone allocator */
struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags);
/* Attach a mlx4_bitmap <bitmap> of priority <priority> to the zone allocator
* <zone_alloc>. Allocating an object from this zone adds an offset <offset>.
* Similarly, when searching for an object to free, this offset it taken into
* account. The use_rr mlx4_ib parameter for allocating objects from this <bitmap>
* is given through the MLX4_ZONE_USE_RR flag in <flags>.
* When an allocation fails, <zone_alloc> tries to allocate from other zones
* according to the policy set by <flags>. <puid> is the unique identifier
* received to this zone.
*/
int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc,
struct mlx4_bitmap *bitmap,
u32 flags,
int priority,
int offset,
u32 *puid);
/* Remove bitmap indicated by <uid> from <zone_alloc> */
int mlx4_zone_remove_one(struct mlx4_zone_allocator *zone_alloc, u32 uid);
/* Delete the zone allocator <zone_alloc. This function doesn't destroy
* the attached bitmaps.
*/
void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc);
/* Allocate <count> objects with align <align> and skip_mask <skip_mask>
* from the mlx4_bitmap whose uid is <uid>. The bitmap which we actually
* allocated from is returned in <puid>. If the allocation fails, a negative
* number is returned. Otherwise, the offset of the first object is returned.
*/
u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count,
int align, u32 skip_mask, u32 *puid);
/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator
* <zones>.
*/
u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones,
u32 uid, u32 obj, u32 count);
/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of
* specifying the uid when freeing an object, zone allocator could figure it by
* itself. Other parameters are similar to mlx4_zone_free.
*/
u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count);
/* Returns a pointer to mlx4_bitmap that was attached to <zones> with <uid> */
struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid);
#endif /* MLX4_H */
diff --git a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
index d33750fdb7be..d08e6733b3b3 100644
--- a/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
+++ b/sys/dev/mlx4/mlx4_ib/mlx4_ib_main.c
@@ -1,3339 +1,3339 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#define LINUXKPI_PARAM_PREFIX mlx4_
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/if_vlan.h>
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/notifier.h>
#include <linux/delay.h>
#include <net/ipv6.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <dev/mlx4/driver.h>
#include <dev/mlx4/cmd.h>
#include <dev/mlx4/qp.h>
#include <linux/sched.h>
#include <linux/page.h>
#include <linux/printk.h>
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
#include "wc.h"
#define DRV_NAME MLX4_IB_DRV_NAME
#ifndef DRV_VERSION
-#define DRV_VERSION "3.4.1"
+#define DRV_VERSION "3.5.0"
#endif
-#define DRV_RELDATE "February 2018"
+#define DRV_RELDATE "November 2018"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
#define MLX4_IB_CARD_REV_A0 0xA0
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
mad->class_version = 1;
mad->method = IB_MGMT_METHOD_GET;
}
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
int ib_num_ports = 0;
int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
if (dmfs) {
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
eth_num_ports++;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
dmfs &= (!ib_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
(!eth_num_ports ||
(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
if (ib_num_ports && mlx4_is_mfunc(dev)) {
pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
dmfs = 0;
}
}
return dmfs;
}
static int num_ib_ports(struct mlx4_dev *dev)
{
int ib_ports = 0;
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_ports++;
return ib_ports;
}
static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
struct net_device *dev;
rcu_read_lock();
dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
#if 0
if (dev) {
if (mlx4_is_bonded(ibdev->dev)) {
struct net_device *upper = NULL;
upper = netdev_master_upper_dev_get_rcu(dev);
if (upper) {
struct net_device *active;
active = bond_option_active_slave_get_rcu(netdev_priv(upper));
if (active)
dev = active;
}
}
}
#endif
if (dev)
dev_hold(dev);
rcu_read_unlock();
return dev;
}
static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
struct mlx4_dev *dev = ibdev->dev;
int i;
union ib_gid *gid_tbl;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return -ENOMEM;
gid_tbl = mailbox->buf;
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | port_num,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (mlx4_is_bonded(dev))
err += mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_GID_TABLE << 8 | 2,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
struct mlx4_dev *dev = ibdev->dev;
int i;
struct {
union ib_gid gid;
__be32 rsrvd1[2];
__be16 rsrvd2;
u8 type;
u8 version;
__be32 rsrvd3;
} *gid_tbl;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return -ENOMEM;
gid_tbl = mailbox->buf;
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
gid_tbl[i].version = 2;
if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
gid_tbl[i].type = 1;
else
memset(&gid_tbl[i].gid, 0, 12);
}
}
err = mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (mlx4_is_bonded(dev))
err += mlx4_cmd(dev, mailbox->dma,
MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
static int mlx4_ib_update_gids(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
u8 port_num)
{
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
void **context)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int free = -1, found = -1;
int ret = 0;
int hw_update = 0;
int i;
struct gid_entry *gids = NULL;
if (!rdma_cap_roce_gid_table(device, port_num))
return -EINVAL;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (!context)
return -EINVAL;
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
(port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
free = i; /* HW has space */
}
if (found < 0) {
if (free < 0) {
ret = -ENOSPC;
} else {
port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
if (!port_gid_table->gids[free].ctx) {
ret = -ENOMEM;
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
}
}
} else {
struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
*context = ctx;
ctx->refcount++;
}
if (!ret && hw_update) {
gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
gids[i].gid_type = port_gid_table->gids[i].gid_type;
}
}
}
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
ret = mlx4_ib_update_gids(gids, ibdev, port_num);
kfree(gids);
}
return ret;
}
static int mlx4_ib_del_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
void **context)
{
struct gid_cache_context *ctx = *context;
struct mlx4_ib_dev *ibdev = to_mdev(device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
struct gid_entry *gids = NULL;
if (!rdma_cap_roce_gid_table(device, port_num))
return -EINVAL;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
if (ctx) {
ctx->refcount--;
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
kfree(port_gid_table->gids[real_index].ctx);
port_gid_table->gids[real_index].ctx = NULL;
hw_update = 1;
}
}
if (!ret && hw_update) {
int i;
gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
}
}
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
ret = mlx4_ib_update_gids(gids, ibdev, port_num);
kfree(gids);
}
return ret;
}
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
u8 port_num, int index)
{
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct gid_cache_context *ctx = NULL;
union ib_gid gid;
struct mlx4_port_gid_table *port_gid_table;
int real_index = -EINVAL;
int i;
int ret;
unsigned long flags;
struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (mlx4_is_bonded(ibdev->dev))
port_num = 1;
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
if (attr.ndev)
dev_put(attr.ndev);
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
spin_lock_irqsave(&iboe->lock, flags);
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
if (ctx)
real_index = ctx->real_index;
spin_unlock_irqrestore(&iboe->lock, flags);
return real_index;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
struct mlx4_clock_params clock_params;
if (uhw->inlen) {
if (uhw->inlen < sizeof(cmd))
return -EINVAL;
err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EINVAL;
if (cmd.reserved)
return -EINVAL;
}
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memset(props, 0, sizeof *props);
have_ib_ports = num_ib_ports(dev->dev);
props->fw_ver = dev->dev->caps.fw_ver;
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
}
if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->persist->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->masked_atomic_cap = props->atomic_cap;
props->max_pkeys = dev->dev->caps.pkey_table_len[1];
props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
resp.response_length += sizeof(resp.hca_core_clock_offset);
if (!err && !mlx4_is_slave(dev->dev)) {
resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
}
}
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
goto out;
}
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static enum rdma_link_layer
mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
struct mlx4_dev *dev = to_mdev(device)->dev;
return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
static int ib_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
props->lmc = out_mad->data[34] & 0x7;
props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
props->sm_sl = out_mad->data[36] & 0xf;
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
if (netw_view)
props->gid_tbl_len = out_mad->data[50];
else
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
props->active_speed = out_mad->data[35] >> 4;
props->max_mtu = out_mad->data[41] & 0xf;
props->active_mtu = out_mad->data[36] >> 4;
props->subnet_timeout = out_mad->data[51] & 0x1f;
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
switch (ext_active_speed) {
case 1:
props->active_speed = IB_SPEED_FDR;
break;
case 2:
props->active_speed = IB_SPEED_EDR;
break;
}
}
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == IB_SPEED_QDR) {
init_query_mad(in_mad);
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
/* Checking LinkSpeedActive for FDR-10 */
if (out_mad->data[15] & 0x1)
props->active_speed = IB_SPEED_FDR10;
}
/* Avoid wrong speed value returned by FW if the IB link is down. */
if (props->state == IB_PORT_DOWN)
props->active_speed = IB_SPEED_SDR;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static u8 state_to_phys_state(enum ib_port_state state)
{
return state == IB_PORT_ACTIVE ? 5 : 3;
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
struct mlx4_ib_iboe *iboe = &mdev->iboe;
struct net_device *ndev;
enum ib_mtu tmp;
struct mlx4_cmd_mailbox *mailbox;
int err = 0;
int is_bonded = mlx4_is_bonded(mdev->dev);
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
goto out;
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
props->max_mtu = IB_MTU_4096;
props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
if (ndev && is_bonded) {
#if 0
rcu_read_lock(); /* required to get upper dev */
ndev = netdev_master_upper_dev_get_rcu(ndev);
rcu_read_unlock();
#endif
}
if (!ndev)
goto out_unlock;
tmp = iboe_get_mtu(ndev->if_mtu);
props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock_bh(&iboe->lock);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
}
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
int err;
memset(props, 0, sizeof *props);
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
eth_link_query_port(ibdev, port, props, netw_view);
return err;
}
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
/* returns host view */
return __mlx4_ib_query_port(ibdev, port, props, 0);
}
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
if (mlx4_is_mfunc(dev->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
if (mlx4_is_mfunc(dev->dev) && !netw_view) {
if (index) {
/* For any index > 0, return the null guid */
err = 0;
clear = 1;
goto out;
}
}
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
if (clear)
memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
int ret;
if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
if (!rdma_protocol_roce(ibdev, port))
return -ENODEV;
if (!rdma_cap_roce_gid_table(ibdev, port))
return -ENODEV;
ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
if (ret == -EAGAIN) {
memcpy(gid, &zgid, sizeof(*gid));
return 0;
}
return ret;
}
static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
{
union sl2vl_tbl_to_u64 sl2vl64;
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
int jj;
if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
*sl2vl_tbl = 0;
return 0;
}
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
in_mad->attr_mod = 0;
if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
for (jj = 0; jj < 8; jj++)
sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
*sl2vl_tbl = sl2vl64.sl64;
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
{
u64 sl2vl;
int i;
int err;
for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
continue;
err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
if (err) {
pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n",
i, err);
sl2vl = 0;
}
atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
}
}
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx4_cmd_mailbox *mailbox;
unsigned long flags;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
if (mlx4_is_slave(to_mdev(ibdev)->dev))
return -EOPNOTSUPP;
spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
/*
* If possible, pass node desc to FW, so it can generate
* a 144 trap. If cmd fails, just ignore.
*/
mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
if (IS_ERR(mailbox))
return 0;
memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
return 0;
}
static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
} else {
((u8 *) mailbox->buf)[3] = !!reset_qkey_viols;
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
}
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
struct ib_port_attr attr;
u32 cap_mask;
int err;
/* return OK if this is RoCE. CM calls ib_modify_port() regardless
* of whether port link layer is ETH or IB. For ETH ports, qkey
* violations and port capabilities are not meaningful.
*/
if (is_eth)
return 0;
mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx4_ib_SET_PORT(mdev, port,
!!(mask & IB_PORT_RESET_QKEY_CNTR),
cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
return err;
}
static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_ucontext *context;
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
} else {
resp.dev_caps = dev->dev->caps.userspace_caps;
resp.qp_tab_size = dev->dev->caps.num_qps;
resp.bf_reg_size = dev->dev->caps.bf_reg_size;
resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
resp.cqe_size = dev->dev->caps.cqe_size;
}
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
if (err) {
kfree(context);
return ERR_PTR(err);
}
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
kfree(context);
return ERR_PTR(-EFAULT);
}
return &context->ibucontext;
}
static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
kfree(context);
return 0;
}
static void mlx4_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA.
* This is done through either mremap flow or split_vma (usually due
* to mlock, madvise, munmap, etc.). We do not support a clone of the
* vma, as this VMA is strongly hardware related. Therefore we set the
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
* calling us again and trying to do incorrect actions. We assume that
* the original vma size is exactly a single page that there will be no
* "splitting" operations on.
*/
area->vm_ops = NULL;
}
static void mlx4_ib_vma_close(struct vm_area_struct *area)
{
struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
/* It's guaranteed that all VMAs opened on a FD are closed before the
* file itself is closed, therefore no sync is needed with the regular
* closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
* with accessing the vma as part of mlx4_ib_disassociate_ucontext.
* The close operation is usually called under mm->mmap_sem except when
* process is exiting. The exiting case is handled explicitly as part
* of mlx4_ib_disassociate_ucontext.
*/
mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
area->vm_private_data;
/* set the vma context pointer to null in the mlx4_ib driver's private
* data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
*/
mlx4_ib_vma_priv_data->vma = NULL;
}
static const struct vm_operations_struct mlx4_ib_vm_ops = {
.open = mlx4_ib_vma_open,
.close = mlx4_ib_vma_close
};
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
struct mlx4_ib_vma_private_data *vma_private_data)
{
vma_private_data->vma = vma;
vma->vm_private_data = vma_private_data;
vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_pgoff == 0) {
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_DB].vma)
return -EINVAL;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_BF].vma)
return -EINVAL;
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
} else if (vma->vm_pgoff == 3) {
struct mlx4_clock_params params;
int ret;
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
return -EINVAL;
ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (ret)
return ret;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
(pci_resource_start(dev->dev->persist->pdev,
params.bar) +
params.offset)
>> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma,
&mucontext->hw_bar_info[HW_BAR_CLOCK]);
} else {
return -EINVAL;
}
return 0;
}
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_pd *pd;
int err;
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context)
if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
return &pd->ibpd;
}
static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
kfree(pd);
return 0;
}
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
struct ib_cq_init_attr cq_attr = {};
int err;
if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
return ERR_PTR(-ENOSYS);
xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
if (!xrcd)
return ERR_PTR(-ENOMEM);
err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
if (err)
goto err1;
xrcd->pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
cq_attr.cqe = 1;
xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
return &xrcd->ibxrcd;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
err1:
kfree(xrcd);
return ERR_PTR(err);
}
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
kfree(xrcd);
return 0;
}
static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
{
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_gid_entry *ge;
ge = kzalloc(sizeof *ge, GFP_KERNEL);
if (!ge)
return -ENOMEM;
ge->gid = *gid;
if (mlx4_ib_add_mc(mdev, mqp, gid)) {
ge->port = mqp->port;
ge->added = 1;
}
mutex_lock(&mqp->mutex);
list_add_tail(&ge->list, &mqp->gid_list);
mutex_unlock(&mqp->mutex);
return 0;
}
static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
struct mlx4_ib_counters *ctr_table)
{
struct counter_index *counter, *tmp_count;
mutex_lock(&ctr_table->mutex);
list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
list) {
if (counter->allocated)
mlx4_counter_free(ibdev->dev, counter->index);
list_del(&counter->list);
kfree(counter);
}
mutex_unlock(&ctr_table->mutex);
}
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
struct net_device *ndev;
int ret = 0;
if (!mqp->port)
return 0;
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
ret = 1;
dev_put(ndev);
}
return ret;
}
struct mlx4_ib_steering {
struct list_head list;
struct mlx4_flow_reg_id reg_id;
union ib_gid gid;
};
#define LAST_ETH_FIELD vlan_tag
#define LAST_IB_FIELD sl
#define LAST_IPV4_FIELD dst_ip
#define LAST_TCP_UDP_FIELD src_port
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
memchr_inv((void *)&filter.field +\
sizeof(filter.field), 0,\
sizeof(filter) -\
offsetof(typeof(filter), field) -\
sizeof(filter.field))
static int parse_flow_attr(struct mlx4_dev *dev,
u32 qp_num,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
enum mlx4_net_trans_rule_id type;
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
return -ENOTSUPP;
type = MLX4_NET_TRANS_RULE_ID_ETH;
memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
ETH_ALEN);
memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
ETH_ALEN);
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
case IB_FLOW_SPEC_IB:
if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
return -ENOTSUPP;
type = MLX4_NET_TRANS_RULE_ID_IB;
mlx4_spec->ib.l3_qpn =
cpu_to_be32(qp_num);
mlx4_spec->ib.qpn_mask =
cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -ENOTSUPP;
type = MLX4_NET_TRANS_RULE_ID_IPV4;
mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
type = ib_spec->type == IB_FLOW_SPEC_TCP ?
MLX4_NET_TRANS_RULE_ID_TCP :
MLX4_NET_TRANS_RULE_ID_UDP;
mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
break;
default:
return -EINVAL;
}
if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
mlx4_hw_rule_sz(dev, type) < 0)
return -EINVAL;
mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
return mlx4_hw_rule_sz(dev, type);
}
struct default_rules {
__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
__u8 link_layer;
};
static const struct default_rules default_table[] = {
{
.mandatory_fields = {IB_FLOW_SPEC_IPV4},
.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
.rules_create_list = {IB_FLOW_SPEC_IB},
.link_layer = IB_LINK_LAYER_INFINIBAND
}
};
static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
struct ib_flow_attr *flow_attr)
{
int i, j, k;
void *ib_flow;
const struct default_rules *pdefault_rules = default_table;
u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
memset(&field_types, 0, sizeof(field_types));
if (link_layer != pdefault_rules->link_layer)
continue;
ib_flow = flow_attr + 1;
/* we assume the specs are sorted */
for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
j < flow_attr->num_of_specs; k++) {
union ib_flow_spec *current_flow =
(union ib_flow_spec *)ib_flow;
/* same layer but different type */
if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
(pdefault_rules->mandatory_fields[k] &
IB_FLOW_SPEC_LAYER_MASK)) &&
(current_flow->type !=
pdefault_rules->mandatory_fields[k]))
goto out;
/* same layer, try match next one */
if (current_flow->type ==
pdefault_rules->mandatory_fields[k]) {
j++;
ib_flow +=
((union ib_flow_spec *)ib_flow)->size;
}
}
ib_flow = flow_attr + 1;
for (j = 0; j < flow_attr->num_of_specs;
j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
/* same layer and same type */
if (((union ib_flow_spec *)ib_flow)->type ==
pdefault_rules->mandatory_not_fields[k])
goto out;
return i;
}
out:
return -1;
}
static int __mlx4_ib_create_default_rules(
struct mlx4_ib_dev *mdev,
struct ib_qp *qp,
const struct default_rules *pdefault_rules,
struct _rule_hw *mlx4_spec) {
int size = 0;
int i;
for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
int ret;
union ib_flow_spec ib_spec;
switch (pdefault_rules->rules_create_list[i]) {
case 0:
/* no rule */
continue;
case IB_FLOW_SPEC_IB:
ib_spec.type = IB_FLOW_SPEC_IB;
ib_spec.size = sizeof(struct ib_flow_spec_ib);
break;
default:
/* invalid rule */
return -EINVAL;
}
/* We must put empty rule, qpn is being ignored */
ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
mlx4_spec);
if (ret < 0) {
pr_info("invalid parsing\n");
return -EINVAL;
}
mlx4_spec = (void *)mlx4_spec + ret;
size += ret;
}
return size;
}
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
u64 *reg_id)
{
int ret, i;
int size = 0;
void *ib_flow;
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
[IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
[IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
[IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
};
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value %d\n", flow_attr->priority);
return -EINVAL;
}
if (domain >= IB_FLOW_DOMAIN_NUM) {
pr_err("Invalid domain value %d\n", domain);
return -EINVAL;
}
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
ctrl = mailbox->buf;
ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
flow_attr->priority);
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
/* Add default flows */
default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
if (default_flow >= 0) {
ret = __mlx4_ib_create_default_rules(
mdev, qp, default_table + default_flow,
mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
}
size += ret;
}
for (i = 0; i < flow_attr->num_of_specs; i++) {
ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
}
ib_flow += ((union ib_flow_spec *) ib_flow)->size;
size += ret;
}
ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
if (ret == -ENOMEM)
pr_err("mcg table is full. Fail to register network rule.\n");
else if (ret == -ENXIO)
pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
else if (ret)
pr_err("Invalid argument. Fail to register network rule.\n");
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return ret;
}
static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
{
int err;
err = mlx4_cmd(dev, reg_id, 0, 0,
MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
if (err)
pr_err("Fail to detach network rule. registration id = 0x%llx\n",
(long long)reg_id);
return err;
}
static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
u64 *reg_id)
{
void *ib_flow;
union ib_flow_spec *ib_spec;
struct mlx4_dev *dev = to_mdev(qp->device)->dev;
int err = 0;
if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
return 0; /* do nothing */
ib_flow = flow_attr + 1;
ib_spec = (union ib_flow_spec *)ib_flow;
if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
return 0; /* do nothing */
err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
flow_attr->port, qp->qp_num,
MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
reg_id);
return err;
}
static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
struct ib_flow_attr *flow_attr,
enum mlx4_net_trans_promisc_mode *type)
{
int err = 0;
if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
(dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
(flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
return -EOPNOTSUPP;
}
if (flow_attr->num_of_specs == 0) {
type[0] = MLX4_FS_MC_SNIFFER;
type[1] = MLX4_FS_UC_SNIFFER;
} else {
union ib_flow_spec *ib_spec;
ib_spec = (union ib_flow_spec *)(flow_attr + 1);
if (ib_spec->type != IB_FLOW_SPEC_ETH)
return -EINVAL;
/* if all is zero than MC and UC */
if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
type[0] = MLX4_FS_MC_SNIFFER;
type[1] = MLX4_FS_UC_SNIFFER;
} else {
u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
ib_spec->eth.mask.dst_mac[1],
ib_spec->eth.mask.dst_mac[2],
ib_spec->eth.mask.dst_mac[3],
ib_spec->eth.mask.dst_mac[4],
ib_spec->eth.mask.dst_mac[5]};
/* Above xor was only on MC bit, non empty mask is valid
* only if this bit is set and rest are zero.
*/
if (!is_zero_ether_addr(&mac[0]))
return -EINVAL;
if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
type[0] = MLX4_FS_MC_SNIFFER;
else
type[0] = MLX4_FS_UC_SNIFFER;
}
}
return err;
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
enum mlx4_net_trans_promisc_mode type[2];
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
int is_bonded = mlx4_is_bonded(dev);
if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
return ERR_PTR(-EINVAL);
if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
if (!mflow) {
err = -ENOMEM;
goto err_free;
}
switch (flow_attr->type) {
case IB_FLOW_ATTR_NORMAL:
/* If dont trap flag (continue match) is set, under specific
* condition traffic be replicated to given qp,
* without stealing it
*/
if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
err = mlx4_ib_add_dont_trap_rule(dev,
flow_attr,
type);
if (err)
goto err_free;
} else {
type[0] = MLX4_FS_REGULAR;
}
break;
case IB_FLOW_ATTR_ALL_DEFAULT:
type[0] = MLX4_FS_ALL_DEFAULT;
break;
case IB_FLOW_ATTR_MC_DEFAULT:
type[0] = MLX4_FS_MC_DEFAULT;
break;
case IB_FLOW_ATTR_SNIFFER:
type[0] = MLX4_FS_MIRROR_RX_PORT;
type[1] = MLX4_FS_MIRROR_SX_PORT;
break;
default:
err = -EINVAL;
goto err_free;
}
while (i < ARRAY_SIZE(type) && type[i]) {
err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
&mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
/* Application always sees one port so the mirror rule
* must be on port #2
*/
flow_attr->port = 2;
err = __mlx4_ib_create_flow(qp, flow_attr,
domain, type[j],
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
goto err_create_flow;
j++;
}
i++;
}
if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
&mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
flow_attr->port = 2;
err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
goto err_create_flow;
j++;
}
/* function to create mirror rule */
i++;
}
return &mflow->ibflow;
err_create_flow:
while (i) {
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
mflow->reg_id[i].id);
i--;
}
while (j) {
(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
mflow->reg_id[j].mirror);
j--;
}
err_free:
kfree(mflow);
return ERR_PTR(err);
}
static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
{
int err, ret = 0;
int i = 0;
struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
struct mlx4_ib_flow *mflow = to_mflow(flow_id);
while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
if (err)
ret = err;
if (mflow->reg_id[i].mirror) {
err = __mlx4_ib_destroy_flow(mdev->dev,
mflow->reg_id[i].mirror);
if (err)
ret = err;
}
i++;
}
kfree(mflow);
return ret;
}
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct mlx4_ib_steering *ib_steering = NULL;
enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
struct mlx4_flow_reg_id reg_id;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
if (!ib_steering)
return -ENOMEM;
}
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
prot, &reg_id.id);
if (err) {
pr_err("multicast attach op failed, err %d\n", err);
goto err_malloc;
}
reg_id.mirror = 0;
if (mlx4_is_bonded(dev)) {
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
(mqp->port == 1) ? 2 : 1,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
prot, &reg_id.mirror);
if (err)
goto err_add;
}
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
if (ib_steering) {
memcpy(ib_steering->gid.raw, gid->raw, 16);
ib_steering->reg_id = reg_id;
mutex_lock(&mqp->mutex);
list_add(&ib_steering->list, &mqp->steering_rules);
mutex_unlock(&mqp->mutex);
}
return 0;
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
prot, reg_id.id);
if (reg_id.mirror)
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
prot, reg_id.mirror);
err_malloc:
kfree(ib_steering);
return err;
}
static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
{
struct mlx4_ib_gid_entry *ge;
struct mlx4_ib_gid_entry *tmp;
struct mlx4_ib_gid_entry *ret = NULL;
list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
if (!memcmp(raw, ge->gid.raw, 16)) {
ret = ge;
break;
}
}
return ret;
}
static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_dev *dev = mdev->dev;
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
struct mlx4_flow_reg_id reg_id = {0, 0};
enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
struct mlx4_ib_steering *ib_steering;
mutex_lock(&mqp->mutex);
list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
list_del(&ib_steering->list);
break;
}
}
mutex_unlock(&mqp->mutex);
if (&ib_steering->list == &mqp->steering_rules) {
pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
return -EINVAL;
}
reg_id = ib_steering->reg_id;
kfree(ib_steering);
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
prot, reg_id.id);
if (err)
return err;
if (mlx4_is_bonded(dev)) {
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
prot, reg_id.mirror);
if (err)
return err;
}
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
if (ndev)
dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev)
dev_put(ndev);
list_del(&ge->list);
kfree(ge);
} else
pr_warn("could not find mgid entry\n");
mutex_unlock(&mqp->mutex);
return 0;
}
static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
out:
kfree(in_mad);
kfree(out_mad);
return err;
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id);
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id
};
struct diag_counter {
const char *name;
u32 offset;
};
#define DIAG_COUNTER(_name, _offset) \
{ .name = #_name, .offset = _offset }
static const struct diag_counter diag_basic[] = {
DIAG_COUNTER(rq_num_lle, 0x00),
DIAG_COUNTER(sq_num_lle, 0x04),
DIAG_COUNTER(rq_num_lqpoe, 0x08),
DIAG_COUNTER(sq_num_lqpoe, 0x0C),
DIAG_COUNTER(rq_num_lpe, 0x18),
DIAG_COUNTER(sq_num_lpe, 0x1C),
DIAG_COUNTER(rq_num_wrfe, 0x20),
DIAG_COUNTER(sq_num_wrfe, 0x24),
DIAG_COUNTER(sq_num_mwbe, 0x2C),
DIAG_COUNTER(sq_num_bre, 0x34),
DIAG_COUNTER(sq_num_rire, 0x44),
DIAG_COUNTER(rq_num_rire, 0x48),
DIAG_COUNTER(sq_num_rae, 0x4C),
DIAG_COUNTER(rq_num_rae, 0x50),
DIAG_COUNTER(sq_num_roe, 0x54),
DIAG_COUNTER(sq_num_tree, 0x5C),
DIAG_COUNTER(sq_num_rree, 0x64),
DIAG_COUNTER(rq_num_rnr, 0x68),
DIAG_COUNTER(sq_num_rnr, 0x6C),
DIAG_COUNTER(rq_num_oos, 0x100),
DIAG_COUNTER(sq_num_oos, 0x104),
};
static const struct diag_counter diag_ext[] = {
DIAG_COUNTER(rq_num_dup, 0x130),
DIAG_COUNTER(sq_num_to, 0x134),
};
static const struct diag_counter diag_device_only[] = {
DIAG_COUNTER(num_cqovf, 0x1A0),
DIAG_COUNTER(rq_num_udsdprd, 0x118),
};
static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
if (!diag[!!port_num].name)
return NULL;
return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
diag[!!port_num].num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port, int index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
u32 hw_value[ARRAY_SIZE(diag_device_only) +
ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
int ret;
int i;
ret = mlx4_query_diag_counters(dev->dev,
MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
diag[!!port].offset, hw_value,
diag[!!port].num_counters, port);
if (ret)
return ret;
for (i = 0; i < diag[!!port].num_counters; i++)
stats->value[i] = hw_value[i];
return diag[!!port].num_counters;
}
static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
const char ***name,
u32 **offset,
u32 *num,
bool port)
{
u32 num_counters;
num_counters = ARRAY_SIZE(diag_basic);
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
num_counters += ARRAY_SIZE(diag_ext);
if (!port)
num_counters += ARRAY_SIZE(diag_device_only);
*name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
if (!*name)
return -ENOMEM;
*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
if (!*offset)
goto err_name;
*num = num_counters;
return 0;
err_name:
kfree(*name);
return -ENOMEM;
}
static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
const char **name,
u32 *offset,
bool port)
{
int i;
int j;
for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
name[i] = diag_basic[i].name;
offset[i] = diag_basic[i].offset;
}
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
name[j] = diag_ext[i].name;
offset[j] = diag_ext[i].offset;
}
}
if (!port) {
for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
name[j] = diag_device_only[i].name;
offset[j] = diag_device_only[i].offset;
}
}
}
static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
int i;
int ret;
bool per_port = !!(ibdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
if (mlx4_is_slave(ibdev->dev))
return 0;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
/* i == 1 means we are building port counters */
if (i && !per_port)
continue;
ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
&diag[i].offset,
&diag[i].num_counters, i);
if (ret)
goto err_alloc;
mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
diag[i].offset, i);
}
ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
return 0;
err_alloc:
if (i) {
kfree(diag[i - 1].name);
kfree(diag[i - 1].offset);
}
return ret;
}
static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
{
int i;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
kfree(ibdev->diag_counters[i].offset);
kfree(ibdev->diag_counters[i].name);
}
}
#define MLX4_IB_INVALID_MAC ((u64)-1)
static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
int port)
{
u64 new_smac = 0;
u64 release_mac = MLX4_IB_INVALID_MAC;
struct mlx4_ib_qp *qp;
new_smac = mlx4_mac_to_u64(IF_LLADDR(dev));
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
/* no need for update QP1 and mac registration in non-SRIOV */
if (!mlx4_is_mfunc(ibdev->dev))
return;
mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
qp = ibdev->qp1_proxy[port - 1];
if (qp) {
int new_smac_index;
u64 old_smac;
struct mlx4_update_qp_params update_params;
mutex_lock(&qp->mutex);
old_smac = qp->pri.smac;
if (new_smac == old_smac)
goto unlock;
new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
if (new_smac_index < 0)
goto unlock;
update_params.smac_index = new_smac_index;
if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
&update_params)) {
release_mac = new_smac;
goto unlock;
}
/* if old port was zero, no mac was yet registered for this QP */
if (qp->pri.smac_port)
release_mac = old_smac;
qp->pri.smac = new_smac;
qp->pri.smac_port = port;
qp->pri.smac_index = new_smac_index;
}
unlock:
if (release_mac != MLX4_IB_INVALID_MAC)
mlx4_unregister_mac(ibdev->dev, port, release_mac);
if (qp)
mutex_unlock(&qp->mutex);
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
struct net_device *dev,
unsigned long event)
{
struct mlx4_ib_iboe *iboe;
int update_qps_port = -1;
int port;
iboe = &ibdev->iboe;
spin_lock_bh(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
if (dev == iboe->netdevs[port - 1] &&
(event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
}
spin_unlock_bh(&iboe->lock);
if (update_qps_port > 0)
mlx4_ib_update_qps(ibdev, dev, update_qps_port);
}
static int mlx4_ib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
mlx4_ib_scan_netdevs(ibdev, dev, event);
return NOTIFY_DONE;
}
static void init_pkeys(struct mlx4_ib_dev *ibdev)
{
int port;
int slave;
int i;
if (mlx4_is_master(ibdev->dev)) {
for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i) {
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
/* master has the identity virt2phys pkey mapping */
(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
}
}
}
/* initialize pkey cache */
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i)
ibdev->pkeys.phys_pkey_cache[port-1][i] =
(i) ? 0 : 0xFFFF;
}
}
}
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
int i, j, eq = 0, total_eqs = 0;
ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
sizeof(ibdev->eq_table[0]), GFP_KERNEL);
if (!ibdev->eq_table)
return;
for (i = 1; i <= dev->caps.num_ports; i++) {
for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
j++, total_eqs++) {
if (i > 1 && mlx4_is_eq_shared(dev, total_eqs))
continue;
ibdev->eq_table[eq] = total_eqs;
if (!mlx4_assign_eq(dev, i,
&ibdev->eq_table[eq]))
eq++;
else
ibdev->eq_table[eq] = -1;
}
}
for (i = eq; i < dev->caps.num_comp_vectors;
ibdev->eq_table[i++] = -1)
;
/* Advertise the new number of EQs to clients */
ibdev->ib_dev.num_comp_vectors = eq;
}
static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
int i;
int total_eqs = ibdev->ib_dev.num_comp_vectors;
/* no eqs were allocated */
if (!ibdev->eq_table)
return;
/* Reset the advertised EQ number */
ibdev->ib_dev.num_comp_vectors = 0;
for (i = 0; i < total_eqs; i++)
mlx4_release_eq(dev, ibdev->eq_table[i]);
kfree(ibdev->eq_table);
ibdev->eq_table = NULL;
}
static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
}
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
static void get_fw_ver_str(struct ib_device *device, char *str,
size_t str_len)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%d",
(int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
int num_ports;
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
int ib_num_ports = 0;
int num_req_counters;
int allocated;
u32 counter_index;
struct counter_index *new_counter_index = NULL;
pr_info_once("%s", mlx4_ib_version);
num_ports = 0;
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
/* No point in registering a device with no ports... */
if (num_ports == 0)
return NULL;
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
return NULL;
}
iboe = &ibdev->iboe;
if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
goto err_dealloc;
if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
goto err_pd;
ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
PAGE_SIZE);
if (!ibdev->uar_map)
goto err_uar;
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
else
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
ibdev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
ibdev->ib_dev.query_device = mlx4_ib_query_device;
ibdev->ib_dev.query_port = mlx4_ib_query_port;
ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
ibdev->ib_dev.mmap = mlx4_ib_mmap;
ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
ibdev->ib_dev.post_send = mlx4_ib_post_send;
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
if (init_node_data(ibdev))
goto err_map;
mlx4_init_sl2vl_tbl(ibdev);
for (i = 0; i < ibdev->num_ports; ++i) {
mutex_init(&ibdev->counters_table[i].mutex);
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
}
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
for (i = 0; i < num_req_counters; ++i) {
mutex_init(&ibdev->qp1_proxy_lock[i]);
allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_counter_alloc(ibdev->dev, &counter_index);
/* if failed to allocate a new counter, use default */
if (err)
counter_index =
mlx4_get_default_counter_index(dev,
i + 1);
else
allocated = 1;
} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
counter_index = mlx4_get_default_counter_index(dev,
i + 1);
}
new_counter_index = kmalloc(sizeof(*new_counter_index),
GFP_KERNEL);
if (!new_counter_index) {
if (allocated)
mlx4_counter_free(ibdev->dev, counter_index);
goto err_counter;
}
new_counter_index->index = counter_index;
new_counter_index->allocated = allocated;
list_add_tail(&new_counter_index->list,
&ibdev->counters_table[i].counters_list);
ibdev->counters_table[i].default_counter = counter_index;
pr_info("counter index %d for port %d allocated %d\n",
counter_index, i + 1, allocated);
}
if (mlx4_is_bonded(dev))
for (i = 1; i < ibdev->num_ports ; ++i) {
new_counter_index =
kmalloc(sizeof(struct counter_index),
GFP_KERNEL);
if (!new_counter_index)
goto err_counter;
new_counter_index->index = counter_index;
new_counter_index->allocated = 0;
list_add_tail(&new_counter_index->list,
&ibdev->counters_table[i].counters_list);
ibdev->counters_table[i].default_counter =
counter_index;
}
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_num_ports++;
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
INIT_LIST_HEAD(&ibdev->qp_list);
spin_lock_init(&ibdev->reset_flow_resource_lock);
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
ib_num_ports) {
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
&ibdev->steer_qpn_base, 0);
if (err)
goto err_counter;
ibdev->ib_uc_qpns_bitmap =
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
dev_err(&dev->persist->pdev->dev,
"bit map alloc failed\n");
goto err_steer_qp_release;
}
bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_base +
ibdev->steer_qpn_count - 1);
if (err)
goto err_steer_free_bitmap;
}
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap;
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err) {
iboe->nb.notifier_call = NULL;
goto err_notif;
}
}
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
if (err) {
goto err_notif;
}
}
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]))
goto err_notif;
}
ibdev->ib_active = true;
if (mlx4_is_mfunc(ibdev->dev))
init_pkeys(ibdev);
/* create paravirt contexts for any VFs which are active */
if (mlx4_is_master(ibdev->dev)) {
for (j = 0; j < MLX4_MFUNC_MAX; j++) {
if (j == mlx4_master_func_num(ibdev->dev))
continue;
if (mlx4_is_slave_active(ibdev->dev, j))
do_slave_init(ibdev, j, 1);
}
}
return ibdev;
err_notif:
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
err_mad:
mlx4_ib_mad_cleanup(ibdev);
err_reg:
ib_unregister_device(&ibdev->ib_dev);
err_diag_counters:
mlx4_ib_diag_cleanup(ibdev);
err_steer_free_bitmap:
kfree(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
err_counter:
for (i = 0; i < ibdev->num_ports; ++i)
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
iounmap(ibdev->uar_map);
err_uar:
mlx4_uar_free(dev, &ibdev->priv_uar);
err_pd:
mlx4_pd_free(dev, ibdev->priv_pdn);
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
return NULL;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
{
int offset;
WARN_ON(!dev->ib_uc_qpns_bitmap);
offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
dev->steer_qpn_count,
get_count_order(count));
if (offset < 0)
return offset;
*qpn = dev->steer_qpn_base + offset;
return 0;
}
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
{
if (!qpn ||
dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
return;
BUG_ON(qpn < dev->steer_qpn_base);
bitmap_release_region(dev->ib_uc_qpns_bitmap,
qpn - dev->steer_qpn_base,
get_count_order(count));
}
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach)
{
int err;
size_t flow_size;
struct ib_flow_attr *flow = NULL;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
flow_size = sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_ib);
flow = kzalloc(flow_size, GFP_KERNEL);
if (!flow)
return -ENOMEM;
flow->port = mqp->port;
flow->num_of_specs = 1;
flow->size = flow_size;
ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
ib_spec->type = IB_FLOW_SPEC_IB;
ib_spec->size = sizeof(struct ib_flow_spec_ib);
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
IB_FLOW_DOMAIN_NIC,
MLX4_FS_REGULAR,
&mqp->reg_id);
} else {
err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
kfree(flow);
return err;
}
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
ibdev->ib_active = false;
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
}
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
mlx4_CLOSE_PORT(dev, p);
mlx4_ib_free_eqs(dev, ibdev);
mlx4_uar_free(dev, &ibdev->priv_uar);
mlx4_pd_free(dev, ibdev->priv_pdn);
ib_dealloc_device(&ibdev->ib_dev);
}
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
struct mlx4_ib_demux_work **dm = NULL;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
struct mlx4_active_ports actv_ports;
unsigned int ports;
unsigned int first_port;
if (!mlx4_is_master(dev))
return;
actv_ports = mlx4_get_active_ports(dev, slave);
ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
return;
}
for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
while (--i >= 0)
kfree(dm[i]);
goto out;
}
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
dm[i]->port = first_port + i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
}
/* initialize or tear down tunnel QPs for the slave */
spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
if (!ibdev->sriov.is_going_down) {
for (i = 0; i < ports; i++)
queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
} else {
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
for (i = 0; i < ports; i++)
kfree(dm[i]);
}
out:
kfree(dm);
return;
}
static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_qp *mqp;
unsigned long flags_qp;
unsigned long flags_cq;
struct mlx4_ib_cq *send_mcq, *recv_mcq;
struct list_head cq_notify_list;
struct mlx4_cq *mcq;
unsigned long flags;
pr_warn("mlx4_ib_handle_catas_error was started\n");
INIT_LIST_HEAD(&cq_notify_list);
/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
if (mqp->sq.tail != mqp->sq.head) {
send_mcq = to_mcq(mqp->ibqp.send_cq);
spin_lock_irqsave(&send_mcq->lock, flags_cq);
if (send_mcq->mcq.comp &&
mqp->ibqp.send_cq->comp_handler) {
if (!send_mcq->mcq.reset_notify_added) {
send_mcq->mcq.reset_notify_added = 1;
list_add_tail(&send_mcq->mcq.reset_notify,
&cq_notify_list);
}
}
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
}
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
/* Now, handle the QP's receive queue */
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
/* no handling is needed for SRQ */
if (!mqp->ibqp.srq) {
if (mqp->rq.tail != mqp->rq.head) {
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
if (recv_mcq->mcq.comp &&
mqp->ibqp.recv_cq->comp_handler) {
if (!recv_mcq->mcq.reset_notify_added) {
recv_mcq->mcq.reset_notify_added = 1;
list_add_tail(&recv_mcq->mcq.reset_notify,
&cq_notify_list);
}
}
spin_unlock_irqrestore(&recv_mcq->lock,
flags_cq);
}
}
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
}
list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
mcq->comp(mcq);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
pr_warn("mlx4_ib_handle_catas_error ended\n");
}
static void handle_bonded_port_state_event(struct work_struct *work)
{
struct ib_event_work *ew =
container_of(work, struct ib_event_work, work);
struct mlx4_ib_dev *ibdev = ew->ib_dev;
enum ib_port_state bonded_port_state = IB_PORT_NOP;
int i;
struct ib_event ibev;
kfree(ew);
spin_lock_bh(&ibdev->iboe.lock);
for (i = 0; i < MLX4_MAX_PORTS; ++i) {
struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
enum ib_port_state curr_port_state;
if (!curr_netdev)
continue;
curr_port_state =
(netif_running(curr_netdev) &&
netif_carrier_ok(curr_netdev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
curr_port_state : IB_PORT_ACTIVE;
}
spin_unlock_bh(&ibdev->iboe.lock);
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = 1;
ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
ib_dispatch_event(&ibev);
}
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
{
u64 sl2vl;
int err;
err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
if (err) {
pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n",
port, err);
sl2vl = 0;
}
atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
}
static void ib_sl2vl_update_work(struct work_struct *work)
{
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
struct mlx4_ib_dev *mdev = ew->ib_dev;
int port = ew->port;
mlx4_ib_sl2vl_update(mdev, port);
kfree(ew);
}
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
int port)
{
struct ib_event_work *ew;
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (ew) {
INIT_WORK(&ew->work, ib_sl2vl_update_work);
ew->port = port;
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
} else {
pr_err("failed to allocate memory for sl2vl update work\n");
}
}
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
struct ib_event ibev;
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
if (mlx4_is_bonded(dev) &&
((event == MLX4_DEV_EVENT_PORT_UP) ||
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (!ew)
return;
INIT_WORK(&ew->work, handle_bonded_port_state_event);
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
return;
}
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
return;
if (!mlx4_is_slave(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
if (mlx4_is_master(dev))
mlx4_ib_invalidate_all_guid_record(ibdev, p);
if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
mlx4_sched_ib_sl2vl_update_work(ibdev, p);
}
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
return;
ibev.event = IB_EVENT_PORT_ERR;
break;
case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx4_ib_handle_catas_error(ibdev);
break;
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew) {
pr_err("failed to allocate memory for events work\n");
break;
}
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
ew->ib_dev = ibdev;
/* need to queue only for port owner, which uses GEN_EQE */
if (mlx4_is_master(dev))
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
return;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
do_slave_init(ibdev, p, 1);
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
1);
}
}
return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
0);
}
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
return;
default:
return;
}
ibev.device = ibdev_ptr;
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
}
static struct mlx4_interface mlx4_ib_interface = {
.add = mlx4_ib_add,
.remove = mlx4_ib_remove,
.event = mlx4_ib_event,
.protocol = MLX4_PROT_IB_IPV6,
.flags = MLX4_INTFF_BONDING
};
static int __init mlx4_ib_init(void)
{
int err;
wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
if (err)
goto clean_wq;
err = mlx4_register_interface(&mlx4_ib_interface);
if (err)
goto clean_mcg;
return 0;
clean_mcg:
mlx4_ib_mcg_destroy();
clean_wq:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
}
module_init_order(mlx4_ib_init, SI_ORDER_THIRD);
module_exit(mlx4_ib_cleanup);
static int
mlx4ib_evhand(module_t mod, int event, void *arg)
{
return (0);
}
static moduledata_t mlx4ib_mod = {
.name = "mlx4ib",
.evhand = mlx4ib_evhand,
};
DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY);
MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1);
diff --git a/sys/dev/mlx5/mlx5_core/mlx5_core.h b/sys/dev/mlx5/mlx5_core/mlx5_core.h
index d24e42dbff00..61419ca43612 100644
--- a/sys/dev/mlx5/mlx5_core/mlx5_core.h
+++ b/sys/dev/mlx5/mlx5_core/mlx5_core.h
@@ -1,111 +1,111 @@
/*-
* Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __MLX5_CORE_H__
#define __MLX5_CORE_H__
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#define DRIVER_NAME "mlx5_core"
#ifndef DRIVER_VERSION
-#define DRIVER_VERSION "3.4.2"
+#define DRIVER_VERSION "3.5.0"
#endif
-#define DRIVER_RELDATE "July 2018"
+#define DRIVER_RELDATE "November 2018"
extern int mlx5_core_debug_mask;
#define mlx5_core_dbg(dev, format, ...) \
pr_debug("%s:%s:%d:(pid %d): " format, \
(dev)->priv.name, __func__, __LINE__, curthread->td_proc->p_pid, \
##__VA_ARGS__)
#define mlx5_core_dbg_mask(dev, mask, format, ...) \
do { \
if ((mask) & mlx5_core_debug_mask) \
mlx5_core_dbg(dev, format, ##__VA_ARGS__); \
} while (0)
#define mlx5_core_err(_dev, format, ...) \
device_printf((&(_dev)->pdev->dev)->bsddev, "ERR: ""%s:%d:(pid %d): " format, \
__func__, __LINE__, curthread->td_proc->p_pid, \
##__VA_ARGS__)
#define mlx5_core_warn(_dev, format, ...) \
device_printf((&(_dev)->pdev->dev)->bsddev, "WARN: ""%s:%d:(pid %d): " format, \
__func__, __LINE__, curthread->td_proc->p_pid, \
##__VA_ARGS__)
enum {
MLX5_CMD_DATA, /* print command payload only */
MLX5_CMD_TIME, /* print command execution time */
};
enum mlx5_semaphore_space_address {
MLX5_SEMAPHORE_SW_RESET = 0x20,
};
struct mlx5_core_dev;
int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
int mlx5_query_board_id(struct mlx5_core_dev *dev);
int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam,
u8 feature_group, u8 access_reg_group);
int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
void mlx5_recover_device(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
void mlx5e_init(void);
void mlx5e_cleanup(void);
int mlx5_rename_eq(struct mlx5_core_dev *dev, int eq_ix, char *name);
int mlx5_fwdump_init(void);
void mlx5_fwdump_fini(void);
void mlx5_fwdump_prep(struct mlx5_core_dev *mdev);
void mlx5_fwdump(struct mlx5_core_dev *mdev);
void mlx5_fwdump_clean(struct mlx5_core_dev *mdev);
struct mlx5_crspace_regmap {
uint32_t addr;
unsigned cnt;
};
extern struct pci_driver mlx5_core_driver;
SYSCTL_DECL(_hw_mlx5);
#endif /* __MLX5_CORE_H__ */
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index b07df80f05d8..4fcd95421c31 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -1,4051 +1,4052 @@
/*-
* Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "en.h"
#include <sys/sockio.h>
#include <machine/atomic.h>
#ifndef ETH_DRIVER_VERSION
-#define ETH_DRIVER_VERSION "3.4.2"
+#define ETH_DRIVER_VERSION "3.5.0"
#endif
+#define DRIVER_RELDATE "November 2018"
static const char mlx5e_version[] = "mlx5en: Mellanox Ethernet driver "
ETH_DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
static int mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs);
struct mlx5e_channel_param {
struct mlx5e_rq_param rq;
struct mlx5e_sq_param sq;
struct mlx5e_cq_param rx_cq;
struct mlx5e_cq_param tx_cq;
};
static const struct {
u32 subtype;
u64 baudrate;
} mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
[MLX5E_1000BASE_CX_SGMII] = {
.subtype = IFM_1000_CX_SGMII,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_1000BASE_KX] = {
.subtype = IFM_1000_KX,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_10GBASE_CX4] = {
.subtype = IFM_10G_CX4,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_KX4] = {
.subtype = IFM_10G_KX4,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_KR] = {
.subtype = IFM_10G_KR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_20GBASE_KR2] = {
.subtype = IFM_20G_KR2,
.baudrate = IF_Gbps(20ULL),
},
[MLX5E_40GBASE_CR4] = {
.subtype = IFM_40G_CR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_KR4] = {
.subtype = IFM_40G_KR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_56GBASE_R4] = {
.subtype = IFM_56G_R4,
.baudrate = IF_Gbps(56ULL),
},
[MLX5E_10GBASE_CR] = {
.subtype = IFM_10G_CR1,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_SR] = {
.subtype = IFM_10G_SR,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_10GBASE_ER] = {
.subtype = IFM_10G_ER,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_40GBASE_SR4] = {
.subtype = IFM_40G_SR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_40GBASE_LR4] = {
.subtype = IFM_40G_LR4,
.baudrate = IF_Gbps(40ULL),
},
[MLX5E_100GBASE_CR4] = {
.subtype = IFM_100G_CR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_SR4] = {
.subtype = IFM_100G_SR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_KR4] = {
.subtype = IFM_100G_KR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100GBASE_LR4] = {
.subtype = IFM_100G_LR4,
.baudrate = IF_Gbps(100ULL),
},
[MLX5E_100BASE_TX] = {
.subtype = IFM_100_TX,
.baudrate = IF_Mbps(100ULL),
},
[MLX5E_1000BASE_T] = {
.subtype = IFM_1000_T,
.baudrate = IF_Mbps(1000ULL),
},
[MLX5E_10GBASE_T] = {
.subtype = IFM_10G_T,
.baudrate = IF_Gbps(10ULL),
},
[MLX5E_25GBASE_CR] = {
.subtype = IFM_25G_CR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GBASE_KR] = {
.subtype = IFM_25G_KR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_25GBASE_SR] = {
.subtype = IFM_25G_SR,
.baudrate = IF_Gbps(25ULL),
},
[MLX5E_50GBASE_CR2] = {
.subtype = IFM_50G_CR2,
.baudrate = IF_Gbps(50ULL),
},
[MLX5E_50GBASE_KR2] = {
.subtype = IFM_50G_KR2,
.baudrate = IF_Gbps(50ULL),
},
};
MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
SYSCTL_DECL(_hw_mlx5);
static void
mlx5e_update_carrier(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ptys_reg)];
u32 eth_proto_oper;
int error;
u8 port_state;
u8 is_er_type;
u8 i;
port_state = mlx5_query_vport_state(mdev,
MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
if (port_state == VPORT_STATE_UP) {
priv->media_status_last |= IFM_ACTIVE;
} else {
priv->media_status_last &= ~IFM_ACTIVE;
priv->media_active_last = IFM_ETHER;
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
return;
}
error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
if (error) {
priv->media_active_last = IFM_ETHER;
priv->ifp->if_baudrate = 1;
if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
__func__, error);
return;
}
eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
if (mlx5e_mode_table[i].baudrate == 0)
continue;
if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
u32 subtype = mlx5e_mode_table[i].subtype;
priv->ifp->if_baudrate =
mlx5e_mode_table[i].baudrate;
switch (subtype) {
case IFM_10G_ER:
error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
if (error != 0) {
if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
__func__, error);
}
if (error != 0 || is_er_type == 0)
subtype = IFM_10G_LR;
break;
case IFM_40G_LR4:
error = mlx5_query_pddr_range_info(mdev, 1, &is_er_type);
if (error != 0) {
if_printf(priv->ifp, "%s: query port pddr failed: %d\n",
__func__, error);
}
if (error == 0 && is_er_type != 0)
subtype = IFM_40G_ER4;
break;
}
priv->media_active_last = subtype | IFM_ETHER | IFM_FDX;
break;
}
}
if_link_state_change(priv->ifp, LINK_STATE_UP);
}
static void
mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
{
struct mlx5e_priv *priv = dev->if_softc;
ifmr->ifm_status = priv->media_status_last;
ifmr->ifm_active = priv->media_active_last |
(priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
(priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
}
static u32
mlx5e_find_link_mode(u32 subtype)
{
u32 i;
u32 link_mode = 0;
switch (subtype) {
case IFM_10G_LR:
subtype = IFM_10G_ER;
break;
case IFM_40G_ER4:
subtype = IFM_40G_LR4;
break;
}
for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
if (mlx5e_mode_table[i].baudrate == 0)
continue;
if (mlx5e_mode_table[i].subtype == subtype)
link_mode |= MLX5E_PROT_MASK(i);
}
return (link_mode);
}
static int
mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
{
return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
priv->params.rx_pauseframe_control,
priv->params.tx_pauseframe_control,
priv->params.rx_priority_flow_control,
priv->params.tx_priority_flow_control));
}
static int
mlx5e_set_port_pfc(struct mlx5e_priv *priv)
{
int error;
if (priv->params.rx_pauseframe_control ||
priv->params.tx_pauseframe_control) {
if_printf(priv->ifp,
"Global pauseframes must be disabled before enabling PFC.\n");
error = -EINVAL;
} else {
error = mlx5e_set_port_pause_and_pfc(priv);
}
return (error);
}
static int
mlx5e_media_change(struct ifnet *dev)
{
struct mlx5e_priv *priv = dev->if_softc;
struct mlx5_core_dev *mdev = priv->mdev;
u32 eth_proto_cap;
u32 link_mode;
int was_opened;
int locked;
int error;
locked = PRIV_LOCKED(priv);
if (!locked)
PRIV_LOCK(priv);
if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
error = EINVAL;
goto done;
}
link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
/* query supported capabilities */
error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
if (error != 0) {
if_printf(dev, "Query port media capability failed\n");
goto done;
}
/* check for autoselect */
if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
link_mode = eth_proto_cap;
if (link_mode == 0) {
if_printf(dev, "Port media capability is zero\n");
error = EINVAL;
goto done;
}
} else {
link_mode = link_mode & eth_proto_cap;
if (link_mode == 0) {
if_printf(dev, "Not supported link mode requested\n");
error = EINVAL;
goto done;
}
}
if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
/* check if PFC is enabled */
if (priv->params.rx_priority_flow_control ||
priv->params.tx_priority_flow_control) {
if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
error = EINVAL;
goto done;
}
}
/* update pauseframe control bits */
priv->params.rx_pauseframe_control =
(priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
priv->params.tx_pauseframe_control =
(priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
/* check if device is opened */
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
/* reconfigure the hardware */
mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
error = -mlx5e_set_port_pause_and_pfc(priv);
if (was_opened)
mlx5_set_port_status(mdev, MLX5_PORT_UP);
done:
if (!locked)
PRIV_UNLOCK(priv);
return (error);
}
static void
mlx5e_update_carrier_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
update_carrier_work);
PRIV_LOCK(priv);
if (test_bit(MLX5E_STATE_OPENED, &priv->state))
mlx5e_update_carrier(priv);
PRIV_UNLOCK(priv);
}
/*
* This function reads the physical port counters from the firmware
* using a pre-defined layout defined by various MLX5E_PPORT_XXX()
* macros. The output is converted from big-endian 64-bit values into
* host endian ones and stored in the "priv->stats.pport" structure.
*/
static void
mlx5e_update_pport_counters(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_pport_stats *s = &priv->stats.pport;
struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
u32 *in;
u32 *out;
const u64 *ptr;
unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
unsigned x;
unsigned y;
unsigned z;
/* allocate firmware request structures */
in = mlx5_vzalloc(sz);
out = mlx5_vzalloc(sz);
if (in == NULL || out == NULL)
goto free_out;
/*
* Get pointer to the 64-bit counter set which is located at a
* fixed offset in the output firmware request structure:
*/
ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
MLX5_SET(ppcnt_reg, in, local_port, 1);
/* read IEEE802_3 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
s->arg[y] = be64toh(ptr[x]);
/* read RFC2819 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
s->arg[y] = be64toh(ptr[x]);
for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read RFC2863 counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read physical layer stats counter group using predefined counter layout */
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
s_debug->arg[y] = be64toh(ptr[x]);
/* read per-priority counters */
MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
/* iterate all the priorities */
for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
MLX5_SET(ppcnt_reg, in, prio_tc, z);
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
/* read per priority stats counter group using predefined counter layout */
for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
s->arg[y] = be64toh(ptr[x]);
}
free_out:
/* free firmware request structures */
kvfree(in);
kvfree(out);
}
/*
* This function is called regularly to collect all statistics
* counters from the firmware. The values can be viewed through the
* sysctl interface. Execution is serialized using the priv's global
* configuration lock.
*/
static void
mlx5e_update_stats_work(struct work_struct *work)
{
struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
update_stats_work);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_vport_stats *s = &priv->stats.vport;
struct mlx5e_sq_stats *sq_stats;
struct buf_ring *sq_br;
#if (__FreeBSD_version < 1100000)
struct ifnet *ifp = priv->ifp;
#endif
u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
u32 *out;
int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
u64 tso_packets = 0;
u64 tso_bytes = 0;
u64 tx_queue_dropped = 0;
u64 tx_defragged = 0;
u64 tx_offload_none = 0;
u64 lro_packets = 0;
u64 lro_bytes = 0;
u64 sw_lro_queued = 0;
u64 sw_lro_flushed = 0;
u64 rx_csum_none = 0;
u64 rx_wqe_err = 0;
u32 rx_out_of_buffer = 0;
int i;
int j;
PRIV_LOCK(priv);
out = mlx5_vzalloc(outlen);
if (out == NULL)
goto free_out;
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
goto free_out;
/* Collect firts the SW counters and then HW for consistency */
for (i = 0; i < priv->params.num_channels; i++) {
struct mlx5e_channel *pch = priv->channel + i;
struct mlx5e_rq *rq = &pch->rq;
struct mlx5e_rq_stats *rq_stats = &pch->rq.stats;
/* collect stats from LRO */
rq_stats->sw_lro_queued = rq->lro.lro_queued;
rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
sw_lro_queued += rq_stats->sw_lro_queued;
sw_lro_flushed += rq_stats->sw_lro_flushed;
lro_packets += rq_stats->lro_packets;
lro_bytes += rq_stats->lro_bytes;
rx_csum_none += rq_stats->csum_none;
rx_wqe_err += rq_stats->wqe_err;
for (j = 0; j < priv->num_tc; j++) {
sq_stats = &pch->sq[j].stats;
sq_br = pch->sq[j].br;
tso_packets += sq_stats->tso_packets;
tso_bytes += sq_stats->tso_bytes;
tx_queue_dropped += sq_stats->dropped;
if (sq_br != NULL)
tx_queue_dropped += sq_br->br_drops;
tx_defragged += sq_stats->defragged;
tx_offload_none += sq_stats->csum_offload_none;
}
}
s->tx_jumbo_packets =
priv->stats.port_stats_debug.p1519to2047octets +
priv->stats.port_stats_debug.p2048to4095octets +
priv->stats.port_stats_debug.p4096to8191octets +
priv->stats.port_stats_debug.p8192to10239octets;
/* update counters */
s->tso_packets = tso_packets;
s->tso_bytes = tso_bytes;
s->tx_queue_dropped = tx_queue_dropped;
s->tx_defragged = tx_defragged;
s->lro_packets = lro_packets;
s->lro_bytes = lro_bytes;
s->sw_lro_queued = sw_lro_queued;
s->sw_lro_flushed = sw_lro_flushed;
s->rx_csum_none = rx_csum_none;
s->rx_wqe_err = rx_wqe_err;
/* HW counters */
memset(in, 0, sizeof(in));
MLX5_SET(query_vport_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_VPORT_COUNTER);
MLX5_SET(query_vport_counter_in, in, op_mod, 0);
MLX5_SET(query_vport_counter_in, in, other_vport, 0);
memset(out, 0, outlen);
/* get number of out-of-buffer drops first */
if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
&rx_out_of_buffer))
goto free_out;
/* accumulate difference into a 64-bit counter */
s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
s->rx_out_of_buffer_prev = rx_out_of_buffer;
/* get port statistics */
if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
goto free_out;
#define MLX5_GET_CTR(out, x) \
MLX5_GET64(query_vport_counter_out, out, x)
s->rx_error_packets =
MLX5_GET_CTR(out, received_errors.packets);
s->rx_error_bytes =
MLX5_GET_CTR(out, received_errors.octets);
s->tx_error_packets =
MLX5_GET_CTR(out, transmit_errors.packets);
s->tx_error_bytes =
MLX5_GET_CTR(out, transmit_errors.octets);
s->rx_unicast_packets =
MLX5_GET_CTR(out, received_eth_unicast.packets);
s->rx_unicast_bytes =
MLX5_GET_CTR(out, received_eth_unicast.octets);
s->tx_unicast_packets =
MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
s->tx_unicast_bytes =
MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
s->rx_multicast_packets =
MLX5_GET_CTR(out, received_eth_multicast.packets);
s->rx_multicast_bytes =
MLX5_GET_CTR(out, received_eth_multicast.octets);
s->tx_multicast_packets =
MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
s->tx_multicast_bytes =
MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
s->rx_broadcast_packets =
MLX5_GET_CTR(out, received_eth_broadcast.packets);
s->rx_broadcast_bytes =
MLX5_GET_CTR(out, received_eth_broadcast.octets);
s->tx_broadcast_packets =
MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
s->tx_broadcast_bytes =
MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
s->rx_packets =
s->rx_unicast_packets +
s->rx_multicast_packets +
s->rx_broadcast_packets -
s->rx_out_of_buffer;
s->rx_bytes =
s->rx_unicast_bytes +
s->rx_multicast_bytes +
s->rx_broadcast_bytes;
s->tx_packets =
s->tx_unicast_packets +
s->tx_multicast_packets +
s->tx_broadcast_packets;
s->tx_bytes =
s->tx_unicast_bytes +
s->tx_multicast_bytes +
s->tx_broadcast_bytes;
/* Update calculated offload counters */
s->tx_csum_offload = s->tx_packets - tx_offload_none;
s->rx_csum_good = s->rx_packets - s->rx_csum_none;
/* Get physical port counters */
mlx5e_update_pport_counters(priv);
#if (__FreeBSD_version < 1100000)
/* no get_counters interface in fbsd 10 */
ifp->if_ipackets = s->rx_packets;
ifp->if_ierrors = s->rx_error_packets +
priv->stats.pport.alignment_err +
priv->stats.pport.check_seq_err +
priv->stats.pport.crc_align_errors +
priv->stats.pport.in_range_len_errors +
priv->stats.pport.jabbers +
priv->stats.pport.out_of_range_len +
priv->stats.pport.oversize_pkts +
priv->stats.pport.symbol_err +
priv->stats.pport.too_long_errors +
priv->stats.pport.undersize_pkts +
priv->stats.pport.unsupported_op_rx;
ifp->if_iqdrops = s->rx_out_of_buffer +
priv->stats.pport.drop_events;
ifp->if_opackets = s->tx_packets;
ifp->if_oerrors = s->tx_error_packets;
ifp->if_snd.ifq_drops = s->tx_queue_dropped;
ifp->if_ibytes = s->rx_bytes;
ifp->if_obytes = s->tx_bytes;
ifp->if_collisions =
priv->stats.pport.collisions;
#endif
free_out:
kvfree(out);
/* Update diagnostics, if any */
if (priv->params_ethtool.diag_pci_enable ||
priv->params_ethtool.diag_general_enable) {
int error = mlx5_core_get_diagnostics_full(mdev,
priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
if (error != 0)
if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
}
PRIV_UNLOCK(priv);
}
static void
mlx5e_update_stats(void *arg)
{
struct mlx5e_priv *priv = arg;
queue_work(priv->wq, &priv->update_stats_work);
callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
}
static void
mlx5e_async_event_sub(struct mlx5e_priv *priv,
enum mlx5_dev_event event)
{
switch (event) {
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
queue_work(priv->wq, &priv->update_carrier_work);
break;
default:
break;
}
}
static void
mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5e_priv *priv = vpriv;
mtx_lock(&priv->async_events_mtx);
if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
mlx5e_async_event_sub(priv, event);
mtx_unlock(&priv->async_events_mtx);
}
static void
mlx5e_enable_async_events(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
}
static void
mlx5e_disable_async_events(struct mlx5e_priv *priv)
{
mtx_lock(&priv->async_events_mtx);
clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
mtx_unlock(&priv->async_events_mtx);
}
static void mlx5e_calibration_callout(void *arg);
static int mlx5e_calibration_duration = 20;
static int mlx5e_fast_calibration = 1;
static int mlx5e_normal_calibration = 30;
static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
"MLX5 timestamp calibration parameteres");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
&mlx5e_calibration_duration, 0,
"Duration of initial calibration");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
&mlx5e_fast_calibration, 0,
"Recalibration interval during initial calibration");
SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
&mlx5e_normal_calibration, 0,
"Recalibration interval during normal operations");
/*
* Ignites the calibration process.
*/
static void
mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
{
if (priv->clbr_done == 0)
mlx5e_calibration_callout(priv);
else
callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
mlx5e_calibration_duration ? mlx5e_fast_calibration :
mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
priv);
}
static uint64_t
mlx5e_timespec2usec(const struct timespec *ts)
{
return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
}
static uint64_t
mlx5e_hw_clock(struct mlx5e_priv *priv)
{
struct mlx5_init_seg *iseg;
uint32_t hw_h, hw_h1, hw_l;
iseg = priv->mdev->iseg;
do {
hw_h = ioread32be(&iseg->internal_timer_h);
hw_l = ioread32be(&iseg->internal_timer_l);
hw_h1 = ioread32be(&iseg->internal_timer_h);
} while (hw_h1 != hw_h);
return (((uint64_t)hw_h << 32) | hw_l);
}
/*
* The calibration callout, it runs either in the context of the
* thread which enables calibration, or in callout. It takes the
* snapshot of system and adapter clocks, then advances the pointers to
* the calibration point to allow rx path to read the consistent data
* lockless.
*/
static void
mlx5e_calibration_callout(void *arg)
{
struct mlx5e_priv *priv;
struct mlx5e_clbr_point *next, *curr;
struct timespec ts;
int clbr_curr_next;
priv = arg;
curr = &priv->clbr_points[priv->clbr_curr];
clbr_curr_next = priv->clbr_curr + 1;
if (clbr_curr_next >= nitems(priv->clbr_points))
clbr_curr_next = 0;
next = &priv->clbr_points[clbr_curr_next];
next->base_prev = curr->base_curr;
next->clbr_hw_prev = curr->clbr_hw_curr;
next->clbr_hw_curr = mlx5e_hw_clock(priv);
if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
0) {
if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
"disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
priv->clbr_done = 0;
return;
}
nanouptime(&ts);
next->base_curr = mlx5e_timespec2usec(&ts);
curr->clbr_gen = 0;
atomic_thread_fence_rel();
priv->clbr_curr = clbr_curr_next;
atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
if (priv->clbr_done < mlx5e_calibration_duration)
priv->clbr_done++;
mlx5e_reset_calibration_callout(priv);
}
static const char *mlx5e_rq_stats_desc[] = {
MLX5E_RQ_STATS(MLX5E_STATS_DESC)
};
static int
mlx5e_create_rq(struct mlx5e_channel *c,
struct mlx5e_rq_param *param,
struct mlx5e_rq *rq)
{
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
char buffer[16];
void *rqc = param->rqc;
void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
int wq_sz;
int err;
int i;
u32 nsegs, wqe_sz;
err = mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
if (err != 0)
goto done;
/* Create DMA descriptor TAG */
if ((err = -bus_dma_tag_create(
bus_get_dma_tag(mdev->pdev->dev.bsddev),
1, /* any alignment */
0, /* no boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
nsegs * MLX5E_MAX_RX_BYTES, /* maxsize */
nsegs, /* nsegments */
nsegs * MLX5E_MAX_RX_BYTES, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockfuncarg */
&rq->dma_tag)))
goto done;
err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
&rq->wq_ctrl);
if (err)
goto err_free_dma_tag;
rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
err = mlx5e_get_wqe_sz(priv, &rq->wqe_sz, &rq->nsegs);
if (err != 0)
goto err_rq_wq_destroy;
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
err = -tcp_lro_init_args(&rq->lro, c->tag.m_snd_tag.ifp, TCP_LRO_ENTRIES, wq_sz);
if (err)
goto err_rq_wq_destroy;
rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
for (i = 0; i != wq_sz; i++) {
struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
#if (MLX5E_MAX_RX_SEGS == 1)
uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
#else
int j;
#endif
err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
if (err != 0) {
while (i--)
bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
goto err_rq_mbuf_free;
}
/* set value for constant fields */
#if (MLX5E_MAX_RX_SEGS == 1)
wqe->data[0].lkey = c->mkey_be;
wqe->data[0].byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
#else
for (j = 0; j < rq->nsegs; j++)
wqe->data[j].lkey = c->mkey_be;
#endif
}
rq->ifp = c->tag.m_snd_tag.ifp;
rq->channel = c;
rq->ix = c->ix;
snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
rq->stats.arg);
return (0);
err_rq_mbuf_free:
free(rq->mbuf, M_MLX5EN);
tcp_lro_free(&rq->lro);
err_rq_wq_destroy:
mlx5_wq_destroy(&rq->wq_ctrl);
err_free_dma_tag:
bus_dma_tag_destroy(rq->dma_tag);
done:
return (err);
}
static void
mlx5e_destroy_rq(struct mlx5e_rq *rq)
{
int wq_sz;
int i;
/* destroy all sysctl nodes */
sysctl_ctx_free(&rq->stats.ctx);
/* free leftover LRO packets, if any */
tcp_lro_free(&rq->lro);
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
for (i = 0; i != wq_sz; i++) {
if (rq->mbuf[i].mbuf != NULL) {
bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
m_freem(rq->mbuf[i].mbuf);
}
bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
}
free(rq->mbuf, M_MLX5EN);
mlx5_wq_destroy(&rq->wq_ctrl);
}
static int
mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
void *in;
void *rqc;
void *wq;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
sizeof(u64) * rq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
memcpy(rqc, param->rqc, sizeof(param->rqc));
MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
if (priv->counter_set_id >= 0)
MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
mlx5_fill_page_array(&rq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(wq, wq, pas));
err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
kvfree(in);
return (err);
}
static int
mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
void *in;
void *rqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
MLX5_SET(modify_rq_in, in, rq_state, curr_state);
MLX5_SET(rqc, rqc, state, next_state);
err = mlx5_core_modify_rq(mdev, in, inlen);
kvfree(in);
return (err);
}
static void
mlx5e_disable_rq(struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
mlx5_core_destroy_rq(mdev, rq->rqn);
}
static int
mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
{
struct mlx5e_channel *c = rq->channel;
struct mlx5e_priv *priv = c->priv;
struct mlx5_wq_ll *wq = &rq->wq;
int i;
for (i = 0; i < 1000; i++) {
if (wq->cur_sz >= priv->params.min_rx_wqes)
return (0);
msleep(4);
}
return (-ETIMEDOUT);
}
static int
mlx5e_open_rq(struct mlx5e_channel *c,
struct mlx5e_rq_param *param,
struct mlx5e_rq *rq)
{
int err;
err = mlx5e_create_rq(c, param, rq);
if (err)
return (err);
err = mlx5e_enable_rq(rq, param);
if (err)
goto err_destroy_rq;
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err)
goto err_disable_rq;
c->rq.enabled = 1;
return (0);
err_disable_rq:
mlx5e_disable_rq(rq);
err_destroy_rq:
mlx5e_destroy_rq(rq);
return (err);
}
static void
mlx5e_close_rq(struct mlx5e_rq *rq)
{
mtx_lock(&rq->mtx);
rq->enabled = 0;
callout_stop(&rq->watchdog);
mtx_unlock(&rq->mtx);
callout_drain(&rq->watchdog);
mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
}
static void
mlx5e_close_rq_wait(struct mlx5e_rq *rq)
{
struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
/* wait till RQ is empty */
while (!mlx5_wq_ll_is_empty(&rq->wq) &&
(mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
msleep(4);
rq->cq.mcq.comp(&rq->cq.mcq);
}
mlx5e_disable_rq(rq);
mlx5e_destroy_rq(rq);
}
void
mlx5e_free_sq_db(struct mlx5e_sq *sq)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int x;
for (x = 0; x != wq_sz; x++)
bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
free(sq->mbuf, M_MLX5EN);
}
int
mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
{
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
int err;
int x;
sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
/* Create DMA descriptor MAPs */
for (x = 0; x != wq_sz; x++) {
err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
if (err != 0) {
while (x--)
bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
free(sq->mbuf, M_MLX5EN);
return (err);
}
}
return (0);
}
static const char *mlx5e_sq_stats_desc[] = {
MLX5E_SQ_STATS(MLX5E_STATS_DESC)
};
void
mlx5e_update_sq_inline(struct mlx5e_sq *sq)
{
sq->max_inline = sq->priv->params.tx_max_inline;
sq->min_inline_mode = sq->priv->params.tx_min_inline_mode;
/*
* Check if trust state is DSCP or if inline mode is NONE which
* indicates CX-5 or newer hardware.
*/
if (sq->priv->params_ethtool.trust_state != MLX5_QPTS_TRUST_PCP ||
sq->min_inline_mode == MLX5_INLINE_MODE_NONE) {
if (MLX5_CAP_ETH(sq->priv->mdev, wqe_vlan_insert))
sq->min_insert_caps = MLX5E_INSERT_VLAN | MLX5E_INSERT_NON_VLAN;
else
sq->min_insert_caps = MLX5E_INSERT_NON_VLAN;
} else {
sq->min_insert_caps = 0;
}
}
static void
mlx5e_refresh_sq_inline_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
{
int i;
for (i = 0; i != c->num_tc; i++) {
mtx_lock(&c->sq[i].lock);
mlx5e_update_sq_inline(&c->sq[i]);
mtx_unlock(&c->sq[i].lock);
}
}
void
mlx5e_refresh_sq_inline(struct mlx5e_priv *priv)
{
int i;
/* check if channels are closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_refresh_sq_inline_sub(priv, &priv->channel[i]);
}
static int
mlx5e_create_sq(struct mlx5e_channel *c,
int tc,
struct mlx5e_sq_param *param,
struct mlx5e_sq *sq)
{
struct mlx5e_priv *priv = c->priv;
struct mlx5_core_dev *mdev = priv->mdev;
char buffer[16];
void *sqc = param->sqc;
void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
#ifdef RSS
cpuset_t cpu_mask;
int cpu_id;
#endif
int err;
/* Create DMA descriptor TAG */
if ((err = -bus_dma_tag_create(
bus_get_dma_tag(mdev->pdev->dev.bsddev),
1, /* any alignment */
0, /* no boundary */
BUS_SPACE_MAXADDR, /* lowaddr */
BUS_SPACE_MAXADDR, /* highaddr */
NULL, NULL, /* filter, filterarg */
MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */
MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */
MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */
0, /* flags */
NULL, NULL, /* lockfunc, lockfuncarg */
&sq->dma_tag)))
goto done;
err = mlx5_alloc_map_uar(mdev, &sq->uar);
if (err)
goto err_free_dma_tag;
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
&sq->wq_ctrl);
if (err)
goto err_unmap_free_uar;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
err = mlx5e_alloc_sq_db(sq);
if (err)
goto err_sq_wq_destroy;
sq->mkey_be = c->mkey_be;
sq->ifp = priv->ifp;
sq->priv = priv;
sq->tc = tc;
mlx5e_update_sq_inline(sq);
snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
sq->stats.arg);
return (0);
err_sq_wq_destroy:
mlx5_wq_destroy(&sq->wq_ctrl);
err_unmap_free_uar:
mlx5_unmap_free_uar(mdev, &sq->uar);
err_free_dma_tag:
bus_dma_tag_destroy(sq->dma_tag);
done:
return (err);
}
static void
mlx5e_destroy_sq(struct mlx5e_sq *sq)
{
/* destroy all sysctl nodes */
sysctl_ctx_free(&sq->stats.ctx);
mlx5e_free_sq_db(sq);
mlx5_wq_destroy(&sq->wq_ctrl);
mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
}
int
mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
int tis_num)
{
void *in;
void *sqc;
void *wq;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
sizeof(u64) * sq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
wq = MLX5_ADDR_OF(sqc, sqc, wq);
memcpy(sqc, param->sqc, sizeof(param->sqc));
MLX5_SET(sqc, sqc, tis_num_0, tis_num);
MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
MLX5_SET(wq, wq, uar_page, sq->uar.index);
MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
mlx5_fill_page_array(&sq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(wq, wq, pas));
err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
kvfree(in);
return (err);
}
int
mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
{
void *in;
void *sqc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
MLX5_SET(modify_sq_in, in, sq_state, curr_state);
MLX5_SET(sqc, sqc, state, next_state);
err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
kvfree(in);
return (err);
}
void
mlx5e_disable_sq(struct mlx5e_sq *sq)
{
mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
}
static int
mlx5e_open_sq(struct mlx5e_channel *c,
int tc,
struct mlx5e_sq_param *param,
struct mlx5e_sq *sq)
{
int err;
err = mlx5e_create_sq(c, tc, param, sq);
if (err)
return (err);
err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
if (err)
goto err_destroy_sq;
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
if (err)
goto err_disable_sq;
WRITE_ONCE(sq->running, 1);
return (0);
err_disable_sq:
mlx5e_disable_sq(sq);
err_destroy_sq:
mlx5e_destroy_sq(sq);
return (err);
}
static void
mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
{
/* fill up remainder with NOPs */
while (sq->cev_counter != 0) {
while (!mlx5e_sq_has_room_for(sq, 1)) {
if (can_sleep != 0) {
mtx_unlock(&sq->lock);
msleep(4);
mtx_lock(&sq->lock);
} else {
goto done;
}
}
/* send a single NOP */
mlx5e_send_nop(sq, 1);
atomic_thread_fence_rel();
}
done:
/* Check if we need to write the doorbell */
if (likely(sq->doorbell.d64 != 0)) {
mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
sq->doorbell.d64 = 0;
}
}
void
mlx5e_sq_cev_timeout(void *arg)
{
struct mlx5e_sq *sq = arg;
mtx_assert(&sq->lock, MA_OWNED);
/* check next state */
switch (sq->cev_next_state) {
case MLX5E_CEV_STATE_SEND_NOPS:
/* fill TX ring with NOPs, if any */
mlx5e_sq_send_nops_locked(sq, 0);
/* check if completed */
if (sq->cev_counter == 0) {
sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
return;
}
break;
default:
/* send NOPs on next timeout */
sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
break;
}
/* restart timer */
callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
}
void
mlx5e_drain_sq(struct mlx5e_sq *sq)
{
int error;
struct mlx5_core_dev *mdev= sq->priv->mdev;
/*
* Check if already stopped.
*
* NOTE: Serialization of this function is managed by the
* caller ensuring the priv's state lock is locked or in case
* of rate limit support, a single thread manages drain and
* resume of SQs. The "running" variable can therefore safely
* be read without any locks.
*/
if (READ_ONCE(sq->running) == 0)
return;
/* don't put more packets into the SQ */
WRITE_ONCE(sq->running, 0);
/* serialize access to DMA rings */
mtx_lock(&sq->lock);
/* teardown event factor timer, if any */
sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
callout_stop(&sq->cev_callout);
/* send dummy NOPs in order to flush the transmit ring */
mlx5e_sq_send_nops_locked(sq, 1);
mtx_unlock(&sq->lock);
/* make sure it is safe to free the callout */
callout_drain(&sq->cev_callout);
/* wait till SQ is empty or link is down */
mtx_lock(&sq->lock);
while (sq->cc != sq->pc &&
(sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mtx_unlock(&sq->lock);
msleep(1);
sq->cq.mcq.comp(&sq->cq.mcq);
mtx_lock(&sq->lock);
}
mtx_unlock(&sq->lock);
/* error out remaining requests */
error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
if (error != 0) {
if_printf(sq->ifp,
"mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
}
/* wait till SQ is empty */
mtx_lock(&sq->lock);
while (sq->cc != sq->pc &&
mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mtx_unlock(&sq->lock);
msleep(1);
sq->cq.mcq.comp(&sq->cq.mcq);
mtx_lock(&sq->lock);
}
mtx_unlock(&sq->lock);
}
static void
mlx5e_close_sq_wait(struct mlx5e_sq *sq)
{
mlx5e_drain_sq(sq);
mlx5e_disable_sq(sq);
mlx5e_destroy_sq(sq);
}
static int
mlx5e_create_cq(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq,
mlx5e_cq_comp_t *comp,
int eq_ix)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5_core_cq *mcq = &cq->mcq;
int eqn_not_used;
int irqn;
int err;
u32 i;
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
&cq->wq_ctrl);
if (err)
return (err);
mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
mcq->cqe_sz = 64;
mcq->set_ci_db = cq->wq_ctrl.db.db;
mcq->arm_db = cq->wq_ctrl.db.db + 1;
*mcq->set_ci_db = 0;
*mcq->arm_db = 0;
mcq->vector = eq_ix;
mcq->comp = comp;
mcq->event = mlx5e_cq_error_event;
mcq->irqn = irqn;
mcq->uar = &priv->cq_uar;
for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
cqe->op_own = 0xf1;
}
cq->priv = priv;
return (0);
}
static void
mlx5e_destroy_cq(struct mlx5e_cq *cq)
{
mlx5_wq_destroy(&cq->wq_ctrl);
}
static int
mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
{
struct mlx5_core_cq *mcq = &cq->mcq;
void *in;
void *cqc;
int inlen;
int irqn_not_used;
int eqn;
int err;
inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
sizeof(u64) * cq->wq_ctrl.buf.npages;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
memcpy(cqc, param->cqc, sizeof(param->cqc));
mlx5_fill_page_array(&cq->wq_ctrl.buf,
(__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
kvfree(in);
if (err)
return (err);
mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
return (0);
}
static void
mlx5e_disable_cq(struct mlx5e_cq *cq)
{
mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
}
int
mlx5e_open_cq(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq,
mlx5e_cq_comp_t *comp,
int eq_ix)
{
int err;
err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
if (err)
return (err);
err = mlx5e_enable_cq(cq, param, eq_ix);
if (err)
goto err_destroy_cq;
return (0);
err_destroy_cq:
mlx5e_destroy_cq(cq);
return (err);
}
void
mlx5e_close_cq(struct mlx5e_cq *cq)
{
mlx5e_disable_cq(cq);
mlx5e_destroy_cq(cq);
}
static int
mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
int err;
int tc;
for (tc = 0; tc < c->num_tc; tc++) {
/* open completion queue */
err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
&mlx5e_tx_cq_comp, c->ix);
if (err)
goto err_close_tx_cqs;
}
return (0);
err_close_tx_cqs:
for (tc--; tc >= 0; tc--)
mlx5e_close_cq(&c->sq[tc].cq);
return (err);
}
static void
mlx5e_close_tx_cqs(struct mlx5e_channel *c)
{
int tc;
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_close_cq(&c->sq[tc].cq);
}
static int
mlx5e_open_sqs(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
int err;
int tc;
for (tc = 0; tc < c->num_tc; tc++) {
err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
if (err)
goto err_close_sqs;
}
return (0);
err_close_sqs:
for (tc--; tc >= 0; tc--)
mlx5e_close_sq_wait(&c->sq[tc]);
return (err);
}
static void
mlx5e_close_sqs_wait(struct mlx5e_channel *c)
{
int tc;
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_close_sq_wait(&c->sq[tc]);
}
static void
mlx5e_chan_mtx_init(struct mlx5e_channel *c)
{
int tc;
mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
for (tc = 0; tc < c->num_tc; tc++) {
struct mlx5e_sq *sq = c->sq + tc;
mtx_init(&sq->lock, "mlx5tx",
MTX_NETWORK_LOCK " TX", MTX_DEF);
mtx_init(&sq->comp_lock, "mlx5comp",
MTX_NETWORK_LOCK " TX", MTX_DEF);
callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
/* ensure the TX completion event factor is not zero */
if (sq->cev_factor == 0)
sq->cev_factor = 1;
}
}
static void
mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
{
int tc;
mtx_destroy(&c->rq.mtx);
for (tc = 0; tc < c->num_tc; tc++) {
mtx_destroy(&c->sq[tc].lock);
mtx_destroy(&c->sq[tc].comp_lock);
}
}
static int
mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel_param *cparam,
struct mlx5e_channel *c)
{
int err;
memset(c, 0, sizeof(*c));
c->priv = priv;
c->ix = ix;
/* setup send tag */
c->tag.m_snd_tag.ifp = priv->ifp;
c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
c->mkey_be = cpu_to_be32(priv->mr.key);
c->num_tc = priv->num_tc;
/* init mutexes */
mlx5e_chan_mtx_init(c);
/* open transmit completion queue */
err = mlx5e_open_tx_cqs(c, cparam);
if (err)
goto err_free;
/* open receive completion queue */
err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
&mlx5e_rx_cq_comp, c->ix);
if (err)
goto err_close_tx_cqs;
err = mlx5e_open_sqs(c, cparam);
if (err)
goto err_close_rx_cq;
err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
if (err)
goto err_close_sqs;
/* poll receive queue initially */
c->rq.cq.mcq.comp(&c->rq.cq.mcq);
return (0);
err_close_sqs:
mlx5e_close_sqs_wait(c);
err_close_rx_cq:
mlx5e_close_cq(&c->rq.cq);
err_close_tx_cqs:
mlx5e_close_tx_cqs(c);
err_free:
/* destroy mutexes */
mlx5e_chan_mtx_destroy(c);
return (err);
}
static void
mlx5e_close_channel(struct mlx5e_channel *c)
{
mlx5e_close_rq(&c->rq);
}
static void
mlx5e_close_channel_wait(struct mlx5e_channel *c)
{
mlx5e_close_rq_wait(&c->rq);
mlx5e_close_sqs_wait(c);
mlx5e_close_cq(&c->rq.cq);
mlx5e_close_tx_cqs(c);
/* destroy mutexes */
mlx5e_chan_mtx_destroy(c);
}
static int
mlx5e_get_wqe_sz(struct mlx5e_priv *priv, u32 *wqe_sz, u32 *nsegs)
{
u32 r, n;
r = priv->params.hw_lro_en ? priv->params.lro_wqe_sz :
MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
if (r > MJUM16BYTES)
return (-ENOMEM);
if (r > MJUM9BYTES)
r = MJUM16BYTES;
else if (r > MJUMPAGESIZE)
r = MJUM9BYTES;
else if (r > MCLBYTES)
r = MJUMPAGESIZE;
else
r = MCLBYTES;
/*
* n + 1 must be a power of two, because stride size must be.
* Stride size is 16 * (n + 1), as the first segment is
* control.
*/
for (n = howmany(r, MLX5E_MAX_RX_BYTES); !powerof2(n + 1); n++)
;
*wqe_sz = r;
*nsegs = n;
return (0);
}
static void
mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_rq_param *param)
{
void *rqc = param->rqc;
void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
u32 wqe_sz, nsegs;
mlx5e_get_wqe_sz(priv, &wqe_sz, &nsegs);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe) +
nsegs * sizeof(struct mlx5_wqe_data_seg)));
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
MLX5_SET(wq, wq, pd, priv->pdn);
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
param->wq.linear = 1;
}
static void
mlx5e_build_sq_param(struct mlx5e_priv *priv,
struct mlx5e_sq_param *param)
{
void *sqc = param->sqc;
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, pd, priv->pdn);
param->wq.buf_numa_node = 0;
param->wq.db_numa_node = 0;
param->wq.linear = 1;
}
static void
mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
}
static void
mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
/*
* TODO The sysctl to control on/off is a bool value for now, which means
* we only support CSUM, once HASH is implemnted we'll need to address that.
*/
if (priv->params.cqe_zipping_en) {
MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
MLX5_SET(cqc, cqc, cqe_compression_en, 1);
}
MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
switch (priv->params.rx_cq_moderation_mode) {
case 0:
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
default:
if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
else
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
}
mlx5e_build_common_cq_param(priv, param);
}
static void
mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_cq_param *param)
{
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
switch (priv->params.tx_cq_moderation_mode) {
case 0:
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
default:
if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
else
MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
break;
}
mlx5e_build_common_cq_param(priv, param);
}
static void
mlx5e_build_channel_param(struct mlx5e_priv *priv,
struct mlx5e_channel_param *cparam)
{
memset(cparam, 0, sizeof(*cparam));
mlx5e_build_rq_param(priv, &cparam->rq);
mlx5e_build_sq_param(priv, &cparam->sq);
mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
}
static int
mlx5e_open_channels(struct mlx5e_priv *priv)
{
struct mlx5e_channel_param cparam;
int err;
int i;
int j;
mlx5e_build_channel_param(priv, &cparam);
for (i = 0; i < priv->params.num_channels; i++) {
err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
if (err)
goto err_close_channels;
}
for (j = 0; j < priv->params.num_channels; j++) {
err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j].rq);
if (err)
goto err_close_channels;
}
return (0);
err_close_channels:
while (i--) {
mlx5e_close_channel(&priv->channel[i]);
mlx5e_close_channel_wait(&priv->channel[i]);
}
return (err);
}
static void
mlx5e_close_channels(struct mlx5e_priv *priv)
{
int i;
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_close_channel(&priv->channel[i]);
for (i = 0; i < priv->params.num_channels; i++)
mlx5e_close_channel_wait(&priv->channel[i]);
}
static int
mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
{
if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
uint8_t cq_mode;
switch (priv->params.tx_cq_moderation_mode) {
case 0:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
break;
default:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
break;
}
return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
priv->params.tx_cq_moderation_usec,
priv->params.tx_cq_moderation_pkts,
cq_mode));
}
return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
priv->params.tx_cq_moderation_usec,
priv->params.tx_cq_moderation_pkts));
}
static int
mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
{
if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
uint8_t cq_mode;
int retval;
switch (priv->params.rx_cq_moderation_mode) {
case 0:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
break;
default:
cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
break;
}
retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
priv->params.rx_cq_moderation_usec,
priv->params.rx_cq_moderation_pkts,
cq_mode);
return (retval);
}
return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
priv->params.rx_cq_moderation_usec,
priv->params.rx_cq_moderation_pkts));
}
static int
mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
{
int err;
int i;
err = mlx5e_refresh_rq_params(priv, &c->rq);
if (err)
goto done;
for (i = 0; i != c->num_tc; i++) {
err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
if (err)
goto done;
}
done:
return (err);
}
int
mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
{
int i;
/* check if channels are closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return (EINVAL);
for (i = 0; i < priv->params.num_channels; i++) {
int err;
err = mlx5e_refresh_channel_params_sub(priv, &priv->channel[i]);
if (err)
return (err);
}
return (0);
}
static int
mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 in[MLX5_ST_SZ_DW(create_tis_in)];
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
memset(in, 0, sizeof(in));
MLX5_SET(tisc, tisc, prio, tc);
MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
}
static void
mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
{
mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
}
static int
mlx5e_open_tises(struct mlx5e_priv *priv)
{
int num_tc = priv->num_tc;
int err;
int tc;
for (tc = 0; tc < num_tc; tc++) {
err = mlx5e_open_tis(priv, tc);
if (err)
goto err_close_tises;
}
return (0);
err_close_tises:
for (tc--; tc >= 0; tc--)
mlx5e_close_tis(priv, tc);
return (err);
}
static void
mlx5e_close_tises(struct mlx5e_priv *priv)
{
int num_tc = priv->num_tc;
int tc;
for (tc = 0; tc < num_tc; tc++)
mlx5e_close_tis(priv, tc);
}
static int
mlx5e_open_rqt(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 *in;
u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
void *rqtc;
int inlen;
int err;
int sz;
int i;
sz = 1 << priv->params.rx_hash_log_tbl_sz;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
for (i = 0; i < sz; i++) {
int ix = i;
#ifdef RSS
ix = rss_get_indirection_to_bucket(ix);
#endif
/* ensure we don't overflow */
ix %= priv->params.num_channels;
/* apply receive side scaling stride, if any */
ix -= ix % (int)priv->params.channels_rsss;
MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix].rq.rqn);
}
MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (!err)
priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
kvfree(in);
return (err);
}
static void
mlx5e_close_rqt(struct mlx5e_priv *priv)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
}
static void
mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
{
void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
__be32 *hkey;
MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
#define ROUGH_MAX_L2_L3_HDR_SZ 256
#define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP)
#define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_L4_SPORT |\
MLX5_HASH_FIELD_SEL_L4_DPORT)
#define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\
MLX5_HASH_FIELD_SEL_DST_IP |\
MLX5_HASH_FIELD_SEL_IPSEC_SPI)
if (priv->params.hw_lro_en) {
MLX5_SET(tirc, tirc, lro_enable_mask,
MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
MLX5_SET(tirc, tirc, lro_max_msg_sz,
(priv->params.lro_wqe_sz -
ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
/* TODO: add the option to choose timer value dynamically */
MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
MLX5_CAP_ETH(priv->mdev,
lro_timer_supported_periods[2]));
}
/* setup parameters for hashing TIR type, if any */
switch (tt) {
case MLX5E_TT_ANY:
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn,
priv->channel[0].rq.rqn);
break;
default:
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, indirect_table,
priv->rqtn);
MLX5_SET(tirc, tirc, rx_hash_fn,
MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
#ifdef RSS
/*
* The FreeBSD RSS implementation does currently not
* support symmetric Toeplitz hashes:
*/
MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
rss_getkey((uint8_t *)hkey);
#else
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
hkey[0] = cpu_to_be32(0xD181C62C);
hkey[1] = cpu_to_be32(0xF7F4DB5B);
hkey[2] = cpu_to_be32(0x1983A2FC);
hkey[3] = cpu_to_be32(0x943E1ADB);
hkey[4] = cpu_to_be32(0xD9389E6B);
hkey[5] = cpu_to_be32(0xD1039C2C);
hkey[6] = cpu_to_be32(0xA74499AD);
hkey[7] = cpu_to_be32(0x593D56D9);
hkey[8] = cpu_to_be32(0xF3253C06);
hkey[9] = cpu_to_be32(0x2ADC1FFC);
#endif
break;
}
switch (tt) {
case MLX5E_TT_IPV4_TCP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV6_TCP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV4_UDP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV6_UDP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
#ifdef RSS
if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
} else
#endif
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_ALL);
break;
case MLX5E_TT_IPV4_IPSEC_AH:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV6_IPSEC_AH:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV4_IPSEC_ESP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV6_IPSEC_ESP:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP_IPSEC_SPI);
break;
case MLX5E_TT_IPV4:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
break;
case MLX5E_TT_IPV6:
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
MLX5_SET(rx_hash_field_select, hfso, selected_fields,
MLX5_HASH_IP);
break;
default:
break;
}
}
static int
mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 *in;
void *tirc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
in = mlx5_vzalloc(inlen);
if (in == NULL)
return (-ENOMEM);
tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
mlx5e_build_tir_ctx(priv, tirc, tt);
err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
kvfree(in);
return (err);
}
static void
mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
{
mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
}
static int
mlx5e_open_tirs(struct mlx5e_priv *priv)
{
int err;
int i;
for (i = 0; i < MLX5E_NUM_TT; i++) {
err = mlx5e_open_tir(priv, i);
if (err)
goto err_close_tirs;
}
return (0);
err_close_tirs:
for (i--; i >= 0; i--)
mlx5e_close_tir(priv, i);
return (err);
}
static void
mlx5e_close_tirs(struct mlx5e_priv *priv)
{
int i;
for (i = 0; i < MLX5E_NUM_TT; i++)
mlx5e_close_tir(priv, i);
}
/*
* SW MTU does not include headers,
* HW MTU includes all headers and checksums.
*/
static int
mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
{
struct mlx5e_priv *priv = ifp->if_softc;
struct mlx5_core_dev *mdev = priv->mdev;
int hw_mtu;
int err;
hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
err = mlx5_set_port_mtu(mdev, hw_mtu);
if (err) {
if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
__func__, sw_mtu, err);
return (err);
}
/* Update vport context MTU */
err = mlx5_set_vport_mtu(mdev, hw_mtu);
if (err) {
if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
__func__, err);
}
ifp->if_mtu = sw_mtu;
err = mlx5_query_vport_mtu(mdev, &hw_mtu);
if (err || !hw_mtu) {
/* fallback to port oper mtu */
err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
}
if (err) {
if_printf(ifp, "Query port MTU, after setting new "
"MTU value, failed\n");
return (err);
} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
err = -E2BIG,
if_printf(ifp, "Port MTU %d is smaller than "
"ifp mtu %d\n", hw_mtu, sw_mtu);
} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
err = -EINVAL;
if_printf(ifp, "Port MTU %d is bigger than "
"ifp mtu %d\n", hw_mtu, sw_mtu);
}
priv->params_ethtool.hw_mtu = hw_mtu;
return (err);
}
int
mlx5e_open_locked(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
int err;
u16 set_id;
/* check if already opened */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
return (0);
#ifdef RSS
if (rss_getnumbuckets() > priv->params.num_channels) {
if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
"channels(%u) available\n", rss_getnumbuckets(),
priv->params.num_channels);
}
#endif
err = mlx5e_open_tises(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
__func__, err);
return (err);
}
err = mlx5_vport_alloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
if (err) {
if_printf(priv->ifp,
"%s: mlx5_vport_alloc_q_counter failed: %d\n",
__func__, err);
goto err_close_tises;
}
/* store counter set ID */
priv->counter_set_id = set_id;
err = mlx5e_open_channels(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
__func__, err);
goto err_dalloc_q_counter;
}
err = mlx5e_open_rqt(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
__func__, err);
goto err_close_channels;
}
err = mlx5e_open_tirs(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
__func__, err);
goto err_close_rqls;
}
err = mlx5e_open_flow_table(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
__func__, err);
goto err_close_tirs;
}
err = mlx5e_add_all_vlan_rules(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
__func__, err);
goto err_close_flow_table;
}
set_bit(MLX5E_STATE_OPENED, &priv->state);
mlx5e_update_carrier(priv);
mlx5e_set_rx_mode_core(priv);
return (0);
err_close_flow_table:
mlx5e_close_flow_table(priv);
err_close_tirs:
mlx5e_close_tirs(priv);
err_close_rqls:
mlx5e_close_rqt(priv);
err_close_channels:
mlx5e_close_channels(priv);
err_dalloc_q_counter:
mlx5_vport_dealloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
err_close_tises:
mlx5e_close_tises(priv);
return (err);
}
static void
mlx5e_open(void *arg)
{
struct mlx5e_priv *priv = arg;
PRIV_LOCK(priv);
if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
if_printf(priv->ifp,
"%s: Setting port status to up failed\n",
__func__);
mlx5e_open_locked(priv->ifp);
priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
PRIV_UNLOCK(priv);
}
int
mlx5e_close_locked(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
/* check if already closed */
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return (0);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
mlx5e_set_rx_mode_core(priv);
mlx5e_del_all_vlan_rules(priv);
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
mlx5e_close_flow_table(priv);
mlx5e_close_tirs(priv);
mlx5e_close_rqt(priv);
mlx5e_close_channels(priv);
mlx5_vport_dealloc_q_counter(priv->mdev,
MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
mlx5e_close_tises(priv);
return (0);
}
#if (__FreeBSD_version >= 1100000)
static uint64_t
mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
{
struct mlx5e_priv *priv = ifp->if_softc;
u64 retval;
/* PRIV_LOCK(priv); XXX not allowed */
switch (cnt) {
case IFCOUNTER_IPACKETS:
retval = priv->stats.vport.rx_packets;
break;
case IFCOUNTER_IERRORS:
retval = priv->stats.vport.rx_error_packets +
priv->stats.pport.alignment_err +
priv->stats.pport.check_seq_err +
priv->stats.pport.crc_align_errors +
priv->stats.pport.in_range_len_errors +
priv->stats.pport.jabbers +
priv->stats.pport.out_of_range_len +
priv->stats.pport.oversize_pkts +
priv->stats.pport.symbol_err +
priv->stats.pport.too_long_errors +
priv->stats.pport.undersize_pkts +
priv->stats.pport.unsupported_op_rx;
break;
case IFCOUNTER_IQDROPS:
retval = priv->stats.vport.rx_out_of_buffer +
priv->stats.pport.drop_events;
break;
case IFCOUNTER_OPACKETS:
retval = priv->stats.vport.tx_packets;
break;
case IFCOUNTER_OERRORS:
retval = priv->stats.vport.tx_error_packets;
break;
case IFCOUNTER_IBYTES:
retval = priv->stats.vport.rx_bytes;
break;
case IFCOUNTER_OBYTES:
retval = priv->stats.vport.tx_bytes;
break;
case IFCOUNTER_IMCASTS:
retval = priv->stats.vport.rx_multicast_packets;
break;
case IFCOUNTER_OMCASTS:
retval = priv->stats.vport.tx_multicast_packets;
break;
case IFCOUNTER_OQDROPS:
retval = priv->stats.vport.tx_queue_dropped;
break;
case IFCOUNTER_COLLISIONS:
retval = priv->stats.pport.collisions;
break;
default:
retval = if_get_counter_default(ifp, cnt);
break;
}
/* PRIV_UNLOCK(priv); XXX not allowed */
return (retval);
}
#endif
static void
mlx5e_set_rx_mode(struct ifnet *ifp)
{
struct mlx5e_priv *priv = ifp->if_softc;
queue_work(priv->wq, &priv->set_rx_mode_work);
}
static int
mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
{
struct mlx5e_priv *priv;
struct ifreq *ifr;
struct ifi2creq i2c;
int error = 0;
int mask = 0;
int size_read = 0;
int module_status;
int module_num;
int max_mtu;
uint8_t read_addr;
priv = ifp->if_softc;
/* check if detaching */
if (priv == NULL || priv->gone != 0)
return (ENXIO);
switch (command) {
case SIOCSIFMTU:
ifr = (struct ifreq *)data;
PRIV_LOCK(priv);
mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
int was_opened;
was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
if (was_opened)
mlx5e_close_locked(ifp);
/* set new MTU */
mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
if (was_opened)
mlx5e_open_locked(ifp);
} else {
error = EINVAL;
if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
}
PRIV_UNLOCK(priv);
break;
case SIOCSIFFLAGS:
if ((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
mlx5e_set_rx_mode(ifp);
break;
}
PRIV_LOCK(priv);
if (ifp->if_flags & IFF_UP) {
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
mlx5e_open_locked(ifp);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
}
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mlx5_set_port_status(priv->mdev,
MLX5_PORT_DOWN);
if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
mlx5e_close_locked(ifp);
mlx5e_update_carrier(priv);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
}
}
PRIV_UNLOCK(priv);
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
mlx5e_set_rx_mode(ifp);
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
case SIOCGIFXMEDIA:
ifr = (struct ifreq *)data;
error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
break;
case SIOCSIFCAP:
ifr = (struct ifreq *)data;
PRIV_LOCK(priv);
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
ifp->if_capenable ^= IFCAP_TXCSUM;
ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
if (IFCAP_TSO4 & ifp->if_capenable &&
!(IFCAP_TXCSUM & ifp->if_capenable)) {
ifp->if_capenable &= ~IFCAP_TSO4;
ifp->if_hwassist &= ~CSUM_IP_TSO;
if_printf(ifp,
"tso4 disabled due to -txcsum.\n");
}
}
if (mask & IFCAP_TXCSUM_IPV6) {
ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
if (IFCAP_TSO6 & ifp->if_capenable &&
!(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
ifp->if_capenable &= ~IFCAP_TSO6;
ifp->if_hwassist &= ~CSUM_IP6_TSO;
if_printf(ifp,
"tso6 disabled due to -txcsum6.\n");
}
}
if (mask & IFCAP_RXCSUM)
ifp->if_capenable ^= IFCAP_RXCSUM;
if (mask & IFCAP_RXCSUM_IPV6)
ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
if (mask & IFCAP_TSO4) {
if (!(IFCAP_TSO4 & ifp->if_capenable) &&
!(IFCAP_TXCSUM & ifp->if_capenable)) {
if_printf(ifp, "enable txcsum first.\n");
error = EAGAIN;
goto out;
}
ifp->if_capenable ^= IFCAP_TSO4;
ifp->if_hwassist ^= CSUM_IP_TSO;
}
if (mask & IFCAP_TSO6) {
if (!(IFCAP_TSO6 & ifp->if_capenable) &&
!(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
if_printf(ifp, "enable txcsum6 first.\n");
error = EAGAIN;
goto out;
}
ifp->if_capenable ^= IFCAP_TSO6;
ifp->if_hwassist ^= CSUM_IP6_TSO;
}
if (mask & IFCAP_VLAN_HWFILTER) {
if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
mlx5e_disable_vlan_filter(priv);
else
mlx5e_enable_vlan_filter(priv);
ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
}
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_WOL_MAGIC)
ifp->if_capenable ^= IFCAP_WOL_MAGIC;
VLAN_CAPABILITIES(ifp);
/* turn off LRO means also turn of HW LRO - if it's on */
if (mask & IFCAP_LRO) {
int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
bool need_restart = false;
ifp->if_capenable ^= IFCAP_LRO;
/* figure out if updating HW LRO is needed */
if (!(ifp->if_capenable & IFCAP_LRO)) {
if (priv->params.hw_lro_en) {
priv->params.hw_lro_en = false;
need_restart = true;
}
} else {
if (priv->params.hw_lro_en == false &&
priv->params_ethtool.hw_lro != 0) {
priv->params.hw_lro_en = true;
need_restart = true;
}
}
if (was_opened && need_restart) {
mlx5e_close_locked(ifp);
mlx5e_open_locked(ifp);
}
}
if (mask & IFCAP_HWRXTSTMP) {
ifp->if_capenable ^= IFCAP_HWRXTSTMP;
if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
if (priv->clbr_done == 0)
mlx5e_reset_calibration_callout(priv);
} else {
callout_drain(&priv->tstmp_clbr);
priv->clbr_done = 0;
}
}
out:
PRIV_UNLOCK(priv);
break;
case SIOCGI2C:
ifr = (struct ifreq *)data;
/*
* Copy from the user-space address ifr_data to the
* kernel-space address i2c
*/
error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
if (error)
break;
if (i2c.len > sizeof(i2c.data)) {
error = EINVAL;
break;
}
PRIV_LOCK(priv);
/* Get module_num which is required for the query_eeprom */
error = mlx5_query_module_num(priv->mdev, &module_num);
if (error) {
if_printf(ifp, "Query module num failed, eeprom "
"reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
/* Check if module is present before doing an access */
module_status = mlx5_query_module_status(priv->mdev, module_num);
if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
error = EINVAL;
goto err_i2c;
}
/*
* Currently 0XA0 and 0xA2 are the only addresses permitted.
* The internal conversion is as follows:
*/
if (i2c.dev_addr == 0xA0)
read_addr = MLX5E_I2C_ADDR_LOW;
else if (i2c.dev_addr == 0xA2)
read_addr = MLX5E_I2C_ADDR_HIGH;
else {
if_printf(ifp, "Query eeprom failed, "
"Invalid Address: %X\n", i2c.dev_addr);
error = EINVAL;
goto err_i2c;
}
error = mlx5_query_eeprom(priv->mdev,
read_addr, MLX5E_EEPROM_LOW_PAGE,
(uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
(uint32_t *)i2c.data, &size_read);
if (error) {
if_printf(ifp, "Query eeprom failed, eeprom "
"reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
error = mlx5_query_eeprom(priv->mdev,
read_addr, MLX5E_EEPROM_LOW_PAGE,
(uint32_t)(i2c.offset + size_read),
(uint32_t)(i2c.len - size_read), module_num,
(uint32_t *)(i2c.data + size_read), &size_read);
}
if (error) {
if_printf(ifp, "Query eeprom failed, eeprom "
"reading is not supported\n");
error = EINVAL;
goto err_i2c;
}
error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
err_i2c:
PRIV_UNLOCK(priv);
break;
default:
error = ether_ioctl(ifp, command, data);
break;
}
return (error);
}
static int
mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
{
/*
* TODO: uncoment once FW really sets all these bits if
* (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
* !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
* !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
* -ENOTSUPP;
*/
/* TODO: add more must-to-have features */
if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return (-ENODEV);
return (0);
}
static u16
mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
{
uint32_t bf_buf_size = (1U << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2U;
bf_buf_size -= sizeof(struct mlx5e_tx_wqe) - 2;
/* verify against driver hardware limit */
if (bf_buf_size > MLX5E_MAX_TX_INLINE)
bf_buf_size = MLX5E_MAX_TX_INLINE;
return (bf_buf_size);
}
static int
mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
struct mlx5e_priv *priv,
int num_comp_vectors)
{
int err;
/*
* TODO: Consider link speed for setting "log_sq_size",
* "log_rq_size" and "cq_moderation_xxx":
*/
priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
priv->params.log_rq_size =
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
priv->params.rx_cq_moderation_usec =
MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
priv->params.rx_cq_moderation_mode =
MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
priv->params.rx_cq_moderation_pkts =
MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
priv->params.tx_cq_moderation_usec =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
priv->params.tx_cq_moderation_pkts =
MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
priv->params.min_rx_wqes =
MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
priv->params.rx_hash_log_tbl_sz =
(order_base_2(num_comp_vectors) >
MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
order_base_2(num_comp_vectors) :
MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
priv->params.num_tc = 1;
priv->params.default_vlan_prio = 0;
priv->counter_set_id = -1;
priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev);
err = mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode);
if (err)
return (err);
/*
* hw lro is currently defaulted to off. when it won't anymore we
* will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
*/
priv->params.hw_lro_en = false;
priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
priv->mdev = mdev;
priv->params.num_channels = num_comp_vectors;
priv->params.channels_rsss = 1;
priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
priv->queue_mapping_channel_mask =
roundup_pow_of_two(num_comp_vectors) - 1;
priv->num_tc = priv->params.num_tc;
priv->default_vlan_prio = priv->params.default_vlan_prio;
INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
return (0);
}
static int
mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
struct mlx5_core_mr *mkey)
{
struct ifnet *ifp = priv->ifp;
struct mlx5_core_dev *mdev = priv->mdev;
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
void *mkc;
u32 *in;
int err;
in = mlx5_vzalloc(inlen);
if (in == NULL) {
if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
return (-ENOMEM);
}
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, lw, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, pd, pdn);
MLX5_SET(mkc, mkc, length64, 1);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
if (err)
if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
__func__, err);
kvfree(in);
return (err);
}
static const char *mlx5e_vport_stats_desc[] = {
MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
};
static const char *mlx5e_pport_stats_desc[] = {
MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
};
static void
mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
{
mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
sx_init(&priv->state_lock, "mlx5state");
callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
}
static void
mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
{
mtx_destroy(&priv->async_events_mtx);
sx_destroy(&priv->state_lock);
}
static int
sysctl_firmware(SYSCTL_HANDLER_ARGS)
{
/*
* %d.%d%.d the string format.
* fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
* We need at most 5 chars to store that.
* It also has: two "." and NULL at the end, which means we need 18
* (5*3 + 3) chars at most.
*/
char fw[18];
struct mlx5e_priv *priv = arg1;
int error;
snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
fw_rev_sub(priv->mdev));
error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
return (error);
}
static void
mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
{
int i;
for (i = 0; i < ch->num_tc; i++)
mlx5e_drain_sq(&ch->sq[i]);
}
static void
mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
{
sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
sq->doorbell.d64 = 0;
}
void
mlx5e_resume_sq(struct mlx5e_sq *sq)
{
int err;
/* check if already enabled */
if (READ_ONCE(sq->running) != 0)
return;
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
MLX5_SQC_STATE_RST);
if (err != 0) {
if_printf(sq->ifp,
"mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
}
sq->cc = 0;
sq->pc = 0;
/* reset doorbell prior to moving from RST to RDY */
mlx5e_reset_sq_doorbell_record(sq);
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
MLX5_SQC_STATE_RDY);
if (err != 0) {
if_printf(sq->ifp,
"mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
}
sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
WRITE_ONCE(sq->running, 1);
}
static void
mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
{
int i;
for (i = 0; i < ch->num_tc; i++)
mlx5e_resume_sq(&ch->sq[i]);
}
static void
mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
{
struct mlx5e_rq *rq = &ch->rq;
int err;
mtx_lock(&rq->mtx);
rq->enabled = 0;
callout_stop(&rq->watchdog);
mtx_unlock(&rq->mtx);
callout_drain(&rq->watchdog);
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
if (err != 0) {
if_printf(rq->ifp,
"mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
}
while (!mlx5_wq_ll_is_empty(&rq->wq)) {
msleep(1);
rq->cq.mcq.comp(&rq->cq.mcq);
}
/*
* Transitioning into RST state will allow the FW to track less ERR state queues,
* thus reducing the recv queue flushing time
*/
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
if (err != 0) {
if_printf(rq->ifp,
"mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
}
}
static void
mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
{
struct mlx5e_rq *rq = &ch->rq;
int err;
rq->wq.wqe_ctr = 0;
mlx5_wq_ll_update_db_record(&rq->wq);
err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err != 0) {
if_printf(rq->ifp,
"mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
}
rq->enabled = 1;
rq->cq.mcq.comp(&rq->cq.mcq);
}
void
mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
{
int i;
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++) {
if (value)
mlx5e_disable_tx_dma(&priv->channel[i]);
else
mlx5e_enable_tx_dma(&priv->channel[i]);
}
}
void
mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
{
int i;
if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
return;
for (i = 0; i < priv->params.num_channels; i++) {
if (value)
mlx5e_disable_rx_dma(&priv->channel[i]);
else
mlx5e_enable_rx_dma(&priv->channel[i]);
}
}
static void
mlx5e_add_hw_stats(struct mlx5e_priv *priv)
{
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
sysctl_firmware, "A", "HCA firmware version");
SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
"Board ID");
}
static int
mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
{
struct mlx5e_priv *priv = arg1;
uint32_t tx_pfc;
uint32_t value;
int error;
PRIV_LOCK(priv);
tx_pfc = priv->params.tx_priority_flow_control;
/* get current value */
value = (tx_pfc >> arg2) & 1;
error = sysctl_handle_32(oidp, &value, 0, req);
/* range check value */
if (value != 0)
priv->params.tx_priority_flow_control |= (1 << arg2);
else
priv->params.tx_priority_flow_control &= ~(1 << arg2);
/* check if update is required */
if (error == 0 && priv->gone == 0 &&
tx_pfc != priv->params.tx_priority_flow_control) {
error = -mlx5e_set_port_pfc(priv);
/* restore previous value */
if (error != 0)
priv->params.tx_priority_flow_control= tx_pfc;
}
PRIV_UNLOCK(priv);
return (error);
}
static int
mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
{
struct mlx5e_priv *priv = arg1;
uint32_t rx_pfc;
uint32_t value;
int error;
PRIV_LOCK(priv);
rx_pfc = priv->params.rx_priority_flow_control;
/* get current value */
value = (rx_pfc >> arg2) & 1;
error = sysctl_handle_32(oidp, &value, 0, req);
/* range check value */
if (value != 0)
priv->params.rx_priority_flow_control |= (1 << arg2);
else
priv->params.rx_priority_flow_control &= ~(1 << arg2);
/* check if update is required */
if (error == 0 && priv->gone == 0 &&
rx_pfc != priv->params.rx_priority_flow_control) {
error = -mlx5e_set_port_pfc(priv);
/* restore previous value */
if (error != 0)
priv->params.rx_priority_flow_control= rx_pfc;
}
PRIV_UNLOCK(priv);
return (error);
}
static void
mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
{
unsigned int x;
char path[96];
int error;
/* enable pauseframes by default */
priv->params.tx_pauseframe_control = 1;
priv->params.rx_pauseframe_control = 1;
/* disable ports flow control, PFC, by default */
priv->params.tx_priority_flow_control = 0;
priv->params.rx_priority_flow_control = 0;
#if (__FreeBSD_version < 1100000)
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
device_get_unit(priv->mdev->pdev->dev.bsddev));
/* try to fetch tunable, if any */
TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
device_get_unit(priv->mdev->pdev->dev.bsddev));
/* try to fetch tunable, if any */
TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
for (x = 0; x != 8; x++) {
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
device_get_unit(priv->mdev->pdev->dev.bsddev), x);
/* try to fetch tunable, if any */
if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
priv->params.tx_priority_flow_control |= 1 << x;
/* compute path for sysctl */
snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
device_get_unit(priv->mdev->pdev->dev.bsddev), x);
/* try to fetch tunable, if any */
if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
priv->params.rx_priority_flow_control |= 1 << x;
}
#endif
/* register pauseframe SYSCTLs */
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
&priv->params.tx_pauseframe_control, 0,
"Set to enable TX pause frames. Clear to disable.");
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
&priv->params.rx_pauseframe_control, 0,
"Set to enable RX pause frames. Clear to disable.");
/* register priority_flow control, PFC, SYSCTLs */
for (x = 0; x != 8; x++) {
snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
"Set to enable TX ports flow control frames for given priority. Clear to disable.");
snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
"Set to enable RX ports flow control frames for given priority. Clear to disable.");
}
PRIV_LOCK(priv);
/* range check */
priv->params.tx_pauseframe_control =
priv->params.tx_pauseframe_control ? 1 : 0;
priv->params.rx_pauseframe_control =
priv->params.rx_pauseframe_control ? 1 : 0;
/* update firmware */
error = mlx5e_set_port_pause_and_pfc(priv);
if (error == -EINVAL) {
if_printf(priv->ifp,
"Global pauseframes must be disabled before enabling PFC.\n");
priv->params.rx_priority_flow_control = 0;
priv->params.tx_priority_flow_control = 0;
/* update firmware */
(void) mlx5e_set_port_pause_and_pfc(priv);
}
PRIV_UNLOCK(priv);
}
static int
mlx5e_ul_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
struct mlx5e_priv *priv;
struct mlx5e_channel *pch;
priv = ifp->if_softc;
if (unlikely(priv->gone || params->hdr.flowtype == M_HASHTYPE_NONE)) {
return (EOPNOTSUPP);
} else {
/* keep this code synced with mlx5e_select_queue() */
u32 ch = priv->params.num_channels;
#ifdef RSS
u32 temp;
if (rss_hash2bucket(params->hdr.flowid,
params->hdr.flowtype, &temp) == 0)
ch = temp % ch;
else
#endif
ch = (params->hdr.flowid % 128) % ch;
/*
* NOTE: The channels array is only freed at detach
* and it safe to return a pointer to the send tag
* inside the channels structure as long as we
* reference the priv.
*/
pch = priv->channel + ch;
/* check if send queue is not running */
if (unlikely(pch->sq[0].running == 0))
return (ENXIO);
mlx5e_ref_channel(priv);
*ppmt = &pch->tag.m_snd_tag;
return (0);
}
}
static int
mlx5e_ul_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
{
struct mlx5e_channel *pch =
container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
params->unlimited.max_rate = -1ULL;
params->unlimited.queue_level = mlx5e_sq_queue_level(&pch->sq[0]);
return (0);
}
static void
mlx5e_ul_snd_tag_free(struct m_snd_tag *pmt)
{
struct mlx5e_channel *pch =
container_of(pmt, struct mlx5e_channel, tag.m_snd_tag);
mlx5e_unref_channel(pch->priv);
}
static int
mlx5e_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
switch (params->hdr.type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_alloc(ifp, params, ppmt));
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
return (mlx5e_ul_snd_tag_alloc(ifp, params, ppmt));
default:
return (EOPNOTSUPP);
}
}
static int
mlx5e_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_modify(pmt, params));
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
default:
return (EOPNOTSUPP);
}
}
static int
mlx5e_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
return (mlx5e_rl_snd_tag_query(pmt, params));
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
return (mlx5e_ul_snd_tag_query(pmt, params));
default:
return (EOPNOTSUPP);
}
}
static void
mlx5e_snd_tag_free(struct m_snd_tag *pmt)
{
struct mlx5e_snd_tag *tag =
container_of(pmt, struct mlx5e_snd_tag, m_snd_tag);
switch (tag->type) {
#ifdef RATELIMIT
case IF_SND_TAG_TYPE_RATE_LIMIT:
mlx5e_rl_snd_tag_free(pmt);
break;
#endif
case IF_SND_TAG_TYPE_UNLIMITED:
mlx5e_ul_snd_tag_free(pmt);
break;
default:
break;
}
}
static void *
mlx5e_create_ifp(struct mlx5_core_dev *mdev)
{
struct ifnet *ifp;
struct mlx5e_priv *priv;
u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
struct sysctl_oid_list *child;
int ncv = mdev->priv.eq_table.num_comp_vectors;
char unit[16];
int err;
int i;
u32 eth_proto_cap;
if (mlx5e_check_required_hca_cap(mdev)) {
mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
return (NULL);
}
/*
* Try to allocate the priv and make room for worst-case
* number of channel structures:
*/
priv = malloc(sizeof(*priv) +
(sizeof(priv->channel[0]) * mdev->priv.eq_table.num_comp_vectors),
M_MLX5EN, M_WAITOK | M_ZERO);
mlx5e_priv_mtx_init(priv);
ifp = priv->ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
mlx5_core_err(mdev, "if_alloc() failed\n");
goto err_free_priv;
}
ifp->if_softc = priv;
if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
ifp->if_mtu = ETHERMTU;
ifp->if_init = mlx5e_open;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = mlx5e_ioctl;
ifp->if_transmit = mlx5e_xmit;
ifp->if_qflush = if_qflush;
#if (__FreeBSD_version >= 1100000)
ifp->if_get_counter = mlx5e_get_counter;
#endif
ifp->if_snd.ifq_maxlen = ifqmaxlen;
/*
* Set driver features
*/
ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
ifp->if_capabilities |= IFCAP_LRO;
ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
ifp->if_snd_tag_free = mlx5e_snd_tag_free;
ifp->if_snd_tag_modify = mlx5e_snd_tag_modify;
ifp->if_snd_tag_query = mlx5e_snd_tag_query;
/* set TSO limits so that we don't have to drop TX packets */
ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TSO)
ifp->if_hwassist |= CSUM_TSO;
if (ifp->if_capenable & IFCAP_TXCSUM)
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
/* ifnet sysctl tree */
sysctl_ctx_init(&priv->sysctl_ctx);
priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
if (priv->sysctl_ifnet == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
if (priv->sysctl_ifnet == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
/* HW sysctl tree */
child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
if (priv->sysctl_hw == NULL) {
mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
goto err_free_sysctl;
}
err = mlx5e_build_ifp_priv(mdev, priv, ncv);
if (err) {
mlx5_core_err(mdev, "mlx5e_build_ifp_priv() failed (%d)\n", err);
goto err_free_sysctl;
}
snprintf(unit, sizeof(unit), "mce%u_wq",
device_get_unit(mdev->pdev->dev.bsddev));
priv->wq = alloc_workqueue(unit, 0, 1);
if (priv->wq == NULL) {
if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
goto err_free_sysctl;
}
err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
if (err) {
if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
__func__, err);
goto err_free_wq;
}
err = mlx5_core_alloc_pd(mdev, &priv->pdn);
if (err) {
if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
__func__, err);
goto err_unmap_free_uar;
}
err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
if (err) {
if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
__func__, err);
goto err_dealloc_pd;
}
err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
if (err) {
if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
__func__, err);
goto err_dealloc_transport_domain;
}
mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
/* check if we should generate a random MAC address */
if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
is_zero_ether_addr(dev_addr)) {
random_ether_addr(dev_addr);
if_printf(ifp, "Assigned random MAC address\n");
}
#ifdef RATELIMIT
err = mlx5e_rl_init(priv);
if (err) {
if_printf(ifp, "%s: mlx5e_rl_init failed, %d\n",
__func__, err);
goto err_create_mkey;
}
#endif
/* set default MTU */
mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
/* Set default media status */
priv->media_status_last = IFM_AVALID;
priv->media_active_last = IFM_ETHER | IFM_AUTO |
IFM_ETH_RXPAUSE | IFM_FDX;
/* setup default pauseframes configuration */
mlx5e_setup_pauseframes(priv);
err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
if (err) {
eth_proto_cap = 0;
if_printf(ifp, "%s: Query port media capability failed, %d\n",
__func__, err);
}
/* Setup supported medias */
ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
mlx5e_media_change, mlx5e_media_status);
for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
if (mlx5e_mode_table[i].baudrate == 0)
continue;
if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
ifmedia_add(&priv->media,
mlx5e_mode_table[i].subtype |
IFM_ETHER, 0, NULL);
ifmedia_add(&priv->media,
mlx5e_mode_table[i].subtype |
IFM_ETHER | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
}
}
/* Additional supported medias */
ifmedia_add(&priv->media, IFM_10G_LR | IFM_ETHER, 0, NULL);
ifmedia_add(&priv->media, IFM_10G_LR |
IFM_ETHER | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
ifmedia_add(&priv->media, IFM_40G_ER4 | IFM_ETHER, 0, NULL);
ifmedia_add(&priv->media, IFM_40G_ER4 |
IFM_ETHER | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
/* Set autoselect by default */
ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
ether_ifattach(ifp, dev_addr);
/* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
/* Link is down by default */
if_link_state_change(ifp, LINK_STATE_DOWN);
mlx5e_enable_async_events(priv);
mlx5e_add_hw_stats(priv);
mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
"vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
priv->stats.vport.arg);
mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
"pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
priv->stats.pport.arg);
mlx5e_create_ethtool(priv);
mtx_lock(&priv->async_events_mtx);
mlx5e_update_stats(priv);
mtx_unlock(&priv->async_events_mtx);
SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
&priv->clbr_done, 0,
"RX timestamps calibration state");
callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
mlx5e_reset_calibration_callout(priv);
return (priv);
#ifdef RATELIMIT
err_create_mkey:
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
#endif
err_dealloc_transport_domain:
mlx5_dealloc_transport_domain(mdev, priv->tdn);
err_dealloc_pd:
mlx5_core_dealloc_pd(mdev, priv->pdn);
err_unmap_free_uar:
mlx5_unmap_free_uar(mdev, &priv->cq_uar);
err_free_wq:
destroy_workqueue(priv->wq);
err_free_sysctl:
sysctl_ctx_free(&priv->sysctl_ctx);
if (priv->sysctl_debug)
sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
if_free(ifp);
err_free_priv:
mlx5e_priv_mtx_destroy(priv);
free(priv, M_MLX5EN);
return (NULL);
}
static void
mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct ifnet *ifp = priv->ifp;
/* don't allow more IOCTLs */
priv->gone = 1;
/* XXX wait a bit to allow IOCTL handlers to complete */
pause("W", hz);
#ifdef RATELIMIT
/*
* The kernel can have reference(s) via the m_snd_tag's into
* the ratelimit channels, and these must go away before
* detaching:
*/
while (READ_ONCE(priv->rl.stats.tx_active_connections) != 0) {
if_printf(priv->ifp, "Waiting for all ratelimit connections "
"to terminate\n");
pause("W", hz);
}
#endif
/* stop watchdog timer */
callout_drain(&priv->watchdog);
callout_drain(&priv->tstmp_clbr);
if (priv->vlan_attach != NULL)
EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
if (priv->vlan_detach != NULL)
EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
/* make sure device gets closed */
PRIV_LOCK(priv);
mlx5e_close_locked(ifp);
PRIV_UNLOCK(priv);
/* wait for all unlimited send tags to go away */
while (priv->channel_refs != 0) {
if_printf(priv->ifp, "Waiting for all unlimited connections "
"to terminate\n");
pause("W", hz);
}
/* unregister device */
ifmedia_removeall(&priv->media);
ether_ifdetach(ifp);
if_free(ifp);
#ifdef RATELIMIT
mlx5e_rl_cleanup(priv);
#endif
/* destroy all remaining sysctl nodes */
sysctl_ctx_free(&priv->stats.vport.ctx);
sysctl_ctx_free(&priv->stats.pport.ctx);
sysctl_ctx_free(&priv->sysctl_ctx);
if (priv->sysctl_debug)
sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
mlx5e_disable_async_events(priv);
destroy_workqueue(priv->wq);
mlx5e_priv_mtx_destroy(priv);
free(priv, M_MLX5EN);
}
static void *
mlx5e_get_ifp(void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
return (priv->ifp);
}
static struct mlx5_interface mlx5e_interface = {
.add = mlx5e_create_ifp,
.remove = mlx5e_destroy_ifp,
.event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_ifp,
};
void
mlx5e_init(void)
{
mlx5_register_interface(&mlx5e_interface);
}
void
mlx5e_cleanup(void)
{
mlx5_unregister_interface(&mlx5e_interface);
}
static void
mlx5e_show_version(void __unused *arg)
{
printf("%s", mlx5e_version);
}
SYSINIT(mlx5e_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5e_show_version, NULL);
module_init_order(mlx5e_init, SI_ORDER_THIRD);
module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
#if (__FreeBSD_version >= 1100000)
MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
#endif
MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
MODULE_VERSION(mlx5en, 1);
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
index 2dc33a4e83c8..37e937ba485c 100644
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c
@@ -1,3271 +1,3271 @@
/*-
* Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#if defined(CONFIG_X86)
#include <asm/pat.h>
#endif
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/fs.h>
#undef inode
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <dev/mlx5/port.h>
#include <dev/mlx5/vport.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <dev/mlx5/fs.h>
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5ib"
#ifndef DRIVER_VERSION
-#define DRIVER_VERSION "3.4.2"
+#define DRIVER_VERSION "3.5.0"
#endif
-#define DRIVER_RELDATE "July 2018"
+#define DRIVER_RELDATE "November 2018"
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
MODULE_VERSION(mlx5ib, 1);
static int deprecated_prof_sel = 2;
module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
static const char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver "
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
};
static enum rdma_link_layer
mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
}
static enum rdma_link_layer
mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
static bool mlx5_netdev_match(struct net_device *ndev,
struct mlx5_core_dev *mdev,
const char *dname)
{
return ndev->if_type == IFT_ETHER &&
ndev->if_dname != NULL &&
strcmp(ndev->if_dname, dname) == 0 &&
ndev->if_softc != NULL &&
*(struct mlx5_core_dev **)ndev->if_softc == mdev;
}
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
roce.nb);
switch (event) {
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
write_lock(&ibdev->roce.netdev_lock);
/* check if network interface belongs to mlx5en */
if (mlx5_netdev_match(ndev, ibdev->mdev, "mce"))
ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
NULL : ndev;
write_unlock(&ibdev->roce.netdev_lock);
break;
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *upper = NULL;
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = {0};
ibev.device = &ibdev->ib_dev;
ibev.event = (event == NETDEV_UP) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
break;
}
default:
break;
}
return NOTIFY_DONE;
}
static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
u8 port_num)
{
struct mlx5_ib_dev *ibdev = to_mdev(device);
struct net_device *ndev;
/* Ensure ndev does not disappear before we invoke dev_hold()
*/
read_lock(&ibdev->roce.netdev_lock);
ndev = ibdev->roce.netdev;
if (ndev)
dev_hold(ndev);
read_unlock(&ibdev->roce.netdev_lock);
return ndev;
}
static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
u8 *active_width)
{
switch (eth_proto_oper) {
case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_SDR;
break;
case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_QDR;
break;
case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_EDR;
break;
case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_QDR;
break;
case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
*active_width = IB_WIDTH_1X;
*active_speed = IB_SPEED_HDR;
break;
case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_FDR;
break;
case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_EDR;
break;
default:
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_QDR;
return -EINVAL;
}
return 0;
}
static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct net_device *ndev;
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
u32 eth_prot_oper;
int err;
memset(props, 0, sizeof(*props));
/* Possible bad flows are checked before filling out props so in case
* of an error it will still be zeroed out.
*/
err = mlx5_query_port_eth_proto_oper(dev->mdev, &eth_prot_oper, port_num);
if (err)
return err;
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
roce_address_table_size);
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
props->phys_state = 3;
mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
props->qkey_viol_cntr = qkey_viol_cntr;
ndev = mlx5_ib_get_netdev(device, port_num);
if (!ndev)
return 0;
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
props->phys_state = 5;
}
ndev_ib_mtu = iboe_get_mtu(ndev->if_mtu);
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
return 0;
}
static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
const struct ib_gid_attr *attr,
void *mlx5_addr)
{
#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
source_l3_address);
void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
source_mac_47_32);
u16 vlan_id;
if (!gid)
return;
ether_addr_copy(mlx5_addr_mac, IF_LLADDR(attr->ndev));
vlan_id = rdma_vlan_dev_vlan_id(attr->ndev);
if (vlan_id != 0xffff) {
MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
MLX5_SET_RA(mlx5_addr, vlan_id, vlan_id);
}
switch (attr->gid_type) {
case IB_GID_TYPE_IB:
MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
break;
default:
WARN_ON(true);
}
if (attr->gid_type != IB_GID_TYPE_IB) {
if (ipv6_addr_v4mapped((void *)gid))
MLX5_SET_RA(mlx5_addr, roce_l3_type,
MLX5_ROCE_L3_TYPE_IPV4);
else
MLX5_SET_RA(mlx5_addr, roce_l3_type,
MLX5_ROCE_L3_TYPE_IPV6);
}
if ((attr->gid_type == IB_GID_TYPE_IB) ||
!ipv6_addr_v4mapped((void *)gid))
memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
else
memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
}
static int set_roce_addr(struct ib_device *device, u8 port_num,
unsigned int index,
const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(device);
u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
if (ll != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
MLX5_SET(set_roce_address_in, in, roce_address_index, index);
MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr,
__always_unused void **context)
{
return set_roce_addr(device, port_num, index, gid, attr);
}
static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
unsigned int index, __always_unused void **context)
{
return set_roce_addr(device, port_num, index, NULL, NULL);
}
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index)
{
struct ib_gid_attr attr;
union ib_gid gid;
if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
return 0;
if (!attr.ndev)
return 0;
dev_put(attr.ndev);
if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type)
{
struct ib_gid_attr attr;
union ib_gid gid;
int ret;
ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
if (!attr.ndev)
return -ENODEV;
dev_put(attr.ndev);
*gid_type = attr.gid_type;
return 0;
}
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
return !MLX5_CAP_GEN(dev->mdev, ib_virt);
return 0;
}
enum {
MLX5_VPORT_ACCESS_METHOD_MAD,
MLX5_VPORT_ACCESS_METHOD_HCA,
MLX5_VPORT_ACCESS_METHOD_NIC,
};
static int mlx5_get_vport_access_method(struct ib_device *ibdev)
{
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
static void get_atomic_caps(struct mlx5_ib_dev *dev,
struct ib_device_attr *props)
{
u8 tmp;
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
/* Check if HW supports 8 bytes standard atomic operations and capable
* of host endianness respond
*/
tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
if (((atomic_operations & tmp) == tmp) &&
(atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
(atomic_req_8B_endianness_mode)) {
props->atomic_cap = IB_ATOMIC_HCA;
} else {
props->atomic_cap = IB_ATOMIC_NONE;
}
}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_system_image_guid(ibdev,
sys_image_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
break;
default:
return -EINVAL;
}
if (!err)
*sys_image_guid = cpu_to_be64(tmp);
return err;
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
pkey_table_size));
return 0;
default:
return -EINVAL;
}
}
static int mlx5_query_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
default:
return -EINVAL;
}
}
static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
__be64 *node_guid)
{
u64 tmp;
int err;
switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_node_guid(dev, node_guid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
break;
default:
return -EINVAL;
}
if (!err)
*node_guid = cpu_to_be64(tmp);
return err;
}
struct mlx5_reg_node_desc {
u8 desc[IB_DEVICE_NODE_DESC_MAX];
};
static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
struct mlx5_reg_node_desc in;
if (mlx5_use_mad_ifc(dev))
return mlx5_query_mad_ifc_node_desc(dev, node_desc);
memset(&in, 0, sizeof(in));
return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
sizeof(struct mlx5_reg_node_desc),
MLX5_REG_NODE_DESC, 0, 0);
}
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
struct mlx5_ib_query_device_resp resp = {};
size_t resp_len;
u64 max_tso;
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
if (uhw->outlen && uhw->outlen < resp_len)
return -EINVAL;
else
resp.response_length = resp_len;
if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
err = mlx5_query_system_image_guid(ibdev,
&props->sys_image_guid);
if (err)
return err;
err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
if (err)
return err;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
return err;
props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
((u32)fw_rev_min(dev->mdev) << 16) |
fw_rev_sub(dev->mdev);
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN;
if (MLX5_CAP_GEN(mdev, pkv))
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (MLX5_CAP_GEN(mdev, qkv))
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (MLX5_CAP_GEN(mdev, apm))
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (MLX5_CAP_GEN(mdev, xrc))
props->device_cap_flags |= IB_DEVICE_XRC;
if (MLX5_CAP_GEN(mdev, imaicl)) {
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
}
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
IB_PROT_T10DIF_TYPE_3;
props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
if (MLX5_CAP_ETH(mdev, csum_cap))
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
resp.tso_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
resp.response_length += sizeof(resp.tso_caps);
}
}
if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
resp.rss_caps.rx_hash_function =
MLX5_RX_HASH_FUNC_TOEPLITZ;
resp.rss_caps.rx_hash_fields_mask =
MLX5_RX_HASH_SRC_IPV4 |
MLX5_RX_HASH_DST_IPV4 |
MLX5_RX_HASH_SRC_IPV6 |
MLX5_RX_HASH_DST_IPV6 |
MLX5_RX_HASH_SRC_PORT_TCP |
MLX5_RX_HASH_DST_PORT_TCP |
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
if (field_avail(typeof(resp), tso_caps, uhw->outlen))
resp.response_length += sizeof(resp.tso_caps);
if (field_avail(typeof(resp), rss_caps, uhw->outlen))
resp.response_length += sizeof(resp.rss_caps);
}
if (MLX5_CAP_GEN(mdev, ipoib_ipoib_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs))
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
sizeof(struct mlx5_wqe_ctrl_seg)) /
sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
#endif
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (!mlx5_core_is_pf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET) {
props->rss_caps.max_rwq_indirection_tables =
1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
props->rss_caps.max_rwq_indirection_table_size =
1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
props->max_wq_type_rq =
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
return err;
}
return 0;
}
enum mlx5_ib_width {
MLX5_IB_WIDTH_1X = 1 << 0,
MLX5_IB_WIDTH_2X = 1 << 1,
MLX5_IB_WIDTH_4X = 1 << 2,
MLX5_IB_WIDTH_8X = 1 << 3,
MLX5_IB_WIDTH_12X = 1 << 4
};
static int translate_active_width(struct ib_device *ibdev, u8 active_width,
u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
int err = 0;
if (active_width & MLX5_IB_WIDTH_1X) {
*ib_width = IB_WIDTH_1X;
} else if (active_width & MLX5_IB_WIDTH_2X) {
mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
(int)active_width);
err = -EINVAL;
} else if (active_width & MLX5_IB_WIDTH_4X) {
*ib_width = IB_WIDTH_4X;
} else if (active_width & MLX5_IB_WIDTH_8X) {
*ib_width = IB_WIDTH_8X;
} else if (active_width & MLX5_IB_WIDTH_12X) {
*ib_width = IB_WIDTH_12X;
} else {
mlx5_ib_dbg(dev, "Invalid active_width %d\n",
(int)active_width);
err = -EINVAL;
}
return err;
}
enum ib_max_vl_num {
__IB_MAX_VL_0 = 1,
__IB_MAX_VL_0_1 = 2,
__IB_MAX_VL_0_3 = 3,
__IB_MAX_VL_0_7 = 4,
__IB_MAX_VL_0_14 = 5,
};
enum mlx5_vl_hw_cap {
MLX5_VL_HW_0 = 1,
MLX5_VL_HW_0_1 = 2,
MLX5_VL_HW_0_2 = 3,
MLX5_VL_HW_0_3 = 4,
MLX5_VL_HW_0_4 = 5,
MLX5_VL_HW_0_5 = 6,
MLX5_VL_HW_0_6 = 7,
MLX5_VL_HW_0_7 = 8,
MLX5_VL_HW_0_14 = 15
};
static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
u8 *max_vl_num)
{
switch (vl_hw_cap) {
case MLX5_VL_HW_0:
*max_vl_num = __IB_MAX_VL_0;
break;
case MLX5_VL_HW_0_1:
*max_vl_num = __IB_MAX_VL_0_1;
break;
case MLX5_VL_HW_0_3:
*max_vl_num = __IB_MAX_VL_0_3;
break;
case MLX5_VL_HW_0_7:
*max_vl_num = __IB_MAX_VL_0_7;
break;
case MLX5_VL_HW_0_14:
*max_vl_num = __IB_MAX_VL_0_14;
break;
default:
return -EINVAL;
}
return 0;
}
static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
u32 *rep;
int replen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
struct mlx5_ptys_reg *ptys;
struct mlx5_pmtu_reg *pmtu;
struct mlx5_pvlc_reg pvlc;
void *ctx;
int err;
rep = mlx5_vzalloc(replen);
ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
if (!rep || !ptys || !pmtu) {
err = -ENOMEM;
goto out;
}
memset(props, 0, sizeof(*props));
err = mlx5_query_hca_vport_context(mdev, port, 0, rep, replen);
if (err)
goto out;
ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
props->lid = MLX5_GET(hca_vport_context, ctx, lid);
props->lmc = MLX5_GET(hca_vport_context, ctx, lmc);
props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid);
props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl);
props->state = MLX5_GET(hca_vport_context, ctx, vport_state);
props->phys_state = MLX5_GET(hca_vport_context, ctx,
port_physical_state);
props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1);
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx,
pkey_violation_counter);
props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx,
qkey_violation_counter);
props->subnet_timeout = MLX5_GET(hca_vport_context, ctx,
subnet_timeout);
props->init_type_reply = MLX5_GET(hca_vport_context, ctx,
init_type_reply);
props->grh_required = MLX5_GET(hca_vport_context, ctx, grh_required);
ptys->proto_mask |= MLX5_PTYS_IB;
ptys->local_port = port;
err = mlx5_core_access_ptys(mdev, ptys, 0);
if (err)
goto out;
err = translate_active_width(ibdev, ptys->ib_link_width_oper,
&props->active_width);
if (err)
goto out;
props->active_speed = (u8)ptys->ib_proto_oper;
pmtu->local_port = port;
err = mlx5_core_access_pmtu(mdev, pmtu, 0);
if (err)
goto out;
props->max_mtu = pmtu->max_mtu;
props->active_mtu = pmtu->oper_mtu;
memset(&pvlc, 0, sizeof(pvlc));
pvlc.local_port = port;
err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
if (err)
goto out;
err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
&props->max_vl_num);
out:
kvfree(rep);
kfree(ptys);
kfree(pmtu);
return err;
}
int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_port_roce(ibdev, port, props);
default:
return -EINVAL;
}
}
static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
default:
return -EINVAL;
}
}
static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
pkey);
default:
return -EINVAL;
}
}
static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_reg_node_desc in;
struct mlx5_reg_node_desc out;
int err;
if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
return -EOPNOTSUPP;
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
/*
* If possible, pass node desc to FW, so it can generate
* a 144 trap. If cmd fails, just ignore.
*/
memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
if (err)
return err;
memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
return err;
}
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_port_attr attr;
u32 tmp;
int err;
mutex_lock(&dev->cap_mask_mutex);
err = mlx5_ib_query_port(ibdev, port, &attr);
if (err)
goto out;
tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
err = mlx5_set_port_caps(dev->mdev, port, tmp);
out:
mutex_unlock(&dev->cap_mask_mutex);
return err;
}
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
int gross_uuars;
int num_uars;
int ver;
int uuarn;
int err;
int i;
size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
return ERR_PTR(-EINVAL);
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
else if (reqlen >= min_req_v2)
ver = 2;
else
return ERR_PTR(-EINVAL);
err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
return ERR_PTR(-ENOMEM);
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
if (reqlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
reqlen - sizeof(req)))
return ERR_PTR(-EOPNOTSUPP);
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
return ERR_PTR(-EINVAL);
num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
resp.cache_line_size = cache_line_size();
resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
resp.cqe_version = min_t(__u8,
(__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
req.max_cqe_version);
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
return ERR_PTR(-ENOMEM);
uuari = &context->uuari;
mutex_init(&uuari->lock);
uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
if (!uars) {
err = -ENOMEM;
goto out_ctx;
}
uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
err = -ENOMEM;
goto out_uar_ctx;
}
/*
* clear all fast path uuars
*/
for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;
}
for (i = 0; i < num_uars; i++) {
err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
if (err)
goto out_count;
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
goto out_uars;
}
INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
resp.response_length += sizeof(resp.cqe_version);
if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
* pretend we don't support reading the HCA's core clock. This is also
* forced by mmap function.
*/
if (PAGE_SIZE <= 4096 &&
field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
resp.comp_mask |=
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset =
offsetof(struct mlx5_init_seg, internal_timer_h) %
PAGE_SIZE;
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2);
}
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
context->cqe_version = resp.cqe_version;
return &context->ibucontext;
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
out_count:
kfree(uuari->count);
out_bitmap:
kfree(uuari->bitmap);
out_uar_ctx:
kfree(uars);
out_ctx:
kfree(context);
return ERR_PTR(err);
}
static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
}
kfree(uuari->count);
kfree(uuari->bitmap);
kfree(uuari->uars);
kfree(context);
return 0;
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
{
return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
}
static int get_command(unsigned long offset)
{
return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
}
static int get_arg(unsigned long offset)
{
return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
}
static int get_index(unsigned long offset)
{
return get_arg(offset);
}
static void mlx5_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA. This
* is done through either mremap flow or split_vma (usually due to
* mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
* as this VMA is strongly hardware related. Therefore we set the
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
* calling us again and trying to do incorrect actions. We assume that
* the original VMA size is exactly a single page, and therefore all
* "splitting" operation will not happen to it.
*/
area->vm_ops = NULL;
}
static void mlx5_ib_vma_close(struct vm_area_struct *area)
{
struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
/* It's guaranteed that all VMAs opened on a FD are closed before the
* file itself is closed, therefore no sync is needed with the regular
* closing flow. (e.g. mlx5 ib_dealloc_ucontext)
* However need a sync with accessing the vma as part of
* mlx5_ib_disassociate_ucontext.
* The close operation is usually called under mm->mmap_sem except when
* process is exiting.
* The exiting case is handled explicitly as part of
* mlx5_ib_disassociate_ucontext.
*/
mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
/* setting the vma context pointer to null in the mlx5_ib driver's
* private data, to protect a race condition in
* mlx5_ib_disassociate_ucontext().
*/
mlx5_ib_vma_priv_data->vma = NULL;
list_del(&mlx5_ib_vma_priv_data->list);
kfree(mlx5_ib_vma_priv_data);
}
static const struct vm_operations_struct mlx5_ib_vm_ops = {
.open = mlx5_ib_vma_open,
.close = mlx5_ib_vma_close
};
static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
struct mlx5_ib_ucontext *ctx)
{
struct mlx5_ib_vma_private_data *vma_prv;
struct list_head *vma_head = &ctx->vma_private_list;
vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
if (!vma_prv)
return -ENOMEM;
vma_prv->vma = vma;
vma->vm_private_data = vma_prv;
vma->vm_ops = &mlx5_ib_vm_ops;
list_add(&vma_prv->list, vma_head);
return 0;
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
{
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
return "WC";
case MLX5_IB_MMAP_REGULAR_PAGE:
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
default:
return NULL;
}
}
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
struct mlx5_uuar_info *uuari = &context->uuari;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
/* Some architectures don't support WC memory */
#if defined(CONFIG_X86)
if (!pat_enabled())
return -EPERM;
#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
return -EPERM;
#endif
/* fall through */
case MLX5_IB_MMAP_REGULAR_PAGE:
/* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
prot = pgprot_writecombine(vma->vm_page_prot);
break;
case MLX5_IB_MMAP_NC_PAGE:
prot = pgprot_noncached(vma->vm_page_prot);
break;
default:
return -EINVAL;
}
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
idx = get_index(vma->vm_pgoff);
if (idx >= uuari->num_uars)
return -EINVAL;
pfn = uar_index2pfn(dev, uuari->uars[idx].index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
vma->vm_page_prot = prot;
err = io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot);
if (err) {
mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%llx, pfn=%pa, mmap_cmd=%s\n",
err, (unsigned long long)vma->vm_start, &pfn, mmap_cmd2str(cmd));
return -EAGAIN;
}
pa = pfn << PAGE_SHIFT;
mlx5_ib_dbg(dev, "mapped %s at 0x%llx, PA %pa\n", mmap_cmd2str(cmd),
(unsigned long long)vma->vm_start, &pa);
return mlx5_ib_set_vma_data(vma, context);
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
unsigned long command;
phys_addr_t pfn;
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
case MLX5_IB_MMAP_CORE_CLOCK:
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
if (vma->vm_flags & VM_WRITE)
return -EPERM;
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx5_ib_dbg(dev, "mapped internal timer at 0x%llx, PA 0x%llx\n",
(unsigned long long)vma->vm_start,
(unsigned long long)pfn << PAGE_SHIFT);
break;
default:
return -EINVAL;
}
return 0;
}
static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx5_ib_alloc_pd_resp resp;
struct mlx5_ib_pd *pd;
int err;
pd = kmalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
if (err) {
kfree(pd);
return ERR_PTR(err);
}
if (context) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
kfree(pd);
return ERR_PTR(-EFAULT);
}
}
return &pd->ibpd;
}
static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
kfree(mpd);
return 0;
}
enum {
MATCH_CRITERIA_ENABLE_OUTER_BIT,
MATCH_CRITERIA_ENABLE_MISC_BIT,
MATCH_CRITERIA_ENABLE_INNER_BIT
};
#define HEADER_IS_ZERO(match_criteria, headers) \
!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
static u8 get_match_criteria_enable(u32 *match_criteria)
{
u8 match_criteria_enable;
match_criteria_enable =
(!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
MATCH_CRITERIA_ENABLE_OUTER_BIT;
match_criteria_enable |=
(!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
MATCH_CRITERIA_ENABLE_MISC_BIT;
match_criteria_enable |=
(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
MATCH_CRITERIA_ENABLE_INNER_BIT;
return match_criteria_enable;
}
static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
{
MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
}
static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
{
MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
}
#define LAST_ETH_FIELD vlan_tag
#define LAST_IB_FIELD sl
#define LAST_IPV4_FIELD tos
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
memchr_inv((void *)&filter.field +\
sizeof(filter.field), 0,\
sizeof(filter) -\
offsetof(typeof(filter), field) -\
sizeof(filter.field))
static int parse_flow_attr(u32 *match_c, u32 *match_v,
const union ib_flow_spec *ib_spec)
{
void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
outer_headers);
void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
outer_headers);
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
misc_parameters);
switch (ib_spec->type) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
return -ENOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dmac_47_16),
ib_spec->eth.mask.dst_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
smac_47_16),
ib_spec->eth.mask.src_mac);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
smac_47_16),
ib_spec->eth.val.src_mac);
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_vid, ntohs(ib_spec->eth.val.vlan_tag));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_cfi,
ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_cfi,
ntohs(ib_spec->eth.val.vlan_tag) >> 12);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
first_prio,
ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
first_prio,
ntohs(ib_spec->eth.val.vlan_tag) >> 13);
}
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, ntohs(ib_spec->eth.mask.ether_type));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ntohs(ib_spec->eth.val.ether_type));
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -ENOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, ETH_P_IP);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.src_ip,
sizeof(ib_spec->ipv4.mask.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.src_ip,
sizeof(ib_spec->ipv4.val.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.dst_ip,
sizeof(ib_spec->ipv4.mask.dst_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
set_tos(outer_headers_c, outer_headers_v,
ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
set_proto(outer_headers_c, outer_headers_v,
ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -ENOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
ethertype, 0xffff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
ethertype, IPPROTO_IPV6);
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.src_ip,
sizeof(ib_spec->ipv6.mask.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.src_ip,
sizeof(ib_spec->ipv6.val.src_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.dst_ip,
sizeof(ib_spec->ipv6.mask.dst_ip));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.dst_ip,
sizeof(ib_spec->ipv6.val.dst_ip));
set_tos(outer_headers_c, outer_headers_v,
ib_spec->ipv6.mask.traffic_class,
ib_spec->ipv6.val.traffic_class);
set_proto(outer_headers_c, outer_headers_v,
ib_spec->ipv6.mask.next_hdr,
ib_spec->ipv6.val.next_hdr);
MLX5_SET(fte_match_set_misc, misc_params_c,
outer_ipv6_flow_label,
ntohl(ib_spec->ipv6.mask.flow_label));
MLX5_SET(fte_match_set_misc, misc_params_v,
outer_ipv6_flow_label,
ntohl(ib_spec->ipv6.val.flow_label));
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_TCP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
0xff);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
IPPROTO_UDP);
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
default:
return -EINVAL;
}
return 0;
}
/* If a flow could catch both multicast and unicast packets,
* it won't fall into the multicast flow steering table and this rule
* could steal other multicast packets.
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
struct ib_flow_spec_eth *eth_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
ib_attr->size < sizeof(struct ib_flow_attr) +
sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
if (eth_spec->type != IB_FLOW_SPEC_ETH ||
eth_spec->size != sizeof(*eth_spec))
return false;
return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
is_multicast_ether_addr(eth_spec->val.dst_mac);
}
static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
{
union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
bool has_ipv4_spec = false;
bool eth_type_ipv4 = true;
unsigned int spec_index;
/* Validate that ethertype is correct */
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
if (ib_spec->type == IB_FLOW_SPEC_ETH &&
ib_spec->eth.mask.ether_type) {
if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
eth_type_ipv4 = false;
} else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
has_ipv4_spec = true;
}
ib_spec = (void *)ib_spec + ib_spec->size;
}
return !has_ipv4_spec || eth_type_ipv4;
}
static void put_flow_table(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio, bool ft_added)
{
prio->refcount -= !!ft_added;
if (!prio->refcount) {
mlx5_destroy_flow_table(prio->flow_table);
prio->flow_table = NULL;
}
}
static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
{
struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
struct mlx5_ib_flow_handler *handler = container_of(flow_id,
struct mlx5_ib_flow_handler,
ibflow);
struct mlx5_ib_flow_handler *iter, *tmp;
mutex_lock(&dev->flow_db.lock);
list_for_each_entry_safe(iter, tmp, &handler->list, list) {
mlx5_del_flow_rule(iter->rule);
put_flow_table(dev, iter->prio, true);
list_del(&iter->list);
kfree(iter);
}
mlx5_del_flow_rule(handler->rule);
put_flow_table(dev, handler->prio, true);
mutex_unlock(&dev->flow_db.lock);
kfree(handler);
return 0;
}
static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
{
priority *= 2;
if (!dont_trap)
priority++;
return priority;
}
enum flow_table_type {
MLX5_IB_FT_RX,
MLX5_IB_FT_TX
};
#define MLX5_FS_MAX_TYPES 10
#define MLX5_FS_MAX_ENTRIES 32000UL
static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
struct ib_flow_attr *flow_attr,
enum flow_table_type ft_type)
{
bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_table *ft;
int num_entries;
int num_groups;
int priority;
int err = 0;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_is_multicast_only(flow_attr) &&
!dont_trap)
priority = MLX5_IB_FLOW_MCAST_PRIO;
else
priority = ib_prio_to_core_prio(flow_attr->priority,
dont_trap);
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_BYPASS);
num_entries = MLX5_FS_MAX_ENTRIES;
num_groups = MLX5_FS_MAX_TYPES;
prio = &dev->flow_db.prios[priority];
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
ns = mlx5_get_flow_namespace(dev->mdev,
MLX5_FLOW_NAMESPACE_LEFTOVERS);
build_leftovers_ft_param("bypass", &priority,
&num_entries,
&num_groups);
prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
if (!MLX5_CAP_FLOWTABLE(dev->mdev,
allow_sniffer_and_nic_rx_shared_tir))
return ERR_PTR(-ENOTSUPP);
ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
MLX5_FLOW_NAMESPACE_SNIFFER_RX :
MLX5_FLOW_NAMESPACE_SNIFFER_TX);
prio = &dev->flow_db.sniffer[ft_type];
priority = 0;
num_entries = 1;
num_groups = 1;
}
if (!ns)
return ERR_PTR(-ENOTSUPP);
ft = prio->flow_table;
if (!ft) {
ft = mlx5_create_auto_grouped_flow_table(ns, priority, "bypass",
num_entries,
num_groups);
if (!IS_ERR(ft)) {
prio->refcount = 0;
prio->flow_table = ft;
} else {
err = PTR_ERR(ft);
}
}
return err ? ERR_PTR(err) : prio;
}
static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
u32 action;
int err = 0;
if (!is_valid_attr(flow_attr))
return ERR_PTR(-EINVAL);
spec = mlx5_vzalloc(sizeof(*spec));
handler = kzalloc(sizeof(*handler), GFP_KERNEL);
if (!handler || !spec) {
err = -ENOMEM;
goto free;
}
INIT_LIST_HEAD(&handler->list);
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(spec->match_criteria,
spec->match_value, ib_flow);
if (err < 0)
goto free;
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
handler->rule = mlx5_add_flow_rule(ft, spec->match_criteria_enable,
spec->match_criteria,
spec->match_value,
action,
MLX5_FS_DEFAULT_FLOW_TAG,
dst);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
goto free;
}
ft_prio->refcount++;
handler->prio = ft_prio;
ft_prio->flow_table = ft;
free:
if (err)
kfree(handler);
kvfree(spec);
return err ? ERR_PTR(err) : handler;
}
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_dst = NULL;
struct mlx5_ib_flow_handler *handler = NULL;
handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
if (!IS_ERR(handler)) {
handler_dst = create_flow_rule(dev, ft_prio,
flow_attr, dst);
if (IS_ERR(handler_dst)) {
mlx5_del_flow_rule(handler->rule);
ft_prio->refcount--;
kfree(handler);
handler = handler_dst;
} else {
list_add(&handler_dst->list, &handler->list);
}
}
return handler;
}
enum {
LEFTOVERS_MC,
LEFTOVERS_UC,
};
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_ucast = NULL;
struct mlx5_ib_flow_handler *handler = NULL;
static struct {
struct ib_flow_attr flow_attr;
struct ib_flow_spec_eth eth_flow;
} leftovers_specs[] = {
[LEFTOVERS_MC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {0x1} }
}
},
[LEFTOVERS_UC] = {
.flow_attr = {
.num_of_specs = 1,
.size = sizeof(leftovers_specs[0])
},
.eth_flow = {
.type = IB_FLOW_SPEC_ETH,
.size = sizeof(struct ib_flow_spec_eth),
.mask = {.dst_mac = {0x1} },
.val = {.dst_mac = {} }
}
}
};
handler = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_MC].flow_attr,
dst);
if (!IS_ERR(handler) &&
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
handler_ucast = create_flow_rule(dev, ft_prio,
&leftovers_specs[LEFTOVERS_UC].flow_attr,
dst);
if (IS_ERR(handler_ucast)) {
mlx5_del_flow_rule(handler->rule);
ft_prio->refcount--;
kfree(handler);
handler = handler_ucast;
} else {
list_add(&handler_ucast->list, &handler->list);
}
}
return handler;
}
static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_rx,
struct mlx5_ib_flow_prio *ft_tx,
struct mlx5_flow_destination *dst)
{
struct mlx5_ib_flow_handler *handler_rx;
struct mlx5_ib_flow_handler *handler_tx;
int err;
static const struct ib_flow_attr flow_attr = {
.num_of_specs = 0,
.size = sizeof(flow_attr)
};
handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
if (IS_ERR(handler_rx)) {
err = PTR_ERR(handler_rx);
goto err;
}
handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
if (IS_ERR(handler_tx)) {
err = PTR_ERR(handler_tx);
goto err_tx;
}
list_add(&handler_tx->list, &handler_rx->list);
return handler_rx;
err_tx:
mlx5_del_flow_rule(handler_rx->rule);
ft_rx->refcount--;
kfree(handler_rx);
err:
return ERR_PTR(err);
}
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
struct mlx5_ib_flow_handler *handler = NULL;
struct mlx5_flow_destination *dst = NULL;
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
int err;
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOSPC);
if (domain != IB_FLOW_DOMAIN_USER ||
flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
(flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
return ERR_PTR(-EINVAL);
dst = kzalloc(sizeof(*dst), GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
mutex_lock(&dev->flow_db.lock);
ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
}
if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
if (IS_ERR(ft_prio_tx)) {
err = PTR_ERR(ft_prio_tx);
ft_prio_tx = NULL;
goto destroy_ft;
}
}
dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
if (mqp->flags & MLX5_IB_QP_RSS)
dst->tir_num = mqp->rss_qp.tirn;
else
dst->tir_num = mqp->raw_packet_qp.rq.tirn;
if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
handler = create_dont_trap_rule(dev, ft_prio,
flow_attr, dst);
} else {
handler = create_flow_rule(dev, ft_prio, flow_attr,
dst);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
handler = create_leftovers_rule(dev, ft_prio, flow_attr,
dst);
} else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
} else {
err = -EINVAL;
goto destroy_ft;
}
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
handler = NULL;
goto destroy_ft;
}
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
return &handler->ibflow;
destroy_ft:
put_flow_table(dev, ft_prio, false);
if (ft_prio_tx)
put_flow_table(dev, ft_prio_tx, false);
unlock:
mutex_unlock(&dev->flow_db.lock);
kfree(dst);
kfree(handler);
return ERR_PTR(err);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
return err;
}
static int init_node_data(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
if (err)
return err;
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
}
static ssize_t show_reg_pages(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
}
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->mdev->pdev->revision);
}
static ssize_t show_board(struct device *device, struct device_attribute *attr,
char *buf)
{
struct mlx5_ib_dev *dev =
container_of(device, struct mlx5_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
dev->mdev->board_id);
}
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
static struct device_attribute *mlx5_class_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type,
&dev_attr_board_id,
&dev_attr_fw_pages,
&dev_attr_reg_pages,
};
static void pkey_change_handler(struct work_struct *work)
{
struct mlx5_ib_port_resources *ports =
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
mutex_lock(&ports->devr->mutex);
mlx5_ib_gsi_pkey_change(ports->gsi);
mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
{
struct mlx5_ib_qp *mqp;
struct mlx5_ib_cq *send_mcq, *recv_mcq;
struct mlx5_core_cq *mcq;
struct list_head cq_armed_list;
unsigned long flags_qp;
unsigned long flags_cq;
unsigned long flags;
INIT_LIST_HEAD(&cq_armed_list);
/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
if (mqp->sq.tail != mqp->sq.head) {
send_mcq = to_mcq(mqp->ibqp.send_cq);
spin_lock_irqsave(&send_mcq->lock, flags_cq);
if (send_mcq->mcq.comp &&
mqp->ibqp.send_cq->comp_handler) {
if (!send_mcq->mcq.reset_notify_added) {
send_mcq->mcq.reset_notify_added = 1;
list_add_tail(&send_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
}
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
/* no handling is needed for SRQ */
if (!mqp->ibqp.srq) {
if (mqp->rq.tail != mqp->rq.head) {
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
if (recv_mcq->mcq.comp &&
mqp->ibqp.recv_cq->comp_handler) {
if (!recv_mcq->mcq.reset_notify_added) {
recv_mcq->mcq.reset_notify_added = 1;
list_add_tail(&recv_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&recv_mcq->lock,
flags_cq);
}
}
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
}
/*At that point all inflight post send were put to be executed as of we
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
mcq->comp(mcq);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
struct ib_event ibev;
bool fatal = false;
u8 port = (u8)param;
switch (event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
fatal = true;
break;
case MLX5_DEV_EVENT_PORT_UP:
case MLX5_DEV_EVENT_PORT_DOWN:
case MLX5_DEV_EVENT_PORT_INITIALIZED:
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET)
return;
ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
case MLX5_DEV_EVENT_LID_CHANGE:
ibev.event = IB_EVENT_LID_CHANGE;
break;
case MLX5_DEV_EVENT_PKEY_CHANGE:
ibev.event = IB_EVENT_PKEY_CHANGE;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
case MLX5_DEV_EVENT_GUID_CHANGE:
ibev.event = IB_EVENT_GID_CHANGE;
break;
case MLX5_DEV_EVENT_CLIENT_REREG:
ibev.event = IB_EVENT_CLIENT_REREGISTER;
break;
default:
/* unsupported event */
return;
}
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
mlx5_ib_warn(ibdev, "warning: event(%d) on port %d\n", event, port);
return;
}
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
if (fatal)
ibdev->ib_active = false;
}
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
mlx5_query_ext_port_caps(dev, port);
}
static int get_port_caps(struct mlx5_ib_dev *dev)
{
struct ib_device_attr *dprops = NULL;
struct ib_port_attr *pprops = NULL;
int err = -ENOMEM;
int port;
struct ib_udata uhw = {.inlen = 0, .outlen = 0};
pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
if (!pprops)
goto out;
dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
if (!dprops)
goto out;
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
goto out;
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
port, err);
break;
}
dev->mdev->port_caps[port - 1].pkey_table_len =
dprops->max_pkeys;
dev->mdev->port_caps[port - 1].gid_table_len =
pprops->gid_tbl_len;
mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
dprops->max_pkeys, pprops->gid_tbl_len);
}
out:
kfree(pprops);
kfree(dprops);
return err;
}
static void destroy_umrc_res(struct mlx5_ib_dev *dev)
{
int err;
err = mlx5_mr_cache_cleanup(dev);
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
mlx5_ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
}
enum {
MAX_UMR_WR = 128,
};
static int create_umr_res(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr *init_attr = NULL;
struct ib_qp_attr *attr = NULL;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
if (!attr || !init_attr) {
ret = -ENOMEM;
goto error_0;
}
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
goto error_0;
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto error_2;
}
init_attr->send_cq = cq;
init_attr->recv_cq = cq;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->cap.max_send_wr = MAX_UMR_WR;
init_attr->cap.max_send_sge = 1;
init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
init_attr->port_num = 1;
qp = mlx5_ib_create_qp(pd, init_attr, NULL);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto error_3;
}
qp->device = &dev->ib_dev;
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = MLX5_IB_QPT_REG_UMR;
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
IB_QP_PORT, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTR;
attr->path_mtu = IB_MTU_256;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTS;
ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
goto error_4;
}
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
ret = mlx5_mr_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
goto error_4;
}
kfree(attr);
kfree(init_attr);
return 0;
error_4:
mlx5_ib_destroy_qp(qp);
error_3:
ib_free_cq(cq);
error_2:
ib_dealloc_pd(pd);
error_0:
kfree(attr);
kfree(init_attr);
return ret;
}
static int create_dev_resources(struct mlx5_ib_resources *devr)
{
struct ib_srq_init_attr attr;
struct mlx5_ib_dev *dev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
int port;
int ret = 0;
dev = container_of(devr, struct mlx5_ib_dev, devr);
mutex_init(&devr->mutex);
devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->p0)) {
ret = PTR_ERR(devr->p0);
goto error0;
}
devr->p0->device = &dev->ib_dev;
devr->p0->uobject = NULL;
atomic_set(&devr->p0->usecnt, 0);
devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
if (IS_ERR(devr->c0)) {
ret = PTR_ERR(devr->c0);
goto error1;
}
devr->c0->device = &dev->ib_dev;
devr->c0->uobject = NULL;
devr->c0->comp_handler = NULL;
devr->c0->event_handler = NULL;
devr->c0->cq_context = NULL;
atomic_set(&devr->c0->usecnt, 0);
devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x0)) {
ret = PTR_ERR(devr->x0);
goto error2;
}
devr->x0->device = &dev->ib_dev;
devr->x0->inode = NULL;
atomic_set(&devr->x0->usecnt, 0);
mutex_init(&devr->x0->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
if (IS_ERR(devr->x1)) {
ret = PTR_ERR(devr->x1);
goto error3;
}
devr->x1->device = &dev->ib_dev;
devr->x1->inode = NULL;
atomic_set(&devr->x1->usecnt, 0);
mutex_init(&devr->x1->tgt_qp_mutex);
INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
attr.ext.xrc.cq = devr->c0;
attr.ext.xrc.xrcd = devr->x0;
devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s0)) {
ret = PTR_ERR(devr->s0);
goto error4;
}
devr->s0->device = &dev->ib_dev;
devr->s0->pd = devr->p0;
devr->s0->uobject = NULL;
devr->s0->event_handler = NULL;
devr->s0->srq_context = NULL;
devr->s0->srq_type = IB_SRQT_XRC;
devr->s0->ext.xrc.xrcd = devr->x0;
devr->s0->ext.xrc.cq = devr->c0;
atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
if (IS_ERR(devr->s1)) {
ret = PTR_ERR(devr->s1);
goto error5;
}
devr->s1->device = &dev->ib_dev;
devr->s1->pd = devr->p0;
devr->s1->uobject = NULL;
devr->s1->event_handler = NULL;
devr->s1->srq_context = NULL;
devr->s1->srq_type = IB_SRQT_BASIC;
devr->s1->ext.xrc.cq = devr->c0;
atomic_inc(&devr->p0->usecnt);
atomic_set(&devr->s0->usecnt, 0);
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
devr->ports[port].devr = devr;
}
return 0;
error5:
mlx5_ib_destroy_srq(devr->s0);
error4:
mlx5_ib_dealloc_xrcd(devr->x1);
error3:
mlx5_ib_dealloc_xrcd(devr->x0);
error2:
mlx5_ib_destroy_cq(devr->c0);
error1:
mlx5_ib_dealloc_pd(devr->p0);
error0:
return ret;
}
static void destroy_dev_resources(struct mlx5_ib_resources *devr)
{
struct mlx5_ib_dev *dev =
container_of(devr, struct mlx5_ib_dev, devr);
int port;
mlx5_ib_destroy_srq(devr->s1);
mlx5_ib_destroy_srq(devr->s0);
mlx5_ib_dealloc_xrcd(devr->x0);
mlx5_ib_dealloc_xrcd(devr->x1);
mlx5_ib_destroy_cq(devr->c0);
mlx5_ib_dealloc_pd(devr->p0);
/* Make sure no change P_Key work items are still executing */
for (port = 0; port < dev->num_ports; ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
static u32 get_core_cap_flags(struct ib_device *ibdev)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
u32 ret = 0;
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
return 0;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
return 0;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
return ret;
}
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
err = mlx5_ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
static void get_dev_fw_str(struct ib_device *ibdev, char *str,
size_t str_len)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
{
return 0;
}
static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
{
}
static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
{
if (dev->roce.nb.notifier_call) {
unregister_netdevice_notifier(&dev->roce.nb);
dev->roce.nb.notifier_call = NULL;
}
}
static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
{
VNET_ITERATOR_DECL(vnet_iter);
struct net_device *idev;
int err;
/* Check if mlx5en net device already exists */
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
IFNET_RLOCK();
CURVNET_SET_QUIET(vnet_iter);
CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
/* check if network interface belongs to mlx5en */
if (!mlx5_netdev_match(idev, dev->mdev, "mce"))
continue;
write_lock(&dev->roce.netdev_lock);
dev->roce.netdev = idev;
write_unlock(&dev->roce.netdev_lock);
}
CURVNET_RESTORE();
IFNET_RUNLOCK();
}
VNET_LIST_RUNLOCK();
dev->roce.nb.notifier_call = mlx5_netdev_event;
err = register_netdevice_notifier(&dev->roce.nb);
if (err) {
dev->roce.nb.notifier_call = NULL;
return err;
}
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
goto err_unregister_netdevice_notifier;
err = mlx5_roce_lag_init(dev);
if (err)
goto err_disable_roce;
return 0;
err_disable_roce:
mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
mlx5_remove_roce_notifier(dev);
return err;
}
static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
{
mlx5_roce_lag_cleanup(dev);
mlx5_nic_vport_disable_roce(dev->mdev);
}
static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
{
mlx5_vport_dealloc_q_counter(dev->mdev,
MLX5_INTERFACE_PROTOCOL_IB,
dev->port[port_num].q_cnt_id);
dev->port[port_num].q_cnt_id = 0;
}
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
for (i = 0; i < dev->num_ports; i++)
mlx5_ib_dealloc_q_port_counter(dev, i);
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
{
int i;
int ret;
for (i = 0; i < dev->num_ports; i++) {
ret = mlx5_vport_alloc_q_counter(dev->mdev,
MLX5_INTERFACE_PROTOCOL_IB,
&dev->port[i].q_cnt_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
}
return 0;
dealloc_counters:
while (--i >= 0)
mlx5_ib_dealloc_q_port_counter(dev, i);
return ret;
}
static const char * const names[] = {
"rx_write_requests",
"rx_read_requests",
"rx_atomic_requests",
"out_of_buffer",
"out_of_sequence",
"duplicate_request",
"rnr_nak_retry_err",
"packet_seq_err",
"implied_nak_seq_err",
"local_ack_timeout_err",
};
static const size_t stats_offsets[] = {
MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
};
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
/* We support only per port stats */
if (port_num == 0)
return NULL;
return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
if (!port || !stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
if (!out)
return -ENOMEM;
ret = mlx5_vport_query_q_counter(dev->mdev,
dev->port[port - 1].q_cnt_id, 0,
out, outlen);
if (ret)
goto free;
for (i = 0; i < ARRAY_SIZE(names); i++) {
val = *(__be32 *)(out + stats_offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
free:
kvfree(out);
return ARRAY_SIZE(names);
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
enum rdma_link_layer ll;
int port_type_cap;
int err;
int i;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
return NULL;
dev->mdev = mdev;
dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port)
goto err_dealloc;
rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_free_port;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
snprintf(dev->ib_dev.name, IB_DEVICE_NAME_MAX, "mlx5_%d", device_get_unit(mdev->pdev->dev.bsddev));
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
dev->ib_dev.dma_device = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_CREATE_AH) |
(1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
dev->ib_dev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
if (ll == IB_LINK_LAYER_ETHERNET)
dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
dev->ib_dev.add_gid = mlx5_ib_add_gid;
dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
dev->ib_dev.mmap = mlx5_ib_mmap;
dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
dev->ib_dev.create_ah = mlx5_ib_create_ah;
dev->ib_dev.query_ah = mlx5_ib_query_ah;
dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
dev->ib_dev.create_srq = mlx5_ib_create_srq;
dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
dev->ib_dev.query_srq = mlx5_ib_query_srq;
dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
dev->ib_dev.create_qp = mlx5_ib_create_qp;
dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
dev->ib_dev.query_qp = mlx5_ib_query_qp;
dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
dev->ib_dev.post_send = mlx5_ib_post_send;
dev->ib_dev.post_recv = mlx5_ib_post_recv;
dev->ib_dev.create_cq = mlx5_ib_create_cq;
dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
dev->ib_dev.get_port_immutable = mlx5_port_immutable;
dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
if (mlx5_core_is_pf(mdev)) {
dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
}
mlx5_ib_internal_fill_odp_caps(dev);
if (MLX5_CAP_GEN(mdev, imaicl)) {
dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
if (MLX5_CAP_GEN(mdev, xrc)) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
}
err = init_node_data(dev);
if (err)
goto err_free_port;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
if (err)
goto err_free_port;
}
err = create_dev_resources(&dev->devr);
if (err)
goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
goto err_rsrc;
err = mlx5_ib_alloc_q_counters(dev);
if (err)
goto err_odp;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
goto err_q_cnt;
err = create_umr_res(dev);
if (err)
goto err_dev;
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
goto err_umrc;
}
err = mlx5_ib_init_congestion(dev);
if (err)
goto err_umrc;
dev->ib_active = true;
return dev;
err_umrc:
destroy_umrc_res(dev);
err_dev:
ib_unregister_device(&dev->ib_dev);
err_q_cnt:
mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
err_rsrc:
destroy_dev_resources(&dev->devr);
err_disable_roce:
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_roce(dev);
mlx5_remove_roce_notifier(dev);
}
err_free_port:
kfree(dev->port);
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
return NULL;
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
mlx5_ib_cleanup_congestion(dev);
mlx5_remove_roce_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
mlx5_disable_roce(dev);
kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
static int __init mlx5_ib_init(void)
{
int err;
if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
err = mlx5_ib_odp_init();
if (err)
return err;
err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;
return err;
clean_odp:
mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
mlx5_ib_odp_cleanup();
}
static void
mlx5_ib_show_version(void __unused *arg)
{
printf("%s", mlx5_version);
}
SYSINIT(mlx5_ib_show_version, SI_SUB_DRIVERS, SI_ORDER_ANY, mlx5_ib_show_version, NULL);
module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);

File Metadata

Mime Type
text/x-diff
Expires
Thu, Mar 13, 10:19 AM (1 d, 6 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17096504
Default Alt Text
(334 KB)

Event Timeline