Index: head/sys/dev/mlx5/mlx5_core/mlx5_eq.c
===================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_eq.c	(revision 291937)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_eq.c	(revision 291938)
@@ -1,592 +1,613 @@
 /*-
  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <dev/mlx5/driver.h>
 #include <dev/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
 
+#include "opt_rss.h"
+
+#ifdef  RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+
 enum {
 	MLX5_EQE_SIZE		= sizeof(struct mlx5_eqe),
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
 };
 
 enum {
 	MLX5_NUM_SPARE_EQE	= 0x80,
 	MLX5_NUM_ASYNC_EQE	= 0x100,
 	MLX5_NUM_CMD_EQE	= 32,
 };
 
 enum {
 	MLX5_EQ_DOORBEL_OFFSET	= 0x40,
 };
 
 #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
 			       (1ull << MLX5_EVENT_TYPE_COMM_EST)	    | \
 			       (1ull << MLX5_EVENT_TYPE_SQ_DRAINED)	    | \
 			       (1ull << MLX5_EVENT_TYPE_CQ_ERROR)	    | \
 			       (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
 			       (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED)    | \
 			       (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
 			       (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
 			       (1ull << MLX5_EVENT_TYPE_PORT_CHANGE)	    | \
 			       (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE)   | \
 			       (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
 			       (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE)	    | \
 			       (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
 
 struct map_eq_in {
 	u64	mask;
 	u32	reserved;
 	u32	unmap_eqn;
 };
 
 struct cre_des_eq {
 	u8	reserved[15];
 	u8	eqn;
 };
 
 /*Function prototype*/
 static void mlx5_port_module_event(struct mlx5_core_dev *dev,
 				   struct mlx5_eqe *eqe);
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_eq_in)];
 	u32 out[MLX5_ST_SZ_DW(destroy_eq_out)];
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
 	MLX5_SET(destroy_eq_in, in, eq_number, eqn);
 
 	memset(out, 0, sizeof(out));
 	return mlx5_cmd_exec_check_status(dev, in,  sizeof(in),
 					       out, sizeof(out));
 }
 
 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
 {
 	return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
 }
 
 static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
 {
 	struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
 
 	return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
 }
 
 static const char *eqe_type_str(u8 type)
 {
 	switch (type) {
 	case MLX5_EVENT_TYPE_COMP:
 		return "MLX5_EVENT_TYPE_COMP";
 	case MLX5_EVENT_TYPE_PATH_MIG:
 		return "MLX5_EVENT_TYPE_PATH_MIG";
 	case MLX5_EVENT_TYPE_COMM_EST:
 		return "MLX5_EVENT_TYPE_COMM_EST";
 	case MLX5_EVENT_TYPE_SQ_DRAINED:
 		return "MLX5_EVENT_TYPE_SQ_DRAINED";
 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
 		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
 		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
 	case MLX5_EVENT_TYPE_CQ_ERROR:
 		return "MLX5_EVENT_TYPE_CQ_ERROR";
 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
 		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
 		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
 		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
 	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
 		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
 	case MLX5_EVENT_TYPE_PORT_CHANGE:
 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
 	case MLX5_EVENT_TYPE_GPIO_EVENT:
 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
 	case MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT:
 		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
 		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
 	case MLX5_EVENT_TYPE_STALL_EVENT:
 		return "MLX5_EVENT_TYPE_STALL_EVENT";
 	case MLX5_EVENT_TYPE_CMD:
 		return "MLX5_EVENT_TYPE_CMD";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
 		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
 	default:
 		return "Unrecognized event";
 	}
 }
 
 static enum mlx5_dev_event port_subtype_event(u8 subtype)
 {
 	switch (subtype) {
 	case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
 		return MLX5_DEV_EVENT_PORT_DOWN;
 	case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
 		return MLX5_DEV_EVENT_PORT_UP;
 	case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
 		return MLX5_DEV_EVENT_PORT_INITIALIZED;
 	case MLX5_PORT_CHANGE_SUBTYPE_LID:
 		return MLX5_DEV_EVENT_LID_CHANGE;
 	case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
 		return MLX5_DEV_EVENT_PKEY_CHANGE;
 	case MLX5_PORT_CHANGE_SUBTYPE_GUID:
 		return MLX5_DEV_EVENT_GUID_CHANGE;
 	case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
 		return MLX5_DEV_EVENT_CLIENT_REREG;
 	}
 	return -1;
 }
 
 static void eq_update_ci(struct mlx5_eq *eq, int arm)
 {
 	__be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
 	u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
 	__raw_writel((__force u32) cpu_to_be32(val), addr);
 	/* We still want ordering, just not swabbing, so add a barrier */
 	mb();
 }
 
 static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
 	struct mlx5_eqe *eqe;
 	int eqes_found = 0;
 	int set_ci = 0;
 	u32 cqn;
 	u32 rsn;
 	u8 port;
 
 	while ((eqe = next_eqe_sw(eq))) {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
 		 */
 		rmb();
 
 		mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
 			      eq->eqn, eqe_type_str(eqe->type));
 		switch (eqe->type) {
 		case MLX5_EVENT_TYPE_COMP:
 			cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
 			mlx5_cq_completion(dev, cqn);
 			break;
 
 		case MLX5_EVENT_TYPE_PATH_MIG:
 		case MLX5_EVENT_TYPE_COMM_EST:
 		case MLX5_EVENT_TYPE_SQ_DRAINED:
 		case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
 		case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
 		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
 			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
 				      eqe_type_str(eqe->type), eqe->type, rsn);
 			mlx5_rsc_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
 		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
 			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
 				      eqe_type_str(eqe->type), eqe->type, rsn);
 			mlx5_srq_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
 			mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
 			break;
 
 		case MLX5_EVENT_TYPE_PORT_CHANGE:
 			port = (eqe->data.port.port >> 4) & 0xf;
 			switch (eqe->sub_type) {
 			case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
 			case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
 			case MLX5_PORT_CHANGE_SUBTYPE_LID:
 			case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
 			case MLX5_PORT_CHANGE_SUBTYPE_GUID:
 			case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
 			case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
 				if (dev->event)
 					dev->event(dev, port_subtype_event(eqe->sub_type),
 						   (unsigned long)port);
 				break;
 			default:
 				mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
 					       port, eqe->sub_type);
 			}
 			break;
 		case MLX5_EVENT_TYPE_CQ_ERROR:
 			cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
 			mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
 				       cqn, eqe->data.cq_err.syndrome);
 			mlx5_cq_event(dev, cqn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
 			{
 				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
 				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
 
 				mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
 					      func_id, npages);
 				mlx5_core_req_pages_handler(dev, func_id, npages);
 			}
 			break;
 
 		case MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT:
 			mlx5_port_module_event(dev, eqe);
 			break;
 
 		case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
 			{
 				struct mlx5_eqe_vport_change *vc_eqe =
 						&eqe->data.vport_change;
 				u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
 
 				if (dev->event)
 					dev->event(dev,
 					     MLX5_DEV_EVENT_VPORT_CHANGE,
 					     (unsigned long)vport_num);
 			}
 			break;
 
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
 			break;
 		}
 
 		++eq->cons_index;
 		eqes_found = 1;
 		++set_ci;
 
 		/* The HCA will think the queue has overflowed if we
 		 * don't tell it we've been processing events.  We
 		 * create our EQs with MLX5_NUM_SPARE_EQE extra
 		 * entries, so we must update our consumer index at
 		 * least that often.
 		 */
 		if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
 			eq_update_ci(eq, 0);
 			set_ci = 0;
 		}
 	}
 
 	eq_update_ci(eq, 1);
 
 	return eqes_found;
 }
 
 static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
 {
 	struct mlx5_eq *eq = eq_ptr;
 	struct mlx5_core_dev *dev = eq->dev;
 
 	mlx5_eq_int(dev, eq);
 
 	/* MSI-X vectors always belong to us */
 	return IRQ_HANDLED;
 }
 
 static void init_eq_buf(struct mlx5_eq *eq)
 {
 	struct mlx5_eqe *eqe;
 	int i;
 
 	for (i = 0; i < eq->nent; i++) {
 		eqe = get_eqe(eq, i);
 		eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
 	}
 }
 
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name, struct mlx5_uar *uar)
 {
 	struct mlx5_priv *priv = &dev->priv;
 	struct mlx5_create_eq_mbox_in *in;
 	struct mlx5_create_eq_mbox_out out;
 	int err;
 	int inlen;
 
 	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
 	err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE,
 			     &eq->buf);
 	if (err)
 		return err;
 
 	init_eq_buf(eq);
 
 	inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_buf;
 	}
 	memset(&out, 0, sizeof(out));
 
 	mlx5_fill_page_array(&eq->buf, in->pas);
 
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
 	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
 	in->ctx.intr = vecidx;
 	in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	in->events_mask = cpu_to_be64(mask);
 
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	if (err)
 		goto err_in;
 
 	if (out.hdr.status) {
 		err = mlx5_cmd_status_to_err(&out.hdr);
 		goto err_in;
 	}
 
 	eq->eqn = out.eq_number;
 	eq->irqn = vecidx;
 	eq->dev = dev;
 	eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
 	snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
 		 name, pci_name(dev->pdev));
 	err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
 			  priv->irq_info[vecidx].name, eq);
 	if (err)
 		goto err_eq;
+#ifdef RSS
+	if (vecidx >= MLX5_EQ_VEC_COMP_BASE) {
+		u8 bucket = vecidx - MLX5_EQ_VEC_COMP_BASE;
+		err = bind_irq_to_cpu(priv->msix_arr[vecidx].vector,
+				      rss_getcpu(bucket % rss_getnumbuckets()));
+		if (err)
+			goto err_irq;
+	}
+#else
+	if (0)
+		goto err_irq;
+#endif
 
 
 	/* EQs are created in ARMED state
 	 */
 	eq_update_ci(eq, 1);
 
 	kvfree(in);
 	return 0;
 
+err_irq:
+	free_irq(priv->msix_arr[vecidx].vector, eq);
 
 err_eq:
 	mlx5_cmd_destroy_eq(dev, eq->eqn);
 
 err_in:
 	kvfree(in);
 
 err_buf:
 	mlx5_buf_free(dev, &eq->buf);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
 
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
 	int err;
 
 	free_irq(dev->priv.msix_arr[eq->irqn].vector, eq);
 	err = mlx5_cmd_destroy_eq(dev, eq->eqn);
 	if (err)
 		mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
 			       eq->eqn);
 	mlx5_buf_free(dev, &eq->buf);
 
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
 
 int mlx5_eq_init(struct mlx5_core_dev *dev)
 {
 	int err;
 
 	spin_lock_init(&dev->priv.eq_table.lock);
 
 	err = 0;
 
 	return err;
 }
 
 
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
 {
 }
 
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
 	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
 	if (MLX5_CAP_GEN(dev, port_module_event))
 		async_event_mask |= (1ull <<
 				     MLX5_EVENT_TYPE_CODING_PORT_MODULE_EVENT);
 
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
 		return err;
 	}
 
 	mlx5_cmd_use_events(dev);
 
 	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
 				 MLX5_NUM_ASYNC_EQE, async_event_mask,
 				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
 		goto err1;
 	}
 
 	err = mlx5_create_map_eq(dev, &table->pages_eq,
 				 MLX5_EQ_VEC_PAGES,
 				 /* TODO: sriov max_vf + */ 1,
 				 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
 				 &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
 		goto err2;
 	}
 
 	return err;
 
 err2:
 	mlx5_destroy_unmap_eq(dev, &table->async_eq);
 
 err1:
 	mlx5_cmd_use_polling(dev);
 	mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
 	return err;
 }
 
 int mlx5_stop_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
 	int err;
 
 	err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
 	if (err)
 		return err;
 
 	mlx5_destroy_unmap_eq(dev, &table->async_eq);
 	mlx5_cmd_use_polling(dev);
 
 	err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
 	if (err)
 		mlx5_cmd_use_events(dev);
 
 	return err;
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 		       struct mlx5_query_eq_mbox_out *out, int outlen)
 {
 	struct mlx5_query_eq_mbox_in in;
 	int err;
 
 	memset(&in, 0, sizeof(in));
 	memset(out, 0, outlen);
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
 	in.eqn = eq->eqn;
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err)
 		return err;
 
 	if (out->hdr.status)
 		err = mlx5_cmd_status_to_err(&out->hdr);
 
 	return err;
 }
 
 EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
 
 static const char *mlx5_port_module_event_error_type_to_string(u8 error_type)
 {
 	switch (error_type) {
 	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
 		return "Power Budget Exceeded";
 	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE:
 		return "Long Range for non MLNX cable/module";
 	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
 		return "Bus stuck(I2C or data shorted)";
 	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
 		return "No EEPROM/retry timeout";
 	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
 		return "Enforce part number list";
 	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
 		return "Unknown identifier";
 	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
 		return "High Temperature";
 
 	default:
 		return "Unknown error type";
 	}
 }
 
 static void mlx5_port_module_event(struct mlx5_core_dev *dev,
 				   struct mlx5_eqe *eqe)
 {
 	unsigned int module_num;
 	unsigned int module_status;
 	unsigned int error_type;
 	struct mlx5_eqe_port_module_event *module_event_eqe;
 	struct pci_dev *pdev = dev->pdev;
 
 	module_event_eqe = &eqe->data.port_module_event;
 
 	module_num = (unsigned int)module_event_eqe->module;
 	module_status = (unsigned int)module_event_eqe->module_status &
 			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
 	error_type = (unsigned int)module_event_eqe->error_type &
 		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
 
 	switch (module_status) {
 	case MLX5_MODULE_STATUS_PLUGGED:
 		device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged", module_num);
 		break;
 
 	case MLX5_MODULE_STATUS_UNPLUGGED:
 		device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged", module_num);
 		break;
 
 	case MLX5_MODULE_STATUS_ERROR:
 		device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s", module_num, mlx5_port_module_event_error_type_to_string(error_type));
 		break;
 
 	default:
 		device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status", module_num);
 	}
 }
 
Index: head/sys/dev/mlx5/mlx5_en/en.h
===================================================================
--- head/sys/dev/mlx5/mlx5_en/en.h	(revision 291937)
+++ head/sys/dev/mlx5/mlx5_en/en.h	(revision 291938)
@@ -1,782 +1,789 @@
 /*-
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MLX5_EN_H_
 #define	_MLX5_EN_H_
 
 #include <linux/kmod.h>
 #include <linux/page.h>
 #include <linux/slab.h>
 #include <linux/if_vlan.h>
 #include <linux/if_ether.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #include <netinet/udp.h>
 #include <net/ethernet.h>
 #include <sys/buf_ring.h>
 
+#include "opt_rss.h"
+
+#ifdef	RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#endif
+
 #include <machine/bus.h>
 
 #ifdef HAVE_TURBO_LRO
 #include "tcp_tlro.h"
 #endif
 
 #include <dev/mlx5/driver.h>
 #include <dev/mlx5/qp.h>
 #include <dev/mlx5/cq.h>
 #include <dev/mlx5/vport.h>
 
 #include <dev/mlx5/mlx5_core/wq.h>
 #include <dev/mlx5/mlx5_core/transobj.h>
 #include <dev/mlx5/mlx5_core/mlx5_core.h>
 
 #define	MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE                0x7
 #define	MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE                0xa
 #define	MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE                0xd
 
 #define	MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE                0x7
 #define	MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE                0xa
 #define	MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE                0xd
 
 /* freeBSD HW LRO is limited by 16KB - the size of max mbuf */
 #define	MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 MJUM16BYTES
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE	0x3
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
 #define	MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC      0x10
 #define	MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS      0x20
 #define	MLX5E_PARAMS_DEFAULT_MIN_RX_WQES                0x80
 #define	MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ         0x7
 #define	MLX5E_CACHELINE_SIZE CACHE_LINE_SIZE
 #define	MLX5E_HW2SW_MTU(hwmtu) \
     ((hwmtu) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
 #define	MLX5E_SW2HW_MTU(swmtu) \
     ((swmtu) + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN))
 #define	MLX5E_SW2MB_MTU(swmtu) \
     (MLX5E_SW2HW_MTU(swmtu) + MLX5E_NET_IP_ALIGN)
 #define	MLX5E_MTU_MIN		72	/* Min MTU allowed by the kernel */
 #define	MLX5E_MTU_MAX		MIN(ETHERMTU_JUMBO, MJUM16BYTES)	/* Max MTU of Ethernet
 									 * jumbo frames */
 
 #define	MLX5E_BUDGET_MAX	8192	/* RX and TX */
 #define	MLX5E_RX_BUDGET_MAX	256
 #define	MLX5E_SQ_BF_BUDGET	16
 #define	MLX5E_SQ_TX_QUEUE_SIZE	4096	/* SQ drbr queue size */
 
 #define	MLX5E_MAX_TX_NUM_TC	8	/* units */
 #define	MLX5E_MAX_TX_HEADER	128	/* bytes */
 #define	MLX5E_MAX_TX_PAYLOAD_SIZE	65536	/* bytes */
 #define	MLX5E_MAX_TX_MBUF_SIZE	65536	/* bytes */
 #define	MLX5E_MAX_TX_MBUF_FRAGS	\
     ((MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) - \
     (MLX5E_MAX_TX_HEADER / MLX5_SEND_WQE_DS))	/* units */
 #define	MLX5E_MAX_TX_INLINE \
   (MLX5E_MAX_TX_HEADER - sizeof(struct mlx5e_tx_wqe) + \
   sizeof(((struct mlx5e_tx_wqe *)0)->eth.inline_hdr_start))	/* bytes */
 
 MALLOC_DECLARE(M_MLX5EN);
 
 struct mlx5_core_dev;
 struct mlx5e_cq;
 
 typedef void (mlx5e_cq_comp_t)(struct mlx5_core_cq *);
 
 #define	MLX5E_STATS_COUNT(a,b,c,d) a
 #define	MLX5E_STATS_VAR(a,b,c,d) b;
 #define	MLX5E_STATS_DESC(a,b,c,d) c, d,
 
 #define	MLX5E_VPORT_STATS(m)						\
   /* HW counters */							\
   m(+1, u64 rx_packets, "rx_packets", "Received packets")		\
   m(+1, u64 rx_bytes, "rx_bytes", "Received bytes")			\
   m(+1, u64 tx_packets, "tx_packets", "Transmitted packets")		\
   m(+1, u64 tx_bytes, "tx_bytes", "Transmitted bytes")			\
   m(+1, u64 rx_error_packets, "rx_error_packets", "Received error packets") \
   m(+1, u64 rx_error_bytes, "rx_error_bytes", "Received error bytes")	\
   m(+1, u64 tx_error_packets, "tx_error_packets", "Transmitted error packets") \
   m(+1, u64 tx_error_bytes, "tx_error_bytes", "Transmitted error bytes") \
   m(+1, u64 rx_unicast_packets, "rx_unicast_packets", "Received unicast packets") \
   m(+1, u64 rx_unicast_bytes, "rx_unicast_bytes", "Received unicast bytes") \
   m(+1, u64 tx_unicast_packets, "tx_unicast_packets", "Transmitted unicast packets") \
   m(+1, u64 tx_unicast_bytes, "tx_unicast_bytes", "Transmitted unicast bytes") \
   m(+1, u64 rx_multicast_packets, "rx_multicast_packets", "Received multicast packets") \
   m(+1, u64 rx_multicast_bytes, "rx_multicast_bytes", "Received multicast bytes") \
   m(+1, u64 tx_multicast_packets, "tx_multicast_packets", "Transmitted multicast packets") \
   m(+1, u64 tx_multicast_bytes, "tx_multicast_bytes", "Transmitted multicast bytes") \
   m(+1, u64 rx_broadcast_packets, "rx_broadcast_packets", "Received broadcast packets") \
   m(+1, u64 rx_broadcast_bytes, "rx_broadcast_bytes", "Received broadcast bytes") \
   m(+1, u64 tx_broadcast_packets, "tx_broadcast_packets", "Transmitted broadcast packets") \
   m(+1, u64 tx_broadcast_bytes, "tx_broadcast_bytes", "Transmitted broadcast bytes") \
   m(+1, u64 rx_out_of_buffer, "rx_out_of_buffer", "Receive out of buffer, no recv wqes events") \
   /* SW counters */							\
   m(+1, u64 tso_packets, "tso_packets", "Transmitted TSO packets")	\
   m(+1, u64 tso_bytes, "tso_bytes", "Transmitted TSO bytes")		\
   m(+1, u64 lro_packets, "lro_packets", "Received LRO packets")		\
   m(+1, u64 lro_bytes, "lro_bytes", "Received LRO bytes")		\
   m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO")	\
   m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO")	\
   m(+1, u64 rx_csum_good, "rx_csum_good", "Received checksum valid packets") \
   m(+1, u64 rx_csum_none, "rx_csum_none", "Received no checksum packets") \
   m(+1, u64 tx_csum_offload, "tx_csum_offload", "Transmit checksum offload packets") \
   m(+1, u64 tx_queue_dropped, "tx_queue_dropped", "Transmit queue dropped") \
   m(+1, u64 tx_defragged, "tx_defragged", "Transmit queue defragged") \
   m(+1, u64 rx_wqe_err, "rx_wqe_err", "Receive WQE errors")
 
 #define	MLX5E_VPORT_STATS_NUM (0 MLX5E_VPORT_STATS(MLX5E_STATS_COUNT))
 
 struct mlx5e_vport_stats {
 	struct	sysctl_ctx_list ctx;
 	u64	arg [0];
 	MLX5E_VPORT_STATS(MLX5E_STATS_VAR)
 	u32	rx_out_of_buffer_prev;
 };
 
 #define	MLX5E_PPORT_IEEE802_3_STATS(m)					\
   m(+1, u64 frames_tx, "frames_tx", "Frames transmitted")		\
   m(+1, u64 frames_rx, "frames_rx", "Frames received")			\
   m(+1, u64 check_seq_err, "check_seq_err", "Sequence errors")		\
   m(+1, u64 alignment_err, "alignment_err", "Alignment errors")	\
   m(+1, u64 octets_tx, "octets_tx", "Bytes transmitted")		\
   m(+1, u64 octets_received, "octets_received", "Bytes received")	\
   m(+1, u64 multicast_xmitted, "multicast_xmitted", "Multicast transmitted") \
   m(+1, u64 broadcast_xmitted, "broadcast_xmitted", "Broadcast transmitted") \
   m(+1, u64 multicast_rx, "multicast_rx", "Multicast received")	\
   m(+1, u64 broadcast_rx, "broadcast_rx", "Broadcast received")	\
   m(+1, u64 in_range_len_errors, "in_range_len_errors", "In range length errors") \
   m(+1, u64 out_of_range_len, "out_of_range_len", "Out of range length errors") \
   m(+1, u64 too_long_errors, "too_long_errors", "Too long errors")	\
   m(+1, u64 symbol_err, "symbol_err", "Symbol errors")			\
   m(+1, u64 mac_control_tx, "mac_control_tx", "MAC control transmitted") \
   m(+1, u64 mac_control_rx, "mac_control_rx", "MAC control received")	\
   m(+1, u64 unsupported_op_rx, "unsupported_op_rx", "Unsupported operation received") \
   m(+1, u64 pause_ctrl_rx, "pause_ctrl_rx", "Pause control received")	\
   m(+1, u64 pause_ctrl_tx, "pause_ctrl_tx", "Pause control transmitted")
 
 #define	MLX5E_PPORT_RFC2819_STATS(m)					\
   m(+1, u64 drop_events, "drop_events", "Dropped events")		\
   m(+1, u64 octets, "octets", "Octets")					\
   m(+1, u64 pkts, "pkts", "Packets")					\
   m(+1, u64 broadcast_pkts, "broadcast_pkts", "Broadcast packets")	\
   m(+1, u64 multicast_pkts, "multicast_pkts", "Multicast packets")	\
   m(+1, u64 crc_align_errors, "crc_align_errors", "CRC alignment errors") \
   m(+1, u64 undersize_pkts, "undersize_pkts", "Undersized packets")	\
   m(+1, u64 oversize_pkts, "oversize_pkts", "Oversized packets")	\
   m(+1, u64 fragments, "fragments", "Fragments")			\
   m(+1, u64 jabbers, "jabbers", "Jabbers")				\
   m(+1, u64 collisions, "collisions", "Collisions")
 
 #define	MLX5E_PPORT_RFC2819_STATS_DEBUG(m)				\
   m(+1, u64 p64octets, "p64octets", "Bytes")				\
   m(+1, u64 p65to127octets, "p65to127octets", "Bytes")			\
   m(+1, u64 p128to255octets, "p128to255octets", "Bytes")		\
   m(+1, u64 p256to511octets, "p256to511octets", "Bytes")		\
   m(+1, u64 p512to1023octets, "p512to1023octets", "Bytes")		\
   m(+1, u64 p1024to1518octets, "p1024to1518octets", "Bytes")		\
   m(+1, u64 p1519to2047octets, "p1519to2047octets", "Bytes")		\
   m(+1, u64 p2048to4095octets, "p2048to4095octets", "Bytes")		\
   m(+1, u64 p4096to8191octets, "p4096to8191octets", "Bytes")		\
   m(+1, u64 p8192to10239octets, "p8192to10239octets", "Bytes")
 
 #define	MLX5E_PPORT_RFC2863_STATS_DEBUG(m)				\
   m(+1, u64 in_octets, "in_octets", "In octets")			\
   m(+1, u64 in_ucast_pkts, "in_ucast_pkts", "In unicast packets")	\
   m(+1, u64 in_discards, "in_discards", "In discards")			\
   m(+1, u64 in_errors, "in_errors", "In errors")			\
   m(+1, u64 in_unknown_protos, "in_unknown_protos", "In unknown protocols") \
   m(+1, u64 out_octets, "out_octets", "Out octets")			\
   m(+1, u64 out_ucast_pkts, "out_ucast_pkts", "Out unicast packets")	\
   m(+1, u64 out_discards, "out_discards", "Out discards")		\
   m(+1, u64 out_errors, "out_errors", "Out errors")			\
   m(+1, u64 in_multicast_pkts, "in_multicast_pkts", "In multicast packets") \
   m(+1, u64 in_broadcast_pkts, "in_broadcast_pkts", "In broadcast packets") \
   m(+1, u64 out_multicast_pkts, "out_multicast_pkts", "Out multicast packets") \
   m(+1, u64 out_broadcast_pkts, "out_broadcast_pkts", "Out broadcast packets")
 
 #define	MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m)                                    		\
   m(+1, u64 time_since_last_clear, "time_since_last_clear",				\
 			"Time since the last counters clear event (msec)")		\
   m(+1, u64 symbol_errors, "symbol_errors", "Symbol errors")				\
   m(+1, u64 sync_headers_errors, "sync_headers_errors", "Sync header error counter")	\
   m(+1, u64 bip_errors_lane0, "edpl_bip_errors_lane0",					\
 			"Indicates the number of PRBS errors on lane 0")		\
   m(+1, u64 bip_errors_lane1, "edpl_bip_errors_lane1",					\
 			"Indicates the number of PRBS errors on lane 1")		\
   m(+1, u64 bip_errors_lane2, "edpl_bip_errors_lane2",					\
 			"Indicates the number of PRBS errors on lane 2")		\
   m(+1, u64 bip_errors_lane3, "edpl_bip_errors_lane3",					\
 			"Indicates the number of PRBS errors on lane 3")		\
   m(+1, u64 fc_corrected_blocks_lane0, "fc_corrected_blocks_lane0",			\
 			"FEC correctable block counter lane 0")				\
   m(+1, u64 fc_corrected_blocks_lane1, "fc_corrected_blocks_lane1",			\
 			"FEC correctable block counter lane 1")				\
   m(+1, u64 fc_corrected_blocks_lane2, "fc_corrected_blocks_lane2",			\
 			"FEC correctable block counter lane 2")				\
   m(+1, u64 fc_corrected_blocks_lane3, "fc_corrected_blocks_lane3",			\
 			"FEC correctable block counter lane 3")				\
   m(+1, u64 rs_corrected_blocks, "rs_corrected_blocks",					\
 			"FEC correcable block counter")					\
   m(+1, u64 rs_uncorrectable_blocks, "rs_uncorrectable_blocks",				\
 			"FEC uncorrecable block counter")				\
   m(+1, u64 rs_no_errors_blocks, "rs_no_errors_blocks",					\
 			"The number of RS-FEC blocks received that had no errors")	\
   m(+1, u64 rs_single_error_blocks, "rs_single_error_blocks",				\
 			"The number of corrected RS-FEC blocks received that had"	\
 			"exactly 1 error symbol")					\
   m(+1, u64 rs_corrected_symbols_total, "rs_corrected_symbols_total",			\
 			"Port FEC corrected symbol counter")				\
   m(+1, u64 rs_corrected_symbols_lane0, "rs_corrected_symbols_lane0",			\
 			"FEC corrected symbol counter lane 0")				\
   m(+1, u64 rs_corrected_symbols_lane1, "rs_corrected_symbols_lane1",			\
 			"FEC corrected symbol counter lane 1")				\
   m(+1, u64 rs_corrected_symbols_lane2, "rs_corrected_symbols_lane2",			\
 			"FEC corrected symbol counter lane 2")				\
   m(+1, u64 rs_corrected_symbols_lane3, "rs_corrected_symbols_lane3",			\
 			"FEC corrected symbol counter lane 3")				\
 
 /*
  * Make sure to update mlx5e_update_pport_counters()
  * when adding a new MLX5E_PPORT_STATS block
  */
 #define	MLX5E_PPORT_STATS(m)			\
   MLX5E_PPORT_IEEE802_3_STATS(m)		\
   MLX5E_PPORT_RFC2819_STATS(m)
 
 #define	MLX5E_PORT_STATS_DEBUG(m)		\
   MLX5E_PPORT_RFC2819_STATS_DEBUG(m)		\
   MLX5E_PPORT_RFC2863_STATS_DEBUG(m)		\
   MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(m)
 
 #define	MLX5E_PPORT_IEEE802_3_STATS_NUM \
   (0 MLX5E_PPORT_IEEE802_3_STATS(MLX5E_STATS_COUNT))
 #define	MLX5E_PPORT_RFC2819_STATS_NUM \
   (0 MLX5E_PPORT_RFC2819_STATS(MLX5E_STATS_COUNT))
 #define	MLX5E_PPORT_STATS_NUM \
   (0 MLX5E_PPORT_STATS(MLX5E_STATS_COUNT))
 
 #define	MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM \
   (0 MLX5E_PPORT_RFC2819_STATS_DEBUG(MLX5E_STATS_COUNT))
 #define	MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM \
   (0 MLX5E_PPORT_RFC2863_STATS_DEBUG(MLX5E_STATS_COUNT))
 #define	MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM \
   (0 MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG(MLX5E_STATS_COUNT))
 #define	MLX5E_PORT_STATS_DEBUG_NUM \
   (0 MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_COUNT))
 
 struct mlx5e_pport_stats {
 	struct	sysctl_ctx_list ctx;
 	u64	arg [0];
 	MLX5E_PPORT_STATS(MLX5E_STATS_VAR)
 };
 
 struct mlx5e_port_stats_debug {
 	struct	sysctl_ctx_list ctx;
 	u64	arg [0];
 	MLX5E_PORT_STATS_DEBUG(MLX5E_STATS_VAR)
 };
 
 #define	MLX5E_RQ_STATS(m)					\
   m(+1, u64 packets, "packets", "Received packets")		\
   m(+1, u64 csum_none, "csum_none", "Received packets")		\
   m(+1, u64 lro_packets, "lro_packets", "Received packets")	\
   m(+1, u64 lro_bytes, "lro_bytes", "Received packets")		\
   m(+1, u64 sw_lro_queued, "sw_lro_queued", "Packets queued for SW LRO")	\
   m(+1, u64 sw_lro_flushed, "sw_lro_flushed", "Packets flushed from SW LRO")	\
   m(+1, u64 wqe_err, "wqe_err", "Received packets")
 
 #define	MLX5E_RQ_STATS_NUM (0 MLX5E_RQ_STATS(MLX5E_STATS_COUNT))
 
 struct mlx5e_rq_stats {
 	struct	sysctl_ctx_list ctx;
 	u64	arg [0];
 	MLX5E_RQ_STATS(MLX5E_STATS_VAR)
 };
 
 #define	MLX5E_SQ_STATS(m)						\
   m(+1, u64 packets, "packets", "Transmitted packets")			\
   m(+1, u64 tso_packets, "tso_packets", "Transmitted packets")		\
   m(+1, u64 tso_bytes, "tso_bytes", "Transmitted bytes")		\
   m(+1, u64 csum_offload_none, "csum_offload_none", "Transmitted packets")	\
   m(+1, u64 defragged, "defragged", "Transmitted packets")		\
   m(+1, u64 dropped, "dropped", "Transmitted packets")			\
   m(+1, u64 nop, "nop", "Transmitted packets")
 
 #define	MLX5E_SQ_STATS_NUM (0 MLX5E_SQ_STATS(MLX5E_STATS_COUNT))
 
 struct mlx5e_sq_stats {
 	struct	sysctl_ctx_list ctx;
 	u64	arg [0];
 	MLX5E_SQ_STATS(MLX5E_STATS_VAR)
 };
 
 struct mlx5e_stats {
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
 	struct mlx5e_port_stats_debug port_stats_debug;
 };
 
 struct mlx5e_params {
 	u8	log_sq_size;
 	u8	log_rq_size;
 	u16	num_channels;
 	u8	default_vlan_prio;
 	u8	num_tc;
 	u8	rx_cq_moderation_mode;
 	u8	tx_cq_moderation_mode;
 	u16	rx_cq_moderation_usec;
 	u16	rx_cq_moderation_pkts;
 	u16	tx_cq_moderation_usec;
 	u16	tx_cq_moderation_pkts;
 	u16	min_rx_wqes;
 	bool	hw_lro_en;
 	u32	lro_wqe_sz;
 	u16	rx_hash_log_tbl_sz;
 };
 
 #define	MLX5E_PARAMS(m)							\
   m(+1, u64 tx_pauseframe_control, "tx_pauseframe_control", "Set to enable TX pause frames. Clear to disable.") \
   m(+1, u64 rx_pauseframe_control, "rx_pauseframe_control", "Set to enable RX pause frames. Clear to disable.") \
   m(+1, u64 tx_queue_size_max, "tx_queue_size_max", "Max send queue size") \
   m(+1, u64 rx_queue_size_max, "rx_queue_size_max", "Max receive queue size") \
   m(+1, u64 tx_queue_size, "tx_queue_size", "Default send queue size")	\
   m(+1, u64 rx_queue_size, "rx_queue_size", "Default receive queue size") \
   m(+1, u64 channels, "channels", "Default number of channels")		\
   m(+1, u64 coalesce_usecs_max, "coalesce_usecs_max", "Maximum usecs for joining packets") \
   m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \
   m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \
   m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \
   m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \
   m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \
   m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \
   m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
   m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro")
 
 #define	MLX5E_PARAMS_NUM (0 MLX5E_PARAMS(MLX5E_STATS_COUNT))
 
 struct mlx5e_params_ethtool {
 	u64	arg [0];
 	MLX5E_PARAMS(MLX5E_STATS_VAR)
 };
 
 /* EEPROM Standards for plug in modules */
 #ifndef MLX5E_ETH_MODULE_SFF_8472
 #define	MLX5E_ETH_MODULE_SFF_8472	0x1
 #define	MLX5E_ETH_MODULE_SFF_8472_LEN	128
 #endif
 
 #ifndef MLX5E_ETH_MODULE_SFF_8636
 #define	MLX5E_ETH_MODULE_SFF_8636	0x2
 #define	MLX5E_ETH_MODULE_SFF_8636_LEN	256
 #endif
 
 #ifndef MLX5E_ETH_MODULE_SFF_8436
 #define	MLX5E_ETH_MODULE_SFF_8436	0x3
 #define	MLX5E_ETH_MODULE_SFF_8436_LEN	256
 #endif
 
 /* EEPROM I2C Addresses */
 #define	MLX5E_I2C_ADDR_LOW		0x50
 #define	MLX5E_I2C_ADDR_HIGH		0x51
 
 #define	MLX5E_EEPROM_LOW_PAGE		0x0
 #define	MLX5E_EEPROM_HIGH_PAGE		0x3
 
 #define	MLX5E_EEPROM_HIGH_PAGE_OFFSET	128
 #define	MLX5E_EEPROM_PAGE_LENGTH	256
 
 #define	MLX5E_EEPROM_INFO_BYTES		0x3
 
 struct mlx5e_cq {
 	/* data path - accessed per cqe */
 	struct mlx5_cqwq wq;
 
 	/* data path - accessed per HW polling */
 	struct mlx5_core_cq mcq;
 	struct mlx5e_channel *channel;
 
 	/* control */
 	struct mlx5_wq_ctrl wq_ctrl;
 } __aligned(MLX5E_CACHELINE_SIZE);
 
 struct mlx5e_rq_mbuf {
 	bus_dmamap_t	dma_map;
 	caddr_t		data;
 	struct mbuf	*mbuf;
 };
 
 struct mlx5e_rq {
 	/* data path */
 	struct mlx5_wq_ll wq;
 	struct mtx mtx;
 	bus_dma_tag_t dma_tag;
 	u32	wqe_sz;
 	struct mlx5e_rq_mbuf *mbuf;
 	struct device *pdev;
 	struct ifnet *ifp;
 	struct mlx5e_rq_stats stats;
 	struct mlx5e_cq cq;
 #ifdef HAVE_TURBO_LRO
 	struct tlro_ctrl lro;
 #else
 	struct lro_ctrl lro;
 #endif
 	volatile int enabled;
 	int	ix;
 
 	/* control */
 	struct mlx5_wq_ctrl wq_ctrl;
 	u32	rqn;
 	struct mlx5e_channel *channel;
 } __aligned(MLX5E_CACHELINE_SIZE);
 
 struct mlx5e_sq_mbuf {
 	bus_dmamap_t dma_map;
 	struct mbuf *mbuf;
 	u32	num_bytes;
 	u32	num_wqebbs;
 };
 
 enum {
 	MLX5E_SQ_READY,
 	MLX5E_SQ_FULL
 };
 
 struct mlx5e_sq {
 	/* data path */
 	struct	mtx lock;
 	bus_dma_tag_t dma_tag;
 	struct	mtx comp_lock;
 
 	/* dirtied @completion */
 	u16	cc;
 
 	/* dirtied @xmit */
 	u16	pc __aligned(MLX5E_CACHELINE_SIZE);
 	u16	bf_offset;
 	struct	mlx5e_sq_stats stats;
 
 	struct	mlx5e_cq cq;
 	struct	task sq_task;
 	struct	taskqueue *sq_tq;
 
 	/* pointers to per packet info: write@xmit, read@completion */
 	struct	mlx5e_sq_mbuf *mbuf;
 	struct	buf_ring *br;
 
 	/* read only */
 	struct	mlx5_wq_cyc wq;
 	void	__iomem *uar_map;
 	void	__iomem *uar_bf_map;
 	u32	sqn;
 	u32	bf_buf_size;
 	struct  device *pdev;
 	u32	mkey_be;
 
 	/* control path */
 	struct	mlx5_wq_ctrl wq_ctrl;
 	struct	mlx5_uar uar;
 	struct	mlx5e_channel *channel;
 	int	tc;
 	unsigned int queue_state;
 } __aligned(MLX5E_CACHELINE_SIZE);
 
 static inline bool
 mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
 {
 	return ((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n ||
 	    sq->cc == sq->pc);
 }
 
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq rq;
 	struct mlx5e_sq sq[MLX5E_MAX_TX_NUM_TC];
 	struct device *pdev;
 	struct ifnet *ifp;
 	u32	mkey_be;
 	u8	num_tc;
 
 	/* control */
 	struct mlx5e_priv *priv;
 	int	ix;
 	int	cpu;
 } __aligned(MLX5E_CACHELINE_SIZE);
 
 enum mlx5e_traffic_types {
 	MLX5E_TT_IPV4_TCP,
 	MLX5E_TT_IPV6_TCP,
 	MLX5E_TT_IPV4_UDP,
 	MLX5E_TT_IPV6_UDP,
 	MLX5E_TT_IPV4_IPSEC_AH,
 	MLX5E_TT_IPV6_IPSEC_AH,
 	MLX5E_TT_IPV4_IPSEC_ESP,
 	MLX5E_TT_IPV6_IPSEC_ESP,
 	MLX5E_TT_IPV4,
 	MLX5E_TT_IPV6,
 	MLX5E_TT_ANY,
 	MLX5E_NUM_TT,
 };
 
 enum {
 	MLX5E_RQT_SPREADING = 0,
 	MLX5E_RQT_DEFAULT_RQ = 1,
 	MLX5E_NUM_RQT = 2,
 };
 
 struct mlx5e_eth_addr_info {
 	u8	addr [ETH_ALEN + 2];
 	u32	tt_vec;
 	u32	ft_ix[MLX5E_NUM_TT];	/* flow table index per traffic type */
 };
 
 #define	MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE)
 
 struct mlx5e_eth_addr_hash_node;
 
 struct mlx5e_eth_addr_hash_head {
 	struct mlx5e_eth_addr_hash_node *lh_first;
 };
 
 struct mlx5e_eth_addr_db {
 	struct mlx5e_eth_addr_hash_head if_uc[MLX5E_ETH_ADDR_HASH_SIZE];
 	struct mlx5e_eth_addr_hash_head if_mc[MLX5E_ETH_ADDR_HASH_SIZE];
 	struct mlx5e_eth_addr_info broadcast;
 	struct mlx5e_eth_addr_info allmulti;
 	struct mlx5e_eth_addr_info promisc;
 	bool	broadcast_enabled;
 	bool	allmulti_enabled;
 	bool	promisc_enabled;
 };
 
 enum {
 	MLX5E_STATE_ASYNC_EVENTS_ENABLE,
 	MLX5E_STATE_OPENED,
 };
 
 struct mlx5e_vlan_db {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	u32	active_vlans_ft_ix[VLAN_N_VID];
 	u32	untagged_rule_ft_ix;
 	u32	any_vlan_rule_ft_ix;
 	bool	filter_disabled;
 };
 
 struct mlx5e_flow_table {
 	void   *vlan;
 	void   *main;
 };
 
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	int	order_base_2_num_channels;
 	int	queue_mapping_channel_mask;
 	int	num_tc;
 	int	default_vlan_prio;
 	/* priv data path fields - end */
 
 	unsigned long state;
 	int	gone;
 #define	PRIV_LOCK(priv) sx_xlock(&(priv)->state_lock)
 #define	PRIV_UNLOCK(priv) sx_xunlock(&(priv)->state_lock)
 #define	PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock)
 	struct sx state_lock;		/* Protects Interface state */
 	struct mlx5_uar cq_uar;
 	u32	pdn;
 	u32	tdn;
 	struct mlx5_core_mr mr;
 
 	struct mlx5e_channel *volatile *channel;
 	u32	tisn[MLX5E_MAX_TX_NUM_TC];
 	u32	rqtn;
 	u32	tirn[MLX5E_NUM_TT];
 
 	struct mlx5e_flow_table ft;
 	struct mlx5e_eth_addr_db eth_addr;
 	struct mlx5e_vlan_db vlan;
 
 	struct mlx5e_params params;
 	struct mlx5e_params_ethtool params_ethtool;
 	struct mtx async_events_mtx;	/* sync hw events */
 	struct work_struct update_stats_work;
 	struct work_struct update_carrier_work;
 	struct work_struct set_rx_mode_work;
 
 	struct mlx5_core_dev *mdev;
 	struct ifnet *ifp;
 	struct sysctl_ctx_list sysctl_ctx;
 	struct sysctl_oid *sysctl_ifnet;
 	struct sysctl_oid *sysctl_hw;
 	int	sysctl_debug;
 	struct mlx5e_stats stats;
 	int	counter_set_id;
 
 	eventhandler_tag vlan_detach;
 	eventhandler_tag vlan_attach;
 	struct ifmedia media;
 	int	media_status_last;
 	int	media_active_last;
 
 	struct callout watchdog;
 };
 
 #define	MLX5E_NET_IP_ALIGN 2
 
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg eth;
 };
 
 struct mlx5e_rx_wqe {
 	struct mlx5_wqe_srq_next_seg next;
 	struct mlx5_wqe_data_seg data;
 };
 
 struct mlx5e_eeprom {
 	int	lock_bit;
 	int	i2c_addr;
 	int	page_num;
 	int	device_addr;
 	int	module_num;
 	int	len;
 	int	type;
 	int	page_valid;
 	u32	*data;
 };
 
 enum mlx5e_link_mode {
 	MLX5E_1000BASE_CX_SGMII = 0,
 	MLX5E_1000BASE_KX = 1,
 	MLX5E_10GBASE_CX4 = 2,
 	MLX5E_10GBASE_KX4 = 3,
 	MLX5E_10GBASE_KR = 4,
 	MLX5E_20GBASE_KR2 = 5,
 	MLX5E_40GBASE_CR4 = 6,
 	MLX5E_40GBASE_KR4 = 7,
 	MLX5E_56GBASE_R4 = 8,
 	MLX5E_10GBASE_CR = 12,
 	MLX5E_10GBASE_SR = 13,
 	MLX5E_10GBASE_ER = 14,
 	MLX5E_40GBASE_SR4 = 15,
 	MLX5E_40GBASE_LR4 = 16,
 	MLX5E_100GBASE_CR4 = 20,
 	MLX5E_100GBASE_SR4 = 21,
 	MLX5E_100GBASE_KR4 = 22,
 	MLX5E_100GBASE_LR4 = 23,
 	MLX5E_100BASE_TX = 24,
 	MLX5E_100BASE_T = 25,
 	MLX5E_10GBASE_T = 26,
 	MLX5E_25GBASE_CR = 27,
 	MLX5E_25GBASE_KR = 28,
 	MLX5E_25GBASE_SR = 29,
 	MLX5E_50GBASE_CR2 = 30,
 	MLX5E_50GBASE_KR2 = 31,
 	MLX5E_LINK_MODES_NUMBER,
 };
 
 #define	MLX5E_PROT_MASK(link_mode) (1 << (link_mode))
 #define	MLX5E_FLD_MAX(typ, fld) ((1ULL << __mlx5_bit_sz(typ, fld)) - 1ULL)
 
 int	mlx5e_xmit(struct ifnet *, struct mbuf *);
 
 int	mlx5e_open_locked(struct ifnet *);
 int	mlx5e_close_locked(struct ifnet *);
 
 void	mlx5e_cq_error_event(struct mlx5_core_cq *mcq, int event);
 void	mlx5e_rx_cq_comp(struct mlx5_core_cq *);
 void	mlx5e_tx_cq_comp(struct mlx5_core_cq *);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 void	mlx5e_tx_que(void *context, int pending);
 
 int	mlx5e_open_flow_table(struct mlx5e_priv *priv);
 void	mlx5e_close_flow_table(struct mlx5e_priv *priv);
 void	mlx5e_set_rx_mode_core(struct mlx5e_priv *priv);
 void	mlx5e_set_rx_mode_work(struct work_struct *work);
 
 void	mlx5e_vlan_rx_add_vid(void *, struct ifnet *, u16);
 void	mlx5e_vlan_rx_kill_vid(void *, struct ifnet *, u16);
 void	mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
 void	mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
 int	mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv);
 void	mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv);
 
 static inline void
 mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
     struct mlx5e_tx_wqe *wqe, int bf_sz)
 {
 	u16 ofst = MLX5_BF_OFFSET + sq->bf_offset;
 
 	/* ensure wqe is visible to device before updating doorbell record */
 	wmb();
 
 	*sq->wq.db = cpu_to_be32(sq->pc);
 
 	/*
 	 * Ensure the doorbell record is visible to device before ringing
 	 * the doorbell:
 	 */
 	wmb();
 
 	if (bf_sz) {
 		__iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz);
 
 		/* flush the write-combining mapped buffer */
 		wmb();
 
 	} else {
 		mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL);
 	}
 
 	sq->bf_offset ^= sq->bf_buf_size;
 }
 
 static inline void
 mlx5e_cq_arm(struct mlx5e_cq *cq)
 {
 	struct mlx5_core_cq *mcq;
 
 	mcq = &cq->mcq;
 	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc);
 }
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 void	mlx5e_create_ethtool(struct mlx5e_priv *);
 void	mlx5e_create_stats(struct sysctl_ctx_list *,
     struct sysctl_oid_list *, const char *,
     const char **, unsigned, u64 *);
 void	mlx5e_send_nop(struct mlx5e_sq *, u32, bool);
 
 #endif					/* _MLX5_EN_H_ */
Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
===================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	(revision 291937)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	(revision 291938)
@@ -1,2901 +1,2959 @@
 /*-
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "en.h"
 
 #include <sys/sockio.h>
 #include <machine/atomic.h>
 
 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
 char mlx5e_version[] = "Mellanox Ethernet driver"
     " (" ETH_DRIVER_VERSION ")";
 
 struct mlx5e_rq_param {
 	u32	rqc [MLX5_ST_SZ_DW(rqc)];
 	struct mlx5_wq_param wq;
 };
 
 struct mlx5e_sq_param {
 	u32	sqc [MLX5_ST_SZ_DW(sqc)];
 	struct mlx5_wq_param wq;
 };
 
 struct mlx5e_cq_param {
 	u32	cqc [MLX5_ST_SZ_DW(cqc)];
 	struct mlx5_wq_param wq;
 	u16	eq_ix;
 };
 
 struct mlx5e_channel_param {
 	struct mlx5e_rq_param rq;
 	struct mlx5e_sq_param sq;
 	struct mlx5e_cq_param rx_cq;
 	struct mlx5e_cq_param tx_cq;
 };
 
 static const struct {
 	u32	subtype;
 	u64	baudrate;
 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
 
 	[MLX5E_1000BASE_CX_SGMII] = {
 		.subtype = IFM_1000_CX_SGMII,
 		.baudrate = IF_Mbps(1000ULL),
 	},
 	[MLX5E_1000BASE_KX] = {
 		.subtype = IFM_1000_KX,
 		.baudrate = IF_Mbps(1000ULL),
 	},
 	[MLX5E_10GBASE_CX4] = {
 		.subtype = IFM_10G_CX4,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_KX4] = {
 		.subtype = IFM_10G_KX4,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_KR] = {
 		.subtype = IFM_10G_KR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_20GBASE_KR2] = {
 		.subtype = IFM_20G_KR2,
 		.baudrate = IF_Gbps(20ULL),
 	},
 	[MLX5E_40GBASE_CR4] = {
 		.subtype = IFM_40G_CR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_KR4] = {
 		.subtype = IFM_40G_KR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_56GBASE_R4] = {
 		.subtype = IFM_56G_R4,
 		.baudrate = IF_Gbps(56ULL),
 	},
 	[MLX5E_10GBASE_CR] = {
 		.subtype = IFM_10G_CR1,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_SR] = {
 		.subtype = IFM_10G_SR,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_10GBASE_ER] = {
 		.subtype = IFM_10G_ER,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_40GBASE_SR4] = {
 		.subtype = IFM_40G_SR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_40GBASE_LR4] = {
 		.subtype = IFM_40G_LR4,
 		.baudrate = IF_Gbps(40ULL),
 	},
 	[MLX5E_100GBASE_CR4] = {
 		.subtype = IFM_100G_CR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_SR4] = {
 		.subtype = IFM_100G_SR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_KR4] = {
 		.subtype = IFM_100G_KR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100GBASE_LR4] = {
 		.subtype = IFM_100G_LR4,
 		.baudrate = IF_Gbps(100ULL),
 	},
 	[MLX5E_100BASE_TX] = {
 		.subtype = IFM_100_TX,
 		.baudrate = IF_Mbps(100ULL),
 	},
 	[MLX5E_100BASE_T] = {
 		.subtype = IFM_100_T,
 		.baudrate = IF_Mbps(100ULL),
 	},
 	[MLX5E_10GBASE_T] = {
 		.subtype = IFM_10G_T,
 		.baudrate = IF_Gbps(10ULL),
 	},
 	[MLX5E_25GBASE_CR] = {
 		.subtype = IFM_25G_CR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GBASE_KR] = {
 		.subtype = IFM_25G_KR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_25GBASE_SR] = {
 		.subtype = IFM_25G_SR,
 		.baudrate = IF_Gbps(25ULL),
 	},
 	[MLX5E_50GBASE_CR2] = {
 		.subtype = IFM_50G_CR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 	[MLX5E_50GBASE_KR2] = {
 		.subtype = IFM_50G_KR2,
 		.baudrate = IF_Gbps(50ULL),
 	},
 };
 
 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
 
 static void
 mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	u32 eth_proto_oper;
 	int error;
 	u8 port_state;
 	u8 i;
 
 	port_state = mlx5_query_vport_state(mdev,
 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
 
 	if (port_state == VPORT_STATE_UP) {
 		priv->media_status_last |= IFM_ACTIVE;
 	} else {
 		priv->media_status_last &= ~IFM_ACTIVE;
 		priv->media_active_last = IFM_ETHER;
 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
 		return;
 	}
 
 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
 	if (error) {
 		priv->media_active_last = IFM_ETHER;
 		priv->ifp->if_baudrate = 1;
 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
 		    __func__, error);
 		return;
 	}
 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
 
 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
 		if (mlx5e_mode_table[i].baudrate == 0)
 			continue;
 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
 			priv->ifp->if_baudrate =
 			    mlx5e_mode_table[i].baudrate;
 			priv->media_active_last =
 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
 		}
 	}
 	if_link_state_change(priv->ifp, LINK_STATE_UP);
 }
 
 static void
 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
 {
 	struct mlx5e_priv *priv = dev->if_softc;
 
 	ifmr->ifm_status = priv->media_status_last;
 	ifmr->ifm_active = priv->media_active_last |
 	    (priv->params_ethtool.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
 	    (priv->params_ethtool.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
 
 }
 
 static u32
 mlx5e_find_link_mode(u32 subtype)
 {
 	u32 i;
 	u32 link_mode = 0;
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
 		if (mlx5e_mode_table[i].baudrate == 0)
 			continue;
 		if (mlx5e_mode_table[i].subtype == subtype)
 			link_mode |= MLX5E_PROT_MASK(i);
 	}
 
 	return (link_mode);
 }
 
 static int
 mlx5e_media_change(struct ifnet *dev)
 {
 	struct mlx5e_priv *priv = dev->if_softc;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 eth_proto_cap;
 	u32 link_mode;
 	int locked;
 	int error;
 
 	locked = PRIV_LOCKED(priv);
 	if (!locked)
 		PRIV_LOCK(priv);
 
 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
 		error = EINVAL;
 		goto done;
 	}
 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
 
 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
 	if (error) {
 		if_printf(dev, "Query port media capability failed\n");
 		goto done;
 	}
 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO)
 		link_mode = eth_proto_cap;
 	else
 		link_mode = link_mode & eth_proto_cap;
 
 	if (!link_mode) {
 		if_printf(dev, "Not supported link mode requested\n");
 		error = EINVAL;
 		goto done;
 	}
 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
 	mlx5_set_port_status(mdev, MLX5_PORT_UP);
 
 done:
 	if (!locked)
 		PRIV_UNLOCK(priv);
 	return (error);
 }
 
 static void
 mlx5e_update_carrier_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 	    update_carrier_work);
 
 	PRIV_LOCK(priv);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		mlx5e_update_carrier(priv);
 	PRIV_UNLOCK(priv);
 }
 
 static void
 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_pport_stats *s = &priv->stats.pport;
 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
 	u32 *in;
 	u32 *out;
 	u64 *ptr;
 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
 	unsigned x;
 	unsigned y;
 
 	in = mlx5_vzalloc(sz);
 	out = mlx5_vzalloc(sz);
 	if (in == NULL || out == NULL)
 		goto free_out;
 
 	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
 
 	MLX5_SET(ppcnt_reg, in, local_port, 1);
 
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
 		s->arg[y] = be64toh(ptr[x]);
 
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
 		s->arg[y] = be64toh(ptr[x]);
 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 
 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
 		s_debug->arg[y] = be64toh(ptr[x]);
 free_out:
 	kvfree(in);
 	kvfree(out);
 }
 
 static void
 mlx5e_update_stats_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 	    update_stats_work);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_vport_stats *s = &priv->stats.vport;
 	struct mlx5e_rq_stats *rq_stats;
 	struct mlx5e_sq_stats *sq_stats;
 	struct buf_ring *sq_br;
 #if (__FreeBSD_version < 1100000)
 	struct ifnet *ifp = priv->ifp;
 #endif
 
 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u64 tso_packets = 0;
 	u64 tso_bytes = 0;
 	u64 tx_queue_dropped = 0;
 	u64 tx_defragged = 0;
 	u64 tx_offload_none = 0;
 	u64 lro_packets = 0;
 	u64 lro_bytes = 0;
 	u64 sw_lro_queued = 0;
 	u64 sw_lro_flushed = 0;
 	u64 rx_csum_none = 0;
 	u64 rx_wqe_err = 0;
 	u32 rx_out_of_buffer = 0;
 	int i;
 	int j;
 
 	PRIV_LOCK(priv);
 	out = mlx5_vzalloc(outlen);
 	if (out == NULL)
 		goto free_out;
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		goto free_out;
 
 	/* Collect firts the SW counters and then HW for consistency */
 	for (i = 0; i < priv->params.num_channels; i++) {
 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
 
 		rq_stats = &priv->channel[i]->rq.stats;
 
 		/* collect stats from LRO */
 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
 		sw_lro_queued += rq_stats->sw_lro_queued;
 		sw_lro_flushed += rq_stats->sw_lro_flushed;
 		lro_packets += rq_stats->lro_packets;
 		lro_bytes += rq_stats->lro_bytes;
 		rx_csum_none += rq_stats->csum_none;
 		rx_wqe_err += rq_stats->wqe_err;
 
 		for (j = 0; j < priv->num_tc; j++) {
 			sq_stats = &priv->channel[i]->sq[j].stats;
 			sq_br = priv->channel[i]->sq[j].br;
 
 			tso_packets += sq_stats->tso_packets;
 			tso_bytes += sq_stats->tso_bytes;
 			tx_queue_dropped += sq_stats->dropped;
 			tx_queue_dropped += sq_br->br_drops;
 			tx_defragged += sq_stats->defragged;
 			tx_offload_none += sq_stats->csum_offload_none;
 		}
 	}
 
 	/* update counters */
 	s->tso_packets = tso_packets;
 	s->tso_bytes = tso_bytes;
 	s->tx_queue_dropped = tx_queue_dropped;
 	s->tx_defragged = tx_defragged;
 	s->lro_packets = lro_packets;
 	s->lro_bytes = lro_bytes;
 	s->sw_lro_queued = sw_lro_queued;
 	s->sw_lro_flushed = sw_lro_flushed;
 	s->rx_csum_none = rx_csum_none;
 	s->rx_wqe_err = rx_wqe_err;
 
 	/* HW counters */
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(query_vport_counter_in, in, opcode,
 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
 	memset(out, 0, outlen);
 
 	/* get number of out-of-buffer drops first */
 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
 	    &rx_out_of_buffer))
 		goto free_out;
 
 	/* accumulate difference into a 64-bit counter */
 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
 
 	/* get port statistics */
 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
 		goto free_out;
 
 #define	MLX5_GET_CTR(out, x) \
 	MLX5_GET64(query_vport_counter_out, out, x)
 
 	s->rx_error_packets =
 	    MLX5_GET_CTR(out, received_errors.packets);
 	s->rx_error_bytes =
 	    MLX5_GET_CTR(out, received_errors.octets);
 	s->tx_error_packets =
 	    MLX5_GET_CTR(out, transmit_errors.packets);
 	s->tx_error_bytes =
 	    MLX5_GET_CTR(out, transmit_errors.octets);
 
 	s->rx_unicast_packets =
 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
 	s->rx_unicast_bytes =
 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
 	s->tx_unicast_packets =
 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
 	s->tx_unicast_bytes =
 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
 
 	s->rx_multicast_packets =
 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
 	s->rx_multicast_bytes =
 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
 	s->tx_multicast_packets =
 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
 	s->tx_multicast_bytes =
 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
 
 	s->rx_broadcast_packets =
 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
 	s->rx_broadcast_bytes =
 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
 	s->tx_broadcast_packets =
 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
 	s->tx_broadcast_bytes =
 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
 
 	s->rx_packets =
 	    s->rx_unicast_packets +
 	    s->rx_multicast_packets +
 	    s->rx_broadcast_packets -
 	    s->rx_out_of_buffer;
 	s->rx_bytes =
 	    s->rx_unicast_bytes +
 	    s->rx_multicast_bytes +
 	    s->rx_broadcast_bytes;
 	s->tx_packets =
 	    s->tx_unicast_packets +
 	    s->tx_multicast_packets +
 	    s->tx_broadcast_packets;
 	s->tx_bytes =
 	    s->tx_unicast_bytes +
 	    s->tx_multicast_bytes +
 	    s->tx_broadcast_bytes;
 
 	/* Update calculated offload counters */
 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
 
 	/* Update per port counters */
 	mlx5e_update_pport_counters(priv);
 
 #if (__FreeBSD_version < 1100000)
 	/* no get_counters interface in fbsd 10 */
 	ifp->if_ipackets = s->rx_packets;
 	ifp->if_ierrors = s->rx_error_packets;
 	ifp->if_iqdrops = s->rx_out_of_buffer;
 	ifp->if_opackets = s->tx_packets;
 	ifp->if_oerrors = s->tx_error_packets;
 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
 	ifp->if_ibytes = s->rx_bytes;
 	ifp->if_obytes = s->tx_bytes;
 #endif
 
 free_out:
 	kvfree(out);
 	PRIV_UNLOCK(priv);
 }
 
 static void
 mlx5e_update_stats(void *arg)
 {
 	struct mlx5e_priv *priv = arg;
 
 	schedule_work(&priv->update_stats_work);
 
 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
 }
 
 static void
 mlx5e_async_event_sub(struct mlx5e_priv *priv,
     enum mlx5_dev_event event)
 {
 	switch (event) {
 	case MLX5_DEV_EVENT_PORT_UP:
 	case MLX5_DEV_EVENT_PORT_DOWN:
 		schedule_work(&priv->update_carrier_work);
 		break;
 
 	default:
 		break;
 	}
 }
 
 static void
 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
     enum mlx5_dev_event event, unsigned long param)
 {
 	struct mlx5e_priv *priv = vpriv;
 
 	mtx_lock(&priv->async_events_mtx);
 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
 		mlx5e_async_event_sub(priv, event);
 	mtx_unlock(&priv->async_events_mtx);
 }
 
 static void
 mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 }
 
 static void
 mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
 	mtx_lock(&priv->async_events_mtx);
 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
 	mtx_unlock(&priv->async_events_mtx);
 }
 
 static const char *mlx5e_rq_stats_desc[] = {
 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
 };
 
 static int
 mlx5e_create_rq(struct mlx5e_channel *c,
     struct mlx5e_rq_param *param,
     struct mlx5e_rq *rq)
 {
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	char buffer[16];
 	void *rqc = param->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	int wq_sz;
 	int err;
 	int i;
 
 	/* Create DMA descriptor TAG */
 	if ((err = -bus_dma_tag_create(
 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
 	    1,				/* any alignment */
 	    0,				/* no boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MJUM16BYTES,		/* maxsize */
 	    1,				/* nsegments */
 	    MJUM16BYTES,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &rq->dma_tag)))
 		goto done;
 
 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
 	    &rq->wq_ctrl);
 	if (err)
 		goto err_free_dma_tag;
 
 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
 
 	if (priv->params.hw_lro_en) {
 		rq->wqe_sz = priv->params.lro_wqe_sz;
 	} else {
 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
 	}
 	if (rq->wqe_sz > MJUM16BYTES) {
 		err = -ENOMEM;
 		goto err_rq_wq_destroy;
 	} else if (rq->wqe_sz > MJUM9BYTES) {
 		rq->wqe_sz = MJUM16BYTES;
 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
 		rq->wqe_sz = MJUM9BYTES;
 	} else if (rq->wqe_sz > MCLBYTES) {
 		rq->wqe_sz = MJUMPAGESIZE;
 	} else {
 		rq->wqe_sz = MCLBYTES;
 	}
 
 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
 	if (rq->mbuf == NULL) {
 		err = -ENOMEM;
 		goto err_rq_wq_destroy;
 	}
 	for (i = 0; i != wq_sz; i++) {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
 
 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
 		if (err != 0) {
 			while (i--)
 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 			goto err_rq_mbuf_free;
 		}
 		wqe->data.lkey = c->mkey_be;
 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
 	}
 
 	rq->pdev = c->pdev;
 	rq->ifp = c->ifp;
 	rq->channel = c;
 	rq->ix = c->ix;
 
 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
 	    rq->stats.arg);
 
 #ifdef HAVE_TURBO_LRO
 	if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
 		rq->lro.mbuf = NULL;
 #else
 	if (tcp_lro_init(&rq->lro))
 		rq->lro.lro_cnt = 0;
 	else
 		rq->lro.ifp = c->ifp;
 #endif
 	return (0);
 
 err_rq_mbuf_free:
 	free(rq->mbuf, M_MLX5EN);
 err_rq_wq_destroy:
 	mlx5_wq_destroy(&rq->wq_ctrl);
 err_free_dma_tag:
 	bus_dma_tag_destroy(rq->dma_tag);
 done:
 	return (err);
 }
 
 static void
 mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
 	int wq_sz;
 	int i;
 
 	/* destroy all sysctl nodes */
 	sysctl_ctx_free(&rq->stats.ctx);
 
 	/* free leftover LRO packets, if any */
 #ifdef HAVE_TURBO_LRO
 	tcp_tlro_free(&rq->lro);
 #else
 	tcp_lro_free(&rq->lro);
 #endif
 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 	for (i = 0; i != wq_sz; i++) {
 		if (rq->mbuf[i].mbuf != NULL) {
 			bus_dmamap_unload(rq->dma_tag,
 			    rq->mbuf[i].dma_map);
 			m_freem(rq->mbuf[i].mbuf);
 		}
 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
 	}
 	free(rq->mbuf, M_MLX5EN);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
 static int
 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *rqc;
 	void *wq;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	memcpy(rqc, param->rqc, sizeof(param->rqc));
 
 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
 	if (priv->counter_set_id >= 0)
 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
 
 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static int
 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *rqc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 
 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 	MLX5_SET(rqc, rqc, state, next_state);
 
 	err = mlx5_core_modify_rq(mdev, in, inlen);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_disable_rq(struct mlx5e_rq *rq)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	mlx5_core_destroy_rq(mdev, rq->rqn);
 }
 
 static int
 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
 {
 	struct mlx5e_channel *c = rq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_wq_ll *wq = &rq->wq;
 	int i;
 
 	for (i = 0; i < 1000; i++) {
 		if (wq->cur_sz >= priv->params.min_rx_wqes)
 			return (0);
 
 		msleep(4);
 	}
 	return (-ETIMEDOUT);
 }
 
 static int
 mlx5e_open_rq(struct mlx5e_channel *c,
     struct mlx5e_rq_param *param,
     struct mlx5e_rq *rq)
 {
 	int err;
 	int i;
 
 	err = mlx5e_create_rq(c, param, rq);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_rq(rq, param);
 	if (err)
 		goto err_destroy_rq;
 
 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 	if (err)
 		goto err_disable_rq;
 
 	c->rq.enabled = 1;
 
 	/*
 	 * Test send queues, which will trigger
 	 * "mlx5e_post_rx_wqes()":
 	 */
 	for (i = 0; i != c->num_tc; i++)
 		mlx5e_send_nop(&c->sq[i], 1, true);
 	return (0);
 
 err_disable_rq:
 	mlx5e_disable_rq(rq);
 err_destroy_rq:
 	mlx5e_destroy_rq(rq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_rq(struct mlx5e_rq *rq)
 {
 	rq->enabled = 0;
 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
 }
 
 static void
 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 {
 	/* wait till RQ is empty */
 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
 		msleep(4);
 		rq->cq.mcq.comp(&rq->cq.mcq);
 	}
 
 	mlx5e_disable_rq(rq);
 	mlx5e_destroy_rq(rq);
 }
 
 static void
 mlx5e_free_sq_db(struct mlx5e_sq *sq)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int x;
 
 	for (x = 0; x != wq_sz; x++)
 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 	free(sq->mbuf, M_MLX5EN);
 }
 
 static int
 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int err;
 	int x;
 
 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
 	if (sq->mbuf == NULL)
 		return (-ENOMEM);
 
 	/* Create DMA descriptor MAPs */
 	for (x = 0; x != wq_sz; x++) {
 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
 		if (err != 0) {
 			while (x--)
 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
 			free(sq->mbuf, M_MLX5EN);
 			return (err);
 		}
 	}
 	return (0);
 }
 
 static const char *mlx5e_sq_stats_desc[] = {
 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
 };
 
 static int
 mlx5e_create_sq(struct mlx5e_channel *c,
     int tc,
     struct mlx5e_sq_param *param,
     struct mlx5e_sq *sq)
 {
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	char buffer[16];
 
 	void *sqc = param->sqc;
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
+#ifdef RSS
+	cpuset_t cpu_mask;
+	int cpu_id;
+#endif
 	int err;
 
 	/* Create DMA descriptor TAG */
 	if ((err = -bus_dma_tag_create(
 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
 	    1,				/* any alignment */
 	    0,				/* no boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &sq->dma_tag)))
 		goto done;
 
 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
 	if (err)
 		goto err_free_dma_tag;
 
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
 	    &sq->wq_ctrl);
 	if (err)
 		goto err_unmap_free_uar;
 
 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 	sq->uar_map = sq->uar.map;
 	sq->uar_bf_map = sq->uar.bf_map;
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 
 	err = mlx5e_alloc_sq_db(sq);
 	if (err)
 		goto err_sq_wq_destroy;
 
 	sq->pdev = c->pdev;
 	sq->mkey_be = c->mkey_be;
 	sq->channel = c;
 	sq->tc = tc;
 
 	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
 	    M_WAITOK, &sq->lock);
 	if (sq->br == NULL) {
 		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
 		    __func__);
 		err = -ENOMEM;
 		goto err_free_sq_db;
 	}
 
 	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
 	    taskqueue_thread_enqueue, &sq->sq_tq);
 	if (sq->sq_tq == NULL) {
 		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
 		    __func__);
 		err = -ENOMEM;
 		goto err_free_drbr;
 	}
 
 	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
-	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s tx sq",
-	    c->ifp->if_xname);
-
+#ifdef RSS
+	cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
+	CPU_SETOF(cpu_id, &cpu_mask);
+	taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
+	    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
+#else
+	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
+	    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
+#endif
 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
 	    sq->stats.arg);
 
 	return (0);
 
 err_free_drbr:
 	buf_ring_free(sq->br, M_MLX5EN);
 err_free_sq_db:
 	mlx5e_free_sq_db(sq);
 err_sq_wq_destroy:
 	mlx5_wq_destroy(&sq->wq_ctrl);
 
 err_unmap_free_uar:
 	mlx5_unmap_free_uar(mdev, &sq->uar);
 
 err_free_dma_tag:
 	bus_dma_tag_destroy(sq->dma_tag);
 done:
 	return (err);
 }
 
 static void
 mlx5e_destroy_sq(struct mlx5e_sq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 
 	/* destroy all sysctl nodes */
 	sysctl_ctx_free(&sq->stats.ctx);
 
 	mlx5e_free_sq_db(sq);
 	mlx5_wq_destroy(&sq->wq_ctrl);
 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
 	taskqueue_drain(sq->sq_tq, &sq->sq_task);
 	taskqueue_free(sq->sq_tq);
 	buf_ring_free(sq->br, M_MLX5EN);
 }
 
 static int
 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *sqc;
 	void *wq;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	memcpy(sqc, param->sqc, sizeof(param->sqc));
 
 	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
 	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
 
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
 
 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
 
 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static int
 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *sqc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 
 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
 	MLX5_SET(sqc, sqc, state, next_state);
 
 	err = mlx5_core_modify_sq(mdev, in, inlen);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_disable_sq(struct mlx5e_sq *sq)
 {
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	mlx5_core_destroy_sq(mdev, sq->sqn);
 }
 
 static int
 mlx5e_open_sq(struct mlx5e_channel *c,
     int tc,
     struct mlx5e_sq_param *param,
     struct mlx5e_sq *sq)
 {
 	int err;
 
 	err = mlx5e_create_sq(c, tc, param, sq);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_sq(sq, param);
 	if (err)
 		goto err_destroy_sq;
 
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
 	if (err)
 		goto err_disable_sq;
 
 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
 
 	return (0);
 
 err_disable_sq:
 	mlx5e_disable_sq(sq);
 err_destroy_sq:
 	mlx5e_destroy_sq(sq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_sq(struct mlx5e_sq *sq)
 {
 
 	/* ensure hw is notified of all pending wqes */
 	if (mlx5e_sq_has_room_for(sq, 1))
 		mlx5e_send_nop(sq, 1, true);
 
 	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
 }
 
 static void
 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
 {
 	/* wait till SQ is empty */
 	while (sq->cc != sq->pc) {
 		msleep(4);
 		sq->cq.mcq.comp(&sq->cq.mcq);
 	}
 
 	mlx5e_disable_sq(sq);
 	mlx5e_destroy_sq(sq);
 }
 
 static int
 mlx5e_create_cq(struct mlx5e_channel *c,
     struct mlx5e_cq_param *param,
     struct mlx5e_cq *cq,
     mlx5e_cq_comp_t *comp)
 {
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_core_cq *mcq = &cq->mcq;
 	int eqn_not_used;
 	int irqn;
 	int err;
 	u32 i;
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->eq_ix = c->ix;
 
 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
 	    &cq->wq_ctrl);
 	if (err)
 		return (err);
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
 
 	mcq->cqe_sz = 64;
 	mcq->set_ci_db = cq->wq_ctrl.db.db;
 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
 	*mcq->set_ci_db = 0;
 	*mcq->arm_db = 0;
 	mcq->vector = param->eq_ix;
 	mcq->comp = comp;
 	mcq->event = mlx5e_cq_error_event;
 	mcq->irqn = irqn;
 	mcq->uar = &priv->cq_uar;
 
 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
 
 		cqe->op_own = 0xf1;
 	}
 
 	cq->channel = c;
 
 	return (0);
 }
 
 static void
 mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
 	mlx5_wq_destroy(&cq->wq_ctrl);
 }
 
 static int
 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
     u8 moderation_mode)
 {
 	struct mlx5e_channel *c = cq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_core_cq *mcq = &cq->mcq;
 	void *in;
 	void *cqc;
 	int inlen;
 	int irqn_not_used;
 	int eqn;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
 
 	memcpy(cqc, param->cqc, sizeof(param->cqc));
 
 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
 
 	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
 	MLX5_SET(cqc, cqc, c_eqn, eqn);
 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
 	    PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
 
 	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
 
 	kvfree(in);
 
 	if (err)
 		return (err);
 
 	mlx5e_cq_arm(cq);
 
 	return (0);
 }
 
 static void
 mlx5e_disable_cq(struct mlx5e_cq *cq)
 {
 	struct mlx5e_channel *c = cq->channel;
 	struct mlx5e_priv *priv = c->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	mlx5_core_destroy_cq(mdev, &cq->mcq);
 }
 
 static int
 mlx5e_open_cq(struct mlx5e_channel *c,
     struct mlx5e_cq_param *param,
     struct mlx5e_cq *cq,
     mlx5e_cq_comp_t *comp,
     u8 moderation_mode)
 {
 	int err;
 
 	err = mlx5e_create_cq(c, param, cq, comp);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_cq(cq, param, moderation_mode);
 	if (err)
 		goto err_destroy_cq;
 
 	return (0);
 
 err_destroy_cq:
 	mlx5e_destroy_cq(cq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_cq(struct mlx5e_cq *cq)
 {
 	mlx5e_disable_cq(cq);
 	mlx5e_destroy_cq(cq);
 }
 
 static int
 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
     struct mlx5e_channel_param *cparam)
 {
 	u8 tx_moderation_mode;
 	int err;
 	int tc;
 
 	switch (c->priv->params.tx_cq_moderation_mode) {
 	case 0:
 		tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 		break;
 	default:
 		if (MLX5_CAP_GEN(c->priv->mdev, cq_period_start_from_cqe))
 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 		else
 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 		break;
 	}
 	for (tc = 0; tc < c->num_tc; tc++) {
 		/* open completion queue */
 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
 		    &mlx5e_tx_cq_comp, tx_moderation_mode);
 		if (err)
 			goto err_close_tx_cqs;
 	}
 	return (0);
 
 err_close_tx_cqs:
 	for (tc--; tc >= 0; tc--)
 		mlx5e_close_cq(&c->sq[tc].cq);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
 {
 	int tc;
 
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_close_cq(&c->sq[tc].cq);
 }
 
 static int
 mlx5e_open_sqs(struct mlx5e_channel *c,
     struct mlx5e_channel_param *cparam)
 {
 	int err;
 	int tc;
 
 	for (tc = 0; tc < c->num_tc; tc++) {
 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
 		if (err)
 			goto err_close_sqs;
 	}
 
 	return (0);
 
 err_close_sqs:
 	for (tc--; tc >= 0; tc--) {
 		mlx5e_close_sq(&c->sq[tc]);
 		mlx5e_close_sq_wait(&c->sq[tc]);
 	}
 
 	return (err);
 }
 
 static void
 mlx5e_close_sqs(struct mlx5e_channel *c)
 {
 	int tc;
 
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_close_sq(&c->sq[tc]);
 }
 
 static void
 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
 {
 	int tc;
 
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_close_sq_wait(&c->sq[tc]);
 }
 
 static void
 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
 {
 	int tc;
 
 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
 
 	for (tc = 0; tc < c->num_tc; tc++) {
 		mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
 		mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
 		    MTX_DEF);
 	}
 }
 
 static void
 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
 {
 	int tc;
 
 	mtx_destroy(&c->rq.mtx);
 
 	for (tc = 0; tc < c->num_tc; tc++) {
 		mtx_destroy(&c->sq[tc].lock);
 		mtx_destroy(&c->sq[tc].comp_lock);
 	}
 }
 
 static int
 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
     struct mlx5e_channel_param *cparam,
     struct mlx5e_channel *volatile *cp)
 {
 	struct mlx5e_channel *c;
 	u8 rx_moderation_mode;
 	int err;
 
 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
 	if (c == NULL)
 		return (-ENOMEM);
 
 	c->priv = priv;
 	c->ix = ix;
 	c->cpu = 0;
 	c->pdev = &priv->mdev->pdev->dev;
 	c->ifp = priv->ifp;
 	c->mkey_be = cpu_to_be32(priv->mr.key);
 	c->num_tc = priv->num_tc;
 
 	/* init mutexes */
 	mlx5e_chan_mtx_init(c);
 
 	/* open transmit completion queue */
 	err = mlx5e_open_tx_cqs(c, cparam);
 	if (err)
 		goto err_free;
 
 	switch (priv->params.rx_cq_moderation_mode) {
 	case 0:
 		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 		break;
 	default:
 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
 		else
 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 		break;
 	}
 
 	/* open receive completion queue */
 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
 	    &mlx5e_rx_cq_comp, rx_moderation_mode);
 	if (err)
 		goto err_close_tx_cqs;
 
 	err = mlx5e_open_sqs(c, cparam);
 	if (err)
 		goto err_close_rx_cq;
 
 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 	if (err)
 		goto err_close_sqs;
 
 	/* store channel pointer */
 	*cp = c;
 
 	/* poll receive queue initially */
 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
 
 	return (0);
 
 err_close_sqs:
 	mlx5e_close_sqs(c);
 	mlx5e_close_sqs_wait(c);
 
 err_close_rx_cq:
 	mlx5e_close_cq(&c->rq.cq);
 
 err_close_tx_cqs:
 	mlx5e_close_tx_cqs(c);
 
 err_free:
 	/* destroy mutexes */
 	mlx5e_chan_mtx_destroy(c);
 	free(c, M_MLX5EN);
 	return (err);
 }
 
 static void
 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
 {
 	struct mlx5e_channel *c = *pp;
 
 	/* check if channel is already closed */
 	if (c == NULL)
 		return;
 	mlx5e_close_rq(&c->rq);
 	mlx5e_close_sqs(c);
 }
 
 static void
 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
 {
 	struct mlx5e_channel *c = *pp;
 
 	/* check if channel is already closed */
 	if (c == NULL)
 		return;
 	/* ensure channel pointer is no longer used */
 	*pp = NULL;
 
 	mlx5e_close_rq_wait(&c->rq);
 	mlx5e_close_sqs_wait(c);
 	mlx5e_close_cq(&c->rq.cq);
 	mlx5e_close_tx_cqs(c);
 	/* destroy mutexes */
 	mlx5e_chan_mtx_destroy(c);
 	free(c, M_MLX5EN);
 }
 
 static void
 mlx5e_build_rq_param(struct mlx5e_priv *priv,
     struct mlx5e_rq_param *param)
 {
 	void *rqc = param->rqc;
 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 
 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
 	MLX5_SET(wq, wq, pd, priv->pdn);
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->wq.linear = 1;
 }
 
 static void
 mlx5e_build_sq_param(struct mlx5e_priv *priv,
     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 
 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 	MLX5_SET(wq, wq, pd, priv->pdn);
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->wq.linear = 1;
 }
 
 static void
 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
 }
 
 static void
 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 
 	mlx5e_build_common_cq_param(priv, param);
 }
 
 static void
 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
 
 	mlx5e_build_common_cq_param(priv, param);
 }
 
 static void
 mlx5e_build_channel_param(struct mlx5e_priv *priv,
     struct mlx5e_channel_param *cparam)
 {
 	memset(cparam, 0, sizeof(*cparam));
 
 	mlx5e_build_rq_param(priv, &cparam->rq);
 	mlx5e_build_sq_param(priv, &cparam->sq);
 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
 }
 
 static int
 mlx5e_open_channels(struct mlx5e_priv *priv)
 {
 	struct mlx5e_channel_param cparam;
 	void *ptr;
 	int err;
 	int i;
 	int j;
 
 	priv->channel = malloc(priv->params.num_channels *
 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
 	if (priv->channel == NULL)
 		return (-ENOMEM);
 
 	mlx5e_build_channel_param(priv, &cparam);
 	for (i = 0; i < priv->params.num_channels; i++) {
 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
 		if (err)
 			goto err_close_channels;
 	}
 
 	for (j = 0; j < priv->params.num_channels; j++) {
 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
 		if (err)
 			goto err_close_channels;
 	}
 
 	return (0);
 
 err_close_channels:
 	for (i--; i >= 0; i--) {
 		mlx5e_close_channel(&priv->channel[i]);
 		mlx5e_close_channel_wait(&priv->channel[i]);
 	}
 
 	/* remove "volatile" attribute from "channel" pointer */
 	ptr = __DECONST(void *, priv->channel);
 	priv->channel = NULL;
 
 	free(ptr, M_MLX5EN);
 
 	return (err);
 }
 
 static void
 mlx5e_close_channels(struct mlx5e_priv *priv)
 {
 	void *ptr;
 	int i;
 
 	if (priv->channel == NULL)
 		return;
 
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel(&priv->channel[i]);
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_close_channel_wait(&priv->channel[i]);
 
 	/* remove "volatile" attribute from "channel" pointer */
 	ptr = __DECONST(void *, priv->channel);
 	priv->channel = NULL;
 
 	free(ptr, M_MLX5EN);
 }
 
 static int
 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(tisc, tisc, prio, tc);
 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
 
 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
 }
 
 static void
 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
 {
 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
 
 static int
 mlx5e_open_tises(struct mlx5e_priv *priv)
 {
 	int num_tc = priv->num_tc;
 	int err;
 	int tc;
 
 	for (tc = 0; tc < num_tc; tc++) {
 		err = mlx5e_open_tis(priv, tc);
 		if (err)
 			goto err_close_tises;
 	}
 
 	return (0);
 
 err_close_tises:
 	for (tc--; tc >= 0; tc--)
 		mlx5e_close_tis(priv, tc);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tises(struct mlx5e_priv *priv)
 {
 	int num_tc = priv->num_tc;
 	int tc;
 
 	for (tc = 0; tc < num_tc; tc++)
 		mlx5e_close_tis(priv, tc);
 }
 
 static int
 mlx5e_open_rqt(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 *in;
 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
 	void *rqtc;
 	int inlen;
 	int err;
 	int sz;
 	int i;
 
 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
 
 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 
 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 
 	for (i = 0; i < sz; i++) {
-		int ix = i % priv->params.num_channels;
-
+		int ix;
+#ifdef RSS
+		ix = rss_get_indirection_to_bucket(i);
+#else
+		ix = i;
+#endif
+		/* ensure we don't overflow */
+		ix %= priv->params.num_channels;
 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
 	}
 
 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
 
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
 	if (!err)
 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_close_rqt(struct mlx5e_priv *priv)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
 
 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
 	    sizeof(out));
 }
 
 static void
 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
 {
 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
 	__be32 *hkey;
 
 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
 
 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
 
 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 			  MLX5_HASH_FIELD_SEL_DST_IP)
 
 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
 
 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
 
 	if (priv->params.hw_lro_en) {
 		MLX5_SET(tirc, tirc, lro_enable_mask,
 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
 		    (priv->params.lro_wqe_sz -
 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
 		/* TODO: add the option to choose timer value dynamically */
 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
 		    MLX5_CAP_ETH(priv->mdev,
 		    lro_timer_supported_periods[2]));
 	}
+
+	/* setup parameters for hashing TIR type, if any */
 	switch (tt) {
 	case MLX5E_TT_ANY:
 		MLX5_SET(tirc, tirc, disp_type,
 		    MLX5_TIRC_DISP_TYPE_DIRECT);
 		MLX5_SET(tirc, tirc, inline_rqn,
 		    priv->channel[0]->rq.rqn);
 		break;
 	default:
 		MLX5_SET(tirc, tirc, disp_type,
 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
 		MLX5_SET(tirc, tirc, indirect_table,
 		    priv->rqtn);
 		MLX5_SET(tirc, tirc, rx_hash_fn,
 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
-		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
+#ifdef RSS
+		/*
+		 * The FreeBSD RSS implementation does currently not
+		 * support symmetric Toeplitz hashes:
+		 */
+		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
+		rss_getkey((uint8_t *)hkey);
+#else
+		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
 		hkey[0] = cpu_to_be32(0xD181C62C);
 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
 		hkey[2] = cpu_to_be32(0x1983A2FC);
 		hkey[3] = cpu_to_be32(0x943E1ADB);
 		hkey[4] = cpu_to_be32(0xD9389E6B);
 		hkey[5] = cpu_to_be32(0xD1039C2C);
 		hkey[6] = cpu_to_be32(0xA74499AD);
 		hkey[7] = cpu_to_be32(0x593D56D9);
 		hkey[8] = cpu_to_be32(0xF3253C06);
 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
+#endif
 		break;
 	}
 
 	switch (tt) {
 	case MLX5E_TT_IPV4_TCP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_TCP);
+#ifdef RSS
+		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
+			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			    MLX5_HASH_IP);
+		} else
+#endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV6_TCP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_TCP);
+#ifdef RSS
+		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
+			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			    MLX5_HASH_IP);
+		} else
+#endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV4_UDP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_UDP);
+#ifdef RSS
+		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
+			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			    MLX5_HASH_IP);
+		} else
+#endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV6_UDP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
 		    MLX5_L4_PROT_TYPE_UDP);
+#ifdef RSS
+		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
+			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
+			    MLX5_HASH_IP);
+		} else
+#endif
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_ALL);
 		break;
 
 	case MLX5E_TT_IPV4_IPSEC_AH:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV6_IPSEC_AH:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV4_IPSEC_ESP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV6_IPSEC_ESP:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP_IPSEC_SPI);
 		break;
 
 	case MLX5E_TT_IPV4:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV4);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP);
 		break;
 
 	case MLX5E_TT_IPV6:
 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
 		    MLX5_L3_PROT_TYPE_IPV6);
 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
 		    MLX5_HASH_IP);
 		break;
 
 	default:
 		break;
 	}
 }
 
 static int
 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 *in;
 	void *tirc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
 
 	mlx5e_build_tir_ctx(priv, tirc, tt);
 
 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
 {
 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
 }
 
 static int
 mlx5e_open_tirs(struct mlx5e_priv *priv)
 {
 	int err;
 	int i;
 
 	for (i = 0; i < MLX5E_NUM_TT; i++) {
 		err = mlx5e_open_tir(priv, i);
 		if (err)
 			goto err_close_tirs;
 	}
 
 	return (0);
 
 err_close_tirs:
 	for (i--; i >= 0; i--)
 		mlx5e_close_tir(priv, i);
 
 	return (err);
 }
 
 static void
 mlx5e_close_tirs(struct mlx5e_priv *priv)
 {
 	int i;
 
 	for (i = 0; i < MLX5E_NUM_TT; i++)
 		mlx5e_close_tir(priv, i);
 }
 
 /*
  * SW MTU does not include headers,
  * HW MTU includes all headers and checksums.
  */
 static int
 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int hw_mtu;
 	int err;
 
 
 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
 	if (err) {
 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
 		    __func__, sw_mtu, err);
 		return (err);
 	}
 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
 	if (!err) {
 		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
 
 		if (ifp->if_mtu != sw_mtu) {
 			if_printf(ifp, "Port MTU %d is different than "
 			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
 		}
 	} else {
 		if_printf(ifp, "Query port MTU, after setting new "
 		    "MTU value, failed\n");
 		ifp->if_mtu = sw_mtu;
 	}
 	return (0);
 }
 
 int
 mlx5e_open_locked(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	int err;
 
 	/* check if already opened */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 		return (0);
 
+#ifdef RSS
+	if (rss_getnumbuckets() > priv->params.num_channels) {
+		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
+		    "channels(%u) available\n", rss_getnumbuckets(),
+		    priv->params.num_channels);
+	}
+#endif
 	err = mlx5e_open_tises(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
 		    __func__, err);
 		return (err);
 	}
 	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
 	if (err) {
 		if_printf(priv->ifp,
 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
 		    __func__, err);
 		goto err_close_tises;
 	}
 	err = mlx5e_open_channels(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
 		    __func__, err);
 		goto err_dalloc_q_counter;
 	}
 	err = mlx5e_open_rqt(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
 		    __func__, err);
 		goto err_close_channels;
 	}
 	err = mlx5e_open_tirs(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
 		    __func__, err);
 		goto err_close_rqls;
 	}
 	err = mlx5e_open_flow_table(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
 		    __func__, err);
 		goto err_close_tirs;
 	}
 	err = mlx5e_add_all_vlan_rules(priv);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
 		    __func__, err);
 		goto err_close_flow_table;
 	}
 	set_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	mlx5e_update_carrier(priv);
 	mlx5e_set_rx_mode_core(priv);
 
 	return (0);
 
 err_close_flow_table:
 	mlx5e_close_flow_table(priv);
 
 err_close_tirs:
 	mlx5e_close_tirs(priv);
 
 err_close_rqls:
 	mlx5e_close_rqt(priv);
 
 err_close_channels:
 	mlx5e_close_channels(priv);
 
 err_dalloc_q_counter:
 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
 
 err_close_tises:
 	mlx5e_close_tises(priv);
 
 	return (err);
 }
 
 static void
 mlx5e_open(void *arg)
 {
 	struct mlx5e_priv *priv = arg;
 
 	PRIV_LOCK(priv);
 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
 		if_printf(priv->ifp,
 		    "%s: Setting port status to up failed\n",
 		    __func__);
 
 	mlx5e_open_locked(priv->ifp);
 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	PRIV_UNLOCK(priv);
 }
 
 int
 mlx5e_close_locked(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 
 	/* check if already closed */
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 		return (0);
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	mlx5e_set_rx_mode_core(priv);
 	mlx5e_del_all_vlan_rules(priv);
 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
 	mlx5e_close_flow_table(priv);
 	mlx5e_close_tirs(priv);
 	mlx5e_close_rqt(priv);
 	mlx5e_close_channels(priv);
 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
 	mlx5e_close_tises(priv);
 
 	return (0);
 }
 
 #if (__FreeBSD_version >= 1100000)
 static uint64_t
 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	u64 retval;
 
 	/* PRIV_LOCK(priv); XXX not allowed */
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		retval = priv->stats.vport.rx_packets;
 		break;
 	case IFCOUNTER_IERRORS:
 		retval = priv->stats.vport.rx_error_packets;
 		break;
 	case IFCOUNTER_IQDROPS:
 		retval = priv->stats.vport.rx_out_of_buffer;
 		break;
 	case IFCOUNTER_OPACKETS:
 		retval = priv->stats.vport.tx_packets;
 		break;
 	case IFCOUNTER_OERRORS:
 		retval = priv->stats.vport.tx_error_packets;
 		break;
 	case IFCOUNTER_IBYTES:
 		retval = priv->stats.vport.rx_bytes;
 		break;
 	case IFCOUNTER_OBYTES:
 		retval = priv->stats.vport.tx_bytes;
 		break;
 	case IFCOUNTER_IMCASTS:
 		retval = priv->stats.vport.rx_multicast_packets;
 		break;
 	case IFCOUNTER_OMCASTS:
 		retval = priv->stats.vport.tx_multicast_packets;
 		break;
 	case IFCOUNTER_OQDROPS:
 		retval = priv->stats.vport.tx_queue_dropped;
 		break;
 	default:
 		retval = if_get_counter_default(ifp, cnt);
 		break;
 	}
 	/* PRIV_UNLOCK(priv); XXX not allowed */
 	return (retval);
 }
 #endif
 
 static void
 mlx5e_set_rx_mode(struct ifnet *ifp)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 
 	schedule_work(&priv->set_rx_mode_work);
 }
 
 static int
 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct mlx5e_priv *priv;
 	struct ifreq *ifr;
 	struct ifi2creq i2c;
 	int error = 0;
 	int mask = 0;
 	int size_read = 0;
 	int module_num;
 	int max_mtu;
 
 	priv = ifp->if_softc;
 
 	/* check if detaching */
 	if (priv == NULL || priv->gone != 0)
 		return (ENXIO);
 
 	switch (command) {
 	case SIOCSIFMTU:
 		ifr = (struct ifreq *)data;
 
 		PRIV_LOCK(priv);
 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
 
 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
 			int was_opened;
 
 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 			if (was_opened)
 				mlx5e_close_locked(ifp);
 
 			/* set new MTU */
 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
 
 			if (was_opened)
 				mlx5e_open_locked(ifp);
 		} else {
 			error = EINVAL;
 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
 		}
 		PRIV_UNLOCK(priv);
 		break;
 	case SIOCSIFFLAGS:
 		if ((ifp->if_flags & IFF_UP) &&
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 			mlx5e_set_rx_mode(ifp);
 			break;
 		}
 		PRIV_LOCK(priv);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
 					mlx5e_open_locked(ifp);
 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				mlx5_set_port_status(priv->mdev,
 				    MLX5_PORT_DOWN);
 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
 					mlx5e_close_locked(ifp);
 				mlx5e_update_carrier(priv);
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			}
 		}
 		PRIV_UNLOCK(priv);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		mlx5e_set_rx_mode(ifp);
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 	case SIOCGIFXMEDIA:
 		ifr = (struct ifreq *)data;
 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
 		break;
 	case SIOCSIFCAP:
 		ifr = (struct ifreq *)data;
 		PRIV_LOCK(priv);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 
 			if (IFCAP_TSO4 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 				if_printf(ifp,
 				    "tso4 disabled due to -txcsum.\n");
 			}
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 			if (IFCAP_TSO6 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO6;
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 				if_printf(ifp,
 				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 		if (mask & IFCAP_TSO4) {
 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum first.\n");
 				error = EAGAIN;
 				goto out;
 			}
 			ifp->if_capenable ^= IFCAP_TSO4;
 			ifp->if_hwassist ^= CSUM_IP_TSO;
 		}
 		if (mask & IFCAP_TSO6) {
 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum6 first.\n");
 				error = EAGAIN;
 				goto out;
 			}
 			ifp->if_capenable ^= IFCAP_TSO6;
 			ifp->if_hwassist ^= CSUM_IP6_TSO;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
 				mlx5e_disable_vlan_filter(priv);
 			else
 				mlx5e_enable_vlan_filter(priv);
 
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 		if (mask & IFCAP_WOL_MAGIC)
 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
 
 		VLAN_CAPABILITIES(ifp);
 		/* turn off LRO means also turn of HW LRO - if it's on */
 		if (mask & IFCAP_LRO) {
 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 			bool need_restart = false;
 
 			ifp->if_capenable ^= IFCAP_LRO;
 			if (!(ifp->if_capenable & IFCAP_LRO)) {
 				if (priv->params.hw_lro_en) {
 					priv->params.hw_lro_en = false;
 					need_restart = true;
 					/* Not sure this is the correct way */
 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
 				}
 			}
 			if (was_opened && need_restart) {
 				mlx5e_close_locked(ifp);
 				mlx5e_open_locked(ifp);
 			}
 		}
 out:
 		PRIV_UNLOCK(priv);
 		break;
 
 	case SIOCGI2C:
 		ifr = (struct ifreq *)data;
 
 		/*
 		 * Copy from the user-space address ifr_data to the
 		 * kernel-space address i2c
 		 */
 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (error)
 			break;
 
 		if (i2c.len > sizeof(i2c.data)) {
 			error = EINVAL;
 			break;
 		}
 
 		PRIV_LOCK(priv);
 		/* Get module_num which is required for the query_eeprom */
 		error = mlx5_query_module_num(priv->mdev, &module_num);
 		if (error) {
 			if_printf(ifp, "Query module num failed, eeprom "
 			    "reading is not supported\n");
 			goto err_i2c;
 		}
 
 		/*
 		 * Note that we ignore i2c.addr here. The driver hardcodes
 		 * the address to 0x50, while standard expects it to be 0xA0.
 		 */
 		error = mlx5_query_eeprom(priv->mdev,
 		    MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
 		    (uint32_t *)i2c.data, &size_read);
 		if (error) {
 			if_printf(ifp, "Query eeprom failed, eeprom "
 			    "reading is not supported\n");
 			goto err_i2c;
 		}
 
 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
 			error = mlx5_query_eeprom(priv->mdev,
 			    MLX5E_I2C_ADDR_LOW, MLX5E_EEPROM_LOW_PAGE,
 			    (uint32_t)(i2c.offset + size_read),
 			    (uint32_t)(i2c.len - size_read), module_num,
 			    (uint32_t *)(i2c.data + size_read), &size_read);
 		}
 		if (error) {
 			if_printf(ifp, "Query eeprom failed, eeprom "
 			    "reading is not supported\n");
 			goto err_i2c;
 		}
 
 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 err_i2c:
 		PRIV_UNLOCK(priv);
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 	return (error);
 }
 
 static int
 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 {
 	/*
 	 * TODO: uncoment once FW really sets all these bits if
 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
 	 * -ENOTSUPP;
 	 */
 
 	/* TODO: add more must-to-have features */
 
 	return (0);
 }
 
 static void
 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
     struct mlx5e_priv *priv,
     int num_comp_vectors)
 {
 	/*
 	 * TODO: Consider link speed for setting "log_sq_size",
 	 * "log_rq_size" and "cq_moderation_xxx":
 	 */
 	priv->params.log_sq_size =
 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 	priv->params.log_rq_size =
 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 	priv->params.rx_cq_moderation_usec =
 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 	priv->params.rx_cq_moderation_mode =
 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
 	priv->params.rx_cq_moderation_pkts =
 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
 	priv->params.tx_cq_moderation_usec =
 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
 	priv->params.tx_cq_moderation_pkts =
 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
 	priv->params.min_rx_wqes =
 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
 	priv->params.rx_hash_log_tbl_sz =
 	    (order_base_2(num_comp_vectors) >
 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
 	    order_base_2(num_comp_vectors) :
 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
 	priv->params.num_tc = 1;
 	priv->params.default_vlan_prio = 0;
 	priv->counter_set_id = -1;
 
 	/*
 	 * hw lro is currently defaulted to off. when it won't anymore we
 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
 	 */
 	priv->params.hw_lro_en = false;
 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
 
 	priv->mdev = mdev;
 	priv->params.num_channels = num_comp_vectors;
 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
 	priv->queue_mapping_channel_mask =
 	    roundup_pow_of_two(num_comp_vectors) - 1;
 	priv->num_tc = priv->params.num_tc;
 	priv->default_vlan_prio = priv->params.default_vlan_prio;
 
 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
 }
 
 static int
 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
     struct mlx5_core_mr *mr)
 {
 	struct ifnet *ifp = priv->ifp;
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5_create_mkey_mbox_in *in;
 	int err;
 
 	in = mlx5_vzalloc(sizeof(*in));
 	if (in == NULL) {
 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
 		return (-ENOMEM);
 	}
 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
 	    MLX5_PERM_LOCAL_READ |
 	    MLX5_ACCESS_MODE_PA;
 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 
 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
 	    NULL);
 	if (err)
 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
 		    __func__, err);
 
 	kvfree(in);
 
 	return (err);
 }
 
 static const char *mlx5e_vport_stats_desc[] = {
 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
 };
 
 static const char *mlx5e_pport_stats_desc[] = {
 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
 };
 
 static void
 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
 {
 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
 	sx_init(&priv->state_lock, "mlx5state");
 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
 }
 
 static void
 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
 {
 	mtx_destroy(&priv->async_events_mtx);
 	sx_destroy(&priv->state_lock);
 }
 
 static int
 sysctl_firmware(SYSCTL_HANDLER_ARGS)
 {
 	/*
 	 * %d.%d%.d the string format.
 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
 	 * We need at most 5 chars to store that.
 	 * It also has: two "." and NULL at the end, which means we need 18
 	 * (5*3 + 3) chars at most.
 	 */
 	char fw[18];
 	struct mlx5e_priv *priv = arg1;
 	int error;
 
 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
 	    fw_rev_sub(priv->mdev));
 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
 	return (error);
 }
 
 static void
 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
 {
 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
 	    sysctl_firmware, "A", "HCA firmware version");
 
 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
 	    "Board ID");
 }
 
 static void *
 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
 {
 	static volatile int mlx5_en_unit;
 	struct ifnet *ifp;
 	struct mlx5e_priv *priv;
 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
 	struct sysctl_oid_list *child;
 	int ncv = mdev->priv.eq_table.num_comp_vectors;
 	char unit[16];
 	int err;
 	int i;
 	u32 eth_proto_cap;
 
 	if (mlx5e_check_required_hca_cap(mdev)) {
 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
 		return (NULL);
 	}
 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
 	if (priv == NULL) {
 		mlx5_core_err(mdev, "malloc() failed\n");
 		return (NULL);
 	}
 	mlx5e_priv_mtx_init(priv);
 
 	ifp = priv->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		mlx5_core_err(mdev, "if_alloc() failed\n");
 		goto err_free_priv;
 	}
 	ifp->if_softc = priv;
 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_init = mlx5e_open;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = mlx5e_ioctl;
 	ifp->if_transmit = mlx5e_xmit;
 	ifp->if_qflush = if_qflush;
 #if (__FreeBSD_version >= 1100000)
 	ifp->if_get_counter = mlx5e_get_counter;
 #endif
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	/*
          * Set driver features
          */
 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
 	ifp->if_capabilities |= IFCAP_LRO;
 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
 
 	/* set TSO limits so that we don't have to drop TX packets */
 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
 
 	ifp->if_capenable = ifp->if_capabilities;
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 	/* ifnet sysctl tree */
 	sysctl_ctx_init(&priv->sysctl_ctx);
 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
 	if (priv->sysctl_ifnet == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
 	if (priv->sysctl_ifnet == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 
 	/* HW sysctl tree */
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
 	if (priv->sysctl_hw == NULL) {
 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
 		goto err_free_sysctl;
 	}
 	mlx5e_build_ifp_priv(mdev, priv, ncv);
 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
 	if (err) {
 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
 		    __func__, err);
 		goto err_free_sysctl;
 	}
 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
 	if (err) {
 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
 		    __func__, err);
 		goto err_unmap_free_uar;
 	}
 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
 	if (err) {
 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
 		    __func__, err);
 		goto err_dealloc_pd;
 	}
 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
 	if (err) {
 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
 		    __func__, err);
 		goto err_dealloc_transport_domain;
 	}
 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
 
 	/* set default MTU */
 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
 
 	/* Set desc */
 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
 
 	/* Set default media status */
 	priv->media_status_last = IFM_AVALID;
 	priv->media_active_last = IFM_ETHER | IFM_AUTO;
 
 	/* Pauseframes are enabled by default */
 	priv->params_ethtool.tx_pauseframe_control = 1;
 	priv->params_ethtool.rx_pauseframe_control = 1;
 
 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
 	if (err) {
 		eth_proto_cap = 0;
 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
 		    __func__, err);
 	}
 
 	/* Setup supported medias */
 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
 	    mlx5e_media_change, mlx5e_media_status);
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
 		if (mlx5e_mode_table[i].baudrate == 0)
 			continue;
 		if (MLX5E_PROT_MASK(i) & eth_proto_cap)
 			ifmedia_add(&priv->media,
 			    IFM_ETHER | mlx5e_mode_table[i].subtype |
 			    IFM_FDX, 0, NULL);
 	}
 
 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
 	ether_ifattach(ifp, dev_addr);
 
 	/* Register for VLAN events */
 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
 
 	/* Link is down by default */
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 
 	mlx5e_enable_async_events(priv);
 
 	mlx5e_add_hw_stats(priv);
 
 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
 	    priv->stats.vport.arg);
 
 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
 	    priv->stats.pport.arg);
 
 	mlx5e_create_ethtool(priv);
 
 	mtx_lock(&priv->async_events_mtx);
 	mlx5e_update_stats(priv);
 	mtx_unlock(&priv->async_events_mtx);
 
 	return (priv);
 
 err_dealloc_transport_domain:
 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
 
 err_dealloc_pd:
 	mlx5_core_dealloc_pd(mdev, priv->pdn);
 
 err_unmap_free_uar:
 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
 
 err_free_sysctl:
 	sysctl_ctx_free(&priv->sysctl_ctx);
 
 	if_free(ifp);
 
 err_free_priv:
 	mlx5e_priv_mtx_destroy(priv);
 	free(priv, M_MLX5EN);
 	return (NULL);
 }
 
 static void
 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
 	struct ifnet *ifp = priv->ifp;
 
 	/* don't allow more IOCTLs */
 	priv->gone = 1;
 
 	/* XXX wait a bit to allow IOCTL handlers to complete */
 	pause("W", hz);
 
 	/* stop watchdog timer */
 	callout_drain(&priv->watchdog);
 
 	if (priv->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
 	if (priv->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
 
 	/* make sure device gets closed */
 	PRIV_LOCK(priv);
 	mlx5e_close_locked(ifp);
 	PRIV_UNLOCK(priv);
 
 	/* unregister device */
 	ifmedia_removeall(&priv->media);
 	ether_ifdetach(ifp);
 	if_free(ifp);
 
 	/* destroy all remaining sysctl nodes */
 	if (priv->sysctl_debug)
 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
 	sysctl_ctx_free(&priv->stats.vport.ctx);
 	sysctl_ctx_free(&priv->stats.pport.ctx);
 	sysctl_ctx_free(&priv->sysctl_ctx);
 
 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
 	mlx5e_disable_async_events(priv);
 	flush_scheduled_work();
 	mlx5e_priv_mtx_destroy(priv);
 	free(priv, M_MLX5EN);
 }
 
 static void *
 mlx5e_get_ifp(void *vpriv)
 {
 	struct mlx5e_priv *priv = vpriv;
 
 	return (priv->ifp);
 }
 
 static struct mlx5_interface mlx5e_interface = {
 	.add = mlx5e_create_ifp,
 	.remove = mlx5e_destroy_ifp,
 	.event = mlx5e_async_event,
 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
 	.get_dev = mlx5e_get_ifp,
 };
 
 void
 mlx5e_init(void)
 {
 	mlx5_register_interface(&mlx5e_interface);
 }
 
 void
 mlx5e_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5e_interface);
 }
 
 module_init_order(mlx5e_init, SI_ORDER_THIRD);
 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
 
 #if (__FreeBSD_version >= 1100000)
 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
 #endif
 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
 MODULE_VERSION(mlx5en, 1);
Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
===================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c	(revision 291937)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c	(revision 291938)
@@ -1,343 +1,374 @@
 /*-
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "en.h"
 #include <machine/in_cksum.h>
 
 static inline int
 mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
     struct mlx5e_rx_wqe *wqe, u16 ix)
 {
 	bus_dma_segment_t segs[1];
 	struct mbuf *mb;
 	int nsegs;
 	int err;
 
 	if (rq->mbuf[ix].mbuf != NULL)
 		return (0);
 
 	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
 	if (unlikely(!mb))
 		return (-ENOMEM);
 
 	/* set initial mbuf length */
 	mb->m_pkthdr.len = mb->m_len = rq->wqe_sz;
 
 	/* get IP header aligned */
 	m_adj(mb, MLX5E_NET_IP_ALIGN);
 
 	err = -bus_dmamap_load_mbuf_sg(rq->dma_tag, rq->mbuf[ix].dma_map,
 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (err != 0)
 		goto err_free_mbuf;
 	if (unlikely(nsegs != 1)) {
 		bus_dmamap_unload(rq->dma_tag, rq->mbuf[ix].dma_map);
 		err = -ENOMEM;
 		goto err_free_mbuf;
 	}
 	wqe->data.addr = cpu_to_be64(segs[0].ds_addr);
 
 	rq->mbuf[ix].mbuf = mb;
 	rq->mbuf[ix].data = mb->m_data;
 
 	bus_dmamap_sync(rq->dma_tag, rq->mbuf[ix].dma_map,
 	    BUS_DMASYNC_PREREAD);
 	return (0);
 
 err_free_mbuf:
 	m_freem(mb);
 	return (err);
 }
 
 static void
 mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
 	if (unlikely(rq->enabled == 0))
 		return;
 
 	while (!mlx5_wq_ll_is_full(&rq->wq)) {
 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head);
 
 		if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head)))
 			break;
 
 		mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index));
 	}
 
 	/* ensure wqes are visible to device before updating doorbell record */
 	wmb();
 
 	mlx5_wq_ll_update_db_record(&rq->wq);
 }
 
 static void
 mlx5e_lro_update_hdr(struct mbuf *mb, struct mlx5_cqe64 *cqe)
 {
 	/* TODO: consider vlans, ip options, ... */
 	struct ether_header *eh;
 	uint16_t eh_type;
 	struct ip6_hdr *ip6 = NULL;
 	struct ip *ip4 = NULL;
 	struct tcphdr *th;
 	uint32_t *ts_ptr;
 
 	eh = mtod(mb, struct ether_header *);
 	eh_type = ntohs(eh->ether_type);
 
 	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
 	int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) ||
 	    (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
 
 	/* TODO: consider vlan */
 	u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETHER_HDR_LEN;
 
 	switch (eh_type) {
 	case ETHERTYPE_IP:
 		ip4 = (struct ip *)(eh + 1);
 		th = (struct tcphdr *)(ip4 + 1);
 		break;
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(eh + 1);
 		th = (struct tcphdr *)(ip6 + 1);
 		break;
 	default:
 		return;
 	}
 
 	ts_ptr = (uint32_t *)(th + 1);
 
 	if (get_cqe_lro_tcppsh(cqe))
 		th->th_flags |= TH_PUSH;
 
 	if (tcp_ack) {
 		th->th_flags |= TH_ACK;
 		th->th_ack = cqe->lro_ack_seq_num;
 		th->th_win = cqe->lro_tcp_win;
 
 		/*
 		 * FreeBSD handles only 32bit aligned timestamp right after
 		 * the TCP hdr
 		 * +--------+--------+--------+--------+
 		 * |   NOP  |  NOP   |  TSopt |   10   |
 		 * +--------+--------+--------+--------+
 		 * |          TSval   timestamp        |
 		 * +--------+--------+--------+--------+
 		 * |          TSecr   timestamp        |
 		 * +--------+--------+--------+--------+
 		 */
 		if (get_cqe_lro_timestamp_valid(cqe) &&
 		    (__predict_true(*ts_ptr) == ntohl(TCPOPT_NOP << 24 |
 		    TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 |
 		    TCPOLEN_TIMESTAMP))) {
 			/*
 			 * cqe->timestamp is 64bit long.
 			 * [0-31] - timestamp.
 			 * [32-64] - timestamp echo replay.
 			 */
 			ts_ptr[1] = *(uint32_t *)&cqe->timestamp;
 			ts_ptr[2] = *((uint32_t *)&cqe->timestamp + 1);
 		}
 	}
 	if (ip4) {
 		ip4->ip_ttl = cqe->lro_min_ttl;
 		ip4->ip_len = cpu_to_be16(tot_len);
 		ip4->ip_sum = 0;
 		ip4->ip_sum = in_cksum(mb, ip4->ip_hl << 2);
 	} else {
 		ip6->ip6_hlim = cqe->lro_min_ttl;
 		ip6->ip6_plen = cpu_to_be16(tot_len -
 		    sizeof(struct ip6_hdr));
 	}
 	/* TODO: handle tcp checksum */
 }
 
 static inline void
 mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe,
     struct mlx5e_rq *rq, struct mbuf *mb,
     u32 cqe_bcnt)
 {
 	struct ifnet *ifp = rq->ifp;
 	int lro_num_seg;	/* HW LRO session aggregated packets counter */
 
 	lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
 	if (lro_num_seg > 1) {
 		mlx5e_lro_update_hdr(mb, cqe);
 		rq->stats.lro_packets++;
 		rq->stats.lro_bytes += cqe_bcnt;
 	}
 
 	mb->m_pkthdr.len = mb->m_len = cqe_bcnt;
 	/* check if a Toeplitz hash was computed */
-	if (cqe->rss_hash_type != 0)
+	if (cqe->rss_hash_type != 0) {
 		mb->m_pkthdr.flowid = be32_to_cpu(cqe->rss_hash_result);
-	else
+#ifdef RSS
+		/* decode the RSS hash type */
+		switch (cqe->rss_hash_type &
+		    (CQE_RSS_DST_HTYPE_L4 | CQE_RSS_DST_HTYPE_IP)) {
+		/* IPv4 */
+		case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV4):
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV4);
+			break;
+		case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV4):
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV4);
+			break;
+		case CQE_RSS_DST_HTYPE_IPV4:
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV4);
+			break;
+		/* IPv6 */
+		case (CQE_RSS_DST_HTYPE_TCP | CQE_RSS_DST_HTYPE_IPV6):
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_TCP_IPV6);
+			break;
+		case (CQE_RSS_DST_HTYPE_UDP | CQE_RSS_DST_HTYPE_IPV6):
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_UDP_IPV6);
+			break;
+		case CQE_RSS_DST_HTYPE_IPV6:
+			M_HASHTYPE_SET(mb, M_HASHTYPE_RSS_IPV6);
+			break;
+		default:	/* Other */
+			M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+			break;
+		}
+#else
+		M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+#endif
+	} else {
 		mb->m_pkthdr.flowid = rq->ix;
-
-	M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+		M_HASHTYPE_SET(mb, M_HASHTYPE_OPAQUE);
+	}
 	mb->m_pkthdr.rcvif = ifp;
 
 	if (likely(ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) &&
 	    ((cqe->hds_ip_ext & (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK)) ==
 	    (CQE_L2_OK | CQE_L3_OK | CQE_L4_OK))) {
 		mb->m_pkthdr.csum_flags =
 		    CSUM_IP_CHECKED | CSUM_IP_VALID |
 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 		mb->m_pkthdr.csum_data = htons(0xffff);
 	} else {
 		rq->stats.csum_none++;
 	}
 
 	if (cqe_has_vlan(cqe)) {
 		mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info);
 		mb->m_flags |= M_VLANTAG;
 	}
 }
 
 static int
 mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
 {
 #ifndef HAVE_TURBO_LRO
 	struct lro_entry *queued;
 #endif
 	int i;
 
 	for (i = 0; i < budget; i++) {
 		struct mlx5e_rx_wqe *wqe;
 		struct mlx5_cqe64 *cqe;
 		struct mbuf *mb;
 		__be16 wqe_counter_be;
 		u16 wqe_counter;
 		u32 byte_cnt;
 
 		cqe = mlx5e_get_cqe(&rq->cq);
 		if (!cqe)
 			break;
 
 		wqe_counter_be = cqe->wqe_counter;
 		wqe_counter = be16_to_cpu(wqe_counter_be);
 		wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
 		byte_cnt = be32_to_cpu(cqe->byte_cnt);
 
 		bus_dmamap_sync(rq->dma_tag,
 		    rq->mbuf[wqe_counter].dma_map,
 		    BUS_DMASYNC_POSTREAD);
 
 		if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
 			rq->stats.wqe_err++;
 			goto wq_ll_pop;
 		}
 
 		if (MHLEN >= byte_cnt &&
 		    (mb = m_gethdr(M_NOWAIT, MT_DATA)) != NULL) {
 			bcopy(rq->mbuf[wqe_counter].data, mtod(mb, caddr_t),
 			    byte_cnt);
 		} else {
 			mb = rq->mbuf[wqe_counter].mbuf;
 			rq->mbuf[wqe_counter].mbuf = NULL;	/* safety clear */
 
 			bus_dmamap_unload(rq->dma_tag,
 			    rq->mbuf[wqe_counter].dma_map);
 		}
 
 		mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
 		rq->stats.packets++;
 #ifdef HAVE_TURBO_LRO
 		if (mb->m_pkthdr.csum_flags == 0 ||
 		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
 		    rq->lro.mbuf == NULL) {
 			/* normal input */
 			rq->ifp->if_input(rq->ifp, mb);
 		} else {
 			tcp_tlro_rx(&rq->lro, mb);
 		}
 #else
 		if (mb->m_pkthdr.csum_flags == 0 ||
 		    (rq->ifp->if_capenable & IFCAP_LRO) == 0 ||
 		    rq->lro.lro_cnt == 0 ||
 		    tcp_lro_rx(&rq->lro, mb, 0) != 0) {
 			rq->ifp->if_input(rq->ifp, mb);
 		}
 #endif
 wq_ll_pop:
 		mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
 		    &wqe->next.next_wqe_index);
 	}
 
 	mlx5_cqwq_update_db_record(&rq->cq.wq);
 
 	/* ensure cq space is freed before enabling more cqes */
 	wmb();
 #ifndef HAVE_TURBO_LRO
 	while ((queued = SLIST_FIRST(&rq->lro.lro_active)) != NULL) {
 		SLIST_REMOVE_HEAD(&rq->lro.lro_active, next);
 		tcp_lro_flush(&rq->lro, queued);
 	}
 #endif
 	return (i);
 }
 
 void
 mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
 {
 	struct mlx5e_rq *rq = container_of(mcq, struct mlx5e_rq, cq.mcq);
 	int i = 0;
 
 #ifdef HAVE_PER_CQ_EVENT_PACKET
 	struct mbuf *mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rq->wqe_sz);
 
 	if (mb != NULL) {
 		/* this code is used for debugging purpose only */
 		mb->m_pkthdr.len = mb->m_len = 15;
 		memset(mb->m_data, 255, 14);
 		mb->m_data[14] = rq->ix;
 		mb->m_pkthdr.rcvif = rq->ifp;
 		rq->ifp->if_input(rq->ifp, mb);
 	}
 #endif
 
 	mtx_lock(&rq->mtx);
 
 	/*
 	 * Polling the entire CQ without posting new WQEs results in
 	 * lack of receive WQEs during heavy traffic scenarios.
 	 */
 	while (1) {
 		if (mlx5e_poll_rx_cq(rq, MLX5E_RX_BUDGET_MAX) !=
 		    MLX5E_RX_BUDGET_MAX)
 			break;
 		i += MLX5E_RX_BUDGET_MAX;
 		if (i >= MLX5E_BUDGET_MAX)
 			break;
 		mlx5e_post_rx_wqes(rq);
 	}
 	mlx5e_post_rx_wqes(rq);
 	mlx5e_cq_arm(&rq->cq);
 #ifdef HAVE_TURBO_LRO
 	tcp_tlro_flush(&rq->lro, 1);
 #endif
 	mtx_unlock(&rq->mtx);
 }
Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c
===================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c	(revision 291937)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c	(revision 291938)
@@ -1,492 +1,500 @@
 /*-
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "en.h"
 #include <machine/atomic.h>
 
 void
 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt, bool notify_hw)
 {
 	u16 pi = sq->pc & sq->wq.sz_m1;
 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
 
 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
 
 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 
 	sq->mbuf[pi].mbuf = NULL;
 	sq->mbuf[pi].num_bytes = 0;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 	if (notify_hw)
 		mlx5e_tx_notify_hw(sq, wqe, 0);
 }
 
 #if (__FreeBSD_version >= 1100000)
 static uint32_t mlx5e_hash_value;
 
 static void
 mlx5e_hash_init(void *arg)
 {
 	mlx5e_hash_value = m_ether_tcpip_hash_init();
 }
 
 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
 #endif
 
 static struct mlx5e_sq *
 mlx5e_select_queue(struct ifnet *ifp, struct mbuf *mb)
 {
 	struct mlx5e_priv *priv = ifp->if_softc;
 	u32 ch;
 	u32 tc;
 
 	/* check if channels are successfully opened */
 	if (unlikely(priv->channel == NULL))
 		return (NULL);
 
 	/* obtain VLAN information if present */
 	if (mb->m_flags & M_VLANTAG) {
 		tc = (mb->m_pkthdr.ether_vtag >> 13);
 		if (tc >= priv->num_tc)
 			tc = priv->default_vlan_prio;
 	} else {
 		tc = priv->default_vlan_prio;
 	}
 
 	ch = priv->params.num_channels;
 
 	/* check if flowid is set */
 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
-		ch = (mb->m_pkthdr.flowid % 128) % ch;
+#ifdef RSS
+		u32 temp;
+
+		if (rss_hash2bucket(mb->m_pkthdr.flowid,
+		    M_HASHTYPE_GET(mb), &temp) == 0)
+			ch = temp % ch;
+		else
+#endif
+			ch = (mb->m_pkthdr.flowid % 128) % ch;
 	} else {
 #if (__FreeBSD_version >= 1100000)
 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
 #else
 		/*
 		 * m_ether_tcpip_hash not present in stable, so just
 		 * throw unhashed mbufs on queue 0
 		 */
 		ch = 0;
 #endif
 	}
 
 	/* check if channel is allocated */
 	if (unlikely(priv->channel[ch] == NULL))
 		return (NULL);
 
 	return (&priv->channel[ch]->sq[tc]);
 }
 
 static inline u16
 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, struct mbuf *mb)
 {
 	return (MIN(MLX5E_MAX_TX_INLINE, mb->m_len));
 }
 
 static int
 mlx5e_get_header_size(struct mbuf *mb)
 {
 	struct ether_vlan_header *eh;
 	struct tcphdr *th;
 	struct ip *ip;
 	int ip_hlen, tcp_hlen;
 	struct ip6_hdr *ip6;
 	uint16_t eth_type;
 	int eth_hdr_len;
 
 	eh = mtod(mb, struct ether_vlan_header *);
 	if (mb->m_len < ETHER_HDR_LEN)
 		return (0);
 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
 		eth_type = ntohs(eh->evl_proto);
 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 	} else {
 		eth_type = ntohs(eh->evl_encap_proto);
 		eth_hdr_len = ETHER_HDR_LEN;
 	}
 	if (mb->m_len < eth_hdr_len)
 		return (0);
 	switch (eth_type) {
 	case ETHERTYPE_IP:
 		ip = (struct ip *)(mb->m_data + eth_hdr_len);
 		if (mb->m_len < eth_hdr_len + sizeof(*ip))
 			return (0);
 		if (ip->ip_p != IPPROTO_TCP)
 			return (0);
 		ip_hlen = ip->ip_hl << 2;
 		eth_hdr_len += ip_hlen;
 		break;
 	case ETHERTYPE_IPV6:
 		ip6 = (struct ip6_hdr *)(mb->m_data + eth_hdr_len);
 		if (mb->m_len < eth_hdr_len + sizeof(*ip6))
 			return (0);
 		if (ip6->ip6_nxt != IPPROTO_TCP)
 			return (0);
 		eth_hdr_len += sizeof(*ip6);
 		break;
 	default:
 		return (0);
 	}
 	if (mb->m_len < eth_hdr_len + sizeof(*th))
 		return (0);
 	th = (struct tcphdr *)(mb->m_data + eth_hdr_len);
 	tcp_hlen = th->th_off << 2;
 	eth_hdr_len += tcp_hlen;
 	if (mb->m_len < eth_hdr_len)
 		return (0);
 	return (eth_hdr_len);
 }
 
 /*
  * The return value is not going back to the stack because of
  * the drbr
  */
 static int
 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
 {
 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
 	struct mlx5_wqe_data_seg *dseg;
 	struct mlx5e_tx_wqe *wqe;
 	struct ifnet *ifp;
 	int nsegs;
 	int err;
 	int x;
 	struct mbuf *mb = *mbp;
 	u16 ds_cnt;
 	u16 ihs;
 	u16 pi;
 	u8 opcode;
 
 	/*
 	 * Return ENOBUFS if the queue is full, this may trigger reinsertion
 	 * of the mbuf into the drbr (see mlx5e_xmit_locked)
 	 */
 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
 		return (ENOBUFS);
 	}
 
 	/* Align SQ edge with NOPs to avoid WQE wrap around */
 	pi = ((~sq->pc) & sq->wq.sz_m1);
 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
 		/* Send one multi NOP message instead of many */
 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS, false);
 		pi = ((~sq->pc) & sq->wq.sz_m1);
 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
 			m_freem(mb);
 			return (ENOMEM);
 		}
 	}
 
 	/* Setup local variables */
 	pi = sq->pc & sq->wq.sz_m1;
 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
 	ifp = sq->channel->ifp;
 
 	memset(wqe, 0, sizeof(*wqe));
 
 	/* Send a copy of the frame to the BPF listener, if any */
 	if (ifp != NULL && ifp->if_bpf != NULL)
 		ETHER_BPF_MTAP(ifp, mb);
 
 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
 	}
 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 	}
 	if (wqe->eth.cs_flags == 0) {
 		sq->stats.csum_offload_none++;
 	}
 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
 		u32 payload_len;
 		u32 mss = mb->m_pkthdr.tso_segsz;
 		u32 num_pkts;
 
 		wqe->eth.mss = cpu_to_be16(mss);
 		opcode = MLX5_OPCODE_LSO;
 		ihs = mlx5e_get_header_size(mb);
 		payload_len = mb->m_pkthdr.len - ihs;
 		if (payload_len == 0)
 			num_pkts = 1;
 		else
 			num_pkts = DIV_ROUND_UP(payload_len, mss);
 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * ihs);
 
 		sq->stats.tso_packets++;
 		sq->stats.tso_bytes += payload_len;
 	} else {
 		opcode = MLX5_OPCODE_SEND;
 		ihs = mlx5e_get_inline_hdr_size(sq, mb);
 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
 	}
 	if (mb->m_flags & M_VLANTAG) {
 		struct ether_vlan_header *eh =
 		    (struct ether_vlan_header *)wqe->eth.inline_hdr_start;
 
 		/* Range checks */
 		if (ihs > (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN))
 			ihs = (MLX5E_MAX_TX_INLINE - ETHER_VLAN_ENCAP_LEN);
 		else if (ihs < ETHER_HDR_LEN) {
 			err = EINVAL;
 			goto tx_drop;
 		}
 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
 		m_adj(mb, ETHER_HDR_LEN);
 		/* Insert 4 bytes VLAN tag into data stream */
 		eh->evl_proto = eh->evl_encap_proto;
 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
 		/* Copy rest of header data, if any */
 		m_copydata(mb, 0, ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
 		m_adj(mb, ihs - ETHER_HDR_LEN);
 		/* Extend header by 4 bytes */
 		ihs += ETHER_VLAN_ENCAP_LEN;
 	} else {
 		m_copydata(mb, 0, ihs, wqe->eth.inline_hdr_start);
 		m_adj(mb, ihs);
 	}
 
 	wqe->eth.inline_hdr_sz = cpu_to_be16(ihs);
 
 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
 	if (likely(ihs > sizeof(wqe->eth.inline_hdr_start))) {
 		ds_cnt += DIV_ROUND_UP(ihs - sizeof(wqe->eth.inline_hdr_start),
 		    MLX5_SEND_WQE_DS);
 	}
 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
 
 	/* Trim off empty mbufs */
 	while (mb->m_len == 0) {
 		mb = m_free(mb);
 		/* Check if all data has been inlined */
 		if (mb == NULL)
 			goto skip_dma;
 	}
 
 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (err == EFBIG) {
 		/*
 		 * Update *mbp before defrag in case it was trimmed in the
 		 * loop above
 		 */
 		*mbp = mb;
 		/* Update statistics */
 		sq->stats.defragged++;
 		/* Too many mbuf fragments */
 		mb = m_defrag(*mbp, M_NOWAIT);
 		if (mb == NULL) {
 			mb = *mbp;
 			goto tx_drop;
 		}
 		/* Try again */
 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
 	}
 	/* Catch errors */
 	if (err != 0) {
 		goto tx_drop;
 	}
 	*mbp = mb;
 
 	for (x = 0; x != nsegs; x++) {
 		if (segs[x].ds_len == 0)
 			continue;
 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
 		dseg->lkey = sq->mkey_be;
 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
 		dseg++;
 	}
 skip_dma:
 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
 
 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 
 	/* Store pointer to mbuf */
 	sq->mbuf[pi].mbuf = mb;
 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 	sq->pc += sq->mbuf[pi].num_wqebbs;
 
 	/* Make sure all mbuf data is written to RAM */
 	if (mb != NULL)
 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map, BUS_DMASYNC_PREWRITE);
 
 	mlx5e_tx_notify_hw(sq, wqe, 0);
 
 	sq->stats.packets++;
 	return (0);
 
 tx_drop:
 	sq->stats.dropped++;
 	*mbp = NULL;
 	m_freem(mb);
 	return err;
 }
 
 static void
 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
 {
 	u16 sqcc;
 
 	/*
 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
 	 * otherwise a cq overrun may occur
 	 */
 	sqcc = sq->cc;
 
 	while (budget--) {
 		struct mlx5_cqe64 *cqe;
 		struct mbuf *mb;
 		u16 ci;
 
 		cqe = mlx5e_get_cqe(&sq->cq);
 		if (!cqe)
 			break;
 
 		ci = sqcc & sq->wq.sz_m1;
 		mb = sq->mbuf[ci].mbuf;
 		sq->mbuf[ci].mbuf = NULL;	/* Safety clear */
 
 		if (mb == NULL) {
 			if (sq->mbuf[ci].num_bytes == 0) {
 				/* NOP */
 				sq->stats.nop++;
 			}
 		} else {
 			bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
 
 			/* Free transmitted mbuf */
 			m_freem(mb);
 		}
 		sqcc += sq->mbuf[ci].num_wqebbs;
 	}
 
 	mlx5_cqwq_update_db_record(&sq->cq.wq);
 
 	/* Ensure cq space is freed before enabling more cqes */
 	wmb();
 
 	sq->cc = sqcc;
 
 	if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY))
 		taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
 }
 
 static int
 mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb)
 {
 	struct mbuf *next;
 	int err = 0;
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if (mb)
 			err = drbr_enqueue(ifp, sq->br, mb);
 		return (err);
 	}
 
 	if (mb != NULL)
 		/*
 		 * If we can't insert mbuf into drbr, try to xmit anyway.
 		 * We keep the error we got so we could return that after xmit.
 		 */
 		err = drbr_enqueue(ifp, sq->br, mb);
 
 	/* Process the queue */
 	while ((next = drbr_peek(ifp, sq->br)) != NULL) {
 		if (mlx5e_sq_xmit(sq, &next) != 0) {
 			if (next == NULL) {
 				drbr_advance(ifp, sq->br);
 			} else {
 				drbr_putback(ifp, sq->br, next);
 				atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_FULL);
 			}
 			break;
 		}
 		drbr_advance(ifp, sq->br);
 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 			break;
 	}
 	return (err);
 }
 
 int
 mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
 {
 	struct mlx5e_sq *sq;
 	int ret;
 
 	sq = mlx5e_select_queue(ifp, mb);
 	if (unlikely(sq == NULL)) {
 		/* Invalid send queue */
 		m_freem(mb);
 		return (ENXIO);
 	}
 	if (mtx_trylock(&sq->lock)) {
 		ret = mlx5e_xmit_locked(ifp, sq, mb);
 		mtx_unlock(&sq->lock);
 	} else {
 		ret = drbr_enqueue(ifp, sq->br, mb);
 		taskqueue_enqueue(sq->sq_tq, &sq->sq_task);
 	}
 
 	return (ret);
 }
 
 void
 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq)
 {
 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
 
 	mtx_lock(&sq->comp_lock);
 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
 	mlx5e_cq_arm(&sq->cq);
 	mtx_unlock(&sq->comp_lock);
 }
 
 void
 mlx5e_tx_que(void *context, int pending)
 {
 	struct mlx5e_sq *sq = context;
 	struct ifnet *ifp = sq->channel->ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		mtx_lock(&sq->lock);
 		if (!drbr_empty(ifp, sq->br))
 			mlx5e_xmit_locked(ifp, sq, NULL);
 		mtx_unlock(&sq->lock);
 	}
 }
Index: head/sys/modules/mlx5/Makefile
===================================================================
--- head/sys/modules/mlx5/Makefile	(revision 291937)
+++ head/sys/modules/mlx5/Makefile	(revision 291938)
@@ -1,34 +1,34 @@
 # $FreeBSD$
 .PATH:	${.CURDIR}/../../dev/mlx5/mlx5_core
 
 KMOD=mlx5
 SRCS= \
 mlx5_alloc.c \
 mlx5_cmd.c \
 mlx5_cq.c \
 mlx5_eq.c \
 mlx5_flow_table.c \
 mlx5_fw.c \
 mlx5_health.c \
 mlx5_mad.c \
 mlx5_main.c \
 mlx5_mcg.c \
 mlx5_mr.c \
 mlx5_pagealloc.c \
 mlx5_pd.c \
 mlx5_port.c \
 mlx5_qp.c \
 mlx5_srq.c \
 mlx5_transobj.c \
 mlx5_uar.c \
 mlx5_vport.c \
 mlx5_wq.c \
 device_if.h bus_if.h vnode_if.h pci_if.h \
-        opt_inet.h opt_inet6.h opt_random.h
+        opt_inet.h opt_inet6.h opt_random.h opt_rss.h
 
 CFLAGS+= -I${.CURDIR}/../../ofed/include
 CFLAGS+= -I${.CURDIR}/../../compat/linuxkpi/common/include
 
 .include <bsd.kmod.mk>
 
 CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}
Index: head/sys/modules/mlx5en/Makefile
===================================================================
--- head/sys/modules/mlx5en/Makefile	(revision 291937)
+++ head/sys/modules/mlx5en/Makefile	(revision 291938)
@@ -1,29 +1,29 @@
 # $FreeBSD$
 .PATH:	${.CURDIR}/../../dev/mlx5/mlx5_en
 
 KMOD=mlx5en
 SRCS= \
 mlx5_en_ethtool.c \
 mlx5_en_main.c \
 mlx5_en_tx.c \
 mlx5_en_flow_table.c \
 mlx5_en_rx.c \
 mlx5_en_txrx.c \
 device_if.h bus_if.h vnode_if.h pci_if.h \
-        opt_inet.h opt_inet6.h
+        opt_inet.h opt_inet6.h opt_rss.h
 
 .if defined(HAVE_TURBO_LRO)
 CFLAGS+= -DHAVE_TURBO_LRO
 SRCS+= tcp_tlro.c
 .endif
 
 .if defined(HAVE_PER_CQ_EVENT_PACKET)
 CFLAGS+= -DHAVE_PER_CQ_EVENT_PACKET
 .endif
 
 CFLAGS+= -I${.CURDIR}/../../ofed/include
 CFLAGS+= -I${.CURDIR}/../../compat/linuxkpi/common/include
 
 .include <bsd.kmod.mk>
 
 CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}