Index: head/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c
===================================================================
--- head/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c	(revision 341554)
+++ head/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c	(revision 341555)
@@ -1,609 +1,609 @@
 /*-
  * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <dev/mlx5/driver.h>
 #include "mlx5_core.h"
 
 CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE);
 
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u16	func_id;
 	s32	npages;
 	struct work_struct work;
 };
 
 
 enum {
 	MAX_RECLAIM_TIME_MSECS	= 5000,
 };
 
 static void
 mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct mlx5_fw_page *fwp;
 	uint8_t owned;
 
 	fwp = (struct mlx5_fw_page *)arg;
 	owned = MLX5_DMA_OWNED(fwp->dev);
 
 	if (!owned)
 		MLX5_DMA_LOCK(fwp->dev);
 
 	if (error == 0) {
 		KASSERT(nseg == 1, ("Number of segments is different from 1"));
 		fwp->dma_addr = segs->ds_addr;
 		fwp->load_done = MLX5_LOAD_ST_SUCCESS;
 	} else {
 		fwp->load_done = MLX5_LOAD_ST_FAILURE;
 	}
 	MLX5_DMA_DONE(fwp->dev);
 
 	if (!owned)
 		MLX5_DMA_UNLOCK(fwp->dev);
 }
 
 void
 mlx5_fwp_flush(struct mlx5_fw_page *fwp)
 {
 	unsigned num = fwp->numpages;
 
 	while (num--)
 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE);
 }
 
 void
 mlx5_fwp_invalidate(struct mlx5_fw_page *fwp)
 {
 	unsigned num = fwp->numpages;
 
 	while (num--) {
 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD);
 	}
 }
 
 struct mlx5_fw_page *
 mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num)
 {
 	struct mlx5_fw_page *fwp;
 	unsigned x;
 	int err;
 
 	/* check for special case */
 	if (num == 0) {
 		fwp = kzalloc(sizeof(*fwp), flags);
 		if (fwp != NULL)
 			fwp->dev = dev;
 		return (fwp);
 	}
 
 	/* we need sleeping context for this function */
 	if (flags & M_NOWAIT)
 		return (NULL);
 
 	fwp = kzalloc(sizeof(*fwp) * num, flags);
 
 	/* serialize loading the DMA map(s) */
 	sx_xlock(&dev->cmd.dma_sx);
 
 	for (x = 0; x != num; x++) {
 		/* store pointer to MLX5 core device */
 		fwp[x].dev = dev;
 		/* store number of pages left from the array */
 		fwp[x].numpages = num - x;
 
 		/* allocate memory */
 		err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map);
 		if (err != 0)
 			goto failure;
 
 		/* load memory into DMA */
 		MLX5_DMA_LOCK(dev);
-		err = bus_dmamap_load(
+		(void) bus_dmamap_load(
 		    dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr,
 		    MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb,
 		    fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
 
 		while (fwp[x].load_done == MLX5_LOAD_ST_NONE)
 			MLX5_DMA_WAIT(dev);
 		MLX5_DMA_UNLOCK(dev);
 
 		/* check for error */
 		if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) {
 			bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr,
 			    fwp[x].dma_map);
 			goto failure;
 		}
 	}
 	sx_xunlock(&dev->cmd.dma_sx);
 	return (fwp);
 
 failure:
 	while (x--) {
 		bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map);
 		bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map);
 	}
 	sx_xunlock(&dev->cmd.dma_sx);
 	return (NULL);
 }
 
 void
 mlx5_fwp_free(struct mlx5_fw_page *fwp)
 {
 	struct mlx5_core_dev *dev;
 	unsigned num;
 
 	/* be NULL safe */
 	if (fwp == NULL)
 		return;
 
 	/* check for special case */
 	if (fwp->numpages == 0) {
 		kfree(fwp);
 		return;
 	}
 
 	num = fwp->numpages;
 	dev = fwp->dev;
 
 	while (num--) {
 		bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map);
 		bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map);
 	}
 
 	kfree(fwp);
 }
 
 u64
 mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset)
 {
 	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
 	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
 
 	return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
 }
 
 void *
 mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset)
 {
 	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
 	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
 
 	return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
 }
 
 static int
 mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp)
 {
 	struct rb_root *root = &dev->priv.page_root;
 	struct rb_node **new = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct mlx5_fw_page *tfp;
 
 	while (*new) {
 		parent = *new;
 		tfp = rb_entry(parent, struct mlx5_fw_page, rb_node);
 		if (tfp->dma_addr < nfp->dma_addr)
 			new = &parent->rb_left;
 		else if (tfp->dma_addr > nfp->dma_addr)
 			new = &parent->rb_right;
 		else
 			return (-EEXIST);
 	}
 
 	rb_link_node(&nfp->rb_node, parent, new);
 	rb_insert_color(&nfp->rb_node, root);
 	return (0);
 }
 
 static struct mlx5_fw_page *
 mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr)
 {
 	struct rb_root *root = &dev->priv.page_root;
 	struct rb_node *tmp = root->rb_node;
 	struct mlx5_fw_page *result = NULL;
 	struct mlx5_fw_page *tfp;
 
 	while (tmp) {
 		tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node);
 		if (tfp->dma_addr < addr) {
 			tmp = tmp->rb_left;
 		} else if (tfp->dma_addr > addr) {
 			tmp = tmp->rb_right;
 		} else {
 			rb_erase(&tfp->rb_node, &dev->priv.page_root);
 			result = tfp;
 			break;
 		}
 	}
 	return (result);
 }
 
 static int
 alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
 {
 	struct mlx5_fw_page *fwp;
 	int err;
 
 	fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1);
 	if (fwp == NULL)
 		return (-ENOMEM);
 
 	fwp->func_id = func_id;
 
 	MLX5_DMA_LOCK(dev);
 	err = mlx5_insert_fw_page_locked(dev, fwp);
 	MLX5_DMA_UNLOCK(dev);
 
 	if (err != 0) {
 		mlx5_fwp_free(fwp);
 	} else {
 		/* make sure cached data is cleaned */
 		mlx5_fwp_invalidate(fwp);
 
 		/* store DMA address */
 		*addr = fwp->dma_addr;
 	}
 	return (err);
 }
 
 static void
 free_4k(struct mlx5_core_dev *dev, u64 addr)
 {
 	struct mlx5_fw_page *fwp;
 
 	MLX5_DMA_LOCK(dev);
 	fwp = mlx5_remove_fw_page_locked(dev, addr);
 	MLX5_DMA_UNLOCK(dev);
 
 	if (fwp == NULL) {
 		mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr);
 		return;
 	}
 	mlx5_fwp_free(fwp);
 }
 
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 				s32 *npages, int boot)
 {
 	u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
 	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
 	int err;
 
 	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
 	MLX5_SET(query_pages_in, in, op_mod, boot ?
 		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
 	*npages = MLX5_GET(query_pages_out, out, num_pages);
 	*func_id = MLX5_GET(query_pages_out, out, function_id);
 
 	return 0;
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 		      int notify_fail)
 {
 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
 	u64 addr;
 	int err;
 	u32 *in, *nin;
 	int i = 0;
 
 	inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
 		err = -ENOMEM;
 		goto out_alloc;
 	}
 
 	for (i = 0; i < npages; i++) {
 		err = alloc_4k(dev, &addr, func_id);
 		if (err)
 			goto out_alloc;
 		MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
 	}
 
 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err) {
 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
 			       func_id, npages, err);
 		goto out_alloc;
 	}
 	dev->priv.fw_pages += npages;
 	dev->priv.pages_per_func[func_id] += npages;
 
 	mlx5_core_dbg(dev, "err %d\n", err);
 
 	goto out_free;
 
 out_alloc:
 	if (notify_fail) {
 		nin = mlx5_vzalloc(inlen);
 		if (!nin)
 			goto out_4k;
 
 		memset(&out, 0, sizeof(out));
 		MLX5_SET(manage_pages_in, nin, opcode, MLX5_CMD_OP_MANAGE_PAGES);
 		MLX5_SET(manage_pages_in, nin, op_mod, MLX5_PAGES_CANT_GIVE);
 		MLX5_SET(manage_pages_in, nin, function_id, func_id);
 		if (mlx5_cmd_exec(dev, nin, inlen, out, sizeof(out)))
 			mlx5_core_warn(dev, "page notify failed\n");
 		kvfree(nin);
 	}
 
 out_4k:
 	for (i--; i >= 0; i--)
 		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
 out_free:
 	kvfree(in);
 	return err;
 }
 
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 			     u32 *in, int in_size, u32 *out, int out_size)
 {
 	struct mlx5_fw_page *fwp;
 	struct rb_node *p;
 	u32 func_id;
 	u32 npages;
 	u32 i = 0;
 
 	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
 		return mlx5_cmd_exec(dev, in, in_size, out, out_size);
 
 	/* No hard feelings, we want our pages back! */
 	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
 	func_id = MLX5_GET(manage_pages_in, in, function_id);
 
 	p = rb_first(&dev->priv.page_root);
 	while (p && i < npages) {
 		fwp = rb_entry(p, struct mlx5_fw_page, rb_node);
 		p = rb_next(p);
 		if (fwp->func_id != func_id)
 			continue;
 
 		MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->dma_addr);
 		i++;
 	}
 
 	MLX5_SET(manage_pages_out, out, output_num_entries, i);
 	return 0;
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed)
 {
 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
 	int num_claimed;
 	u32 *out;
 	int err;
 	int i;
 
 	if (nclaimed)
 		*nclaimed = 0;
 
 	outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
 	out = mlx5_vzalloc(outlen);
 	if (!out)
 		return -ENOMEM;
 
 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
 
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
 	if (err) {
 		mlx5_core_err(dev, "failed reclaiming pages\n");
 		goto out_free;
 	}
 
 	num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
 	if (nclaimed)
 		*nclaimed = num_claimed;
 
 	dev->priv.fw_pages -= num_claimed;
 	dev->priv.pages_per_func[func_id] -= num_claimed;
 	for (i = 0; i < num_claimed; i++)
 		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
 
 out_free:
 	kvfree(out);
 	return err;
 }
 
 static void pages_work_handler(struct work_struct *work)
 {
 	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
 	struct mlx5_core_dev *dev = req->dev;
 	int err = 0;
 
 	if (req->npages < 0)
 		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
 	else if (req->npages > 0)
 		err = give_pages(dev, req->func_id, req->npages, 1);
 
 	if (err)
 		mlx5_core_warn(dev, "%s fail %d\n",
 			       req->npages < 0 ? "reclaim" : "give", err);
 
 	kfree(req);
 }
 
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 				 s32 npages)
 {
 	struct mlx5_pages_req *req;
 
 	req = kzalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req) {
 		mlx5_core_warn(dev, "failed to allocate pages request\n");
 		return;
 	}
 
 	req->dev = dev;
 	req->func_id = func_id;
 	req->npages = npages;
 	INIT_WORK(&req->work, pages_work_handler);
 	if (!queue_work(dev->priv.pg_wq, &req->work))
 		mlx5_core_warn(dev, "failed to queue pages handler work\n");
 }
 
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 {
 	u16 uninitialized_var(func_id);
 	s32 uninitialized_var(npages);
 	int err;
 
 	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
 	if (err)
 		return err;
 
 	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
 		      npages, boot ? "boot" : "init", func_id);
 
 	return give_pages(dev, func_id, npages, 0);
 }
 
 enum {
 	MLX5_BLKS_FOR_RECLAIM_PAGES = 12
 };
 
 s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev)
 {
 	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
 	s64 prevpages = 0;
 	s64 npages = 0;
 
 	while (!time_after(jiffies, end)) {
 		/* exclude own function, VFs only */
 		npages = dev->priv.fw_pages - dev->priv.pages_per_func[0];
 		if (!npages)
 			break;
 
 		if (npages != prevpages)
 			end = end + msecs_to_jiffies(100);
 
 		prevpages = npages;
 		msleep(1);
 	}
 
 	if (npages)
 		mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n");
 
 	return -npages;
 }
 
 static int optimal_reclaimed_pages(void)
 {
 	struct mlx5_cmd_prot_block *block;
 	struct mlx5_cmd_layout *lay;
 	int ret;
 
 	ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
 	       MLX5_ST_SZ_BYTES(manage_pages_out)) /
 	       MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
 
 	return ret;
 }
 
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 {
 	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
 	struct mlx5_fw_page *fwp;
 	struct rb_node *p;
 	int nclaimed = 0;
 	int err;
 
 	do {
 		p = rb_first(&dev->priv.page_root);
 		if (p) {
 			fwp = rb_entry(p, struct mlx5_fw_page, rb_node);
 			err = reclaim_pages(dev, fwp->func_id,
 					    optimal_reclaimed_pages(),
 					    &nclaimed);
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
 					       err);
 				return err;
 			}
 
 			if (nclaimed)
 				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
 		}
 		if (time_after(jiffies, end)) {
 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
 			break;
 		}
 	} while (p);
 
 	return 0;
 }
 
 void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
 
 	dev->priv.page_root = RB_ROOT;
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
 	/* nothing */
 }
 
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
 {
 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
 	if (!dev->priv.pg_wq)
 		return -ENOMEM;
 
 	return 0;
 }
 
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 {
 	destroy_workqueue(dev->priv.pg_wq);
 }
Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c
===================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c	(revision 341554)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c	(revision 341555)
@@ -1,1542 +1,1541 @@
 /*-
  * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "en.h"
 
 #ifdef RATELIMIT
 
 static int mlx5e_rl_open_workers(struct mlx5e_priv *);
 static void mlx5e_rl_close_workers(struct mlx5e_priv *);
 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS);
 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x,
     struct sysctl_oid *, const char *name, const char *desc);
 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
       struct sysctl_oid *node, const char *name, const char *desc);
 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value);
 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value);
 
 static void
 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl,
     struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size);
 
 	MLX5_SET(wq, wq, log_wq_sz, log_sq_size);
 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 	MLX5_SET(wq, wq, pd, rl->priv->pdn);
 
 	param->wq.buf_numa_node = 0;
 	param->wq.db_numa_node = 0;
 	param->wq.linear = 1;
 }
 
 static void
 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl,
     struct mlx5e_cq_param *param)
 {
 	void *cqc = param->cqc;
 	uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size);
 
 	MLX5_SET(cqc, cqc, log_cq_size, log_sq_size);
 	MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs);
 	MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts);
 
 	switch (rl->param.tx_coalesce_mode) {
 	case 0:
 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	default:
 		if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe))
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 		else
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
 	}
 }
 
 static void
 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl,
     struct mlx5e_rl_channel_param *cparam)
 {
 	memset(cparam, 0, sizeof(*cparam));
 
 	mlx5e_rl_build_sq_param(rl, &cparam->sq);
 	mlx5e_rl_build_cq_param(rl, &cparam->cq);
 }
 
 static int
 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
     struct mlx5e_sq_param *param, int ix)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	void *sqc = param->sqc;
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
 	/* Create DMA descriptor TAG */
 	if ((err = -bus_dma_tag_create(
 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
 	    1,				/* any alignment */
 	    0,				/* no boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
 	    0,				/* flags */
 	    NULL, NULL,			/* lockfunc, lockfuncarg */
 	    &sq->dma_tag)))
 		goto done;
 
 	/* use shared UAR */
 	sq->uar = priv->rl.sq_uar;
 
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
 	    &sq->wq_ctrl);
 	if (err)
 		goto err_free_dma_tag;
 
 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 	/*
 	 * The sq->bf_buf_size variable is intentionally left zero so
 	 * that the doorbell writes will occur at the same memory
 	 * location.
 	 */
 
 	err = mlx5e_alloc_sq_db(sq);
 	if (err)
 		goto err_sq_wq_destroy;
 
 	sq->mkey_be = cpu_to_be32(priv->mr.key);
 	sq->ifp = priv->ifp;
 	sq->priv = priv;
 	sq->max_inline = priv->params.tx_max_inline;
 	sq->min_inline_mode = priv->params.tx_min_inline_mode;
 	sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert);
 
 	return (0);
 
 err_sq_wq_destroy:
 	mlx5_wq_destroy(&sq->wq_ctrl);
 err_free_dma_tag:
 	bus_dma_tag_destroy(sq->dma_tag);
 done:
 	return (err);
 }
 
 static void
 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq)
 {
 
 	mlx5e_free_sq_db(sq);
 	mlx5_wq_destroy(&sq->wq_ctrl);
 }
 
 static int
 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq,
     struct mlx5e_sq_param *param, int ix)
 {
 	int err;
 
 	err = mlx5e_rl_create_sq(priv, sq, param, ix);
 	if (err)
 		return (err);
 
 	err = mlx5e_enable_sq(sq, param, priv->rl.tisn);
 	if (err)
 		goto err_destroy_sq;
 
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
 	if (err)
 		goto err_disable_sq;
 
 	return (0);
 
 err_disable_sq:
 	mlx5e_disable_sq(sq);
 err_destroy_sq:
 	mlx5e_rl_destroy_sq(sq);
 
 	return (err);
 }
 
 static void
 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
 {
 	mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF);
 	mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF);
 
 	callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
 
 	sq->cev_factor = priv->rl.param.tx_completion_fact;
 
 	/* ensure the TX completion event factor is not zero */
 	if (sq->cev_factor == 0)
 		sq->cev_factor = 1;
 }
 
 static int
 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix,
     struct mlx5e_rl_channel_param *cparam,
     struct mlx5e_sq *volatile *ppsq)
 {
 	struct mlx5e_priv *priv = rlw->priv;
 	struct mlx5e_sq *sq;
 	int err;
 
 	sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO);
 
 	/* init mutexes */
 	mlx5e_rl_chan_mtx_init(priv, sq);
 
 	/* open TX completion queue */
 	err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq,
 	    &mlx5e_tx_cq_comp, eq_ix);
 	if (err)
 		goto err_free;
 
 	err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix);
 	if (err)
 		goto err_close_tx_cq;
 
 	/* store TX channel pointer */
 	*ppsq = sq;
 
 	/* poll TX queue initially */
 	sq->cq.mcq.comp(&sq->cq.mcq);
 
 	return (0);
 
 err_close_tx_cq:
 	mlx5e_close_cq(&sq->cq);
 
 err_free:
 	/* destroy mutexes */
 	mtx_destroy(&sq->lock);
 	mtx_destroy(&sq->comp_lock);
 	free(sq, M_MLX5EN);
 	atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL);
 	return (err);
 }
 
 static void
 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq)
 {
 	struct mlx5e_sq *sq = *ppsq;
 
 	/* check if channel is already closed */
 	if (sq == NULL)
 		return;
 	/* ensure channel pointer is no longer used */
 	*ppsq = NULL;
 
 	/* teardown and destroy SQ */
 	mlx5e_drain_sq(sq);
 	mlx5e_disable_sq(sq);
 	mlx5e_rl_destroy_sq(sq);
 
 	/* close CQ */
 	mlx5e_close_cq(&sq->cq);
 
 	/* destroy mutexes */
 	mtx_destroy(&sq->lock);
 	mtx_destroy(&sq->comp_lock);
 
 	free(sq, M_MLX5EN);
 }
 
 static void
 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl)
 {
 	/*
 	 * Limit the maximum distance between completion events to
 	 * half of the currently set TX queue size.
 	 *
 	 * The maximum number of queue entries a single IP packet can
 	 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS.
 	 *
 	 * The worst case max value is then given as below:
 	 */
 	uint64_t max = rl->param.tx_queue_size /
 	    (2 * MLX5_SEND_WQE_MAX_WQEBBS);
 
 	/*
 	 * Update the maximum completion factor value in case the
 	 * tx_queue_size field changed. Ensure we don't overflow
 	 * 16-bits.
 	 */
 	if (max < 1)
 		max = 1;
 	else if (max > 65535)
 		max = 65535;
 	rl->param.tx_completion_fact_max = max;
 
 	/*
 	 * Verify that the current TX completion factor is within the
 	 * given limits:
 	 */
 	if (rl->param.tx_completion_fact < 1)
 		rl->param.tx_completion_fact = 1;
 	else if (rl->param.tx_completion_fact > max)
 		rl->param.tx_completion_fact = max;
 }
 
 static int
 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index)
 {
 	struct mlx5e_priv *priv = sq->priv;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	void *in;
 	void *sqc;
 	int inlen;
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
 	in = mlx5_vzalloc(inlen);
 	if (in == NULL)
 		return (-ENOMEM);
 
 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
 
 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
 	MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY);
 	MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY);
 	MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
 
 	err = mlx5_core_modify_sq(mdev, in, inlen);
 
 	kvfree(in);
 
 	return (err);
 }
 
 /*
  * This function will search the configured rate limit table for the
  * best match to avoid that a single socket based application can
  * allocate all the available hardware rates. If the user selected
  * rate deviates too much from the closes rate available in the rate
  * limit table, unlimited rate will be selected.
  */
 static uint64_t
 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate)
 {
 	uint64_t distance = -1ULL;
 	uint64_t diff;
 	uint64_t retval = 0;		/* unlimited */
 	uint64_t x;
 
 	/* search for closest rate */
 	for (x = 0; x != rl->param.tx_rates_def; x++) {
 		uint64_t rate = rl->rate_limit_table[x];
 		if (rate == 0)
 			continue;
 
 		if (rate > user_rate)
 			diff = rate - user_rate;
 		else
 			diff = user_rate - rate;
 
 		/* check if distance is smaller than previous rate */
 		if (diff < distance) {
 			distance = diff;
 			retval = rate;
 		}
 	}
 
 	/* range check for multiplication below */
 	if (user_rate > rl->param.tx_limit_max)
 		user_rate = rl->param.tx_limit_max;
 
 	/* fallback to unlimited, if rate deviates too much */
 	if (distance > howmany(user_rate *
 	    rl->param.tx_allowed_deviation, 1000ULL))
 		retval = 0;
 
 	return (retval);
 }
 
 /*
  * This function sets the requested rate for a rate limit channel, in
  * bits per second. The requested rate will be filtered through the
  * find best rate function above.
  */
 static int
 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw,
     struct mlx5e_rl_channel *channel, uint64_t rate)
 {
 	struct mlx5e_rl_priv_data *rl = &rlw->priv->rl;
 	struct mlx5e_sq *sq;
 	uint64_t temp;
 	uint16_t index;
 	uint16_t burst;
 	int error;
 
 	if (rate != 0) {
 		MLX5E_RL_WORKER_UNLOCK(rlw);
 
 		MLX5E_RL_RLOCK(rl);
 
 		/* get current burst size in bytes */
 		temp = rl->param.tx_burst_size *
 		    MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu);
 
 		/* limit burst size to 64K currently */
 		if (temp > 65535)
 			temp = 65535;
 		burst = temp;
 
 		/* find best rate */
 		rate = mlx5e_rl_find_best_rate_locked(rl, rate);
 
 		MLX5E_RL_RUNLOCK(rl);
 
 		if (rate == 0) {
 			/* rate doesn't exist, fallback to unlimited */
-			error = EINVAL;
 			index = 0;
 			rate = 0;
 			atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
 		} else {
 			/* get a reference on the new rate */
 			error = -mlx5_rl_add_rate(rlw->priv->mdev,
 			    howmany(rate, 1000), burst, &index);
 
 			if (error != 0) {
 				/* adding rate failed, fallback to unlimited */
 				index = 0;
 				rate = 0;
 				atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL);
 			}
 		}
 		MLX5E_RL_WORKER_LOCK(rlw);
 	} else {
 		index = 0;
 		burst = 0;	/* default */
 	}
 
 	/* atomically swap rates */
 	temp = channel->last_rate;
 	channel->last_rate = rate;
 	rate = temp;
 
 	/* atomically swap burst size */
 	temp = channel->last_burst;
 	channel->last_burst = burst;
 	burst = temp;
 
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 	/* put reference on the old rate, if any */
 	if (rate != 0) {
 		mlx5_rl_remove_rate(rlw->priv->mdev,
 		    howmany(rate, 1000), burst);
 	}
 
 	/* set new rate */
 	sq = channel->sq;
 	if (sq != NULL) {
 		error = mlx5e_rl_modify_sq(sq, index);
 		if (error != 0)
 			atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL);
 	} else
 		error = 0;
 	MLX5E_RL_WORKER_LOCK(rlw);
 
 	return (-error);
 }
 
 static void
 mlx5e_rl_worker(void *arg)
 {
 	struct thread *td;
 	struct mlx5e_rl_worker *rlw = arg;
 	struct mlx5e_rl_channel *channel;
 	struct mlx5e_priv *priv;
 	unsigned ix;
 	uint64_t x;
 	int error;
 
 	/* set thread priority */
 	td = curthread;
 
 	thread_lock(td);
 	sched_prio(td, PI_SWI(SWI_NET));
 	thread_unlock(td);
 
 	priv = rlw->priv;
 
 	/* compute completion vector */
 	ix = (rlw - priv->rl.workers) %
 	    priv->mdev->priv.eq_table.num_comp_vectors;
 
 	/* TODO bind to CPU */
 
 	/* open all the SQs */
 	MLX5E_RL_WORKER_LOCK(rlw);
 	for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) {
 		struct mlx5e_rl_channel *channel = rlw->channels + x;
 
 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS)
 		if (channel->state == MLX5E_RL_ST_FREE)
 			continue;
 #endif
 		MLX5E_RL_WORKER_UNLOCK(rlw);
 
 		MLX5E_RL_RLOCK(&priv->rl);
 		error = mlx5e_rl_open_channel(rlw, ix,
 		    &priv->rl.chan_param, &channel->sq);
 		MLX5E_RL_RUNLOCK(&priv->rl);
 
 		MLX5E_RL_WORKER_LOCK(rlw);
 		if (error != 0) {
 			if_printf(priv->ifp,
 			    "mlx5e_rl_open_channel failed: %d\n", error);
 			break;
 		}
 		mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate);
 	}
 	while (1) {
 		if (STAILQ_FIRST(&rlw->process_head) == NULL) {
 			/* check if we are tearing down */
 			if (rlw->worker_done != 0)
 				break;
 			cv_wait(&rlw->cv, &rlw->mtx);
 		}
 		/* check if we are tearing down */
 		if (rlw->worker_done != 0)
 			break;
 		channel = STAILQ_FIRST(&rlw->process_head);
 		if (channel != NULL) {
 			STAILQ_REMOVE_HEAD(&rlw->process_head, entry);
 
 			switch (channel->state) {
 			case MLX5E_RL_ST_MODIFY:
 				channel->state = MLX5E_RL_ST_USED;
 				MLX5E_RL_WORKER_UNLOCK(rlw);
 
 				/* create channel by demand */
 				if (channel->sq == NULL) {
 					MLX5E_RL_RLOCK(&priv->rl);
 					error = mlx5e_rl_open_channel(rlw, ix,
 					    &priv->rl.chan_param, &channel->sq);
 					MLX5E_RL_RUNLOCK(&priv->rl);
 
 					if (error != 0) {
 						if_printf(priv->ifp,
 						    "mlx5e_rl_open_channel failed: %d\n", error);
 					} else {
 						atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL);
 					}
 				} else {
 					mlx5e_resume_sq(channel->sq);
 				}
 
 				MLX5E_RL_WORKER_LOCK(rlw);
 				/* convert from bytes/s to bits/s and set new rate */
 				error = mlx5e_rlw_channel_set_rate_locked(rlw, channel,
 				    channel->new_rate * 8ULL);
 				if (error != 0) {
 					if_printf(priv->ifp,
 					    "mlx5e_rlw_channel_set_rate_locked failed: %d\n",
 					    error);
 				}
 				break;
 
 			case MLX5E_RL_ST_DESTROY:
 				error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0);
 				if (error != 0) {
 					if_printf(priv->ifp,
 					    "mlx5e_rlw_channel_set_rate_locked failed: %d\n",
 					    error);
 				}
 				if (channel->sq != NULL) {
 					/*
 					 * Make sure all packets are
 					 * transmitted before SQ is
 					 * returned to free list:
 					 */
 					MLX5E_RL_WORKER_UNLOCK(rlw);
 					mlx5e_drain_sq(channel->sq);
 					MLX5E_RL_WORKER_LOCK(rlw);
 				}
 				/* put the channel back into the free list */
 				STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry);
 				channel->state = MLX5E_RL_ST_FREE;
 				atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL);
 				break;
 			default:
 				/* NOP */
 				break;
 			}
 		}
 	}
 
 	/* close all the SQs */
 	for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) {
 		struct mlx5e_rl_channel *channel = rlw->channels + x;
 
 		/* update the initial rate */
 		channel->init_rate = channel->last_rate;
 
 		/* make sure we free up the rate resource */
 		mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0);
 
 		if (channel->sq != NULL) {
 			MLX5E_RL_WORKER_UNLOCK(rlw);
 			mlx5e_rl_close_channel(&channel->sq);
 			atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL);
 			MLX5E_RL_WORKER_LOCK(rlw);
 		}
 	}
 
 	rlw->worker_done = 0;
 	cv_broadcast(&rlw->cv);
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 
 	kthread_exit();
 }
 
 static int
 mlx5e_rl_open_tis(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
 	memset(in, 0, sizeof(in));
 
 	MLX5_SET(tisc, tisc, prio, 0);
 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
 
 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn));
 }
 
 static void
 mlx5e_rl_close_tis(struct mlx5e_priv *priv)
 {
 	mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn);
 }
 
 static void
 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param,
     struct mlx5_core_dev *mdev)
 {
 	/* ratelimit workers */
 	param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors;
 	param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS;
 
 	/* range check */
 	if (param->tx_worker_threads_def == 0 ||
 	    param->tx_worker_threads_def > param->tx_worker_threads_max)
 		param->tx_worker_threads_def = param->tx_worker_threads_max;
 
 	/* ratelimit channels */
 	param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS /
 	    param->tx_worker_threads_def;
 	param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS;
 
 	/* range check */
 	if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER)
 		param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER;
 
 	/* set default burst size */
 	param->tx_burst_size = 4;	/* MTUs */
 
 	/*
 	 * Set maximum burst size
 	 *
 	 * The burst size is multiplied by the MTU and clamped to the
 	 * range 0 ... 65535 bytes inclusivly before fed into the
 	 * firmware.
 	 *
 	 * NOTE: If the burst size or MTU is changed only ratelimit
 	 * connections made after the change will use the new burst
 	 * size.
 	 */
 	param->tx_burst_size_max = 255;
 
 	/* get firmware rate limits in 1000bit/s and convert them to bit/s */
 	param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL;
 	param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL;
 
 	/* ratelimit table size */
 	param->tx_rates_max = mdev->priv.rl_table.max_size;
 
 	/* range check */
 	if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES)
 		param->tx_rates_max = MLX5E_RL_MAX_TX_RATES;
 
 	/* set default number of rates */
 	param->tx_rates_def = param->tx_rates_max;
 
 	/* set maximum allowed rate deviation */
 	if (param->tx_limit_max != 0) {
 		/*
 		 * Make sure the deviation multiplication doesn't
 		 * overflow unsigned 64-bit:
 		 */
 		param->tx_allowed_deviation_max = -1ULL /
 		    param->tx_limit_max;
 	}
 	/* set default rate deviation */
 	param->tx_allowed_deviation = 50;	/* 5.0% */
 
 	/* channel parameters */
 	param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
 	param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT;
 	param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT;
 	param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT;
 	param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT;
 }
 
 static const char *mlx5e_rl_params_desc[] = {
 	MLX5E_RL_PARAMS(MLX5E_STATS_DESC)
 };
 
 static const char *mlx5e_rl_table_params_desc[] = {
 	MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC)
 };
 
 static const char *mlx5e_rl_stats_desc[] = {
 	MLX5E_RL_STATS(MLX5E_STATS_DESC)
 };
 
 int
 mlx5e_rl_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rl_priv_data *rl = &priv->rl;
 	struct sysctl_oid *node;
 	struct sysctl_oid *stats;
 	char buf[64];
 	uint64_t i;
 	uint64_t j;
 	int error;
 
 	/* check if there is support for packet pacing */
 	if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing))
 		return (0);
 
 	rl->priv = priv;
 
 	sysctl_ctx_init(&rl->ctx);
 
 	sx_init(&rl->rl_sxlock, "ratelimit-sxlock");
 
 	/* allocate shared UAR for SQs */
 	error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar);
 	if (error)
 		goto done;
 
 	/* open own TIS domain for ratelimit SQs */
 	error = mlx5e_rl_open_tis(priv);
 	if (error)
 		goto err_uar;
 
 	/* setup default value for parameters */
 	mlx5e_rl_set_default_params(&rl->param, priv->mdev);
 
 	/* update the completion factor */
 	mlx5e_rl_sync_tx_completion_fact(rl);
 
 	/* create root node */
 	node = SYSCTL_ADD_NODE(&rl->ctx,
 	    SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO,
 	    "rate_limit", CTLFLAG_RW, NULL, "Rate limiting support");
 
 	if (node != NULL) {
 		/* create SYSCTLs */
 		for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) {
 			mlx5e_rl_sysctl_add_u64_oid(rl,
 			    MLX5E_RL_PARAMS_INDEX(arg[i]),
 			    node, mlx5e_rl_params_desc[2 * i],
 			    mlx5e_rl_params_desc[2 * i + 1]);
 		}
 
 		stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node),
 		    OID_AUTO, "stats", CTLFLAG_RD, NULL,
 		    "Rate limiting statistics");
 		if (stats != NULL) {
 			/* create SYSCTLs */
 			for (i = 0; i != MLX5E_RL_STATS_NUM; i++) {
 				mlx5e_rl_sysctl_add_stats_u64_oid(rl, i,
 				    stats, mlx5e_rl_stats_desc[2 * i],
 				    mlx5e_rl_stats_desc[2 * i + 1]);
 			}
 		}
 	}
 
 	/* allocate workers array */
 	rl->workers = malloc(sizeof(rl->workers[0]) *
 	    rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO);
 
 	/* allocate rate limit array */
 	rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) *
 	    rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO);
 
 	if (node != NULL) {
 		/* create more SYSCTls */
 		SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 		    "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD |
 		    CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table,
 		    "A", "Show table of all configured TX rates");
 
 		/* try to fetch rate table from kernel environment */
 		for (i = 0; i != rl->param.tx_rates_def; i++) {
 			/* compute path for tunable */
 			snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d",
 			    device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i);
 			if (TUNABLE_QUAD_FETCH(buf, &j))
 				mlx5e_rl_tx_limit_add(rl, j);
 		}
 
 		/* setup rate table sysctls */
 		for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) {
 			mlx5e_rl_sysctl_add_u64_oid(rl,
 			    MLX5E_RL_PARAMS_INDEX(table_arg[i]),
 			    node, mlx5e_rl_table_params_desc[2 * i],
 			    mlx5e_rl_table_params_desc[2 * i + 1]);
 		}
 	}
 
 	for (j = 0; j < rl->param.tx_worker_threads_def; j++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + j;
 
 		rlw->priv = priv;
 
 		cv_init(&rlw->cv, "mlx5-worker-cv");
 		mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF);
 		STAILQ_INIT(&rlw->index_list_head);
 		STAILQ_INIT(&rlw->process_head);
 
 		rlw->channels = malloc(sizeof(rlw->channels[0]) *
 		    rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO);
 
 		MLX5E_RL_WORKER_LOCK(rlw);
 		for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) {
 			struct mlx5e_rl_channel *channel = rlw->channels + i;
 			channel->worker = rlw;
 			channel->m_snd_tag.ifp = priv->ifp;
 			STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry);
 		}
 		MLX5E_RL_WORKER_UNLOCK(rlw);
 	}
 
 	PRIV_LOCK(priv);
 	error = mlx5e_rl_open_workers(priv);
 	PRIV_UNLOCK(priv);
 
 	if (error != 0) {
 		if_printf(priv->ifp,
 		    "mlx5e_rl_open_workers failed: %d\n", error);
 	}
 
 	return (0);
 
 err_uar:
 	mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar);
 done:
 	sysctl_ctx_free(&rl->ctx);
 	sx_destroy(&rl->rl_sxlock);
 	return (error);
 }
 
 static int
 mlx5e_rl_open_workers(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rl_priv_data *rl = &priv->rl;
 	struct thread *rl_thread = NULL;
 	struct proc *rl_proc = NULL;
 	uint64_t j;
 	int error;
 
 	if (priv->gone || rl->opened)
 		return (-EINVAL);
 
 	MLX5E_RL_WLOCK(rl);
 	/* compute channel parameters once */
 	mlx5e_rl_build_channel_param(rl, &rl->chan_param);
 	MLX5E_RL_WUNLOCK(rl);
 
 	for (j = 0; j < rl->param.tx_worker_threads_def; j++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + j;
 
 		/* start worker thread */
 		error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread,
 		    RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j);
 		if (error != 0) {
 			if_printf(rl->priv->ifp,
 			    "kproc_kthread_add failed: %d\n", error);
 			rlw->worker_done = 1;
 		}
 	}
 
 	rl->opened = 1;
 
 	return (0);
 }
 
 static void
 mlx5e_rl_close_workers(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rl_priv_data *rl = &priv->rl;
 	uint64_t y;
 
 	if (rl->opened == 0)
 		return;
 
 	/* tear down worker threads simultaneously */
 	for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + y;
 
 		/* tear down worker before freeing SQs */
 		MLX5E_RL_WORKER_LOCK(rlw);
 		if (rlw->worker_done == 0) {
 			rlw->worker_done = 1;
 			cv_broadcast(&rlw->cv);
 		} else {
 			/* XXX thread not started */
 			rlw->worker_done = 0;
 		}
 		MLX5E_RL_WORKER_UNLOCK(rlw);
 	}
 
 	/* wait for worker threads to exit */
 	for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + y;
 
 		/* tear down worker before freeing SQs */
 		MLX5E_RL_WORKER_LOCK(rlw);
 		while (rlw->worker_done != 0)
 			cv_wait(&rlw->cv, &rlw->mtx);
 		MLX5E_RL_WORKER_UNLOCK(rlw);
 	}
 
 	rl->opened = 0;
 }
 
 static void
 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl)
 {
 	unsigned x;
 
 	MLX5E_RL_WLOCK(rl);
 	for (x = 0; x != rl->param.tx_rates_def; x++)
 		rl->rate_limit_table[x] = 0;
 	MLX5E_RL_WUNLOCK(rl);
 }
 
 void
 mlx5e_rl_cleanup(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rl_priv_data *rl = &priv->rl;
 	uint64_t y;
 
 	/* check if there is support for packet pacing */
 	if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing))
 		return;
 
 	/* TODO check if there is support for packet pacing */
 
 	sysctl_ctx_free(&rl->ctx);
 
 	PRIV_LOCK(priv);
 	mlx5e_rl_close_workers(priv);
 	PRIV_UNLOCK(priv);
 
 	mlx5e_rl_reset_rates(rl);
 
 	/* free shared UAR for SQs */
 	mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar);
 
 	/* close TIS domain */
 	mlx5e_rl_close_tis(priv);
 
 	for (y = 0; y < rl->param.tx_worker_threads_def; y++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + y;
 
 		cv_destroy(&rlw->cv);
 		mtx_destroy(&rlw->mtx);
 		free(rlw->channels, M_MLX5EN);
 	}
 	free(rl->rate_limit_table, M_MLX5EN);
 	free(rl->workers, M_MLX5EN);
 	sx_destroy(&rl->rl_sxlock);
 }
 
 static void
 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw,
     struct mlx5e_rl_channel *channel)
 {
 	STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry);
 	cv_broadcast(&rlw->cv);
 }
 
 static void
 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel)
 {
 	if (channel == NULL)
 		return;
 
 	MLX5E_RL_WORKER_LOCK(rlw);
 	switch (channel->state) {
 	case MLX5E_RL_ST_MODIFY:
 		channel->state = MLX5E_RL_ST_DESTROY;
 		break;
 	case MLX5E_RL_ST_USED:
 		channel->state = MLX5E_RL_ST_DESTROY;
 		mlx5e_rlw_queue_channel_locked(rlw, channel);
 		break;
 	default:
 		break;
 	}
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 }
 
 static int
 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate)
 {
 
 	MLX5E_RL_WORKER_LOCK(rlw);
 	channel->new_rate = rate;
 	switch (channel->state) {
 	case MLX5E_RL_ST_USED:
 		channel->state = MLX5E_RL_ST_MODIFY;
 		mlx5e_rlw_queue_channel_locked(rlw, channel);
 		break;
 	default:
 		break;
 	}
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 
 	return (0);
 }
 
 static int
 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t *prate)
 {
 	int retval;
 
 	MLX5E_RL_WORKER_LOCK(rlw);
 	switch (channel->state) {
 	case MLX5E_RL_ST_USED:
 		*prate = channel->last_rate;
 		retval = 0;
 		break;
 	case MLX5E_RL_ST_MODIFY:
 		retval = EBUSY;
 		break;
 	default:
 		retval = EINVAL;
 		break;
 	}
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 
 	return (retval);
 }
 
 static int
 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw,
     struct mlx5e_rl_channel **pchannel)
 {
 	struct mlx5e_rl_channel *channel;
 	int retval = ENOMEM;
 
 	MLX5E_RL_WORKER_LOCK(rlw);
 	/* Check for available channel in free list */
 	if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) {
 		retval = 0;
 		/* Remove head index from available list */
 		STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry);
 		channel->state = MLX5E_RL_ST_USED;
 		atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL);
 	} else {
 		atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL);
 	}
 	MLX5E_RL_WORKER_UNLOCK(rlw);
 
 	*pchannel = channel;
 #ifdef RATELIMIT_DEBUG
 	if_printf(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel);
 #endif
 	return (retval);
 }
 
 int
 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp,
     union if_snd_tag_alloc_params *params,
     struct m_snd_tag **ppmt)
 {
 	struct mlx5e_rl_channel *channel;
 	struct mlx5e_rl_worker *rlw;
 	struct mlx5e_priv *priv;
 	int error;
 
 	priv = ifp->if_softc;
 
 	/* check if there is support for packet pacing or if device is going away */
 	if (!MLX5_CAP_GEN(priv->mdev, qos) ||
 	    !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone ||
 	    params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT)
 		return (EOPNOTSUPP);
 
 	/* compute worker thread this TCP connection belongs to */
 	rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) %
 	    priv->rl.param.tx_worker_threads_def);
 
 	error = mlx5e_find_available_tx_ring_index(rlw, &channel);
 	if (error != 0)
 		goto done;
 
 	error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate);
 	if (error != 0) {
 		mlx5e_rl_free(rlw, channel);
 		goto done;
 	}
 
 	/* store pointer to mbuf tag */
 	*ppmt = &channel->m_snd_tag;
 done:
 	return (error);
 }
 
 
 int
 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params)
 {
 	struct mlx5e_rl_channel *channel =
 	    container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
 
 	return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate));
 }
 
 int
 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params)
 {
 	struct mlx5e_rl_channel *channel =
 	    container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
 
 	return (mlx5e_rl_query(channel->worker, channel, &params->rate_limit.max_rate));
 }
 
 void
 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt)
 {
 	struct mlx5e_rl_channel *channel =
 	    container_of(pmt, struct mlx5e_rl_channel, m_snd_tag);
 
 	mlx5e_rl_free(channel->worker, channel);
 }
 
 static int
 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS)
 {
 	struct mlx5e_rl_priv_data *rl = arg1;
 	struct mlx5e_priv *priv = rl->priv;
 	struct sbuf sbuf;
 	unsigned x;
 	int error;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	PRIV_LOCK(priv);
 
 	sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req);
 
 	sbuf_printf(&sbuf,
 	    "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n"
 	    "\t" "--------------------------------------------\n");
 
 	MLX5E_RL_RLOCK(rl);
 	for (x = 0; x != rl->param.tx_rates_def; x++) {
 		if (rl->rate_limit_table[x] == 0)
 			continue;
 
 		sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n",
 		    x, (unsigned)rl->param.tx_burst_size,
 		    (long long)rl->rate_limit_table[x]);
 	}
 	MLX5E_RL_RUNLOCK(rl);
 
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 
 	PRIV_UNLOCK(priv);
 
 	return (error);
 }
 
 static int
 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl)
 {
 	uint64_t x;
 	uint64_t y;
 
 	MLX5E_RL_WLOCK(rl);
 	/* compute channel parameters once */
 	mlx5e_rl_build_channel_param(rl, &rl->chan_param);
 	MLX5E_RL_WUNLOCK(rl);
 
 	for (y = 0; y != rl->param.tx_worker_threads_def; y++) {
 		struct mlx5e_rl_worker *rlw = rl->workers + y;
 
 		for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) {
 			struct mlx5e_rl_channel *channel;
 			struct mlx5e_sq *sq;
 
 			channel = rlw->channels + x;
 			sq = channel->sq;
 
 			if (sq == NULL)
 				continue;
 
 			if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) {
 				mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq,
 				    rl->param.tx_coalesce_usecs,
 				    rl->param.tx_coalesce_pkts,
 				    rl->param.tx_coalesce_mode);
 			} else {
 				mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq,
 				    rl->param.tx_coalesce_usecs,
 				    rl->param.tx_coalesce_pkts);
 			}
 		}
 	}
 	return (0);
 }
 
 static int
 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value)
 {
 	unsigned x;
 	int error;
 
 	if (value < 1000 ||
 	    mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0)
 		return (EINVAL);
 
 	MLX5E_RL_WLOCK(rl);
 	error = ENOMEM;
 
 	/* check if rate already exists */
 	for (x = 0; x != rl->param.tx_rates_def; x++) {
 		if (rl->rate_limit_table[x] != value)
 			continue;
 		error = EEXIST;
 		break;
 	}
 
 	/* check if there is a free rate entry */
 	if (x == rl->param.tx_rates_def) {
 		for (x = 0; x != rl->param.tx_rates_def; x++) {
 			if (rl->rate_limit_table[x] != 0)
 				continue;
 			rl->rate_limit_table[x] = value;
 			error = 0;
 			break;
 		}
 	}
 	MLX5E_RL_WUNLOCK(rl);
 
 	return (error);
 }
 
 static int
 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value)
 {
 	unsigned x;
 	int error;
 
 	if (value == 0)
 		return (EINVAL);
 
 	MLX5E_RL_WLOCK(rl);
 
 	/* check if rate already exists */
 	for (x = 0; x != rl->param.tx_rates_def; x++) {
 		if (rl->rate_limit_table[x] != value)
 			continue;
 		/* free up rate */
 		rl->rate_limit_table[x] = 0;
 		break;
 	}
 
 	/* check if there is a free rate entry */
 	if (x == rl->param.tx_rates_def)
 		error = ENOENT;
 	else
 		error = 0;
 	MLX5E_RL_WUNLOCK(rl);
 
 	return (error);
 }
 
 static int
 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct mlx5e_rl_priv_data *rl = arg1;
 	struct mlx5e_priv *priv = rl->priv;
 	unsigned mode_modify;
 	unsigned was_opened;
 	uint64_t value;
 	uint64_t old;
 	int error;
 
 	PRIV_LOCK(priv);
 
 	MLX5E_RL_RLOCK(rl);
 	value = rl->param.arg[arg2];
 	MLX5E_RL_RUNLOCK(rl);
 
 	if (req != NULL) {
 		old = value;
 		error = sysctl_handle_64(oidp, &value, 0, req);
 		if (error || req->newptr == NULL ||
 		    value == rl->param.arg[arg2])
 			goto done;
 	} else {
 		old = 0;
 		error = 0;
 	}
 
 	/* check if device is gone */
 	if (priv->gone) {
 		error = ENXIO;
 		goto done;
 	}
 	was_opened = rl->opened;
 	mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify);
 
 	switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) {
 	case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def):
 		if (value > rl->param.tx_worker_threads_max)
 			value = rl->param.tx_worker_threads_max;
 		else if (value < 1)
 			value = 1;
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def):
 		if (value > rl->param.tx_channels_per_worker_max)
 			value = rl->param.tx_channels_per_worker_max;
 		else if (value < 1)
 			value = 1;
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_rates_def):
 		if (value > rl->param.tx_rates_max)
 			value = rl->param.tx_rates_max;
 		else if (value < 1)
 			value = 1;
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs):
 		/* range check */
 		if (value < 1)
 			value = 0;
 		else if (value > MLX5E_FLD_MAX(cqc, cq_period))
 			value = MLX5E_FLD_MAX(cqc, cq_period);
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 
 		/* check to avoid down and up the network interface */
 		if (was_opened)
 			error = mlx5e_rl_refresh_channel_params(rl);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts):
 		/* import TX coal pkts */
 		if (value < 1)
 			value = 0;
 		else if (value > MLX5E_FLD_MAX(cqc, cq_max_count))
 			value = MLX5E_FLD_MAX(cqc, cq_max_count);
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 
 		/* check to avoid down and up the network interface */
 		if (was_opened)
 			error = mlx5e_rl_refresh_channel_params(rl);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode):
 		/* network interface must be down */
 		if (was_opened != 0 && mode_modify == 0)
 			mlx5e_rl_close_workers(priv);
 
 		/* import TX coalesce mode */
 		if (value != 0)
 			value = 1;
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 
 		/* restart network interface, if any */
 		if (was_opened != 0) {
 			if (mode_modify == 0)
 				mlx5e_rl_open_workers(priv);
 			else
 				error = mlx5e_rl_refresh_channel_params(rl);
 		}
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_queue_size):
 		/* network interface must be down */
 		if (was_opened)
 			mlx5e_rl_close_workers(priv);
 
 		/* import TX queue size */
 		if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE))
 			value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE);
 		else if (value > priv->params_ethtool.tx_queue_size_max)
 			value = priv->params_ethtool.tx_queue_size_max;
 
 		/* store actual TX queue size */
 		value = 1ULL << order_base_2(value);
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 
 		/* verify TX completion factor */
 		mlx5e_rl_sync_tx_completion_fact(rl);
 
 		/* restart network interface, if any */
 		if (was_opened)
 			mlx5e_rl_open_workers(priv);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_completion_fact):
 		/* network interface must be down */
 		if (was_opened)
 			mlx5e_rl_close_workers(priv);
 
 		/* store new value */
 		rl->param.arg[arg2] = value;
 
 		/* verify parameter */
 		mlx5e_rl_sync_tx_completion_fact(rl);
 
 		/* restart network interface, if any */
 		if (was_opened)
 			mlx5e_rl_open_workers(priv);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_limit_add):
 		error = mlx5e_rl_tx_limit_add(rl, value);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_limit_clr):
 		error = mlx5e_rl_tx_limit_clr(rl, value);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation):
 		/* range check */
 		if (value > rl->param.tx_allowed_deviation_max)
 			value = rl->param.tx_allowed_deviation_max;
 		else if (value < rl->param.tx_allowed_deviation_min)
 			value = rl->param.tx_allowed_deviation_min;
 
 		MLX5E_RL_WLOCK(rl);
 		rl->param.arg[arg2] = value;
 		MLX5E_RL_WUNLOCK(rl);
 		break;
 
 	case MLX5E_RL_PARAMS_INDEX(tx_burst_size):
 		/* range check */
 		if (value > rl->param.tx_burst_size_max)
 			value = rl->param.tx_burst_size_max;
 		else if (value < rl->param.tx_burst_size_min)
 			value = rl->param.tx_burst_size_min;
 
 		MLX5E_RL_WLOCK(rl);
 		rl->param.arg[arg2] = value;
 		MLX5E_RL_WUNLOCK(rl);
 		break;
 
 	default:
 		break;
 	}
 done:
 	PRIV_UNLOCK(priv);
 	return (error);
 }
 
 static void
 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
     struct sysctl_oid *node, const char *name, const char *desc)
 {
 	/*
 	 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will
 	 * take care of loading default sysctl value from the kernel
 	 * environment, if any:
 	 */
 	if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) {
 		/* read-only SYSCTLs */
 		SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 		    name, CTLTYPE_U64 | CTLFLAG_RD |
 		    CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
 	} else {
 		if (strstr(name, "_def") != 0) {
 #ifdef RATELIMIT_DEBUG
 			/* tunable read-only advanced SYSCTLs */
 			SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 			    name, CTLTYPE_U64 | CTLFLAG_RDTUN |
 			    CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
 #endif
 		} else {
 			/* read-write SYSCTLs */
 			SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO,
 			    name, CTLTYPE_U64 | CTLFLAG_RWTUN |
 			    CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc);
 		}
 	}
 }
 
 static void
 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x,
     struct sysctl_oid *node, const char *name, const char *desc)
 {
 	/* read-only SYSCTLs */
 	SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name,
 	    CTLFLAG_RD, &rl->stats.arg[x], 0, desc);
 }
 
 #endif