Index: sys/compat/linuxkpi/common/include/linux/radix-tree.h =================================================================== --- sys/compat/linuxkpi/common/include/linux/radix-tree.h +++ sys/compat/linuxkpi/common/include/linux/radix-tree.h @@ -78,6 +78,7 @@ void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void *radix_tree_delete(struct radix_tree_root *, unsigned long); int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); +int radix_tree_store(struct radix_tree_root *, unsigned long, void **); bool radix_tree_iter_find(struct radix_tree_root *, struct radix_tree_iter *, void ***); void radix_tree_iter_delete(struct radix_tree_root *, struct radix_tree_iter *, void **); Index: sys/compat/linuxkpi/common/include/linux/xarray.h =================================================================== --- sys/compat/linuxkpi/common/include/linux/xarray.h +++ sys/compat/linuxkpi/common/include/linux/xarray.h @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 2020 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _LINUX_XARRAY_H_ +#define _LINUX_XARRAY_H_ + +#include + +#include +#include +#include +#include +#include + +#define XA_LIMIT(min, max) \ + ({ CTASSERT((min) == 0); (uint32_t)(max); }) + +#define XA_FLAGS_ALLOC (1U << 0) +#define XA_FLAGS_LOCK_IRQ (1U << 1) + +#define XA_ERROR(x) \ + ERR_PTR(x) + +#define xa_limit_32b XA_LIMIT(0, -1U) + +struct xarray { + struct radix_tree_root root; + spinlock_t spinlock; + atomic_t use_lock; +}; + +/* + * Extensible arrays API implemented as a wrapper + * around the radix tree implementation. + */ +void xa_lock(struct xarray *); +void xa_unlock(struct xarray *); +void *xa_erase(struct xarray *, uint32_t); +void *xa_load(struct xarray *, uint32_t); +int xa_alloc(struct xarray *, uint32_t *, void *, uint32_t, gfp_t); +int xa_alloc_cyclic(struct xarray *, uint32_t *, void *, uint32_t, uint32_t *, gfp_t); +int xa_insert(struct xarray *, uint32_t, void *, gfp_t); +void *xa_store(struct xarray *, uint32_t, void *, gfp_t); +void xa_init_flags(struct xarray *, uint32_t); +bool xa_empty(struct xarray *); +void xa_destroy(struct xarray *); + +static inline void * +__xa_for_each(struct xarray *xa, unsigned long *pindex, bool not_first) +{ + struct radix_tree_iter iter = { .index = *pindex }; + void **ppslot; + + if (not_first) { + /* advance to next index, if any */ + iter.index++; + if (iter.index == 0) + return (NULL); + } + + if (radix_tree_iter_find(&xa->root, &iter, &ppslot)) { + *pindex = iter.index; + return (*ppslot); + } else { + return (NULL); + } +} + +#define xa_for_each(xa, index, entry) \ + for ((entry) = NULL, (index) = 0; \ + ((entry) = __xa_for_each(xa, &index, (entry) != NULL)) != NULL; ) + +/* + * Unlocked version of functions above. + * The current implementation detects this automagically. + */ +#define __xa_erase(...) xa_erase(__VA_ARGS__) +#define __xa_load(...) xa_load(__VA_ARGS__) +#define __xa_alloc(...) xa_alloc(__VA_ARGS__) +#define __xa_alloc_cyclic(...) xa_alloc_cyclic(__VA_ARGS__) +#define __xa_insert(...) xa_insert(__VA_ARGS__) +#define __xa_store(...) xa_store(__VA_ARGS__) +#define __xa_empty(...) xa_empty(__VA_ARGS_)_ + +static inline int +xa_err(void *ptr) +{ + return (PTR_ERR_OR_ZERO(ptr)); +} + +#endif /* _LINUX_XARRAY_H_ */ Index: sys/compat/linuxkpi/common/src/linux_radix.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_radix.c +++ sys/compat/linuxkpi/common/src/linux_radix.c @@ -2,7 +2,7 @@ * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2020 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,18 @@ return (id >> (RADIX_TREE_MAP_SHIFT * height)) & RADIX_TREE_MAP_MASK; } +static void +radix_tree_clean_root_node(struct radix_tree_root *root) +{ + + /* Check if the root node should be freed */ + if (root->rnode->count == 0) { + free(root->rnode, M_RADIX); + root->rnode = NULL; + root->height = 0; + } +} + void * radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { @@ -197,8 +209,10 @@ while (radix_max(root) < index) { /* check if the radix tree is getting too big */ - if (root->height == RADIX_TREE_MAX_HEIGHT) + if (root->height == RADIX_TREE_MAX_HEIGHT) { + radix_tree_clean_root_node(root); return (-E2BIG); + } /* * If the root radix level is not empty, we need to @@ -206,8 +220,16 @@ */ if (node->count != 0) { node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); - if (node == NULL) + if (node == NULL) { + /* + * Freeing the already allocated radix + * levels, if any, will be handled by + * the radix_tree_delete() function. + * This code path can only happen when + * the tree is not empty! + */ return (-ENOMEM); + } node->slots[0] = root->rnode; node->count++; root->rnode = node; @@ -231,14 +253,9 @@ temp[idx] = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); if (temp[idx] == NULL) { - while(idx--) + while (idx--) free(temp[idx], M_RADIX); - /* Check if we should free the root node as well. */ - if (root->rnode->count == 0) { - free(root->rnode, M_RADIX); - root->rnode = NULL; - root->height = 0; - } + radix_tree_clean_root_node(root); return (-ENOMEM); } } @@ -262,3 +279,107 @@ return (0); } + +int +radix_tree_store(struct radix_tree_root *root, unsigned long index, void **ppitem) +{ + struct radix_tree_node *node; + struct radix_tree_node *temp[RADIX_TREE_MAX_HEIGHT - 1]; + void *pitem; + int height; + int idx; + + /* check for deletion */ + if (*ppitem == NULL) { + *ppitem = radix_tree_delete(root, index); + return (0); + } + + /* get root node, if any */ + node = root->rnode; + + /* allocate root node, if any */ + if (node == NULL) { + node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); + if (node == NULL) + return (-ENOMEM); + root->rnode = node; + root->height++; + } + + /* expand radix tree as needed */ + while (radix_max(root) < index) { + + /* check if the radix tree is getting too big */ + if (root->height == RADIX_TREE_MAX_HEIGHT) { + radix_tree_clean_root_node(root); + return (-E2BIG); + } + + /* + * If the root radix level is not empty, we need to + * allocate a new radix level: + */ + if (node->count != 0) { + node = malloc(sizeof(*node), M_RADIX, root->gfp_mask | M_ZERO); + if (node == NULL) { + /* + * Freeing the already allocated radix + * levels, if any, will be handled by + * the radix_tree_delete() function. + * This code path can only happen when + * the tree is not empty! + */ + return (-ENOMEM); + } + node->slots[0] = root->rnode; + node->count++; + root->rnode = node; + } + root->height++; + } + + /* get radix tree height index */ + height = root->height - 1; + + /* walk down the tree until the first missing node, if any */ + for ( ; height != 0; height--) { + idx = radix_pos(index, height); + if (node->slots[idx] == NULL) + break; + node = node->slots[idx]; + } + + /* allocate the missing radix levels, if any */ + for (idx = 0; idx != height; idx++) { + temp[idx] = malloc(sizeof(*node), M_RADIX, + root->gfp_mask | M_ZERO); + if (temp[idx] == NULL) { + while (idx--) + free(temp[idx], M_RADIX); + radix_tree_clean_root_node(root); + return (-ENOMEM); + } + } + + /* setup new radix levels, if any */ + for ( ; height != 0; height--) { + idx = radix_pos(index, height); + node->slots[idx] = temp[height - 1]; + node->count++; + node = node->slots[idx]; + } + + /* + * Insert and adjust count if the item does not already exist. + */ + idx = radix_pos(index, 0); + /* swap */ + pitem = node->slots[idx]; + node->slots[idx] = *ppitem; + *ppitem = pitem; + + if (pitem == NULL) + node->count++; + return (0); +} Index: sys/compat/linuxkpi/common/src/linux_xarray.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_xarray.c +++ sys/compat/linuxkpi/common/src/linux_xarray.c @@ -0,0 +1,383 @@ +/*- + * Copyright (c) 2020 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include + +/* + * This function acquires the read lock for the given xarray + * structure. This function is useful when looping over its elements. + * While the xarray is read locked no elements may be added or + * removed. This function may not be used recursivly on the same + * xarray structure. + */ +void +xa_lock(struct xarray *xa) +{ + + MPASS(mtx_owned(&xa->spinlock.m) == false); + + rcu_read_lock(); + if (atomic_read(&xa->use_lock) != 0) + spin_lock(&xa->spinlock); +} + +/* + * Drop read lock of the given xarray structure. + */ +void +xa_unlock(struct xarray *xa) +{ + + if (mtx_owned(&xa->spinlock.m)) + spin_unlock(&xa->spinlock); + rcu_read_unlock(); + + MPASS(mtx_owned(&xa->spinlock.m) == false); +} + +/* + * This is an internal function to acquire the write lock of the given + * xarray structure. It works by incrementing the "use_lock" counter + * and waiting for the reader, if any, to observe the "use_lock" + * counter being non-zero and start using the spinlock for read + * access. This way all reads and writes get serialized during the + * update period. It is assumed that acquiring the write lock is rare + * compared to acquiring the read lock. This function may not be used + * recursivly on the same xarray structure. + */ +static void +xa_write_lock(struct xarray *xa) +{ + + MPASS(atomic_read(&xa->use_lock) != INT_MAX); + + atomic_inc(&xa->use_lock); + synchronize_rcu(); + spin_lock(&xa->spinlock); +} + +/* + * Drop write lock of the given xarray structure. + */ +static void +xa_write_unlock(struct xarray *xa) +{ + + spin_unlock(&xa->spinlock); + atomic_dec(&xa->use_lock); + + MPASS(atomic_read(&xa->use_lock) >= 0); +} + +/* + * This function removes the element at the given index and returns + * the pointer to the removed element, if any. + */ +void * +xa_erase(struct xarray *xa, uint32_t index) +{ + bool is_locked; + void *retval; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_write_lock(xa); + retval = radix_tree_delete(&xa->root, index); + if (likely(is_locked == false)) + xa_write_unlock(xa); + + return (retval); +} + +/* + * This function returns the element pointer at the given index. A + * value of NULL is returned if the element does not exist. + */ +void * +xa_load(struct xarray *xa, uint32_t index) +{ + bool is_locked; + void *retval; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_lock(xa); + retval = radix_tree_lookup(&xa->root, index); + if (likely(is_locked == false)) + xa_unlock(xa); + + return (retval); +} + +/* + * This is an internal function used to sleep until more memory + * becomes available. + */ +static void +xa_vm_wait(struct xarray *xa) +{ + + xa_write_unlock(xa); + vm_wait(NULL); + xa_write_lock(xa); +} + +/* + * This function iterates the xarray until it finds a free slot where + * it can insert the element pointer to by "ptr". It starts at the + * index pointed to by "pindex" and updates this value at return. The + * "mask" argument defines the maximum index allowed, inclusivly, and + * must be a power of two minus one value. The "gfp" argument + * basically tells if we can wait for more memory to become available + * or not. This function returns zero upon success or a negative error + * code on failure. A typical error code is -ENOMEM which means either + * the xarray is full, or there was not enough internal memory + * available to complete the radix tree insertion. + */ +int +xa_alloc(struct xarray *xa, uint32_t *pindex, void *ptr, uint32_t mask, gfp_t gfp) +{ + bool is_locked; + int retval; + + *pindex = 0; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_write_lock(xa); + else + MPASS((gfp & M_WAITOK) == 0); /* cannot sleep in read section */ +retry: + retval = radix_tree_insert(&xa->root, *pindex, ptr); + + switch (retval) { + case -EEXIST: + if (likely(*pindex != mask)) { + (*pindex)++; + goto retry; + } + retval = -ENOMEM; + break; + case -ENOMEM: + if (likely(gfp & M_WAITOK)) { + xa_vm_wait(xa); + goto retry; + } + break; + default: + break; + } + if (likely(is_locked == false)) + xa_write_unlock(xa); + + return (retval); +} + +/* + * This function works the same like the "xa_alloc" function, except + * it wraps the next index value to zero when there are no entries + * left at the end of the xarray searching for a free slot from the + * beginning of the array. If the xarray is full -ENOMEM is returned. + */ +int +xa_alloc_cyclic(struct xarray *xa, uint32_t *pindex, void *ptr, uint32_t mask, + uint32_t *pnext_index, gfp_t gfp) +{ + bool is_locked; + int retval; + int timeout = 1; + + *pnext_index = 0; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_write_lock(xa); + else + MPASS((gfp & M_WAITOK) == 0); /* cannot sleep in read section */ +retry: + retval = radix_tree_insert(&xa->root, *pnext_index, ptr); + + switch (retval) { + case -EEXIST: + if (unlikely(*pnext_index == mask) && !timeout--) { + retval = -ENOMEM; + break; + } + (*pnext_index)++; + (*pnext_index) &= mask; + goto retry; + case -ENOMEM: + if (likely(gfp & M_WAITOK)) { + xa_vm_wait(xa); + goto retry; + } + break; + default: + break; + } + *pindex = *pnext_index; + + if (likely(is_locked == false)) + xa_write_unlock(xa); + + return (retval); +} + +/* + * This function tries to insert an element at the given index. The + * "gfp" argument basically decides of this function can sleep or not + * trying to allocate internal memory for its radix tree. The + * function returns an error code upon failure. Typical error codes + * are element exists (-EEXIST) or out of memory (-ENOMEM). + */ +int +xa_insert(struct xarray *xa, uint32_t index, void *ptr, gfp_t gfp) +{ + bool is_locked; + int retval; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_write_lock(xa); + else + MPASS((gfp & M_WAITOK) == 0); /* cannot sleep in read section */ +retry: + retval = radix_tree_insert(&xa->root, index, ptr); + + switch (retval) { + case -ENOMEM: + if (likely(gfp & M_WAITOK)) { + xa_vm_wait(xa); + goto retry; + } + break; + default: + break; + } + + if (likely(is_locked == false)) + xa_write_unlock(xa); + + return (retval); +} + +/* + * This function updates the element at the given index and returns a + * pointer to the old element. The "gfp" argument basically decides of + * this function can sleep or not trying to allocate internal memory + * for its radix tree. The function returns an XA_ERROR() pointer code + * upon failure. Code using this function must always check if the + * return value is an XA_ERROR() code before using the returned value. + */ +void * +xa_store(struct xarray *xa, uint32_t index, void *ptr, gfp_t gfp) +{ + bool is_locked; + int retval; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_write_lock(xa); + else + MPASS((gfp & M_WAITOK) == 0); /* cannot sleep in read section */ +retry: + retval = radix_tree_store(&xa->root, index, &ptr); + + switch (retval) { + case 0: + break; + case -ENOMEM: + if (likely(gfp & M_WAITOK)) { + xa_vm_wait(xa); + goto retry; + } + ptr = XA_ERROR(retval); + break; + default: + ptr = XA_ERROR(retval); + break; + } + + if (likely(is_locked == false)) + xa_write_unlock(xa); + + return (ptr); +} + +/* + * This function initialize an xarray structure. + */ +void +xa_init_flags(struct xarray *xa, uint32_t flags) +{ + + memset(xa, 0, sizeof(*xa)); + + spin_lock_init(&xa->spinlock); + xa->root.gfp_mask = GFP_NOWAIT; +} + +/* + * This function destroys an xarray structure and all its internal + * memory and locks. + */ +void +xa_destroy(struct xarray *xa) +{ + struct radix_tree_iter iter; + void **ppslot; + + radix_tree_for_each_slot(ppslot, &xa->root, &iter, 0) + radix_tree_iter_delete(&xa->root, &iter, ppslot); + spin_lock_destroy(&xa->spinlock); +} + +/* + * This function checks if an xarray is empty or not. + * It returns true if empty, else false. + */ +bool +xa_empty(struct xarray *xa) +{ + bool is_locked; + struct radix_tree_iter iter = {}; + void **temp; + bool found; + + is_locked = mtx_owned(&xa->spinlock.m); + if (likely(is_locked == false)) + xa_lock(xa); + found = radix_tree_iter_find(&xa->root, &iter, &temp); + if (likely(is_locked == false)) + xa_unlock(xa); + + return (!found); +} Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4485,6 +4485,8 @@ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_work.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" +compat/linuxkpi/common/src/linux_xarray.c optional compat_linuxkpi \ + compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_seq_file.c optional compat_linuxkpi | lindebugfs \ compile-with "${LINUXKPI_C}" Index: sys/modules/linuxkpi/Makefile =================================================================== --- sys/modules/linuxkpi/Makefile +++ sys/modules/linuxkpi/Makefile @@ -19,7 +19,8 @@ linux_slab.c \ linux_tasklet.c \ linux_usb.c \ - linux_work.c + linux_work.c \ + linux_xarray.c SRCS+= ${LINUXKPI_GENSRCS}