diff --git a/sys/compat/linuxkpi/common/include/linux/irq_work.h b/sys/compat/linuxkpi/common/include/linux/irq_work.h index b44e78230b0d..eb1798a4e450 100644 --- a/sys/compat/linuxkpi/common/include/linux/irq_work.h +++ b/sys/compat/linuxkpi/common/include/linux/irq_work.h @@ -1,52 +1,67 @@ /*- * Copyright (c) 2020 The FreeBSD Foundation * * This software was developed by Emmanuel Vadot under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __LINUX_IRQ_WORK_H__ #define __LINUX_IRQ_WORK_H__ -#include +#include +#include + +struct irq_work; +typedef void (*irq_work_func_t)(struct irq_work *); struct irq_work { - struct work_struct work; + struct task irq_task; + irq_work_func_t func; }; +extern struct taskqueue *linux_irq_work_tq; + +#define DEFINE_IRQ_WORK(name, _func) struct irq_work name = { \ + .irq_task = TASK_INITIALIZER(0, linux_irq_work_fn, &(name)), \ + .func = (_func), \ +} + +void linux_irq_work_fn(void *, int); + static inline void -init_irq_work(struct irq_work *irqw, void (*func)(struct irq_work *)) +init_irq_work(struct irq_work *irqw, irq_work_func_t func) { - INIT_WORK(&irqw->work, (work_func_t)func); + TASK_INIT(&irqw->irq_task, 0, linux_irq_work_fn, irqw); + irqw->func = func; } static inline void irq_work_queue(struct irq_work *irqw) { - schedule_work(&irqw->work); + taskqueue_enqueue(linux_irq_work_tq, &irqw->irq_task); } #endif /* __LINUX_IRQ_WORK_H__ */ diff --git a/sys/compat/linuxkpi/common/include/linux/llist.h b/sys/compat/linuxkpi/common/include/linux/llist.h new file mode 100644 index 000000000000..b3c89516e710 --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/llist.h @@ -0,0 +1,101 @@ +/* Public domain. */ + +#ifndef _LINUX_LLIST_H +#define _LINUX_LLIST_H + +#include +#include + +struct llist_node { + struct llist_node *next; +}; + +struct llist_head { + struct llist_node *first; +}; + +#define LLIST_HEAD_INIT(name) { NULL } +#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) + +#define llist_entry(ptr, type, member) \ + ((ptr) ? container_of(ptr, type, member) : NULL) + +static inline struct llist_node * +llist_del_all(struct llist_head *head) +{ + return ((void *)atomic_readandclear_ptr((uintptr_t *)&head->first)); +} + +static inline struct llist_node * +llist_del_first(struct llist_head *head) +{ + struct llist_node *first, *next; + + do { + first = head->first; + if (first == NULL) + return NULL; + next = first->next; + } while (atomic_cmpset_ptr((uintptr_t *)&head->first, + (uintptr_t)first, (uintptr_t)next) == 0); + + return (first); +} + +static inline bool +llist_add(struct llist_node *new, struct llist_head *head) +{ + struct llist_node *first; + + do { + new->next = first = head->first; + } while (atomic_cmpset_ptr((uintptr_t *)&head->first, + (uintptr_t)first, (uintptr_t)new) == 0); + + return (first == NULL); +} + +static inline bool +llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *first; + + do { + new_last->next = first = head->first; + } while (atomic_cmpset_ptr((uintptr_t *)&head->first, + (uintptr_t)first, (uintptr_t)new_first) == 0); + + return (first == NULL); +} + +static inline void +init_llist_head(struct llist_head *head) +{ + head->first = NULL; +} + +static inline bool +llist_empty(struct llist_head *head) +{ + return (head->first == NULL); +} + +#define llist_for_each_safe(pos, n, node) \ + for ((pos) = (node); \ + (pos) != NULL && \ + ((n) = (pos)->next, pos); \ + (pos) = (n)) + +#define llist_for_each_entry_safe(pos, n, node, member) \ + for (pos = llist_entry((node), __typeof(*pos), member); \ + pos != NULL && \ + (n = llist_entry(pos->member.next, __typeof(*pos), member), pos); \ + pos = n) + +#define llist_for_each_entry(pos, node, member) \ + for ((pos) = llist_entry((node), __typeof(*(pos)), member); \ + (pos) != NULL; \ + (pos) = llist_entry((pos)->member.next, __typeof(*(pos)), member)) + +#endif diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index ae1c9d81843e..0cd748b7ecb9 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -1,193 +1,209 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_SLAB_H_ #define _LINUX_SLAB_H_ #include #include #include #include +#include #include #include #include +#include MALLOC_DECLARE(M_KMALLOC); #define kvmalloc(size, flags) kmalloc(size, flags) #define kvzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kvcalloc(n, size, flags) kvmalloc_array(n, size, (flags) | __GFP_ZERO) #define kzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kzalloc_node(size, flags, node) kmalloc(size, (flags) | __GFP_ZERO) #define kfree_const(ptr) kfree(ptr) #define vzalloc(size) __vmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 0) #define vfree(arg) kfree(arg) #define kvfree(arg) kfree(arg) #define vmalloc_node(size, node) __vmalloc(size, GFP_KERNEL, 0) #define vmalloc_user(size) __vmalloc(size, GFP_KERNEL | __GFP_ZERO, 0) #define vmalloc(size) __vmalloc(size, GFP_KERNEL, 0) #define __kmalloc(...) kmalloc(__VA_ARGS__) #define kmalloc_node(chunk, flags, n) kmalloc(chunk, flags) /* * Prefix some functions with linux_ to avoid namespace conflict * with the OpenSolaris code in the kernel. */ #define kmem_cache linux_kmem_cache #define kmem_cache_create(...) linux_kmem_cache_create(__VA_ARGS__) #define kmem_cache_alloc(...) linux_kmem_cache_alloc(__VA_ARGS__) #define kmem_cache_free(...) linux_kmem_cache_free(__VA_ARGS__) #define kmem_cache_destroy(...) linux_kmem_cache_destroy(__VA_ARGS__) #define KMEM_CACHE(__struct, flags) \ linux_kmem_cache_create(#__struct, sizeof(struct __struct), \ __alignof(struct __struct), (flags), NULL) typedef void linux_kmem_ctor_t (void *); struct linux_kmem_cache { uma_zone_t cache_zone; linux_kmem_ctor_t *cache_ctor; unsigned cache_flags; unsigned cache_size; }; #define SLAB_HWCACHE_ALIGN (1 << 0) #define SLAB_TYPESAFE_BY_RCU (1 << 1) #define SLAB_RECLAIM_ACCOUNT (1 << 2) #define SLAB_DESTROY_BY_RCU \ SLAB_TYPESAFE_BY_RCU #define ARCH_KMALLOC_MINALIGN \ __alignof(unsigned long long) +/* + * Critical section-friendly version of kfree(). + * Requires knowledge of the allocation size at build time. + */ +#define kfree_async(ptr) do { \ + _Static_assert(sizeof(*(ptr)) >= sizeof(struct llist_node), \ + "Size of object to free is unknown or too small"); \ + if (curthread->td_critnest != 0) \ + linux_kfree_async(ptr); \ + else \ + kfree(ptr); \ +} while (0) + static inline gfp_t linux_check_m_flags(gfp_t flags) { const gfp_t m = M_NOWAIT | M_WAITOK; /* make sure either M_NOWAIT or M_WAITOK is set */ if ((flags & m) == 0) flags |= M_NOWAIT; else if ((flags & m) == m) flags &= ~M_WAITOK; /* mask away LinuxKPI specific flags */ return (flags & GFP_NATIVE_MASK); } static inline void * kmalloc(size_t size, gfp_t flags) { return (malloc(size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * kcalloc(size_t n, size_t size, gfp_t flags) { flags |= __GFP_ZERO; return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * __vmalloc(size_t size, gfp_t flags, int other) { return (malloc(size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * vmalloc_32(size_t size) { return (contigmalloc(size, M_KMALLOC, M_WAITOK, 0, UINT_MAX, 1, 1)); } static inline void * kmalloc_array(size_t n, size_t size, gfp_t flags) { return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * kvmalloc_array(size_t n, size_t size, gfp_t flags) { return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * krealloc(void *ptr, size_t size, gfp_t flags) { return (realloc(ptr, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void kfree(const void *ptr) { free(__DECONST(void *, ptr), M_KMALLOC); } static inline size_t ksize(const void *ptr) { return (malloc_usable_size(ptr)); } extern struct linux_kmem_cache *linux_kmem_cache_create(const char *name, size_t size, size_t align, unsigned flags, linux_kmem_ctor_t *ctor); static inline void * linux_kmem_cache_alloc(struct linux_kmem_cache *c, gfp_t flags) { return (uma_zalloc_arg(c->cache_zone, c, linux_check_m_flags(flags))); } static inline void * kmem_cache_zalloc(struct linux_kmem_cache *c, gfp_t flags) { return (uma_zalloc_arg(c->cache_zone, c, linux_check_m_flags(flags | M_ZERO))); } extern void linux_kmem_cache_free_rcu(struct linux_kmem_cache *, void *); static inline void linux_kmem_cache_free(struct linux_kmem_cache *c, void *m) { if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) linux_kmem_cache_free_rcu(c, m); else uma_zfree(c->cache_zone, m); } extern void linux_kmem_cache_destroy(struct linux_kmem_cache *); +void linux_kfree_async(void *); #endif /* _LINUX_SLAB_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_slab.c b/sys/compat/linuxkpi/common/src/linux_slab.c index c8deab01731a..3304c34b1dee 100644 --- a/sys/compat/linuxkpi/common/src/linux_slab.c +++ b/sys/compat/linuxkpi/common/src/linux_slab.c @@ -1,128 +1,155 @@ /*- * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include +#include +#include + +#include +#include struct linux_kmem_rcu { struct rcu_head rcu_head; struct linux_kmem_cache *cache; }; #define LINUX_KMEM_TO_RCU(c, m) \ ((struct linux_kmem_rcu *)((char *)(m) + \ (c)->cache_size - sizeof(struct linux_kmem_rcu))) #define LINUX_RCU_TO_KMEM(r) \ ((void *)((char *)(r) + sizeof(struct linux_kmem_rcu) - \ (r)->cache->cache_size)) +static LLIST_HEAD(linux_kfree_async_list); + static int linux_kmem_ctor(void *mem, int size, void *arg, int flags) { struct linux_kmem_cache *c = arg; if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) { struct linux_kmem_rcu *rcu = LINUX_KMEM_TO_RCU(c, mem); /* duplicate cache pointer */ rcu->cache = c; } /* check for constructor */ if (likely(c->cache_ctor != NULL)) c->cache_ctor(mem); return (0); } static void linux_kmem_cache_free_rcu_callback(struct rcu_head *head) { struct linux_kmem_rcu *rcu = container_of(head, struct linux_kmem_rcu, rcu_head); uma_zfree(rcu->cache->cache_zone, LINUX_RCU_TO_KMEM(rcu)); } struct linux_kmem_cache * linux_kmem_cache_create(const char *name, size_t size, size_t align, unsigned flags, linux_kmem_ctor_t *ctor) { struct linux_kmem_cache *c; c = malloc(sizeof(*c), M_KMALLOC, M_WAITOK); if (flags & SLAB_HWCACHE_ALIGN) align = UMA_ALIGN_CACHE; else if (align != 0) align--; if (flags & SLAB_TYPESAFE_BY_RCU) { /* make room for RCU structure */ size = ALIGN(size, sizeof(void *)); size += sizeof(struct linux_kmem_rcu); /* create cache_zone */ c->cache_zone = uma_zcreate(name, size, linux_kmem_ctor, NULL, NULL, NULL, align, UMA_ZONE_ZINIT); } else { /* create cache_zone */ c->cache_zone = uma_zcreate(name, size, ctor ? linux_kmem_ctor : NULL, NULL, NULL, NULL, align, 0); } c->cache_flags = flags; c->cache_ctor = ctor; c->cache_size = size; return (c); } void linux_kmem_cache_free_rcu(struct linux_kmem_cache *c, void *m) { struct linux_kmem_rcu *rcu = LINUX_KMEM_TO_RCU(c, m); call_rcu(&rcu->rcu_head, linux_kmem_cache_free_rcu_callback); } void linux_kmem_cache_destroy(struct linux_kmem_cache *c) { if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) { /* make sure all free callbacks have been called */ rcu_barrier(); } uma_zdestroy(c->cache_zone); free(c, M_KMALLOC); } + +static void +linux_kfree_async_fn(void *context, int pending) +{ + struct llist_node *freed; + + while((freed = llist_del_first(&linux_kfree_async_list)) != NULL) + kfree(freed); +} +static struct task linux_kfree_async_task = + TASK_INITIALIZER(0, linux_kfree_async_fn, &linux_kfree_async_task); + +void +linux_kfree_async(void *addr) +{ + if (addr == NULL) + return; + llist_add(addr, &linux_kfree_async_list); + taskqueue_enqueue(linux_irq_work_tq, &linux_kfree_async_task); +} diff --git a/sys/compat/linuxkpi/common/src/linux_work.c b/sys/compat/linuxkpi/common/src/linux_work.c index 043c5b7d1aff..45025378baa9 100644 --- a/sys/compat/linuxkpi/common/src/linux_work.c +++ b/sys/compat/linuxkpi/common/src/linux_work.c @@ -1,685 +1,733 @@ /*- * Copyright (c) 2017-2019 Hans Petter Selasky * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include +#include #include /* * Define all work struct states */ enum { WORK_ST_IDLE, /* idle - not started */ WORK_ST_TIMER, /* timer is being started */ WORK_ST_TASK, /* taskqueue is being queued */ WORK_ST_EXEC, /* callback is being called */ WORK_ST_CANCEL, /* cancel is being requested */ WORK_ST_MAX, }; /* * Define global workqueues */ static struct workqueue_struct *linux_system_short_wq; static struct workqueue_struct *linux_system_long_wq; struct workqueue_struct *system_wq; struct workqueue_struct *system_long_wq; struct workqueue_struct *system_unbound_wq; struct workqueue_struct *system_highpri_wq; struct workqueue_struct *system_power_efficient_wq; +struct taskqueue *linux_irq_work_tq; + static int linux_default_wq_cpus = 4; static void linux_delayed_work_timer_fn(void *); /* * This function atomically updates the work state and returns the * previous state at the time of update. */ static uint8_t linux_update_state(atomic_t *v, const uint8_t *pstate) { int c, old; c = v->counter; while ((old = atomic_cmpxchg(v, c, pstate[c])) != c) c = old; return (c); } /* * A LinuxKPI task is allowed to free itself inside the callback function * and cannot safely be referred after the callback function has * completed. This function gives the linux_work_fn() function a hint, * that the task is not going away and can have its state checked * again. Without this extra hint LinuxKPI tasks cannot be serialized * accross multiple worker threads. */ static bool linux_work_exec_unblock(struct work_struct *work) { struct workqueue_struct *wq; struct work_exec *exec; bool retval = false; wq = work->work_queue; if (unlikely(wq == NULL)) goto done; WQ_EXEC_LOCK(wq); TAILQ_FOREACH(exec, &wq->exec_head, entry) { if (exec->target == work) { exec->target = NULL; retval = true; break; } } WQ_EXEC_UNLOCK(wq); done: return (retval); } static void linux_delayed_work_enqueue(struct delayed_work *dwork) { struct taskqueue *tq; tq = dwork->work.work_queue->taskqueue; taskqueue_enqueue(tq, &dwork->work.work_task); } /* * This function queues the given work structure on the given * workqueue. It returns non-zero if the work was successfully * [re-]queued. Else the work is already pending for completion. */ bool linux_queue_work_on(int cpu __unused, struct workqueue_struct *wq, struct work_struct *work) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_TASK, /* start queuing task */ [WORK_ST_TIMER] = WORK_ST_TIMER, /* NOP */ [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ [WORK_ST_EXEC] = WORK_ST_TASK, /* queue task another time */ [WORK_ST_CANCEL] = WORK_ST_TASK, /* start queuing task again */ }; if (atomic_read(&wq->draining) != 0) return (!work_pending(work)); switch (linux_update_state(&work->state, states)) { case WORK_ST_EXEC: case WORK_ST_CANCEL: if (linux_work_exec_unblock(work) != 0) return (true); /* FALLTHROUGH */ case WORK_ST_IDLE: work->work_queue = wq; taskqueue_enqueue(wq->taskqueue, &work->work_task); return (true); default: return (false); /* already on a queue */ } } /* * Callback func for linux_queue_rcu_work */ static void rcu_work_func(struct rcu_head *rcu) { struct rcu_work *rwork; rwork = container_of(rcu, struct rcu_work, rcu); linux_queue_work_on(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); } /* * This function queue a work after a grace period * If the work was already pending it returns false, * if not it calls call_rcu and returns true. */ bool linux_queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) { if (!linux_work_pending(&rwork->work)) { rwork->wq = wq; linux_call_rcu(RCU_TYPE_REGULAR, &rwork->rcu, rcu_work_func); return (true); } return (false); } /* * This function waits for the last execution of a work and then * flush the work. * It returns true if the work was pending and we waited, it returns * false otherwise. */ bool linux_flush_rcu_work(struct rcu_work *rwork) { if (linux_work_pending(&rwork->work)) { linux_rcu_barrier(RCU_TYPE_REGULAR); linux_flush_work(&rwork->work); return (true); } return (linux_flush_work(&rwork->work)); } /* * This function queues the given work structure on the given * workqueue after a given delay in ticks. It returns non-zero if the * work was successfully [re-]queued. Else the work is already pending * for completion. */ bool linux_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned delay) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_TIMER, /* start timeout */ [WORK_ST_TIMER] = WORK_ST_TIMER, /* NOP */ [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ [WORK_ST_EXEC] = WORK_ST_TIMER, /* start timeout */ [WORK_ST_CANCEL] = WORK_ST_TIMER, /* start timeout */ }; if (atomic_read(&wq->draining) != 0) return (!work_pending(&dwork->work)); switch (linux_update_state(&dwork->work.state, states)) { case WORK_ST_EXEC: case WORK_ST_CANCEL: if (delay == 0 && linux_work_exec_unblock(&dwork->work) != 0) { dwork->timer.expires = jiffies; return (true); } /* FALLTHROUGH */ case WORK_ST_IDLE: dwork->work.work_queue = wq; dwork->timer.expires = jiffies + delay; if (delay == 0) { linux_delayed_work_enqueue(dwork); } else if (unlikely(cpu != WORK_CPU_UNBOUND)) { mtx_lock(&dwork->timer.mtx); callout_reset_on(&dwork->timer.callout, delay, &linux_delayed_work_timer_fn, dwork, cpu); mtx_unlock(&dwork->timer.mtx); } else { mtx_lock(&dwork->timer.mtx); callout_reset(&dwork->timer.callout, delay, &linux_delayed_work_timer_fn, dwork); mtx_unlock(&dwork->timer.mtx); } return (true); default: return (false); /* already on a queue */ } } void linux_work_fn(void *context, int pending) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ [WORK_ST_TIMER] = WORK_ST_EXEC, /* delayed work w/o timeout */ [WORK_ST_TASK] = WORK_ST_EXEC, /* call callback */ [WORK_ST_EXEC] = WORK_ST_IDLE, /* complete callback */ [WORK_ST_CANCEL] = WORK_ST_EXEC, /* failed to cancel */ }; struct work_struct *work; struct workqueue_struct *wq; struct work_exec exec; struct task_struct *task; task = current; /* setup local variables */ work = context; wq = work->work_queue; /* store target pointer */ exec.target = work; /* insert executor into list */ WQ_EXEC_LOCK(wq); TAILQ_INSERT_TAIL(&wq->exec_head, &exec, entry); while (1) { switch (linux_update_state(&work->state, states)) { case WORK_ST_TIMER: case WORK_ST_TASK: case WORK_ST_CANCEL: WQ_EXEC_UNLOCK(wq); /* set current work structure */ task->work = work; /* call work function */ work->func(work); /* set current work structure */ task->work = NULL; WQ_EXEC_LOCK(wq); /* check if unblocked */ if (exec.target != work) { /* reapply block */ exec.target = work; break; } /* FALLTHROUGH */ default: goto done; } } done: /* remove executor from list */ TAILQ_REMOVE(&wq->exec_head, &exec, entry); WQ_EXEC_UNLOCK(wq); } void linux_delayed_work_fn(void *context, int pending) { struct delayed_work *dwork = context; /* * Make sure the timer belonging to the delayed work gets * drained before invoking the work function. Else the timer * mutex may still be in use which can lead to use-after-free * situations, because the work function might free the work * structure before returning. */ callout_drain(&dwork->timer.callout); linux_work_fn(&dwork->work, pending); } static void linux_delayed_work_timer_fn(void *arg) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ [WORK_ST_TIMER] = WORK_ST_TASK, /* start queueing task */ [WORK_ST_TASK] = WORK_ST_TASK, /* NOP */ [WORK_ST_EXEC] = WORK_ST_EXEC, /* NOP */ [WORK_ST_CANCEL] = WORK_ST_TASK, /* failed to cancel */ }; struct delayed_work *dwork = arg; switch (linux_update_state(&dwork->work.state, states)) { case WORK_ST_TIMER: case WORK_ST_CANCEL: linux_delayed_work_enqueue(dwork); break; default: break; } } /* * This function cancels the given work structure in a synchronous * fashion. It returns non-zero if the work was successfully * cancelled. Else the work was already cancelled. */ bool linux_cancel_work_sync(struct work_struct *work) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ [WORK_ST_TIMER] = WORK_ST_TIMER, /* can't happen */ [WORK_ST_TASK] = WORK_ST_IDLE, /* cancel and drain */ [WORK_ST_EXEC] = WORK_ST_IDLE, /* too late, drain */ [WORK_ST_CANCEL] = WORK_ST_IDLE, /* cancel and drain */ }; struct taskqueue *tq; bool retval = false; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "linux_cancel_work_sync() might sleep"); retry: switch (linux_update_state(&work->state, states)) { case WORK_ST_IDLE: case WORK_ST_TIMER: return (retval); case WORK_ST_EXEC: tq = work->work_queue->taskqueue; if (taskqueue_cancel(tq, &work->work_task, NULL) != 0) taskqueue_drain(tq, &work->work_task); goto retry; /* work may have restarted itself */ default: tq = work->work_queue->taskqueue; if (taskqueue_cancel(tq, &work->work_task, NULL) != 0) taskqueue_drain(tq, &work->work_task); retval = true; goto retry; } } /* * This function atomically stops the timer and callback. The timer * callback will not be called after this function returns. This * functions returns true when the timeout was cancelled. Else the * timeout was not started or has already been called. */ static inline bool linux_cancel_timer(struct delayed_work *dwork, bool drain) { bool cancelled; mtx_lock(&dwork->timer.mtx); cancelled = (callout_stop(&dwork->timer.callout) == 1); mtx_unlock(&dwork->timer.mtx); /* check if we should drain */ if (drain) callout_drain(&dwork->timer.callout); return (cancelled); } /* * This function cancels the given delayed work structure in a * non-blocking fashion. It returns non-zero if the work was * successfully cancelled. Else the work may still be busy or already * cancelled. */ bool linux_cancel_delayed_work(struct delayed_work *dwork) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ [WORK_ST_TIMER] = WORK_ST_CANCEL, /* try to cancel */ [WORK_ST_TASK] = WORK_ST_CANCEL, /* try to cancel */ [WORK_ST_EXEC] = WORK_ST_EXEC, /* NOP */ [WORK_ST_CANCEL] = WORK_ST_CANCEL, /* NOP */ }; struct taskqueue *tq; switch (linux_update_state(&dwork->work.state, states)) { case WORK_ST_TIMER: case WORK_ST_CANCEL: if (linux_cancel_timer(dwork, 0)) { atomic_cmpxchg(&dwork->work.state, WORK_ST_CANCEL, WORK_ST_IDLE); return (true); } /* FALLTHROUGH */ case WORK_ST_TASK: tq = dwork->work.work_queue->taskqueue; if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) == 0) { atomic_cmpxchg(&dwork->work.state, WORK_ST_CANCEL, WORK_ST_IDLE); return (true); } /* FALLTHROUGH */ default: return (false); } } /* * This function cancels the given work structure in a synchronous * fashion. It returns non-zero if the work was successfully * cancelled. Else the work was already cancelled. */ bool linux_cancel_delayed_work_sync(struct delayed_work *dwork) { static const uint8_t states[WORK_ST_MAX] __aligned(8) = { [WORK_ST_IDLE] = WORK_ST_IDLE, /* NOP */ [WORK_ST_TIMER] = WORK_ST_IDLE, /* cancel and drain */ [WORK_ST_TASK] = WORK_ST_IDLE, /* cancel and drain */ [WORK_ST_EXEC] = WORK_ST_IDLE, /* too late, drain */ [WORK_ST_CANCEL] = WORK_ST_IDLE, /* cancel and drain */ }; struct taskqueue *tq; bool retval = false; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "linux_cancel_delayed_work_sync() might sleep"); retry: switch (linux_update_state(&dwork->work.state, states)) { case WORK_ST_IDLE: return (retval); case WORK_ST_EXEC: tq = dwork->work.work_queue->taskqueue; if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0) taskqueue_drain(tq, &dwork->work.work_task); goto retry; /* work may have restarted itself */ case WORK_ST_TIMER: case WORK_ST_CANCEL: if (linux_cancel_timer(dwork, 1)) { /* * Make sure taskqueue is also drained before * returning: */ tq = dwork->work.work_queue->taskqueue; taskqueue_drain(tq, &dwork->work.work_task); retval = true; goto retry; } /* FALLTHROUGH */ default: tq = dwork->work.work_queue->taskqueue; if (taskqueue_cancel(tq, &dwork->work.work_task, NULL) != 0) taskqueue_drain(tq, &dwork->work.work_task); retval = true; goto retry; } } /* * This function waits until the given work structure is completed. * It returns non-zero if the work was successfully * waited for. Else the work was not waited for. */ bool linux_flush_work(struct work_struct *work) { struct taskqueue *tq; bool retval; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "linux_flush_work() might sleep"); switch (atomic_read(&work->state)) { case WORK_ST_IDLE: return (false); default: tq = work->work_queue->taskqueue; retval = taskqueue_poll_is_busy(tq, &work->work_task); taskqueue_drain(tq, &work->work_task); return (retval); } } /* * This function waits until the given delayed work structure is * completed. It returns non-zero if the work was successfully waited * for. Else the work was not waited for. */ bool linux_flush_delayed_work(struct delayed_work *dwork) { struct taskqueue *tq; bool retval; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "linux_flush_delayed_work() might sleep"); switch (atomic_read(&dwork->work.state)) { case WORK_ST_IDLE: return (false); case WORK_ST_TIMER: if (linux_cancel_timer(dwork, 1)) linux_delayed_work_enqueue(dwork); /* FALLTHROUGH */ default: tq = dwork->work.work_queue->taskqueue; retval = taskqueue_poll_is_busy(tq, &dwork->work.work_task); taskqueue_drain(tq, &dwork->work.work_task); return (retval); } } /* * This function returns true if the given work is pending, and not * yet executing: */ bool linux_work_pending(struct work_struct *work) { switch (atomic_read(&work->state)) { case WORK_ST_TIMER: case WORK_ST_TASK: case WORK_ST_CANCEL: return (true); default: return (false); } } /* * This function returns true if the given work is busy. */ bool linux_work_busy(struct work_struct *work) { struct taskqueue *tq; switch (atomic_read(&work->state)) { case WORK_ST_IDLE: return (false); case WORK_ST_EXEC: tq = work->work_queue->taskqueue; return (taskqueue_poll_is_busy(tq, &work->work_task)); default: return (true); } } struct workqueue_struct * linux_create_workqueue_common(const char *name, int cpus) { struct workqueue_struct *wq; /* * If zero CPUs are specified use the default number of CPUs: */ if (cpus == 0) cpus = linux_default_wq_cpus; wq = kmalloc(sizeof(*wq), M_WAITOK | M_ZERO); wq->taskqueue = taskqueue_create(name, M_WAITOK, taskqueue_thread_enqueue, &wq->taskqueue); atomic_set(&wq->draining, 0); taskqueue_start_threads(&wq->taskqueue, cpus, PWAIT, "%s", name); TAILQ_INIT(&wq->exec_head); mtx_init(&wq->exec_mtx, "linux_wq_exec", NULL, MTX_DEF); return (wq); } void linux_destroy_workqueue(struct workqueue_struct *wq) { atomic_inc(&wq->draining); drain_workqueue(wq); taskqueue_free(wq->taskqueue); mtx_destroy(&wq->exec_mtx); kfree(wq); } void linux_init_delayed_work(struct delayed_work *dwork, work_func_t func) { memset(dwork, 0, sizeof(*dwork)); dwork->work.func = func; TASK_INIT(&dwork->work.work_task, 0, linux_delayed_work_fn, dwork); mtx_init(&dwork->timer.mtx, spin_lock_name("lkpi-dwork"), NULL, MTX_DEF | MTX_NOWITNESS); callout_init_mtx(&dwork->timer.callout, &dwork->timer.mtx, 0); } struct work_struct * linux_current_work(void) { return (current->work); } static void linux_work_init(void *arg) { int max_wq_cpus = mp_ncpus + 1; /* avoid deadlock when there are too few threads */ if (max_wq_cpus < 4) max_wq_cpus = 4; /* set default number of CPUs */ linux_default_wq_cpus = max_wq_cpus; linux_system_short_wq = alloc_workqueue("linuxkpi_short_wq", 0, max_wq_cpus); linux_system_long_wq = alloc_workqueue("linuxkpi_long_wq", 0, max_wq_cpus); /* populate the workqueue pointers */ system_long_wq = linux_system_long_wq; system_wq = linux_system_short_wq; system_power_efficient_wq = linux_system_short_wq; system_unbound_wq = linux_system_short_wq; system_highpri_wq = linux_system_short_wq; } SYSINIT(linux_work_init, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_init, NULL); static void linux_work_uninit(void *arg) { destroy_workqueue(linux_system_short_wq); destroy_workqueue(linux_system_long_wq); /* clear workqueue pointers */ system_long_wq = NULL; system_wq = NULL; system_power_efficient_wq = NULL; system_unbound_wq = NULL; system_highpri_wq = NULL; } SYSUNINIT(linux_work_uninit, SI_SUB_TASKQ, SI_ORDER_THIRD, linux_work_uninit, NULL); + +void +linux_irq_work_fn(void *context, int pending) +{ + struct irq_work *irqw = context; + + irqw->func(irqw); +} + +static void +linux_irq_work_init_fn(void *context, int pending) +{ + /* + * LinuxKPI performs lazy allocation of memory structures required by + * current on the first access to it. As some irq_work clients read + * it with spinlock taken, we have to preallocate td_lkpi_task before + * first call to irq_work_queue(). As irq_work uses a single thread, + * it is enough to read current once at SYSINIT stage. + */ + if (current == NULL) + panic("irq_work taskqueue is not initialized"); +} +static struct task linux_irq_work_init_task = + TASK_INITIALIZER(0, linux_irq_work_init_fn, &linux_irq_work_init_task); + +static void +linux_irq_work_init(void *arg) +{ + linux_irq_work_tq = taskqueue_create_fast("linuxkpi_irq_wq", + M_WAITOK, taskqueue_thread_enqueue, &linux_irq_work_tq); + taskqueue_start_threads(&linux_irq_work_tq, 1, PWAIT, + "linuxkpi_irq_wq"); + taskqueue_enqueue(linux_irq_work_tq, &linux_irq_work_init_task); +} +SYSINIT(linux_irq_work_init, SI_SUB_TASKQ, SI_ORDER_SECOND, + linux_irq_work_init, NULL); + +static void +linux_irq_work_uninit(void *arg) +{ + taskqueue_drain_all(linux_irq_work_tq); + taskqueue_free(linux_irq_work_tq); +} +SYSUNINIT(linux_irq_work_uninit, SI_SUB_TASKQ, SI_ORDER_SECOND, + linux_irq_work_uninit, NULL);