diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index 8d5eb3e85e96..ecd39d711eb4 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -1,244 +1,214 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2021 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_SLAB_H_ #define _LINUX_SLAB_H_ -#include -#include +#include #include #include -#include -#include #include #include #include #include MALLOC_DECLARE(M_KMALLOC); #define kvmalloc(size, flags) kmalloc(size, flags) #define kvzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kvcalloc(n, size, flags) kvmalloc_array(n, size, (flags) | __GFP_ZERO) #define kzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kzalloc_node(size, flags, node) kmalloc_node(size, (flags) | __GFP_ZERO, node) #define kfree_const(ptr) kfree(ptr) #define vzalloc(size) __vmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 0) #define vfree(arg) kfree(arg) #define kvfree(arg) kfree(arg) #define vmalloc_node(size, node) __vmalloc_node(size, GFP_KERNEL, node) #define vmalloc_user(size) __vmalloc(size, GFP_KERNEL | __GFP_ZERO, 0) #define vmalloc(size) __vmalloc(size, GFP_KERNEL, 0) #define __kmalloc(...) kmalloc(__VA_ARGS__) /* * Prefix some functions with linux_ to avoid namespace conflict * with the OpenSolaris code in the kernel. */ #define kmem_cache linux_kmem_cache #define kmem_cache_create(...) linux_kmem_cache_create(__VA_ARGS__) -#define kmem_cache_alloc(...) linux_kmem_cache_alloc(__VA_ARGS__) -#define kmem_cache_free(...) linux_kmem_cache_free(__VA_ARGS__) +#define kmem_cache_alloc(...) lkpi_kmem_cache_alloc(__VA_ARGS__) +#define kmem_cache_zalloc(...) lkpi_kmem_cache_zalloc(__VA_ARGS__) +#define kmem_cache_free(...) lkpi_kmem_cache_free(__VA_ARGS__) #define kmem_cache_destroy(...) linux_kmem_cache_destroy(__VA_ARGS__) #define KMEM_CACHE(__struct, flags) \ linux_kmem_cache_create(#__struct, sizeof(struct __struct), \ __alignof(struct __struct), (flags), NULL) typedef void linux_kmem_ctor_t (void *); -struct linux_kmem_cache { - uma_zone_t cache_zone; - linux_kmem_ctor_t *cache_ctor; - unsigned cache_flags; - unsigned cache_size; -}; +struct linux_kmem_cache; #define SLAB_HWCACHE_ALIGN (1 << 0) #define SLAB_TYPESAFE_BY_RCU (1 << 1) #define SLAB_RECLAIM_ACCOUNT (1 << 2) #define SLAB_DESTROY_BY_RCU \ SLAB_TYPESAFE_BY_RCU #define ARCH_KMALLOC_MINALIGN \ __alignof(unsigned long long) /* * Critical section-friendly version of kfree(). * Requires knowledge of the allocation size at build time. */ #define kfree_async(ptr) do { \ _Static_assert(sizeof(*(ptr)) >= sizeof(struct llist_node), \ "Size of object to free is unknown or too small"); \ if (curthread->td_critnest != 0) \ linux_kfree_async(ptr); \ else \ kfree(ptr); \ } while (0) static inline gfp_t linux_check_m_flags(gfp_t flags) { const gfp_t m = M_NOWAIT | M_WAITOK; /* make sure either M_NOWAIT or M_WAITOK is set */ if ((flags & m) == 0) flags |= M_NOWAIT; else if ((flags & m) == m) flags &= ~M_WAITOK; /* mask away LinuxKPI specific flags */ return (flags & GFP_NATIVE_MASK); } static inline void * kmalloc(size_t size, gfp_t flags) { return (malloc(size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * kmalloc_node(size_t size, gfp_t flags, int node) { return (malloc_domainset(size, M_KMALLOC, linux_get_vm_domain_set(node), linux_check_m_flags(flags))); } static inline void * kcalloc(size_t n, size_t size, gfp_t flags) { flags |= __GFP_ZERO; return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * kcalloc_node(size_t n, size_t size, gfp_t flags, int node) { flags |= __GFP_ZERO; return (mallocarray_domainset(n, size, M_KMALLOC, linux_get_vm_domain_set(node), linux_check_m_flags(flags))); } static inline void * __vmalloc(size_t size, gfp_t flags, int other) { return (malloc(size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * __vmalloc_node(size_t size, gfp_t flags, int node) { return (malloc_domainset(size, M_KMALLOC, linux_get_vm_domain_set(node), linux_check_m_flags(flags))); } static inline void * vmalloc_32(size_t size) { return (contigmalloc(size, M_KMALLOC, M_WAITOK, 0, UINT_MAX, 1, 1)); } static inline void * kmalloc_array(size_t n, size_t size, gfp_t flags) { return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * kmalloc_array_node(size_t n, size_t size, gfp_t flags, int node) { return (mallocarray_domainset(n, size, M_KMALLOC, linux_get_vm_domain_set(node), linux_check_m_flags(flags))); } static inline void * kvmalloc_array(size_t n, size_t size, gfp_t flags) { return (mallocarray(n, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void * krealloc(void *ptr, size_t size, gfp_t flags) { return (realloc(ptr, size, M_KMALLOC, linux_check_m_flags(flags))); } static inline void kfree(const void *ptr) { free(__DECONST(void *, ptr), M_KMALLOC); } static __inline void kfree_sensitive(const void *ptr) { zfree(__DECONST(void *, ptr), M_KMALLOC); } static inline size_t ksize(const void *ptr) { return (malloc_usable_size(ptr)); } extern struct linux_kmem_cache *linux_kmem_cache_create(const char *name, size_t size, size_t align, unsigned flags, linux_kmem_ctor_t *ctor); - -static inline void * -linux_kmem_cache_alloc(struct linux_kmem_cache *c, gfp_t flags) -{ - return (uma_zalloc_arg(c->cache_zone, c, - linux_check_m_flags(flags))); -} - -static inline void * -kmem_cache_zalloc(struct linux_kmem_cache *c, gfp_t flags) -{ - return (uma_zalloc_arg(c->cache_zone, c, - linux_check_m_flags(flags | M_ZERO))); -} - -extern void linux_kmem_cache_free_rcu(struct linux_kmem_cache *, void *); - -static inline void -linux_kmem_cache_free(struct linux_kmem_cache *c, void *m) -{ - if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) - linux_kmem_cache_free_rcu(c, m); - else - uma_zfree(c->cache_zone, m); -} - +extern void *lkpi_kmem_cache_alloc(struct linux_kmem_cache *, gfp_t); +extern void *lkpi_kmem_cache_zalloc(struct linux_kmem_cache *, gfp_t); +extern void lkpi_kmem_cache_free(struct linux_kmem_cache *, void *); extern void linux_kmem_cache_destroy(struct linux_kmem_cache *); void linux_kfree_async(void *); #endif /* _LINUX_SLAB_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_rcu.c b/sys/compat/linuxkpi/common/src/linux_rcu.c index 404c5cec4ae4..a39949cf5013 100644 --- a/sys/compat/linuxkpi/common/src/linux_rcu.c +++ b/sys/compat/linuxkpi/common/src/linux_rcu.c @@ -1,442 +1,428 @@ /*- * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) * Copyright (c) 2017-2021 Hans Petter Selasky (hselasky@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include /* * By defining CONFIG_NO_RCU_SKIP LinuxKPI RCU locks and asserts will * not be skipped during panic(). */ #ifdef CONFIG_NO_RCU_SKIP #define RCU_SKIP(void) 0 #else #define RCU_SKIP(void) unlikely(SCHEDULER_STOPPED() || kdb_active) #endif struct callback_head { - STAILQ_ENTRY(callback_head) entry; + union { + STAILQ_ENTRY(callback_head) entry; + struct llist_node node; + }; rcu_callback_t func; }; struct linux_epoch_head { - STAILQ_HEAD(, callback_head) cb_head; - struct mtx lock; + struct llist_head cb_head; struct task task; } __aligned(CACHE_LINE_SIZE); struct linux_epoch_record { ck_epoch_record_t epoch_record; TAILQ_HEAD(, task_struct) ts_head; int cpuid; int type; } __aligned(CACHE_LINE_SIZE); /* * Verify that "struct rcu_head" is big enough to hold "struct * callback_head". This has been done to avoid having to add special * compile flags for including ck_epoch.h to all clients of the * LinuxKPI. */ CTASSERT(sizeof(struct rcu_head) == sizeof(struct callback_head)); /* * Verify that "rcu_section[0]" has the same size as * "ck_epoch_section_t". This has been done to avoid having to add * special compile flags for including ck_epoch.h to all clients of * the LinuxKPI. */ CTASSERT(sizeof(((struct task_struct *)0)->rcu_section[0] == sizeof(ck_epoch_section_t))); /* * Verify that "epoch_record" is at beginning of "struct * linux_epoch_record": */ CTASSERT(offsetof(struct linux_epoch_record, epoch_record) == 0); CTASSERT(TS_RCU_TYPE_MAX == RCU_TYPE_MAX); static ck_epoch_t linux_epoch[RCU_TYPE_MAX]; static struct linux_epoch_head linux_epoch_head[RCU_TYPE_MAX]; DPCPU_DEFINE_STATIC(struct linux_epoch_record, linux_epoch_record[RCU_TYPE_MAX]); static void linux_rcu_cleaner_func(void *, int); static void linux_rcu_runtime_init(void *arg __unused) { struct linux_epoch_head *head; int i; int j; for (j = 0; j != RCU_TYPE_MAX; j++) { ck_epoch_init(&linux_epoch[j]); head = &linux_epoch_head[j]; - mtx_init(&head->lock, "LRCU-HEAD", NULL, MTX_DEF); TASK_INIT(&head->task, 0, linux_rcu_cleaner_func, head); - STAILQ_INIT(&head->cb_head); + init_llist_head(&head->cb_head); CPU_FOREACH(i) { struct linux_epoch_record *record; record = &DPCPU_ID_GET(i, linux_epoch_record[j]); record->cpuid = i; record->type = j; ck_epoch_register(&linux_epoch[j], &record->epoch_record, NULL); TAILQ_INIT(&record->ts_head); } } } SYSINIT(linux_rcu_runtime, SI_SUB_CPU, SI_ORDER_ANY, linux_rcu_runtime_init, NULL); -static void -linux_rcu_runtime_uninit(void *arg __unused) -{ - struct linux_epoch_head *head; - int j; - - for (j = 0; j != RCU_TYPE_MAX; j++) { - head = &linux_epoch_head[j]; - - mtx_destroy(&head->lock); - } -} -SYSUNINIT(linux_rcu_runtime, SI_SUB_LOCK, SI_ORDER_SECOND, linux_rcu_runtime_uninit, NULL); - static void linux_rcu_cleaner_func(void *context, int pending __unused) { - struct linux_epoch_head *head; + struct linux_epoch_head *head = context; struct callback_head *rcu; STAILQ_HEAD(, callback_head) tmp_head; + struct llist_node *node, *next; uintptr_t offset; - linux_set_current(curthread); - - head = context; - /* move current callbacks into own queue */ - mtx_lock(&head->lock); STAILQ_INIT(&tmp_head); - STAILQ_CONCAT(&tmp_head, &head->cb_head); - mtx_unlock(&head->lock); + llist_for_each_safe(node, next, llist_del_all(&head->cb_head)) { + rcu = container_of(node, struct callback_head, node); + /* re-reverse list to restore chronological order */ + STAILQ_INSERT_HEAD(&tmp_head, rcu, entry); + } /* synchronize */ linux_synchronize_rcu(head - linux_epoch_head); /* dispatch all callbacks, if any */ while ((rcu = STAILQ_FIRST(&tmp_head)) != NULL) { STAILQ_REMOVE_HEAD(&tmp_head, entry); offset = (uintptr_t)rcu->func; if (offset < LINUX_KFREE_RCU_OFFSET_MAX) kfree((char *)rcu - offset); else rcu->func((struct rcu_head *)rcu); } } void linux_rcu_read_lock(unsigned type) { struct linux_epoch_record *record; struct task_struct *ts; MPASS(type < RCU_TYPE_MAX); if (RCU_SKIP()) return; ts = current; /* assert valid refcount */ MPASS(ts->rcu_recurse[type] != INT_MAX); if (++(ts->rcu_recurse[type]) != 1) return; /* * Pin thread to current CPU so that the unlock code gets the * same per-CPU epoch record: */ sched_pin(); record = &DPCPU_GET(linux_epoch_record[type]); /* * Use a critical section to prevent recursion inside * ck_epoch_begin(). Else this function supports recursion. */ critical_enter(); ck_epoch_begin(&record->epoch_record, (ck_epoch_section_t *)&ts->rcu_section[type]); TAILQ_INSERT_TAIL(&record->ts_head, ts, rcu_entry[type]); critical_exit(); } void linux_rcu_read_unlock(unsigned type) { struct linux_epoch_record *record; struct task_struct *ts; MPASS(type < RCU_TYPE_MAX); if (RCU_SKIP()) return; ts = current; /* assert valid refcount */ MPASS(ts->rcu_recurse[type] > 0); if (--(ts->rcu_recurse[type]) != 0) return; record = &DPCPU_GET(linux_epoch_record[type]); /* * Use a critical section to prevent recursion inside * ck_epoch_end(). Else this function supports recursion. */ critical_enter(); ck_epoch_end(&record->epoch_record, (ck_epoch_section_t *)&ts->rcu_section[type]); TAILQ_REMOVE(&record->ts_head, ts, rcu_entry[type]); critical_exit(); sched_unpin(); } static void linux_synchronize_rcu_cb(ck_epoch_t *epoch __unused, ck_epoch_record_t *epoch_record, void *arg __unused) { struct linux_epoch_record *record = container_of(epoch_record, struct linux_epoch_record, epoch_record); struct thread *td = curthread; struct task_struct *ts; /* check if blocked on the current CPU */ if (record->cpuid == PCPU_GET(cpuid)) { bool is_sleeping = 0; u_char prio = 0; /* * Find the lowest priority or sleeping thread which * is blocking synchronization on this CPU core. All * the threads in the queue are CPU-pinned and cannot * go anywhere while the current thread is locked. */ TAILQ_FOREACH(ts, &record->ts_head, rcu_entry[record->type]) { if (ts->task_thread->td_priority > prio) prio = ts->task_thread->td_priority; is_sleeping |= (ts->task_thread->td_inhibitors != 0); } if (is_sleeping) { thread_unlock(td); pause("W", 1); thread_lock(td); } else { /* set new thread priority */ sched_prio(td, prio); /* task switch */ mi_switch(SW_VOL | SWT_RELINQUISH); /* * It is important the thread lock is dropped * while yielding to allow other threads to * acquire the lock pointed to by * TDQ_LOCKPTR(td). Currently mi_switch() will * unlock the thread lock before * returning. Else a deadlock like situation * might happen. */ thread_lock(td); } } else { /* * To avoid spinning move execution to the other CPU * which is blocking synchronization. Set highest * thread priority so that code gets run. The thread * priority will be restored later. */ sched_prio(td, 0); sched_bind(td, record->cpuid); } } void linux_synchronize_rcu(unsigned type) { struct thread *td; int was_bound; int old_cpu; int old_pinned; u_char old_prio; MPASS(type < RCU_TYPE_MAX); if (RCU_SKIP()) return; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "linux_synchronize_rcu() can sleep"); td = curthread; DROP_GIANT(); /* * Synchronizing RCU might change the CPU core this function * is running on. Save current values: */ thread_lock(td); old_cpu = PCPU_GET(cpuid); old_pinned = td->td_pinned; old_prio = td->td_priority; was_bound = sched_is_bound(td); sched_unbind(td); td->td_pinned = 0; sched_bind(td, old_cpu); ck_epoch_synchronize_wait(&linux_epoch[type], &linux_synchronize_rcu_cb, NULL); /* restore CPU binding, if any */ if (was_bound != 0) { sched_bind(td, old_cpu); } else { /* get thread back to initial CPU, if any */ if (old_pinned != 0) sched_bind(td, old_cpu); sched_unbind(td); } /* restore pinned after bind */ td->td_pinned = old_pinned; /* restore thread priority */ sched_prio(td, old_prio); thread_unlock(td); PICKUP_GIANT(); } void linux_rcu_barrier(unsigned type) { struct linux_epoch_head *head; MPASS(type < RCU_TYPE_MAX); linux_synchronize_rcu(type); head = &linux_epoch_head[type]; /* wait for callbacks to complete */ - taskqueue_drain(taskqueue_fast, &head->task); + taskqueue_drain(linux_irq_work_tq, &head->task); } void linux_call_rcu(unsigned type, struct rcu_head *context, rcu_callback_t func) { struct callback_head *rcu; struct linux_epoch_head *head; MPASS(type < RCU_TYPE_MAX); rcu = (struct callback_head *)context; head = &linux_epoch_head[type]; - mtx_lock(&head->lock); rcu->func = func; - STAILQ_INSERT_TAIL(&head->cb_head, rcu, entry); - taskqueue_enqueue(taskqueue_fast, &head->task); - mtx_unlock(&head->lock); + llist_add(&rcu->node, &head->cb_head); + taskqueue_enqueue(linux_irq_work_tq, &head->task); } int init_srcu_struct(struct srcu_struct *srcu) { return (0); } void cleanup_srcu_struct(struct srcu_struct *srcu) { } int srcu_read_lock(struct srcu_struct *srcu) { linux_rcu_read_lock(RCU_TYPE_SLEEPABLE); return (0); } void srcu_read_unlock(struct srcu_struct *srcu, int key __unused) { linux_rcu_read_unlock(RCU_TYPE_SLEEPABLE); } void synchronize_srcu(struct srcu_struct *srcu) { linux_synchronize_rcu(RCU_TYPE_SLEEPABLE); } void srcu_barrier(struct srcu_struct *srcu) { linux_rcu_barrier(RCU_TYPE_SLEEPABLE); } diff --git a/sys/compat/linuxkpi/common/src/linux_slab.c b/sys/compat/linuxkpi/common/src/linux_slab.c index 3304c34b1dee..5dbd87b66d1a 100644 --- a/sys/compat/linuxkpi/common/src/linux_slab.c +++ b/sys/compat/linuxkpi/common/src/linux_slab.c @@ -1,155 +1,226 @@ /*- * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include +#include struct linux_kmem_rcu { struct rcu_head rcu_head; struct linux_kmem_cache *cache; }; +struct linux_kmem_cache { + uma_zone_t cache_zone; + linux_kmem_ctor_t *cache_ctor; + unsigned cache_flags; + unsigned cache_size; + struct llist_head cache_items; + struct task cache_task; +}; + #define LINUX_KMEM_TO_RCU(c, m) \ ((struct linux_kmem_rcu *)((char *)(m) + \ (c)->cache_size - sizeof(struct linux_kmem_rcu))) #define LINUX_RCU_TO_KMEM(r) \ ((void *)((char *)(r) + sizeof(struct linux_kmem_rcu) - \ (r)->cache->cache_size)) static LLIST_HEAD(linux_kfree_async_list); +static void lkpi_kmem_cache_free_async_fn(void *, int); + +void * +lkpi_kmem_cache_alloc(struct linux_kmem_cache *c, gfp_t flags) +{ + return (uma_zalloc_arg(c->cache_zone, c, + linux_check_m_flags(flags))); +} + +void * +lkpi_kmem_cache_zalloc(struct linux_kmem_cache *c, gfp_t flags) +{ + return (uma_zalloc_arg(c->cache_zone, c, + linux_check_m_flags(flags | M_ZERO))); +} + static int linux_kmem_ctor(void *mem, int size, void *arg, int flags) { struct linux_kmem_cache *c = arg; if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) { struct linux_kmem_rcu *rcu = LINUX_KMEM_TO_RCU(c, mem); /* duplicate cache pointer */ rcu->cache = c; } /* check for constructor */ if (likely(c->cache_ctor != NULL)) c->cache_ctor(mem); return (0); } static void linux_kmem_cache_free_rcu_callback(struct rcu_head *head) { struct linux_kmem_rcu *rcu = container_of(head, struct linux_kmem_rcu, rcu_head); uma_zfree(rcu->cache->cache_zone, LINUX_RCU_TO_KMEM(rcu)); } struct linux_kmem_cache * linux_kmem_cache_create(const char *name, size_t size, size_t align, unsigned flags, linux_kmem_ctor_t *ctor) { struct linux_kmem_cache *c; c = malloc(sizeof(*c), M_KMALLOC, M_WAITOK); if (flags & SLAB_HWCACHE_ALIGN) align = UMA_ALIGN_CACHE; else if (align != 0) align--; if (flags & SLAB_TYPESAFE_BY_RCU) { /* make room for RCU structure */ size = ALIGN(size, sizeof(void *)); size += sizeof(struct linux_kmem_rcu); /* create cache_zone */ c->cache_zone = uma_zcreate(name, size, linux_kmem_ctor, NULL, NULL, NULL, align, UMA_ZONE_ZINIT); } else { + /* make room for async task list items */ + size = MAX(size, sizeof(struct llist_node)); + /* create cache_zone */ c->cache_zone = uma_zcreate(name, size, ctor ? linux_kmem_ctor : NULL, NULL, NULL, NULL, align, 0); } c->cache_flags = flags; c->cache_ctor = ctor; c->cache_size = size; + init_llist_head(&c->cache_items); + TASK_INIT(&c->cache_task, 0, lkpi_kmem_cache_free_async_fn, c); return (c); } -void -linux_kmem_cache_free_rcu(struct linux_kmem_cache *c, void *m) +static inline void +lkpi_kmem_cache_free_rcu(struct linux_kmem_cache *c, void *m) { struct linux_kmem_rcu *rcu = LINUX_KMEM_TO_RCU(c, m); call_rcu(&rcu->rcu_head, linux_kmem_cache_free_rcu_callback); } +static inline void +lkpi_kmem_cache_free_sync(struct linux_kmem_cache *c, void *m) +{ + uma_zfree(c->cache_zone, m); +} + +static void +lkpi_kmem_cache_free_async_fn(void *context, int pending) +{ + struct linux_kmem_cache *c = context; + struct llist_node *freed, *next; + + llist_for_each_safe(freed, next, llist_del_all(&c->cache_items)) + lkpi_kmem_cache_free_sync(c, freed); +} + +static inline void +lkpi_kmem_cache_free_async(struct linux_kmem_cache *c, void *m) +{ + if (m == NULL) + return; + + llist_add(m, &c->cache_items); + taskqueue_enqueue(linux_irq_work_tq, &c->cache_task); +} + +void +lkpi_kmem_cache_free(struct linux_kmem_cache *c, void *m) +{ + if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) + lkpi_kmem_cache_free_rcu(c, m); + else if (unlikely(curthread->td_critnest != 0)) + lkpi_kmem_cache_free_async(c, m); + else + lkpi_kmem_cache_free_sync(c, m); +} + void linux_kmem_cache_destroy(struct linux_kmem_cache *c) { if (unlikely(c->cache_flags & SLAB_TYPESAFE_BY_RCU)) { /* make sure all free callbacks have been called */ rcu_barrier(); } + if (!llist_empty(&c->cache_items)) + taskqueue_enqueue(linux_irq_work_tq, &c->cache_task); + taskqueue_drain(linux_irq_work_tq, &c->cache_task); uma_zdestroy(c->cache_zone); free(c, M_KMALLOC); } static void linux_kfree_async_fn(void *context, int pending) { struct llist_node *freed; while((freed = llist_del_first(&linux_kfree_async_list)) != NULL) kfree(freed); } static struct task linux_kfree_async_task = TASK_INITIALIZER(0, linux_kfree_async_fn, &linux_kfree_async_task); void linux_kfree_async(void *addr) { if (addr == NULL) return; llist_add(addr, &linux_kfree_async_list); taskqueue_enqueue(linux_irq_work_tq, &linux_kfree_async_task); }