Index: head/sys/kern/subr_epoch.c =================================================================== --- head/sys/kern/subr_epoch.c (revision 356825) +++ head/sys/kern/subr_epoch.c (revision 356826) @@ -1,846 +1,846 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef EPOCH_TRACE #include #include #include #endif #include #include #include #include #include static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation"); #ifdef __amd64__ #define EPOCH_ALIGN CACHE_LINE_SIZE*2 #else #define EPOCH_ALIGN CACHE_LINE_SIZE #endif TAILQ_HEAD (epoch_tdlist, epoch_tracker); typedef struct epoch_record { ck_epoch_record_t er_record; struct epoch_context er_drain_ctx; struct epoch *er_parent; volatile struct epoch_tdlist er_tdlist; volatile uint32_t er_gen; uint32_t er_cpuid; } __aligned(EPOCH_ALIGN) *epoch_record_t; struct epoch { struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); epoch_record_t e_pcpu_record; int e_idx; int e_flags; struct sx e_drain_sx; struct mtx e_drain_mtx; volatile int e_drain_count; const char *e_name; }; /* arbitrary --- needs benchmarking */ #define MAX_ADAPTIVE_SPIN 100 #define MAX_EPOCHS 64 CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context)); SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information"); SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats"); /* Stats. */ static counter_u64_t block_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW, &block_count, "# of times a thread was in an epoch when epoch_wait was called"); static counter_u64_t migrate_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW, &migrate_count, "# of times thread was migrated to another CPU in epoch_wait"); static counter_u64_t turnstile_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW, &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait"); static counter_u64_t switch_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW, &switch_count, "# of times a thread voluntarily context switched in epoch_wait"); static counter_u64_t epoch_call_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW, &epoch_call_count, "# of times a callback was deferred"); static counter_u64_t epoch_call_task_count; SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW, &epoch_call_task_count, "# of times a callback task was run"); TAILQ_HEAD (threadlist, thread); CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, ck_epoch_entry_container) epoch_t allepochs[MAX_EPOCHS]; DPCPU_DEFINE(struct grouptask, epoch_cb_task); DPCPU_DEFINE(int, epoch_cb_count); static __read_mostly int inited; static __read_mostly int epoch_count; __read_mostly epoch_t global_epoch; __read_mostly epoch_t global_epoch_preempt; static void epoch_call_task(void *context __unused); static uma_zone_t pcpu_zone_record; #ifdef EPOCH_TRACE struct stackentry { RB_ENTRY(stackentry) se_node; struct stack se_stack; }; static int stackentry_compare(struct stackentry *a, struct stackentry *b) { if (a->se_stack.depth > b->se_stack.depth) return (1); if (a->se_stack.depth < b->se_stack.depth) return (-1); for (int i = 0; i < a->se_stack.depth; i++) { if (a->se_stack.pcs[i] > b->se_stack.pcs[i]) return (1); if (a->se_stack.pcs[i] < b->se_stack.pcs[i]) return (-1); } return (0); } RB_HEAD(stacktree, stackentry) epoch_stacks = RB_INITIALIZER(&epoch_stacks); RB_GENERATE_STATIC(stacktree, stackentry, se_node, stackentry_compare); static struct mtx epoch_stacks_lock; MTX_SYSINIT(epochstacks, &epoch_stacks_lock, "epoch_stacks", MTX_DEF); static bool epoch_trace_stack_print = true; SYSCTL_BOOL(_kern_epoch, OID_AUTO, trace_stack_print, CTLFLAG_RWTUN, &epoch_trace_stack_print, 0, "Print stack traces on epoch reports"); static void epoch_trace_report(const char *fmt, ...) __printflike(1, 2); static inline void epoch_trace_report(const char *fmt, ...) { va_list ap; struct stackentry se, *new; stack_zero(&se.se_stack); /* XXX: is it really needed? */ stack_save(&se.se_stack); /* Tree is never reduced - go lockless. */ if (RB_FIND(stacktree, &epoch_stacks, &se) != NULL) return; new = malloc(sizeof(*new), M_STACK, M_NOWAIT); if (new != NULL) { bcopy(&se.se_stack, &new->se_stack, sizeof(struct stack)); mtx_lock(&epoch_stacks_lock); new = RB_INSERT(stacktree, &epoch_stacks, new); mtx_unlock(&epoch_stacks_lock); if (new != NULL) free(new, M_STACK); } va_start(ap, fmt); (void)vprintf(fmt, ap); va_end(ap); if (epoch_trace_stack_print) stack_print_ddb(&se.se_stack); } static inline void epoch_trace_enter(struct thread *td, epoch_t epoch, epoch_tracker_t et, const char *file, int line) { epoch_tracker_t iet; SLIST_FOREACH(iet, &td->td_epochs, et_tlink) if (iet->et_epoch == epoch) epoch_trace_report("Recursively entering epoch %s " "at %s:%d, previously entered at %s:%d\n", epoch->e_name, file, line, iet->et_file, iet->et_line); et->et_epoch = epoch; et->et_file = file; et->et_line = line; SLIST_INSERT_HEAD(&td->td_epochs, et, et_tlink); } static inline void epoch_trace_exit(struct thread *td, epoch_t epoch, epoch_tracker_t et, const char *file, int line) { if (SLIST_FIRST(&td->td_epochs) != et) { epoch_trace_report("Exiting epoch %s in a not nested order " "at %s:%d. Most recently entered %s at %s:%d\n", epoch->e_name, file, line, SLIST_FIRST(&td->td_epochs)->et_epoch->e_name, SLIST_FIRST(&td->td_epochs)->et_file, SLIST_FIRST(&td->td_epochs)->et_line); /* This will panic if et is not anywhere on td_epochs. */ SLIST_REMOVE(&td->td_epochs, et, epoch_tracker, et_tlink); } else SLIST_REMOVE_HEAD(&td->td_epochs, et_tlink); } /* Used by assertions that check thread state before going to sleep. */ void epoch_trace_list(struct thread *td) { epoch_tracker_t iet; SLIST_FOREACH(iet, &td->td_epochs, et_tlink) printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name, iet->et_file, iet->et_line); } #endif /* EPOCH_TRACE */ static void epoch_init(void *arg __unused) { int cpu; block_count = counter_u64_alloc(M_WAITOK); migrate_count = counter_u64_alloc(M_WAITOK); turnstile_count = counter_u64_alloc(M_WAITOK); switch_count = counter_u64_alloc(M_WAITOK); epoch_call_count = counter_u64_alloc(M_WAITOK); epoch_call_task_count = counter_u64_alloc(M_WAITOK); pcpu_zone_record = uma_zcreate("epoch_record pcpu", sizeof(struct epoch_record), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); CPU_FOREACH(cpu) { GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL); taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL, "epoch call task"); } #ifdef EPOCH_TRACE SLIST_INIT(&thread0.td_epochs); #endif inited = 1; global_epoch = epoch_alloc("Global", 0); global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT); } SYSINIT(epoch, SI_SUB_EPOCH, SI_ORDER_FIRST, epoch_init, NULL); #if !defined(EARLY_AP_STARTUP) static void epoch_init_smp(void *dummy __unused) { inited = 2; } SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL); #endif static void epoch_ctor(epoch_t epoch) { epoch_record_t er; int cpu; epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK); CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); bzero(er, sizeof(*er)); ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = cpu; er->er_parent = epoch; } } static void epoch_adjust_prio(struct thread *td, u_char prio) { thread_lock(td); sched_prio(td, prio); thread_unlock(td); } epoch_t epoch_alloc(const char *name, int flags) { epoch_t epoch; if (__predict_false(!inited)) panic("%s called too early in boot", __func__); epoch = malloc(sizeof(struct epoch), M_EPOCH, M_ZERO | M_WAITOK); ck_epoch_init(&epoch->e_epoch); epoch_ctor(epoch); MPASS(epoch_count < MAX_EPOCHS - 2); epoch->e_flags = flags; epoch->e_idx = epoch_count; epoch->e_name = name; sx_init(&epoch->e_drain_sx, "epoch-drain-sx"); mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF); allepochs[epoch_count++] = epoch; return (epoch); } void epoch_free(epoch_t epoch) { epoch_drain_callbacks(epoch); allepochs[epoch->e_idx] = NULL; epoch_wait(global_epoch); uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record); mtx_destroy(&epoch->e_drain_mtx); sx_destroy(&epoch->e_drain_sx); free(epoch, M_EPOCH); } static epoch_record_t epoch_currecord(epoch_t epoch) { return (zpcpu_get_cpu(epoch->e_pcpu_record, curcpu)); } #define INIT_CHECK(epoch) \ do { \ if (__predict_false((epoch) == NULL)) \ return; \ } while (0) void _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE) { struct epoch_record *er; struct thread *td; MPASS(cold || epoch != NULL); MPASS(epoch->e_flags & EPOCH_PREEMPT); td = curthread; MPASS((vm_offset_t)et >= td->td_kstack && (vm_offset_t)et + sizeof(struct epoch_tracker) <= td->td_kstack + td->td_kstack_pages * PAGE_SIZE); INIT_CHECK(epoch); #ifdef EPOCH_TRACE epoch_trace_enter(td, epoch, et, file, line); #endif et->et_td = td; THREAD_NO_SLEEPING(); critical_enter(); sched_pin(); td->td_pre_epoch_prio = td->td_priority; er = epoch_currecord(epoch); TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link); ck_epoch_begin(&er->er_record, &et->et_section); critical_exit(); } void epoch_enter(epoch_t epoch) { epoch_record_t er; MPASS(cold || epoch != NULL); INIT_CHECK(epoch); critical_enter(); er = epoch_currecord(epoch); ck_epoch_begin(&er->er_record, NULL); } void _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE) { struct epoch_record *er; struct thread *td; INIT_CHECK(epoch); td = curthread; critical_enter(); sched_unpin(); THREAD_SLEEPING_OK(); er = epoch_currecord(epoch); MPASS(epoch->e_flags & EPOCH_PREEMPT); MPASS(et != NULL); MPASS(et->et_td == td); #ifdef INVARIANTS et->et_td = (void*)0xDEADBEEF; #endif ck_epoch_end(&er->er_record, &et->et_section); TAILQ_REMOVE(&er->er_tdlist, et, et_link); er->er_gen++; if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) epoch_adjust_prio(td, td->td_pre_epoch_prio); critical_exit(); #ifdef EPOCH_TRACE epoch_trace_exit(td, epoch, et, file, line); #endif } void epoch_exit(epoch_t epoch) { epoch_record_t er; INIT_CHECK(epoch); er = epoch_currecord(epoch); ck_epoch_end(&er->er_record, NULL); critical_exit(); } /* * epoch_block_handler_preempt() is a callback from the CK code when another * thread is currently in an epoch section. */ static void epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t *cr, void *arg __unused) { epoch_record_t record; struct thread *td, *owner, *curwaittd; struct epoch_tracker *tdwait; struct turnstile *ts; struct lock_object *lock; int spincount, gen; int locksheld __unused; record = __containerof(cr, struct epoch_record, er_record); td = curthread; locksheld = td->td_locks; spincount = 0; counter_u64_add(block_count, 1); /* * We lost a race and there's no longer any threads * on the CPU in an epoch section. */ if (TAILQ_EMPTY(&record->er_tdlist)) return; if (record->er_cpuid != curcpu) { /* * If the head of the list is running, we can wait for it * to remove itself from the list and thus save us the * overhead of a migration */ gen = record->er_gen; thread_unlock(td); /* * We can't actually check if the waiting thread is running * so we simply poll for it to exit before giving up and * migrating. */ do { cpu_spinwait(); } while (!TAILQ_EMPTY(&record->er_tdlist) && gen == record->er_gen && spincount++ < MAX_ADAPTIVE_SPIN); thread_lock(td); /* * If the generation has changed we can poll again * otherwise we need to migrate. */ if (gen != record->er_gen) return; /* * Being on the same CPU as that of the record on which * we need to wait allows us access to the thread * list associated with that CPU. We can then examine the * oldest thread in the queue and wait on its turnstile * until it resumes and so on until a grace period * elapses. * */ counter_u64_add(migrate_count, 1); sched_bind(td, record->er_cpuid); /* * At this point we need to return to the ck code * to scan to see if a grace period has elapsed. * We can't move on to check the thread list, because * in the meantime new threads may have arrived that * in fact belong to a different epoch. */ return; } /* * Try to find a thread in an epoch section on this CPU * waiting on a turnstile. Otherwise find the lowest * priority thread (highest prio value) and drop our priority * to match to allow it to run. */ TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) { /* * Propagate our priority to any other waiters to prevent us * from starving them. They will have their original priority * restore on exit from epoch_wait(). */ curwaittd = tdwait->et_td; if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) { critical_enter(); thread_unlock(td); thread_lock(curwaittd); sched_prio(curwaittd, td->td_priority); thread_unlock(curwaittd); thread_lock(td); critical_exit(); } if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) && ((ts = curwaittd->td_blocked) != NULL)) { /* * We unlock td to allow turnstile_wait to reacquire * the thread lock. Before unlocking it we enter a * critical section to prevent preemption after we * reenable interrupts by dropping the thread lock in * order to prevent curwaittd from getting to run. */ critical_enter(); thread_unlock(td); if (turnstile_lock(ts, &lock, &owner)) { if (ts == curwaittd->td_blocked) { MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd)); critical_exit(); turnstile_wait(ts, owner, curwaittd->td_tsqueue); counter_u64_add(turnstile_count, 1); thread_lock(td); return; } turnstile_unlock(ts, lock); } thread_lock(td); critical_exit(); KASSERT(td->td_locks == locksheld, ("%d extra locks held", td->td_locks - locksheld)); } } /* * We didn't find any threads actually blocked on a lock * so we have nothing to do except context switch away. */ counter_u64_add(switch_count, 1); mi_switch(SW_VOL | SWT_RELINQUISH); /* * It is important the thread lock is dropped while yielding * to allow other threads to acquire the lock pointed to by * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the * thread lock before returning. Else a deadlock like * situation might happen. */ thread_lock(td); } void epoch_wait_preempt(epoch_t epoch) { struct thread *td; int was_bound; int old_cpu; int old_pinned; u_char old_prio; int locks __unused; MPASS(cold || epoch != NULL); INIT_CHECK(epoch); td = curthread; #ifdef INVARIANTS locks = curthread->td_locks; MPASS(epoch->e_flags & EPOCH_PREEMPT); if ((epoch->e_flags & EPOCH_LOCKED) == 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "epoch_wait() can be long running"); KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle " "of an epoch section of the same epoch")); #endif DROP_GIANT(); thread_lock(td); old_cpu = PCPU_GET(cpuid); old_pinned = td->td_pinned; old_prio = td->td_priority; was_bound = sched_is_bound(td); sched_unbind(td); td->td_pinned = 0; sched_bind(td, old_cpu); ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, NULL); /* restore CPU binding, if any */ if (was_bound != 0) { sched_bind(td, old_cpu); } else { /* get thread back to initial CPU, if any */ if (old_pinned != 0) sched_bind(td, old_cpu); sched_unbind(td); } /* restore pinned after bind */ td->td_pinned = old_pinned; /* restore thread priority */ sched_prio(td, old_prio); thread_unlock(td); PICKUP_GIANT(); KASSERT(td->td_locks == locks, ("%d residual locks held", td->td_locks - locks)); } static void epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused, void *arg __unused) { cpu_spinwait(); } void epoch_wait(epoch_t epoch) { MPASS(cold || epoch != NULL); INIT_CHECK(epoch); MPASS(epoch->e_flags == 0); critical_enter(); ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL); critical_exit(); } void -epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)) +epoch_call(epoch_t epoch, epoch_callback_t callback, epoch_context_t ctx) { epoch_record_t er; ck_epoch_entry_t *cb; cb = (void *)ctx; MPASS(callback); /* too early in boot to have epoch set up */ if (__predict_false(epoch == NULL)) goto boottime; #if !defined(EARLY_AP_STARTUP) if (__predict_false(inited < 2)) goto boottime; #endif critical_enter(); *DPCPU_PTR(epoch_cb_count) += 1; er = epoch_currecord(epoch); ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback); critical_exit(); return; boottime: callback(ctx); } static void epoch_call_task(void *arg __unused) { ck_stack_entry_t *cursor, *head, *next; ck_epoch_record_t *record; epoch_record_t er; epoch_t epoch; ck_stack_t cb_stack; int i, npending, total; ck_stack_init(&cb_stack); critical_enter(); epoch_enter(global_epoch); for (total = i = 0; i < epoch_count; i++) { if (__predict_false((epoch = allepochs[i]) == NULL)) continue; er = epoch_currecord(epoch); record = &er->er_record; if ((npending = record->n_pending) == 0) continue; ck_epoch_poll_deferred(record, &cb_stack); total += npending - record->n_pending; } epoch_exit(global_epoch); *DPCPU_PTR(epoch_cb_count) -= total; critical_exit(); counter_u64_add(epoch_call_count, total); counter_u64_add(epoch_call_task_count, 1); head = ck_stack_batch_pop_npsc(&cb_stack); for (cursor = head; cursor != NULL; cursor = next) { struct ck_epoch_entry *entry = ck_epoch_entry_container(cursor); next = CK_STACK_NEXT(cursor); entry->function(entry); } } int in_epoch_verbose(epoch_t epoch, int dump_onfail) { struct epoch_tracker *tdwait; struct thread *td; epoch_record_t er; td = curthread; if (THREAD_CAN_SLEEP()) return (0); if (__predict_false((epoch) == NULL)) return (0); critical_enter(); er = epoch_currecord(epoch); TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link) if (tdwait->et_td == td) { critical_exit(); return (1); } #ifdef INVARIANTS if (dump_onfail) { MPASS(td->td_pinned); printf("cpu: %d id: %d\n", curcpu, td->td_tid); TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link) printf("td_tid: %d ", tdwait->et_td->td_tid); printf("\n"); } #endif critical_exit(); return (0); } int in_epoch(epoch_t epoch) { return (in_epoch_verbose(epoch, 0)); } static void epoch_drain_cb(struct epoch_context *ctx) { struct epoch *epoch = __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent; if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) { mtx_lock(&epoch->e_drain_mtx); wakeup(epoch); mtx_unlock(&epoch->e_drain_mtx); } } void epoch_drain_callbacks(epoch_t epoch) { epoch_record_t er; struct thread *td; int was_bound; int old_pinned; int old_cpu; int cpu; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "epoch_drain_callbacks() may sleep!"); /* too early in boot to have epoch set up */ if (__predict_false(epoch == NULL)) return; #if !defined(EARLY_AP_STARTUP) if (__predict_false(inited < 2)) return; #endif DROP_GIANT(); sx_xlock(&epoch->e_drain_sx); mtx_lock(&epoch->e_drain_mtx); td = curthread; thread_lock(td); old_cpu = PCPU_GET(cpuid); old_pinned = td->td_pinned; was_bound = sched_is_bound(td); sched_unbind(td); td->td_pinned = 0; CPU_FOREACH(cpu) epoch->e_drain_count++; CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); sched_bind(td, cpu); - epoch_call(epoch, &er->er_drain_ctx, &epoch_drain_cb); + epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx); } /* restore CPU binding, if any */ if (was_bound != 0) { sched_bind(td, old_cpu); } else { /* get thread back to initial CPU, if any */ if (old_pinned != 0) sched_bind(td, old_cpu); sched_unbind(td); } /* restore pinned after bind */ td->td_pinned = old_pinned; thread_unlock(td); while (epoch->e_drain_count != 0) msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0); mtx_unlock(&epoch->e_drain_mtx); sx_xunlock(&epoch->e_drain_sx); PICKUP_GIANT(); } Index: head/sys/net/pfil.c =================================================================== --- head/sys/net/pfil.c (revision 356825) +++ head/sys/net/pfil.c (revision 356826) @@ -1,682 +1,682 @@ /* $FreeBSD$ */ /* $NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2019 Gleb Smirnoff * Copyright (c) 1996 Matthew R. Green * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks"); static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *); static struct cdevsw pfil_cdevsw = { .d_ioctl = pfil_ioctl, .d_name = PFILDEV, .d_version = D_VERSION, }; static struct cdev *pfil_dev; static struct mtx pfil_lock; MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF); #define PFIL_LOCK() mtx_lock(&pfil_lock) #define PFIL_UNLOCK() mtx_unlock(&pfil_lock) #define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED) #define PFIL_EPOCH net_epoch_preempt #define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et)) #define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et)) struct pfil_hook { pfil_func_t hook_func; void *hook_ruleset; int hook_flags; int hook_links; enum pfil_types hook_type; const char *hook_modname; const char *hook_rulname; LIST_ENTRY(pfil_hook) hook_list; }; struct pfil_link { CK_STAILQ_ENTRY(pfil_link) link_chain; pfil_func_t link_func; void *link_ruleset; int link_flags; struct pfil_hook *link_hook; struct epoch_context link_epoch_ctx; }; typedef CK_STAILQ_HEAD(pfil_chain, pfil_link) pfil_chain_t; struct pfil_head { int head_nhooksin; int head_nhooksout; pfil_chain_t head_in; pfil_chain_t head_out; int head_flags; enum pfil_types head_type; LIST_ENTRY(pfil_head) head_list; const char *head_name; }; LIST_HEAD(pfilheadhead, pfil_head); VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) = LIST_HEAD_INITIALIZER(pfil_head_list); #define V_pfil_head_list VNET(pfil_head_list) LIST_HEAD(pfilhookhead, pfil_hook); VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) = LIST_HEAD_INITIALIZER(pfil_hook_list); #define V_pfil_hook_list VNET(pfil_hook_list) static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t ); static void pfil_link_free(epoch_context_t); int pfil_realloc(pfil_packet_t *p, int flags, struct ifnet *ifp) { struct mbuf *m; MPASS(flags & PFIL_MEMPTR); if ((m = m_devget(p->mem, PFIL_LENGTH(flags), 0, ifp, NULL)) == NULL) return (ENOMEM); *p = pfil_packet_align(*p); *p->m = m; return (0); } static __noinline int pfil_fake_mbuf(pfil_func_t func, pfil_packet_t *p, struct ifnet *ifp, int flags, void *ruleset, struct inpcb *inp) { struct mbuf m, *mp; pfil_return_t rv; (void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR); m_extadd(&m, p->mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0, EXT_RXRING); m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags); mp = &m; flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); rv = func(&mp, ifp, flags, ruleset, inp); if (rv == PFIL_PASS && mp != &m) { /* * Firewalls that need pfil_fake_mbuf() most likely don't * know they need return PFIL_REALLOCED. */ rv = PFIL_REALLOCED; *p = pfil_packet_align(*p); *p->m = mp; } return (rv); } /* * pfil_run_hooks() runs the specified packet filter hook chain. */ int pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp, int flags, struct inpcb *inp) { struct epoch_tracker et; pfil_chain_t *pch; struct pfil_link *link; pfil_return_t rv; bool realloc = false; if (PFIL_DIR(flags) == PFIL_IN) pch = &head->head_in; else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT)) pch = &head->head_out; else panic("%s: bogus flags %d", __func__, flags); rv = PFIL_PASS; PFIL_EPOCH_ENTER(et); CK_STAILQ_FOREACH(link, pch, link_chain) { if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR)) rv = pfil_fake_mbuf(link->link_func, &p, ifp, flags, link->link_ruleset, inp); else rv = (*link->link_func)(p, ifp, flags, link->link_ruleset, inp); if (rv == PFIL_DROPPED || rv == PFIL_CONSUMED) break; else if (rv == PFIL_REALLOCED) { flags &= ~(PFIL_MEMPTR | PFIL_LENMASK); realloc = true; } } PFIL_EPOCH_EXIT(et); if (realloc && rv == PFIL_PASS) rv = PFIL_REALLOCED; return (rv); } /* * pfil_head_register() registers a pfil_head with the packet filter hook * mechanism. */ pfil_head_t pfil_head_register(struct pfil_head_args *pa) { struct pfil_head *head, *list; MPASS(pa->pa_version == PFIL_VERSION); head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK); head->head_nhooksin = head->head_nhooksout = 0; head->head_flags = pa->pa_flags; head->head_type = pa->pa_type; head->head_name = pa->pa_headname; CK_STAILQ_INIT(&head->head_in); CK_STAILQ_INIT(&head->head_out); PFIL_LOCK(); LIST_FOREACH(list, &V_pfil_head_list, head_list) if (strcmp(pa->pa_headname, list->head_name) == 0) { printf("pfil: duplicate head \"%s\"\n", pa->pa_headname); } LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list); PFIL_UNLOCK(); return (head); } /* * pfil_head_unregister() removes a pfil_head from the packet filter hook * mechanism. The producer of the hook promises that all outstanding * invocations of the hook have completed before it unregisters the hook. */ void pfil_head_unregister(pfil_head_t ph) { struct pfil_link *link, *next; PFIL_LOCK(); LIST_REMOVE(ph, head_list); CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) { link->link_hook->hook_links--; free(link, M_PFIL); } CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) { link->link_hook->hook_links--; free(link, M_PFIL); } PFIL_UNLOCK(); } pfil_hook_t pfil_add_hook(struct pfil_hook_args *pa) { struct pfil_hook *hook, *list; MPASS(pa->pa_version == PFIL_VERSION); hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO); hook->hook_func = pa->pa_func; hook->hook_ruleset = pa->pa_ruleset; hook->hook_flags = pa->pa_flags; hook->hook_type = pa->pa_type; hook->hook_modname = pa->pa_modname; hook->hook_rulname = pa->pa_rulname; PFIL_LOCK(); LIST_FOREACH(list, &V_pfil_hook_list, hook_list) if (strcmp(pa->pa_modname, list->hook_modname) == 0 && strcmp(pa->pa_rulname, list->hook_rulname) == 0) { printf("pfil: duplicate hook \"%s:%s\"\n", pa->pa_modname, pa->pa_rulname); } LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list); PFIL_UNLOCK(); return (hook); } static int pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook) { struct pfil_link *in, *out; PFIL_LOCK_ASSERT(); if (pa->pa_flags & PFIL_IN) { in = pfil_link_remove(&head->head_in, hook); if (in != NULL) { head->head_nhooksin--; hook->hook_links--; } } else in = NULL; if (pa->pa_flags & PFIL_OUT) { out = pfil_link_remove(&head->head_out, hook); if (out != NULL) { head->head_nhooksout--; hook->hook_links--; } } else out = NULL; PFIL_UNLOCK(); if (in != NULL) - epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free); + epoch_call(PFIL_EPOCH, pfil_link_free, &in->link_epoch_ctx); if (out != NULL) - epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free); + epoch_call(PFIL_EPOCH, pfil_link_free, &out->link_epoch_ctx); if (in == NULL && out == NULL) return (ENOENT); else return (0); } int pfil_link(struct pfil_link_args *pa) { struct pfil_link *in, *out, *link; struct pfil_head *head; struct pfil_hook *hook; int error; MPASS(pa->pa_version == PFIL_VERSION); if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN) in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO); else in = NULL; if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT) out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO); else out = NULL; PFIL_LOCK(); if (pa->pa_flags & PFIL_HEADPTR) head = pa->pa_head; else LIST_FOREACH(head, &V_pfil_head_list, head_list) if (strcmp(pa->pa_headname, head->head_name) == 0) break; if (pa->pa_flags & PFIL_HOOKPTR) hook = pa->pa_hook; else LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) if (strcmp(pa->pa_modname, hook->hook_modname) == 0 && strcmp(pa->pa_rulname, hook->hook_rulname) == 0) break; if (head == NULL || hook == NULL) { error = ENOENT; goto fail; } if (pa->pa_flags & PFIL_UNLINK) return (pfil_unlink(pa, head, hook)); if (head->head_type != hook->hook_type || ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) { error = EINVAL; goto fail; } if (pa->pa_flags & PFIL_IN) CK_STAILQ_FOREACH(link, &head->head_in, link_chain) if (link->link_hook == hook) { error = EEXIST; goto fail; } if (pa->pa_flags & PFIL_OUT) CK_STAILQ_FOREACH(link, &head->head_out, link_chain) if (link->link_hook == hook) { error = EEXIST; goto fail; } if (pa->pa_flags & PFIL_IN) { in->link_hook = hook; in->link_func = hook->hook_func; in->link_flags = hook->hook_flags; in->link_ruleset = hook->hook_ruleset; if (pa->pa_flags & PFIL_APPEND) CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain); else CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain); hook->hook_links++; head->head_nhooksin++; } if (pa->pa_flags & PFIL_OUT) { out->link_hook = hook; out->link_func = hook->hook_func; out->link_flags = hook->hook_flags; out->link_ruleset = hook->hook_ruleset; if (pa->pa_flags & PFIL_APPEND) CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain); else CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain); hook->hook_links++; head->head_nhooksout++; } PFIL_UNLOCK(); return (0); fail: PFIL_UNLOCK(); free(in, M_PFIL); free(out, M_PFIL); return (error); } static void pfil_link_free(epoch_context_t ctx) { struct pfil_link *link; link = __containerof(ctx, struct pfil_link, link_epoch_ctx); free(link, M_PFIL); } /* * pfil_remove_hook removes a filter from all filtering points. */ void pfil_remove_hook(pfil_hook_t hook) { struct pfil_head *head; struct pfil_link *in, *out; PFIL_LOCK(); LIST_FOREACH(head, &V_pfil_head_list, head_list) { retry: in = pfil_link_remove(&head->head_in, hook); if (in != NULL) { head->head_nhooksin--; hook->hook_links--; - epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, - pfil_link_free); + epoch_call(PFIL_EPOCH, pfil_link_free, + &in->link_epoch_ctx); } out = pfil_link_remove(&head->head_out, hook); if (out != NULL) { head->head_nhooksout--; hook->hook_links--; - epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, - pfil_link_free); + epoch_call(PFIL_EPOCH, pfil_link_free, + &out->link_epoch_ctx); } if (in != NULL || out != NULL) /* What if some stupid admin put same filter twice? */ goto retry; } LIST_REMOVE(hook, hook_list); PFIL_UNLOCK(); MPASS(hook->hook_links == 0); free(hook, M_PFIL); } /* * Internal: Remove a pfil hook from a hook chain. */ static struct pfil_link * pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook) { struct pfil_link *link; PFIL_LOCK_ASSERT(); CK_STAILQ_FOREACH(link, chain, link_chain) if (link->link_hook == hook) { CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain); return (link); } return (NULL); } static void pfil_init(const void *unused __unused) { struct make_dev_args args; int error; make_dev_args_init(&args); args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME; args.mda_devsw = &pfil_cdevsw; args.mda_uid = UID_ROOT; args.mda_gid = GID_WHEEL; args.mda_mode = 0600; error = make_dev_s(&args, &pfil_dev, PFILDEV); KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error)); } /* * Make sure the pfil bits are first before any possible subsystem which * might piggyback on the SI_SUB_PROTO_PFIL. */ SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL); /* * User control interface. */ static int pfilioc_listheads(struct pfilioc_list *); static int pfilioc_listhooks(struct pfilioc_list *); static int pfilioc_link(struct pfilioc_link *); static int pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error; CURVNET_SET(TD_TO_VNET(td)); error = 0; switch (cmd) { case PFILIOC_LISTHEADS: error = pfilioc_listheads((struct pfilioc_list *)addr); break; case PFILIOC_LISTHOOKS: error = pfilioc_listhooks((struct pfilioc_list *)addr); break; case PFILIOC_LINK: error = pfilioc_link((struct pfilioc_link *)addr); break; default: error = EINVAL; break; } CURVNET_RESTORE(); return (error); } static int pfilioc_listheads(struct pfilioc_list *req) { struct pfil_head *head; struct pfil_link *link; struct pfilioc_head *iohead; struct pfilioc_hook *iohook; u_int nheads, nhooks, hd, hk; int error; PFIL_LOCK(); restart: nheads = nhooks = 0; LIST_FOREACH(head, &V_pfil_head_list, head_list) { nheads++; nhooks += head->head_nhooksin + head->head_nhooksout; } PFIL_UNLOCK(); if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) { req->pio_nheads = nheads; req->pio_nhooks = nhooks; return (0); } iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK); iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); hd = hk = 0; PFIL_LOCK(); LIST_FOREACH(head, &V_pfil_head_list, head_list) { if (hd + 1 > nheads || hk + head->head_nhooksin + head->head_nhooksout > nhooks) { /* Configuration changed during malloc(). */ free(iohead, M_TEMP); free(iohook, M_TEMP); goto restart; } strlcpy(iohead[hd].pio_name, head->head_name, sizeof(iohead[0].pio_name)); iohead[hd].pio_nhooksin = head->head_nhooksin; iohead[hd].pio_nhooksout = head->head_nhooksout; iohead[hd].pio_type = head->head_type; CK_STAILQ_FOREACH(link, &head->head_in, link_chain) { strlcpy(iohook[hk].pio_module, link->link_hook->hook_modname, sizeof(iohook[0].pio_module)); strlcpy(iohook[hk].pio_ruleset, link->link_hook->hook_rulname, sizeof(iohook[0].pio_ruleset)); hk++; } CK_STAILQ_FOREACH(link, &head->head_out, link_chain) { strlcpy(iohook[hk].pio_module, link->link_hook->hook_modname, sizeof(iohook[0].pio_module)); strlcpy(iohook[hk].pio_ruleset, link->link_hook->hook_rulname, sizeof(iohook[0].pio_ruleset)); hk++; } hd++; } PFIL_UNLOCK(); error = copyout(iohead, req->pio_heads, sizeof(*iohead) * min(hd, req->pio_nheads)); if (error == 0) error = copyout(iohook, req->pio_hooks, sizeof(*iohook) * min(req->pio_nhooks, hk)); req->pio_nheads = hd; req->pio_nhooks = hk; free(iohead, M_TEMP); free(iohook, M_TEMP); return (error); } static int pfilioc_listhooks(struct pfilioc_list *req) { struct pfil_hook *hook; struct pfilioc_hook *iohook; u_int nhooks, hk; int error; PFIL_LOCK(); restart: nhooks = 0; LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) nhooks++; PFIL_UNLOCK(); if (req->pio_nhooks < nhooks) { req->pio_nhooks = nhooks; return (0); } iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK); hk = 0; PFIL_LOCK(); LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) { if (hk + 1 > nhooks) { /* Configuration changed during malloc(). */ free(iohook, M_TEMP); goto restart; } strlcpy(iohook[hk].pio_module, hook->hook_modname, sizeof(iohook[0].pio_module)); strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname, sizeof(iohook[0].pio_ruleset)); iohook[hk].pio_type = hook->hook_type; iohook[hk].pio_flags = hook->hook_flags; hk++; } PFIL_UNLOCK(); error = copyout(iohook, req->pio_hooks, sizeof(*iohook) * min(req->pio_nhooks, hk)); req->pio_nhooks = hk; free(iohook, M_TEMP); return (error); } static int pfilioc_link(struct pfilioc_link *req) { struct pfil_link_args args; if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND)) return (EINVAL); args.pa_version = PFIL_VERSION; args.pa_flags = req->pio_flags; args.pa_headname = req->pio_name; args.pa_modname = req->pio_module; args.pa_rulname = req->pio_ruleset; return (pfil_link(&args)); } Index: head/sys/sys/epoch.h =================================================================== --- head/sys/sys/epoch.h (revision 356825) +++ head/sys/sys/epoch.h (revision 356826) @@ -1,108 +1,109 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2018, Matthew Macy * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_EPOCH_H_ #define _SYS_EPOCH_H_ struct epoch_context { void *data[2]; } __aligned(sizeof(void *)); typedef struct epoch_context *epoch_context_t; +typedef void epoch_callback_t(epoch_context_t); #ifdef _KERNEL #include #include #include struct epoch; typedef struct epoch *epoch_t; #define EPOCH_PREEMPT 0x1 #define EPOCH_LOCKED 0x2 extern epoch_t global_epoch; extern epoch_t global_epoch_preempt; struct epoch_tracker { TAILQ_ENTRY(epoch_tracker) et_link; struct thread *et_td; ck_epoch_section_t et_section; #ifdef EPOCH_TRACE struct epoch *et_epoch; SLIST_ENTRY(epoch_tracker) et_tlink; const char *et_file; int et_line; #endif } __aligned(sizeof(void *)); typedef struct epoch_tracker *epoch_tracker_t; epoch_t epoch_alloc(const char *name, int flags); void epoch_free(epoch_t epoch); void epoch_wait(epoch_t epoch); void epoch_wait_preempt(epoch_t epoch); void epoch_drain_callbacks(epoch_t epoch); -void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)); +void epoch_call(epoch_t epoch, epoch_callback_t cb, epoch_context_t ctx); int in_epoch(epoch_t epoch); int in_epoch_verbose(epoch_t epoch, int dump_onfail); DPCPU_DECLARE(int, epoch_cb_count); DPCPU_DECLARE(struct grouptask, epoch_cb_task); #ifdef EPOCH_TRACE #define EPOCH_FILE_LINE , const char *file, int line #else #define EPOCH_FILE_LINE #endif void _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE); void _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE); #ifdef EPOCH_TRACE void epoch_trace_list(struct thread *); #define epoch_enter_preempt(epoch, et) _epoch_enter_preempt(epoch, et, __FILE__, __LINE__) #define epoch_exit_preempt(epoch, et) _epoch_exit_preempt(epoch, et, __FILE__, __LINE__) #else #define epoch_enter_preempt(epoch, et) _epoch_enter_preempt(epoch, et) #define epoch_exit_preempt(epoch, et) _epoch_exit_preempt(epoch, et) #endif void epoch_enter(epoch_t epoch); void epoch_exit(epoch_t epoch); /* * Globally recognized epochs in the FreeBSD kernel. */ /* Network preemptible epoch, declared in sys/net/if.c. */ extern epoch_t net_epoch_preempt; #define NET_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et)) #define NET_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et)) #define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt) -#define NET_EPOCH_CALL(f, c) epoch_call(net_epoch_preempt, (c), (f)) +#define NET_EPOCH_CALL(f, c) epoch_call(net_epoch_preempt, (f), (c)) #define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #endif /* _KERNEL */ #endif /* _SYS_EPOCH_H_ */