Index: head/sys/kern/subr_epoch.c
===================================================================
--- head/sys/kern/subr_epoch.c	(revision 356825)
+++ head/sys/kern/subr_epoch.c	(revision 356826)
@@ -1,846 +1,846 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/epoch.h>
 #include <sys/gtaskqueue.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #ifdef EPOCH_TRACE
 #include <machine/stdarg.h>
 #include <sys/stack.h>
 #include <sys/tree.h>
 #endif
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #include <ck_epoch.h>
 
 static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation");
 
 #ifdef __amd64__
 #define EPOCH_ALIGN CACHE_LINE_SIZE*2
 #else
 #define EPOCH_ALIGN CACHE_LINE_SIZE
 #endif
 
 TAILQ_HEAD (epoch_tdlist, epoch_tracker);
 typedef struct epoch_record {
 	ck_epoch_record_t er_record;
 	struct epoch_context er_drain_ctx;
 	struct epoch *er_parent;
 	volatile struct epoch_tdlist er_tdlist;
 	volatile uint32_t er_gen;
 	uint32_t er_cpuid;
 } __aligned(EPOCH_ALIGN)     *epoch_record_t;
 
 struct epoch {
 	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
 	epoch_record_t e_pcpu_record;
 	int	e_idx;
 	int	e_flags;
 	struct sx e_drain_sx;
 	struct mtx e_drain_mtx;
 	volatile int e_drain_count;
 	const char *e_name;
 };
 
 /* arbitrary --- needs benchmarking */
 #define MAX_ADAPTIVE_SPIN 100
 #define MAX_EPOCHS 64
 
 CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
 SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information");
 SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats");
 
 /* Stats. */
 static counter_u64_t block_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
     &block_count, "# of times a thread was in an epoch when epoch_wait was called");
 static counter_u64_t migrate_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
     &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
 static counter_u64_t turnstile_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
     &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
 static counter_u64_t switch_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
     &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
 static counter_u64_t epoch_call_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
     &epoch_call_count, "# of times a callback was deferred");
 static counter_u64_t epoch_call_task_count;
 
 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
     &epoch_call_task_count, "# of times a callback task was run");
 
 TAILQ_HEAD (threadlist, thread);
 
 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
     ck_epoch_entry_container)
 
 epoch_t	allepochs[MAX_EPOCHS];
 
 DPCPU_DEFINE(struct grouptask, epoch_cb_task);
 DPCPU_DEFINE(int, epoch_cb_count);
 
 static __read_mostly int inited;
 static __read_mostly int epoch_count;
 __read_mostly epoch_t global_epoch;
 __read_mostly epoch_t global_epoch_preempt;
 
 static void epoch_call_task(void *context __unused);
 static 	uma_zone_t pcpu_zone_record;
 
 #ifdef EPOCH_TRACE
 struct stackentry {
 	RB_ENTRY(stackentry) se_node;
 	struct stack se_stack;
 };
 
 static int
 stackentry_compare(struct stackentry *a, struct stackentry *b)
 {
 
 	if (a->se_stack.depth > b->se_stack.depth)
 		return (1);
 	if (a->se_stack.depth < b->se_stack.depth)
 		return (-1);
 	for (int i = 0; i < a->se_stack.depth; i++) {
 		if (a->se_stack.pcs[i] > b->se_stack.pcs[i])
 			return (1);
 		if (a->se_stack.pcs[i] < b->se_stack.pcs[i])
 			return (-1);
 	}
 
 	return (0);
 }
 
 RB_HEAD(stacktree, stackentry) epoch_stacks = RB_INITIALIZER(&epoch_stacks);
 RB_GENERATE_STATIC(stacktree, stackentry, se_node, stackentry_compare);
 
 static struct mtx epoch_stacks_lock;
 MTX_SYSINIT(epochstacks, &epoch_stacks_lock, "epoch_stacks", MTX_DEF);
 
 static bool epoch_trace_stack_print = true;
 SYSCTL_BOOL(_kern_epoch, OID_AUTO, trace_stack_print, CTLFLAG_RWTUN,
     &epoch_trace_stack_print, 0, "Print stack traces on epoch reports");
 
 static void epoch_trace_report(const char *fmt, ...) __printflike(1, 2);
 static inline void
 epoch_trace_report(const char *fmt, ...)
 {
 	va_list ap;
 	struct stackentry se, *new;
 
 	stack_zero(&se.se_stack);	/* XXX: is it really needed? */
 	stack_save(&se.se_stack);
 
 	/* Tree is never reduced - go lockless. */
 	if (RB_FIND(stacktree, &epoch_stacks, &se) != NULL)
 		return;
 
 	new = malloc(sizeof(*new), M_STACK, M_NOWAIT);
 	if (new != NULL) {
 		bcopy(&se.se_stack, &new->se_stack, sizeof(struct stack));
 
 		mtx_lock(&epoch_stacks_lock);
 		new = RB_INSERT(stacktree, &epoch_stacks, new);
 		mtx_unlock(&epoch_stacks_lock);
 		if (new != NULL)
 			free(new, M_STACK);
 	}
 
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
 	if (epoch_trace_stack_print)
 		stack_print_ddb(&se.se_stack);
 }
 
 static inline void
 epoch_trace_enter(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink)
 		if (iet->et_epoch == epoch)
 			epoch_trace_report("Recursively entering epoch %s "
 			    "at %s:%d, previously entered at %s:%d\n",
 			    epoch->e_name, file, line,
 			    iet->et_file, iet->et_line);
 	et->et_epoch = epoch;
 	et->et_file = file;
 	et->et_line = line;
 	SLIST_INSERT_HEAD(&td->td_epochs, et, et_tlink);
 }
 
 static inline void
 epoch_trace_exit(struct thread *td, epoch_t epoch, epoch_tracker_t et,
     const char *file, int line)
 {
 
 	if (SLIST_FIRST(&td->td_epochs) != et) {
 		epoch_trace_report("Exiting epoch %s in a not nested order "
 		    "at %s:%d. Most recently entered %s at %s:%d\n",
 		    epoch->e_name,
 		    file, line,
 		    SLIST_FIRST(&td->td_epochs)->et_epoch->e_name,
 		    SLIST_FIRST(&td->td_epochs)->et_file,
 		    SLIST_FIRST(&td->td_epochs)->et_line);
 		/* This will panic if et is not anywhere on td_epochs. */
 		SLIST_REMOVE(&td->td_epochs, et, epoch_tracker, et_tlink);
 	} else
 		SLIST_REMOVE_HEAD(&td->td_epochs, et_tlink);
 }
 
 /* Used by assertions that check thread state before going to sleep. */
 void
 epoch_trace_list(struct thread *td)
 {
 	epoch_tracker_t iet;
 
 	SLIST_FOREACH(iet, &td->td_epochs, et_tlink)
 		printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name,
 		    iet->et_file, iet->et_line);
 }
 #endif /* EPOCH_TRACE */
 
 static void
 epoch_init(void *arg __unused)
 {
 	int cpu;
 
 	block_count = counter_u64_alloc(M_WAITOK);
 	migrate_count = counter_u64_alloc(M_WAITOK);
 	turnstile_count = counter_u64_alloc(M_WAITOK);
 	switch_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_task_count = counter_u64_alloc(M_WAITOK);
 
 	pcpu_zone_record = uma_zcreate("epoch_record pcpu",
 	    sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 	CPU_FOREACH(cpu) {
 		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0,
 		    epoch_call_task, NULL);
 		taskqgroup_attach_cpu(qgroup_softirq,
 		    DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL,
 		    "epoch call task");
 	}
 #ifdef EPOCH_TRACE
 	SLIST_INIT(&thread0.td_epochs);
 #endif
 	inited = 1;
 	global_epoch = epoch_alloc("Global", 0);
 	global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT);
 }
 SYSINIT(epoch, SI_SUB_EPOCH, SI_ORDER_FIRST, epoch_init, NULL);
 
 #if !defined(EARLY_AP_STARTUP)
 static void
 epoch_init_smp(void *dummy __unused)
 {
 	inited = 2;
 }
 SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL);
 #endif
 
 static void
 epoch_ctor(epoch_t epoch)
 {
 	epoch_record_t er;
 	int cpu;
 
 	epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		bzero(er, sizeof(*er));
 		ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 		TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 		er->er_cpuid = cpu;
 		er->er_parent = epoch;
 	}
 }
 
 static void
 epoch_adjust_prio(struct thread *td, u_char prio)
 {
 
 	thread_lock(td);
 	sched_prio(td, prio);
 	thread_unlock(td);
 }
 
 epoch_t
 epoch_alloc(const char *name, int flags)
 {
 	epoch_t epoch;
 
 	if (__predict_false(!inited))
 		panic("%s called too early in boot", __func__);
 	epoch = malloc(sizeof(struct epoch), M_EPOCH, M_ZERO | M_WAITOK);
 	ck_epoch_init(&epoch->e_epoch);
 	epoch_ctor(epoch);
 	MPASS(epoch_count < MAX_EPOCHS - 2);
 	epoch->e_flags = flags;
 	epoch->e_idx = epoch_count;
 	epoch->e_name = name;
 	sx_init(&epoch->e_drain_sx, "epoch-drain-sx");
 	mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF);
 	allepochs[epoch_count++] = epoch;
 	return (epoch);
 }
 
 void
 epoch_free(epoch_t epoch)
 {
 
 	epoch_drain_callbacks(epoch);
 	allepochs[epoch->e_idx] = NULL;
 	epoch_wait(global_epoch);
 	uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record);
 	mtx_destroy(&epoch->e_drain_mtx);
 	sx_destroy(&epoch->e_drain_sx);
 	free(epoch, M_EPOCH);
 }
 
 static epoch_record_t
 epoch_currecord(epoch_t epoch)
 {
 
 	return (zpcpu_get_cpu(epoch->e_pcpu_record, curcpu));
 }
 
 #define INIT_CHECK(epoch)					\
 	do {							\
 		if (__predict_false((epoch) == NULL))		\
 			return;					\
 	} while (0)
 
 void
 _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	MPASS(cold || epoch != NULL);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	td = curthread;
 	MPASS((vm_offset_t)et >= td->td_kstack &&
 	    (vm_offset_t)et + sizeof(struct epoch_tracker) <=
 	    td->td_kstack + td->td_kstack_pages * PAGE_SIZE);
 
 	INIT_CHECK(epoch);
 #ifdef EPOCH_TRACE
 	epoch_trace_enter(td, epoch, et, file, line);
 #endif
 	et->et_td = td;
 	THREAD_NO_SLEEPING();
 	critical_enter();
 	sched_pin();
 	td->td_pre_epoch_prio = td->td_priority;
 	er = epoch_currecord(epoch);
 	TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
 	ck_epoch_begin(&er->er_record, &et->et_section);
 	critical_exit();
 }
 
 void
 epoch_enter(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	critical_enter();
 	er = epoch_currecord(epoch);
 	ck_epoch_begin(&er->er_record, NULL);
 }
 
 void
 _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
 {
 	struct epoch_record *er;
 	struct thread *td;
 
 	INIT_CHECK(epoch);
 	td = curthread;
 	critical_enter();
 	sched_unpin();
 	THREAD_SLEEPING_OK();
 	er = epoch_currecord(epoch);
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	MPASS(et != NULL);
 	MPASS(et->et_td == td);
 #ifdef INVARIANTS
 	et->et_td = (void*)0xDEADBEEF;
 #endif
 	ck_epoch_end(&er->er_record, &et->et_section);
 	TAILQ_REMOVE(&er->er_tdlist, et, et_link);
 	er->er_gen++;
 	if (__predict_false(td->td_pre_epoch_prio != td->td_priority))
 		epoch_adjust_prio(td, td->td_pre_epoch_prio);
 	critical_exit();
 #ifdef EPOCH_TRACE
 	epoch_trace_exit(td, epoch, et, file, line);
 #endif
 }
 
 void
 epoch_exit(epoch_t epoch)
 {
 	epoch_record_t er;
 
 	INIT_CHECK(epoch);
 	er = epoch_currecord(epoch);
 	ck_epoch_end(&er->er_record, NULL);
 	critical_exit();
 }
 
 /*
  * epoch_block_handler_preempt() is a callback from the CK code when another
  * thread is currently in an epoch section.
  */
 static void
 epoch_block_handler_preempt(struct ck_epoch *global __unused,
     ck_epoch_record_t *cr, void *arg __unused)
 {
 	epoch_record_t record;
 	struct thread *td, *owner, *curwaittd;
 	struct epoch_tracker *tdwait;
 	struct turnstile *ts;
 	struct lock_object *lock;
 	int spincount, gen;
 	int locksheld __unused;
 
 	record = __containerof(cr, struct epoch_record, er_record);
 	td = curthread;
 	locksheld = td->td_locks;
 	spincount = 0;
 	counter_u64_add(block_count, 1);
 	/*
 	 * We lost a race and there's no longer any threads
 	 * on the CPU in an epoch section.
 	 */
 	if (TAILQ_EMPTY(&record->er_tdlist))
 		return;
 
 	if (record->er_cpuid != curcpu) {
 		/*
 		 * If the head of the list is running, we can wait for it
 		 * to remove itself from the list and thus save us the
 		 * overhead of a migration
 		 */
 		gen = record->er_gen;
 		thread_unlock(td);
 		/*
 		 * We can't actually check if the waiting thread is running
 		 * so we simply poll for it to exit before giving up and
 		 * migrating.
 		 */
 		do {
 			cpu_spinwait();
 		} while (!TAILQ_EMPTY(&record->er_tdlist) &&
 				 gen == record->er_gen &&
 				 spincount++ < MAX_ADAPTIVE_SPIN);
 		thread_lock(td);
 		/*
 		 * If the generation has changed we can poll again
 		 * otherwise we need to migrate.
 		 */
 		if (gen != record->er_gen)
 			return;
 		/*
 		 * Being on the same CPU as that of the record on which
 		 * we need to wait allows us access to the thread
 		 * list associated with that CPU. We can then examine the
 		 * oldest thread in the queue and wait on its turnstile
 		 * until it resumes and so on until a grace period
 		 * elapses.
 		 *
 		 */
 		counter_u64_add(migrate_count, 1);
 		sched_bind(td, record->er_cpuid);
 		/*
 		 * At this point we need to return to the ck code
 		 * to scan to see if a grace period has elapsed.
 		 * We can't move on to check the thread list, because
 		 * in the meantime new threads may have arrived that
 		 * in fact belong to a different epoch.
 		 */
 		return;
 	}
 	/*
 	 * Try to find a thread in an epoch section on this CPU
 	 * waiting on a turnstile. Otherwise find the lowest
 	 * priority thread (highest prio value) and drop our priority
 	 * to match to allow it to run.
 	 */
 	TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
 		/*
 		 * Propagate our priority to any other waiters to prevent us
 		 * from starving them. They will have their original priority
 		 * restore on exit from epoch_wait().
 		 */
 		curwaittd = tdwait->et_td;
 		if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
 			critical_enter();
 			thread_unlock(td);
 			thread_lock(curwaittd);
 			sched_prio(curwaittd, td->td_priority);
 			thread_unlock(curwaittd);
 			thread_lock(td);
 			critical_exit();
 		}
 		if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
 		    ((ts = curwaittd->td_blocked) != NULL)) {
 			/*
 			 * We unlock td to allow turnstile_wait to reacquire
 			 * the thread lock. Before unlocking it we enter a
 			 * critical section to prevent preemption after we
 			 * reenable interrupts by dropping the thread lock in
 			 * order to prevent curwaittd from getting to run.
 			 */
 			critical_enter();
 			thread_unlock(td);
 
 			if (turnstile_lock(ts, &lock, &owner)) {
 				if (ts == curwaittd->td_blocked) {
 					MPASS(TD_IS_INHIBITED(curwaittd) &&
 					    TD_ON_LOCK(curwaittd));
 					critical_exit();
 					turnstile_wait(ts, owner,
 					    curwaittd->td_tsqueue);
 					counter_u64_add(turnstile_count, 1);
 					thread_lock(td);
 					return;
 				}
 				turnstile_unlock(ts, lock);
 			}
 			thread_lock(td);
 			critical_exit();
 			KASSERT(td->td_locks == locksheld,
 			    ("%d extra locks held", td->td_locks - locksheld));
 		}
 	}
 	/*
 	 * We didn't find any threads actually blocked on a lock
 	 * so we have nothing to do except context switch away.
 	 */
 	counter_u64_add(switch_count, 1);
 	mi_switch(SW_VOL | SWT_RELINQUISH);
 	/*
 	 * It is important the thread lock is dropped while yielding
 	 * to allow other threads to acquire the lock pointed to by
 	 * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the
 	 * thread lock before returning. Else a deadlock like
 	 * situation might happen.
 	 */
 	thread_lock(td);
 }
 
 void
 epoch_wait_preempt(epoch_t epoch)
 {
 	struct thread *td;
 	int was_bound;
 	int old_cpu;
 	int old_pinned;
 	u_char old_prio;
 	int locks __unused;
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	td = curthread;
 #ifdef INVARIANTS
 	locks = curthread->td_locks;
 	MPASS(epoch->e_flags & EPOCH_PREEMPT);
 	if ((epoch->e_flags & EPOCH_LOCKED) == 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "epoch_wait() can be long running");
 	KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle "
 	    "of an epoch section of the same epoch"));
 #endif
 	DROP_GIANT();
 	thread_lock(td);
 
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	old_prio = td->td_priority;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 	sched_bind(td, old_cpu);
 
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
 	    NULL);
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	/* restore thread priority */
 	sched_prio(td, old_prio);
 	thread_unlock(td);
 	PICKUP_GIANT();
 	KASSERT(td->td_locks == locks,
 	    ("%d residual locks held", td->td_locks - locks));
 }
 
 static void
 epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
     void *arg __unused)
 {
 	cpu_spinwait();
 }
 
 void
 epoch_wait(epoch_t epoch)
 {
 
 	MPASS(cold || epoch != NULL);
 	INIT_CHECK(epoch);
 	MPASS(epoch->e_flags == 0);
 	critical_enter();
 	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL);
 	critical_exit();
 }
 
 void
-epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t))
+epoch_call(epoch_t epoch, epoch_callback_t callback, epoch_context_t ctx)
 {
 	epoch_record_t er;
 	ck_epoch_entry_t *cb;
 
 	cb = (void *)ctx;
 
 	MPASS(callback);
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		goto boottime;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		goto boottime;
 #endif
 
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
 	er = epoch_currecord(epoch);
 	ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
 	critical_exit();
 	return;
 boottime:
 	callback(ctx);
 }
 
 static void
 epoch_call_task(void *arg __unused)
 {
 	ck_stack_entry_t *cursor, *head, *next;
 	ck_epoch_record_t *record;
 	epoch_record_t er;
 	epoch_t epoch;
 	ck_stack_t cb_stack;
 	int i, npending, total;
 
 	ck_stack_init(&cb_stack);
 	critical_enter();
 	epoch_enter(global_epoch);
 	for (total = i = 0; i < epoch_count; i++) {
 		if (__predict_false((epoch = allepochs[i]) == NULL))
 			continue;
 		er = epoch_currecord(epoch);
 		record = &er->er_record;
 		if ((npending = record->n_pending) == 0)
 			continue;
 		ck_epoch_poll_deferred(record, &cb_stack);
 		total += npending - record->n_pending;
 	}
 	epoch_exit(global_epoch);
 	*DPCPU_PTR(epoch_cb_count) -= total;
 	critical_exit();
 
 	counter_u64_add(epoch_call_count, total);
 	counter_u64_add(epoch_call_task_count, 1);
 
 	head = ck_stack_batch_pop_npsc(&cb_stack);
 	for (cursor = head; cursor != NULL; cursor = next) {
 		struct ck_epoch_entry *entry =
 		    ck_epoch_entry_container(cursor);
 
 		next = CK_STACK_NEXT(cursor);
 		entry->function(entry);
 	}
 }
 
 int
 in_epoch_verbose(epoch_t epoch, int dump_onfail)
 {
 	struct epoch_tracker *tdwait;
 	struct thread *td;
 	epoch_record_t er;
 
 	td = curthread;
 	if (THREAD_CAN_SLEEP())
 		return (0);
 	if (__predict_false((epoch) == NULL))
 		return (0);
 	critical_enter();
 	er = epoch_currecord(epoch);
 	TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 		if (tdwait->et_td == td) {
 			critical_exit();
 			return (1);
 		}
 #ifdef INVARIANTS
 	if (dump_onfail) {
 		MPASS(td->td_pinned);
 		printf("cpu: %d id: %d\n", curcpu, td->td_tid);
 		TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
 			printf("td_tid: %d ", tdwait->et_td->td_tid);
 		printf("\n");
 	}
 #endif
 	critical_exit();
 	return (0);
 }
 
 int
 in_epoch(epoch_t epoch)
 {
 	return (in_epoch_verbose(epoch, 0));
 }
 
 static void
 epoch_drain_cb(struct epoch_context *ctx)
 {
 	struct epoch *epoch =
 	    __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent;
 
 	if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) {
 		mtx_lock(&epoch->e_drain_mtx);
 		wakeup(epoch);
 		mtx_unlock(&epoch->e_drain_mtx);
 	}
 }
 
 void
 epoch_drain_callbacks(epoch_t epoch)
 {
 	epoch_record_t er;
 	struct thread *td;
 	int was_bound;
 	int old_pinned;
 	int old_cpu;
 	int cpu;
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 	    "epoch_drain_callbacks() may sleep!");
 
 	/* too early in boot to have epoch set up */
 	if (__predict_false(epoch == NULL))
 		return;
 #if !defined(EARLY_AP_STARTUP)
 	if (__predict_false(inited < 2))
 		return;
 #endif
 	DROP_GIANT();
 
 	sx_xlock(&epoch->e_drain_sx);
 	mtx_lock(&epoch->e_drain_mtx);
 
 	td = curthread;
 	thread_lock(td);
 	old_cpu = PCPU_GET(cpuid);
 	old_pinned = td->td_pinned;
 	was_bound = sched_is_bound(td);
 	sched_unbind(td);
 	td->td_pinned = 0;
 
 	CPU_FOREACH(cpu)
 		epoch->e_drain_count++;
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		sched_bind(td, cpu);
-		epoch_call(epoch, &er->er_drain_ctx, &epoch_drain_cb);
+		epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx);
 	}
 
 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
 		sched_bind(td, old_cpu);
 	} else {
 		/* get thread back to initial CPU, if any */
 		if (old_pinned != 0)
 			sched_bind(td, old_cpu);
 		sched_unbind(td);
 	}
 	/* restore pinned after bind */
 	td->td_pinned = old_pinned;
 
 	thread_unlock(td);
 
 	while (epoch->e_drain_count != 0)
 		msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0);
 
 	mtx_unlock(&epoch->e_drain_mtx);
 	sx_xunlock(&epoch->e_drain_sx);
 
 	PICKUP_GIANT();
 }
Index: head/sys/net/pfil.c
===================================================================
--- head/sys/net/pfil.c	(revision 356825)
+++ head/sys/net/pfil.c	(revision 356826)
@@ -1,682 +1,682 @@
 /*	$FreeBSD$ */
 /*	$NetBSD: pfil.c,v 1.20 2001/11/12 23:49:46 lukem Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2019 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 1996 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/epoch.h>
 #include <sys/errno.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/ucred.h>
 #include <sys/jail.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/pfil.h>
 
 static MALLOC_DEFINE(M_PFIL, "pfil", "pfil(9) packet filter hooks");
 
 static int pfil_ioctl(struct cdev *, u_long, caddr_t, int, struct thread *);
 static struct cdevsw pfil_cdevsw = {
 	.d_ioctl =	pfil_ioctl,
 	.d_name =	PFILDEV,
 	.d_version =	D_VERSION,
 };
 static struct cdev *pfil_dev;
 
 static struct mtx pfil_lock;
 MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF);
 #define	PFIL_LOCK()	mtx_lock(&pfil_lock)
 #define	PFIL_UNLOCK()	mtx_unlock(&pfil_lock)
 #define	PFIL_LOCK_ASSERT()	mtx_assert(&pfil_lock, MA_OWNED)
 
 #define	PFIL_EPOCH		net_epoch_preempt
 #define	PFIL_EPOCH_ENTER(et)	epoch_enter_preempt(net_epoch_preempt, &(et))
 #define	PFIL_EPOCH_EXIT(et)	epoch_exit_preempt(net_epoch_preempt, &(et))
 
 struct pfil_hook {
 	pfil_func_t	 hook_func;
 	void		*hook_ruleset;
 	int		 hook_flags;
 	int		 hook_links;
 	enum pfil_types	 hook_type;
 	const char	*hook_modname;
 	const char	*hook_rulname;
 	LIST_ENTRY(pfil_hook) hook_list;
 };
 
 struct pfil_link {
 	CK_STAILQ_ENTRY(pfil_link) link_chain;
 	pfil_func_t		 link_func;
 	void			*link_ruleset;
 	int			 link_flags;
 	struct pfil_hook	*link_hook;
 	struct epoch_context	 link_epoch_ctx;
 };
 
 typedef CK_STAILQ_HEAD(pfil_chain, pfil_link)	pfil_chain_t;
 struct pfil_head {
 	int		 head_nhooksin;
 	int		 head_nhooksout;
 	pfil_chain_t	 head_in;
 	pfil_chain_t	 head_out;
 	int		 head_flags;
 	enum pfil_types	 head_type;
 	LIST_ENTRY(pfil_head) head_list;
 	const char	*head_name;
 };
 
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE_STATIC(struct pfilheadhead, pfil_head_list) =
     LIST_HEAD_INITIALIZER(pfil_head_list);
 #define	V_pfil_head_list	VNET(pfil_head_list)
 
 LIST_HEAD(pfilhookhead, pfil_hook);
 VNET_DEFINE_STATIC(struct pfilhookhead, pfil_hook_list) =
     LIST_HEAD_INITIALIZER(pfil_hook_list);
 #define	V_pfil_hook_list	VNET(pfil_hook_list)
 
 static struct pfil_link *pfil_link_remove(pfil_chain_t *, pfil_hook_t );
 static void pfil_link_free(epoch_context_t);
 
 int
 pfil_realloc(pfil_packet_t *p, int flags, struct ifnet *ifp)
 {
 	struct mbuf *m;
 
 	MPASS(flags & PFIL_MEMPTR);
 
 	if ((m = m_devget(p->mem, PFIL_LENGTH(flags), 0, ifp, NULL)) == NULL)
 		return (ENOMEM);
 	*p = pfil_packet_align(*p);
 	*p->m = m;
 
 	return (0);
 }
 
 static __noinline int
 pfil_fake_mbuf(pfil_func_t func, pfil_packet_t *p, struct ifnet *ifp, int flags,
     void *ruleset, struct inpcb *inp)
 {
 	struct mbuf m, *mp;
 	pfil_return_t rv;
 
 	(void)m_init(&m, M_NOWAIT, MT_DATA, M_NOFREE | M_PKTHDR);
 	m_extadd(&m, p->mem, PFIL_LENGTH(flags), NULL, NULL, NULL, 0,
 	    EXT_RXRING);
 	m.m_len = m.m_pkthdr.len = PFIL_LENGTH(flags);
 	mp = &m;
 	flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
 
 	rv = func(&mp, ifp, flags, ruleset, inp);
 	if (rv == PFIL_PASS && mp != &m) {
 		/*
 		 * Firewalls that need pfil_fake_mbuf() most likely don't
 		 * know they need return PFIL_REALLOCED.
 		 */
 		rv = PFIL_REALLOCED;
 		*p = pfil_packet_align(*p);
 		*p->m = mp;
 	}
 
 	return (rv);
 }
 
 /*
  * pfil_run_hooks() runs the specified packet filter hook chain.
  */
 int
 pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
     int flags, struct inpcb *inp)
 {
 	struct epoch_tracker et;
 	pfil_chain_t *pch;
 	struct pfil_link *link;
 	pfil_return_t rv;
 	bool realloc = false;
 
 	if (PFIL_DIR(flags) == PFIL_IN)
 		pch = &head->head_in;
 	else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT))
 		pch = &head->head_out;
 	else
 		panic("%s: bogus flags %d", __func__, flags);
 
 	rv = PFIL_PASS;
 	PFIL_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(link, pch, link_chain) {
 		if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR))
 			rv = pfil_fake_mbuf(link->link_func, &p, ifp, flags,
 			    link->link_ruleset, inp);
 		else
 			rv = (*link->link_func)(p, ifp, flags,
 			    link->link_ruleset, inp);
 		if (rv == PFIL_DROPPED || rv == PFIL_CONSUMED)
 			break;
 		else if (rv == PFIL_REALLOCED) {
 			flags &= ~(PFIL_MEMPTR | PFIL_LENMASK);
 			realloc = true;
 		}
 	}
 	PFIL_EPOCH_EXIT(et);
 	if (realloc && rv == PFIL_PASS)
 		rv = PFIL_REALLOCED;
 	return (rv);
 }
 
 /*
  * pfil_head_register() registers a pfil_head with the packet filter hook
  * mechanism.
  */
 pfil_head_t
 pfil_head_register(struct pfil_head_args *pa)
 {
 	struct pfil_head *head, *list;
 
 	MPASS(pa->pa_version == PFIL_VERSION);
 
 	head = malloc(sizeof(struct pfil_head), M_PFIL, M_WAITOK);
 
 	head->head_nhooksin = head->head_nhooksout = 0;
 	head->head_flags = pa->pa_flags;
 	head->head_type = pa->pa_type;
 	head->head_name = pa->pa_headname;
 	CK_STAILQ_INIT(&head->head_in);
 	CK_STAILQ_INIT(&head->head_out);
 
 	PFIL_LOCK();
 	LIST_FOREACH(list, &V_pfil_head_list, head_list)
 		if (strcmp(pa->pa_headname, list->head_name) == 0) {
 			printf("pfil: duplicate head \"%s\"\n",
 			    pa->pa_headname);
 		}
 	LIST_INSERT_HEAD(&V_pfil_head_list, head, head_list);
 	PFIL_UNLOCK();
 
 	return (head);
 }
 
 /*
  * pfil_head_unregister() removes a pfil_head from the packet filter hook
  * mechanism.  The producer of the hook promises that all outstanding
  * invocations of the hook have completed before it unregisters the hook.
  */
 void
 pfil_head_unregister(pfil_head_t ph)
 {
 	struct pfil_link *link, *next;
 
 	PFIL_LOCK();
 	LIST_REMOVE(ph, head_list);
 
 	CK_STAILQ_FOREACH_SAFE(link, &ph->head_in, link_chain, next) {
 		link->link_hook->hook_links--;
 		free(link, M_PFIL);
 	}
 	CK_STAILQ_FOREACH_SAFE(link, &ph->head_out, link_chain, next) {
 		link->link_hook->hook_links--;
 		free(link, M_PFIL);
 	}
 	PFIL_UNLOCK();
 }
 
 pfil_hook_t
 pfil_add_hook(struct pfil_hook_args *pa)
 {
 	struct pfil_hook *hook, *list;
 
 	MPASS(pa->pa_version == PFIL_VERSION);
 
 	hook = malloc(sizeof(struct pfil_hook), M_PFIL, M_WAITOK | M_ZERO);
 	hook->hook_func = pa->pa_func;
 	hook->hook_ruleset = pa->pa_ruleset;
 	hook->hook_flags = pa->pa_flags;
 	hook->hook_type = pa->pa_type;
 	hook->hook_modname = pa->pa_modname;
 	hook->hook_rulname = pa->pa_rulname;
 
 	PFIL_LOCK();
 	LIST_FOREACH(list, &V_pfil_hook_list, hook_list)
 		if (strcmp(pa->pa_modname, list->hook_modname) == 0 &&
 		    strcmp(pa->pa_rulname, list->hook_rulname) == 0) {
 			printf("pfil: duplicate hook \"%s:%s\"\n",
 			    pa->pa_modname, pa->pa_rulname);
 		}
 	LIST_INSERT_HEAD(&V_pfil_hook_list, hook, hook_list);
 	PFIL_UNLOCK();
 
 	return (hook);
 }
 
 static int
 pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook)
 {
 	struct pfil_link *in, *out;
 
 	PFIL_LOCK_ASSERT();
 
 	if (pa->pa_flags & PFIL_IN) {
 		in = pfil_link_remove(&head->head_in, hook);
 		if (in != NULL) {
 			head->head_nhooksin--;
 			hook->hook_links--;
 		}
 	} else
 		in = NULL;
 	if (pa->pa_flags & PFIL_OUT) {
 		out = pfil_link_remove(&head->head_out, hook);
 		if (out != NULL) {
 			head->head_nhooksout--;
 			hook->hook_links--;
 		}
 	} else
 		out = NULL;
 	PFIL_UNLOCK();
 
 	if (in != NULL)
-		epoch_call(PFIL_EPOCH, &in->link_epoch_ctx, pfil_link_free);
+		epoch_call(PFIL_EPOCH, pfil_link_free, &in->link_epoch_ctx);
 	if (out != NULL)
-		epoch_call(PFIL_EPOCH, &out->link_epoch_ctx, pfil_link_free);
+		epoch_call(PFIL_EPOCH, pfil_link_free, &out->link_epoch_ctx);
 
 	if (in == NULL && out == NULL)
 		return (ENOENT);
 	else
 		return (0);
 }
 
 int
 pfil_link(struct pfil_link_args *pa)
 {
 	struct pfil_link *in, *out, *link;
 	struct pfil_head *head;
 	struct pfil_hook *hook;
 	int error;
 
 	MPASS(pa->pa_version == PFIL_VERSION);
 
 	if ((pa->pa_flags & (PFIL_IN | PFIL_UNLINK)) == PFIL_IN)
 		in = malloc(sizeof(*in), M_PFIL, M_WAITOK | M_ZERO);
 	else
 		in = NULL;
 	if ((pa->pa_flags & (PFIL_OUT | PFIL_UNLINK)) == PFIL_OUT)
 		out = malloc(sizeof(*out), M_PFIL, M_WAITOK | M_ZERO);
 	else
 		out = NULL;
 
 	PFIL_LOCK();
 	if (pa->pa_flags & PFIL_HEADPTR)
 		head = pa->pa_head;
 	else
 		LIST_FOREACH(head, &V_pfil_head_list, head_list)
 			if (strcmp(pa->pa_headname, head->head_name) == 0)
 				break;
 	if (pa->pa_flags & PFIL_HOOKPTR)
 		hook = pa->pa_hook;
 	else
 		LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
 			if (strcmp(pa->pa_modname, hook->hook_modname) == 0 &&
 			    strcmp(pa->pa_rulname, hook->hook_rulname) == 0)
 				break;
 	if (head == NULL || hook == NULL) {
 		error = ENOENT;
 		goto fail;
 	}
 
 	if (pa->pa_flags & PFIL_UNLINK)
 		return (pfil_unlink(pa, head, hook));
 
 	if (head->head_type != hook->hook_type ||
 	    ((hook->hook_flags & pa->pa_flags) & ~head->head_flags)) {
 		error = EINVAL;
 		goto fail;
 	}
 
 	if (pa->pa_flags & PFIL_IN)
 		CK_STAILQ_FOREACH(link, &head->head_in, link_chain)
 			if (link->link_hook == hook) {
 				error = EEXIST;
 				goto fail;
 			}
 	if (pa->pa_flags & PFIL_OUT)
 		CK_STAILQ_FOREACH(link, &head->head_out, link_chain)
 			if (link->link_hook == hook) {
 				error = EEXIST;
 				goto fail;
 			}
 
 	if (pa->pa_flags & PFIL_IN) {
 		in->link_hook = hook;
 		in->link_func = hook->hook_func;
 		in->link_flags = hook->hook_flags;
 		in->link_ruleset = hook->hook_ruleset;
 		if (pa->pa_flags & PFIL_APPEND)
 			CK_STAILQ_INSERT_TAIL(&head->head_in, in, link_chain);
 		else
 			CK_STAILQ_INSERT_HEAD(&head->head_in, in, link_chain);
 		hook->hook_links++;
 		head->head_nhooksin++;
 	}
 	if (pa->pa_flags & PFIL_OUT) {
 		out->link_hook = hook;
 		out->link_func = hook->hook_func;
 		out->link_flags = hook->hook_flags;
 		out->link_ruleset = hook->hook_ruleset;
 		if (pa->pa_flags & PFIL_APPEND)
 			CK_STAILQ_INSERT_HEAD(&head->head_out, out, link_chain);
 		else
 			CK_STAILQ_INSERT_TAIL(&head->head_out, out, link_chain);
 		hook->hook_links++;
 		head->head_nhooksout++;
 	}
 	PFIL_UNLOCK();
 
 	return (0);
 
 fail:
 	PFIL_UNLOCK();
 	free(in, M_PFIL);
 	free(out, M_PFIL);
 	return (error);
 }
 
 static void
 pfil_link_free(epoch_context_t ctx)
 {
 	struct pfil_link *link;
 
 	link = __containerof(ctx, struct pfil_link, link_epoch_ctx);
 	free(link, M_PFIL);
 }
 
 /*
  * pfil_remove_hook removes a filter from all filtering points.
  */
 void
 pfil_remove_hook(pfil_hook_t hook)
 {
 	struct pfil_head *head;
 	struct pfil_link *in, *out;
 
 	PFIL_LOCK();
 	LIST_FOREACH(head, &V_pfil_head_list, head_list) {
 retry:
 		in = pfil_link_remove(&head->head_in, hook);
 		if (in != NULL) {
 			head->head_nhooksin--;
 			hook->hook_links--;
-			epoch_call(PFIL_EPOCH, &in->link_epoch_ctx,
-			    pfil_link_free);
+			epoch_call(PFIL_EPOCH, pfil_link_free,
+			    &in->link_epoch_ctx);
 		}
 		out = pfil_link_remove(&head->head_out, hook);
 		if (out != NULL) {
 			head->head_nhooksout--;
 			hook->hook_links--;
-			epoch_call(PFIL_EPOCH, &out->link_epoch_ctx,
-			    pfil_link_free);
+			epoch_call(PFIL_EPOCH, pfil_link_free,
+			    &out->link_epoch_ctx);
 		}
 		if (in != NULL || out != NULL)
 			/* What if some stupid admin put same filter twice? */
 			goto retry;
 	}
 	LIST_REMOVE(hook, hook_list);
 	PFIL_UNLOCK();
 	MPASS(hook->hook_links == 0);
 	free(hook, M_PFIL);
 }
 
 /*
  * Internal: Remove a pfil hook from a hook chain.
  */
 static struct pfil_link *
 pfil_link_remove(pfil_chain_t *chain, pfil_hook_t hook)
 {
 	struct pfil_link *link;
 
 	PFIL_LOCK_ASSERT();
 
 	CK_STAILQ_FOREACH(link, chain, link_chain)
 		if (link->link_hook == hook) {
 			CK_STAILQ_REMOVE(chain, link, pfil_link, link_chain);
 			return (link);
 		}
 
 	return (NULL);
 }
 
 static void
 pfil_init(const void *unused __unused)
 {
 	struct make_dev_args args;
 	int error;
 
 	make_dev_args_init(&args);
 	args.mda_flags = MAKEDEV_WAITOK | MAKEDEV_CHECKNAME;
 	args.mda_devsw = &pfil_cdevsw;
 	args.mda_uid = UID_ROOT;
 	args.mda_gid = GID_WHEEL;
 	args.mda_mode = 0600;
 	error = make_dev_s(&args, &pfil_dev, PFILDEV);
 	KASSERT(error == 0, ("%s: failed to create dev: %d", __func__, error));
 }
 /*
  * Make sure the pfil bits are first before any possible subsystem which
  * might piggyback on the SI_SUB_PROTO_PFIL.
  */
 SYSINIT(pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, pfil_init, NULL);
 
 /*
  * User control interface.
  */
 static int pfilioc_listheads(struct pfilioc_list *);
 static int pfilioc_listhooks(struct pfilioc_list *);
 static int pfilioc_link(struct pfilioc_link *);
 
 static int
 pfil_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
     struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(TD_TO_VNET(td));
 	error = 0;
 	switch (cmd) {
 	case PFILIOC_LISTHEADS:
 		error = pfilioc_listheads((struct pfilioc_list *)addr);
 		break;
 	case PFILIOC_LISTHOOKS:
 		error = pfilioc_listhooks((struct pfilioc_list *)addr);
 		break;
 	case PFILIOC_LINK:
 		error = pfilioc_link((struct pfilioc_link *)addr);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 static int
 pfilioc_listheads(struct pfilioc_list *req)
 {
 	struct pfil_head *head;
 	struct pfil_link *link;
 	struct pfilioc_head *iohead;
 	struct pfilioc_hook *iohook;
 	u_int nheads, nhooks, hd, hk;
 	int error;
 
 	PFIL_LOCK();
 restart:
 	nheads = nhooks = 0;
 	LIST_FOREACH(head, &V_pfil_head_list, head_list) {
 		nheads++;
 		nhooks += head->head_nhooksin + head->head_nhooksout;
 	}
 	PFIL_UNLOCK();
 
 	if (req->pio_nheads < nheads || req->pio_nhooks < nhooks) {
 		req->pio_nheads = nheads;
 		req->pio_nhooks = nhooks;
 		return (0);
 	}
 
 	iohead = malloc(sizeof(*iohead) * nheads, M_TEMP, M_WAITOK);
 	iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
 
 	hd = hk = 0;
 	PFIL_LOCK();
 	LIST_FOREACH(head, &V_pfil_head_list, head_list) {
 		if (hd + 1 > nheads ||
 		    hk + head->head_nhooksin + head->head_nhooksout > nhooks) {
 			/* Configuration changed during malloc(). */
 			free(iohead, M_TEMP);
 			free(iohook, M_TEMP);
 			goto restart;
 		}
 		strlcpy(iohead[hd].pio_name, head->head_name,
 			sizeof(iohead[0].pio_name));
 		iohead[hd].pio_nhooksin = head->head_nhooksin;
 		iohead[hd].pio_nhooksout = head->head_nhooksout;
 		iohead[hd].pio_type = head->head_type;
 		CK_STAILQ_FOREACH(link, &head->head_in, link_chain) {
 			strlcpy(iohook[hk].pio_module,
 			    link->link_hook->hook_modname,
 			    sizeof(iohook[0].pio_module));
 			strlcpy(iohook[hk].pio_ruleset,
 			    link->link_hook->hook_rulname,
 			    sizeof(iohook[0].pio_ruleset));
 			hk++;
 		}
 		CK_STAILQ_FOREACH(link, &head->head_out, link_chain) {
 			strlcpy(iohook[hk].pio_module,
 			    link->link_hook->hook_modname,
 			    sizeof(iohook[0].pio_module));
 			strlcpy(iohook[hk].pio_ruleset,
 			    link->link_hook->hook_rulname,
 			    sizeof(iohook[0].pio_ruleset));
 			hk++;
 		}
 		hd++;
 	}
 	PFIL_UNLOCK();
 
 	error = copyout(iohead, req->pio_heads,
 	    sizeof(*iohead) * min(hd, req->pio_nheads));
 	if (error == 0)
 		error = copyout(iohook, req->pio_hooks,
 		    sizeof(*iohook) * min(req->pio_nhooks, hk));
 
 	req->pio_nheads = hd;
 	req->pio_nhooks = hk;
 
 	free(iohead, M_TEMP);
 	free(iohook, M_TEMP);
 
 	return (error);
 }
 
 static int
 pfilioc_listhooks(struct pfilioc_list *req)
 {
 	struct pfil_hook *hook;
 	struct pfilioc_hook *iohook;
 	u_int nhooks, hk;
 	int error;
 
 	PFIL_LOCK();
 restart:
 	nhooks = 0;
 	LIST_FOREACH(hook, &V_pfil_hook_list, hook_list)
 		nhooks++;
 	PFIL_UNLOCK();
 
 	if (req->pio_nhooks < nhooks) {
 		req->pio_nhooks = nhooks;
 		return (0);
 	}
 
 	iohook = malloc(sizeof(*iohook) * nhooks, M_TEMP, M_WAITOK);
 
 	hk = 0;
 	PFIL_LOCK();
 	LIST_FOREACH(hook, &V_pfil_hook_list, hook_list) {
 		if (hk + 1 > nhooks) {
 			/* Configuration changed during malloc(). */
 			free(iohook, M_TEMP);
 			goto restart;
 		}
 		strlcpy(iohook[hk].pio_module, hook->hook_modname,
 		    sizeof(iohook[0].pio_module));
 		strlcpy(iohook[hk].pio_ruleset, hook->hook_rulname,
 		    sizeof(iohook[0].pio_ruleset));
 		iohook[hk].pio_type = hook->hook_type;
 		iohook[hk].pio_flags = hook->hook_flags;
 		hk++;
 	}
 	PFIL_UNLOCK();
 
 	error = copyout(iohook, req->pio_hooks,
 	    sizeof(*iohook) * min(req->pio_nhooks, hk));
 	req->pio_nhooks = hk;
 	free(iohook, M_TEMP);
 
 	return (error);
 }
 
 static int
 pfilioc_link(struct pfilioc_link *req)
 {
 	struct pfil_link_args args;
 
 	if (req->pio_flags & ~(PFIL_IN | PFIL_OUT | PFIL_UNLINK | PFIL_APPEND))
 		return (EINVAL);
 
 	args.pa_version = PFIL_VERSION;
 	args.pa_flags = req->pio_flags;
 	args.pa_headname = req->pio_name;
 	args.pa_modname = req->pio_module;
 	args.pa_rulname = req->pio_ruleset;
 
 	return (pfil_link(&args));
 }
Index: head/sys/sys/epoch.h
===================================================================
--- head/sys/sys/epoch.h	(revision 356825)
+++ head/sys/sys/epoch.h	(revision 356826)
@@ -1,108 +1,109 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_EPOCH_H_
 #define _SYS_EPOCH_H_
 
 struct epoch_context {
 	void   *data[2];
 } __aligned(sizeof(void *));
 
 typedef struct epoch_context *epoch_context_t;
+typedef	void epoch_callback_t(epoch_context_t);
 
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/pcpu.h>
 #include <ck_epoch.h>
 
 struct epoch;
 typedef struct epoch *epoch_t;
 
 #define EPOCH_PREEMPT 0x1
 #define EPOCH_LOCKED 0x2
 
 extern epoch_t global_epoch;
 extern epoch_t global_epoch_preempt;
 
 struct epoch_tracker {
 	TAILQ_ENTRY(epoch_tracker) et_link;
 	struct thread *et_td;
 	ck_epoch_section_t et_section;
 #ifdef EPOCH_TRACE
 	struct epoch *et_epoch;
 	SLIST_ENTRY(epoch_tracker) et_tlink;
 	const char *et_file;
 	int et_line;
 #endif
 }  __aligned(sizeof(void *));
 typedef struct epoch_tracker *epoch_tracker_t;
 
 epoch_t	epoch_alloc(const char *name, int flags);
 void	epoch_free(epoch_t epoch);
 void	epoch_wait(epoch_t epoch);
 void	epoch_wait_preempt(epoch_t epoch);
 void	epoch_drain_callbacks(epoch_t epoch);
-void	epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t));
+void	epoch_call(epoch_t epoch, epoch_callback_t cb, epoch_context_t ctx);
 int	in_epoch(epoch_t epoch);
 int in_epoch_verbose(epoch_t epoch, int dump_onfail);
 DPCPU_DECLARE(int, epoch_cb_count);
 DPCPU_DECLARE(struct grouptask, epoch_cb_task);
 
 #ifdef EPOCH_TRACE
 #define	EPOCH_FILE_LINE	, const char *file, int line
 #else
 #define	EPOCH_FILE_LINE
 #endif
 
 void _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE);
 void _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE);
 #ifdef EPOCH_TRACE
 void epoch_trace_list(struct thread *);
 #define	epoch_enter_preempt(epoch, et)	_epoch_enter_preempt(epoch, et, __FILE__, __LINE__)
 #define	epoch_exit_preempt(epoch, et)	_epoch_exit_preempt(epoch, et, __FILE__, __LINE__)
 #else
 #define epoch_enter_preempt(epoch, et)	_epoch_enter_preempt(epoch, et)
 #define	epoch_exit_preempt(epoch, et)	_epoch_exit_preempt(epoch, et)
 #endif
 void epoch_enter(epoch_t epoch);
 void epoch_exit(epoch_t epoch);
 
 /*
  * Globally recognized epochs in the FreeBSD kernel.
  */
 /* Network preemptible epoch, declared in sys/net/if.c. */
 extern epoch_t net_epoch_preempt;
 #define	NET_EPOCH_ENTER(et)	epoch_enter_preempt(net_epoch_preempt, &(et))
 #define	NET_EPOCH_EXIT(et)	epoch_exit_preempt(net_epoch_preempt, &(et))
 #define	NET_EPOCH_WAIT()	epoch_wait_preempt(net_epoch_preempt)
-#define	NET_EPOCH_CALL(f, c)	epoch_call(net_epoch_preempt, (c), (f))
+#define	NET_EPOCH_CALL(f, c)	epoch_call(net_epoch_preempt, (f), (c))
 #define	NET_EPOCH_ASSERT()	MPASS(in_epoch(net_epoch_preempt))
 
 #endif	/* _KERNEL */
 #endif	/* _SYS_EPOCH_H_ */