Index: sys/kern/subr_epoch.c =================================================================== --- sys/kern/subr_epoch.c +++ sys/kern/subr_epoch.c @@ -74,15 +74,19 @@ volatile struct epoch_tdlist er_tdlist; volatile uint32_t er_gen; uint32_t er_cpuid; + int er_drain_state; } __aligned(EPOCH_ALIGN) *epoch_record_t; +#define EPOCH_DRAIN_START 2 +#define EPOCH_DRAIN_RUNNING 1 +#define EPOCH_DRAIN_DONE 0 + struct epoch { struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); epoch_record_t e_pcpu_record; int e_idx; int e_flags; struct sx e_drain_sx; - struct mtx e_drain_mtx; volatile int e_drain_count; const char *e_name; }; @@ -337,7 +341,6 @@ epoch->e_idx = epoch_count; epoch->e_name = name; sx_init(&epoch->e_drain_sx, "epoch-drain-sx"); - mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF); allepochs[epoch_count++] = epoch; return (epoch); } @@ -350,7 +353,6 @@ allepochs[epoch->e_idx] = NULL; epoch_wait(global_epoch); uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record); - mtx_destroy(&epoch->e_drain_mtx); sx_destroy(&epoch->e_drain_sx); free(epoch, M_EPOCH); } @@ -701,14 +703,24 @@ epoch_t epoch; ck_stack_t cb_stack; int i, npending, total; + bool draining; + + KASSERT(curthread->td_pinned > 0, + ("%s: callback task thread is not pinned", __func__)); ck_stack_init(&cb_stack); critical_enter(); epoch_enter(global_epoch); - for (total = i = 0; i < epoch_count; i++) { + for (total = i = 0, draining = false; i < epoch_count; i++) { if (__predict_false((epoch = allepochs[i]) == NULL)) continue; er = epoch_currecord(epoch); + if (atomic_load_int(&er->er_drain_state) == EPOCH_DRAIN_START) { + atomic_store_int(&er->er_drain_state, + EPOCH_DRAIN_RUNNING); + draining = true; + } + record = &er->er_record; if ((npending = record->n_pending) == 0) continue; @@ -730,6 +742,20 @@ next = CK_STACK_NEXT(cursor); entry->function(entry); } + + if (__predict_false(draining)) { + epoch_enter(global_epoch); + for (i = 0; i < epoch_count; i++) { + if (__predict_false((epoch = allepochs[i]) == NULL)) + continue; + er = epoch_currecord(epoch); + if (atomic_load_int(&er->er_drain_state) == + EPOCH_DRAIN_RUNNING) + atomic_store_int(&er->er_drain_state, + EPOCH_DRAIN_DONE); + } + epoch_exit(global_epoch); + } } int @@ -771,27 +797,18 @@ } static void -epoch_drain_cb(struct epoch_context *ctx) +epoch_drain_handler(struct ck_epoch *global __unused, + ck_epoch_record_t *cr __unused, void *arg __unused) { - struct epoch *epoch = - __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent; - - if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) { - mtx_lock(&epoch->e_drain_mtx); - wakeup(epoch); - mtx_unlock(&epoch->e_drain_mtx); - } + maybe_yield(); } void epoch_drain_callbacks(epoch_t epoch) { epoch_record_t er; - struct thread *td; - int was_bound; - int old_pinned; - int old_cpu; - int cpu; + int cpu, state; + bool pending; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "epoch_drain_callbacks() may sleep!"); @@ -804,45 +821,28 @@ return; #endif DROP_GIANT(); - sx_xlock(&epoch->e_drain_sx); - mtx_lock(&epoch->e_drain_mtx); - td = curthread; - thread_lock(td); - old_cpu = PCPU_GET(cpuid); - old_pinned = td->td_pinned; - was_bound = sched_is_bound(td); - sched_unbind(td); - td->td_pinned = 0; + /* Make sure that all pending callbacks are available. */ + ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_drain_handler, NULL); - CPU_FOREACH(cpu) - epoch->e_drain_count++; CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); - sched_bind(td, cpu); - epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx); + atomic_store_int(&er->er_drain_state, EPOCH_DRAIN_START); + GROUPTASK_ENQUEUE(DPCPU_ID_PTR(cpu, epoch_cb_task)); } - /* restore CPU binding, if any */ - if (was_bound != 0) { - sched_bind(td, old_cpu); - } else { - /* get thread back to initial CPU, if any */ - if (old_pinned != 0) - sched_bind(td, old_cpu); - sched_unbind(td); - } - /* restore pinned after bind */ - td->td_pinned = old_pinned; - - thread_unlock(td); - - while (epoch->e_drain_count != 0) - msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0); + do { + pending = false; + CPU_FOREACH(cpu) { + er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); + state = atomic_load_int(&er->er_drain_state); + if (state != EPOCH_DRAIN_DONE) + pending = true; + } + pause("edrain", 1); + } while (pending); - mtx_unlock(&epoch->e_drain_mtx); sx_xunlock(&epoch->e_drain_sx); - PICKUP_GIANT(); } Index: sys/kern/subr_gtaskqueue.c =================================================================== --- sys/kern/subr_gtaskqueue.c +++ sys/kern/subr_gtaskqueue.c @@ -54,7 +54,7 @@ static int task_is_running(struct gtaskqueue *queue, struct gtask *gtask); static void gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask); -TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); +TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, PI_SOFT); struct gtaskqueue_busy { struct gtask *tb_running; @@ -609,7 +609,7 @@ }; static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu) +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, int pri) { struct taskqgroup_cpu *qcpu; @@ -617,7 +617,7 @@ LIST_INIT(&qcpu->tgc_tasks); qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, taskqueue_thread_enqueue, &qcpu->tgc_taskq); - gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri, "%s_%d", qgroup->tqg_name, idx); qcpu->tgc_cpu = cpu; } @@ -795,7 +795,7 @@ } struct taskqgroup * -taskqgroup_create(const char *name, int cnt, int stride) +taskqgroup_create(const char *name, int pri, int cnt, int stride) { struct taskqgroup *qgroup; int cpu, i, j; @@ -806,7 +806,7 @@ qgroup->tqg_cnt = cnt; for (cpu = i = 0; i < cnt; i++) { - taskqgroup_cpu_create(qgroup, i, cpu); + taskqgroup_cpu_create(qgroup, i, cpu, pri); for (j = 0; j < stride; j++) cpu = CPU_NEXT(cpu); } Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -563,8 +564,8 @@ MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); -TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); -TASKQGROUP_DEFINE(if_config_tqg, 1, 1); +TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1, PI_SWI(SWI_NET)); +TASKQGROUP_DEFINE(if_config_tqg, 1, 1, PI_SWI(SWI_NET)); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS Index: sys/sys/gtaskqueue.h =================================================================== --- sys/sys/gtaskqueue.h +++ sys/sys/gtaskqueue.h @@ -77,7 +77,8 @@ struct grouptask *grptask, void *uniq, int cpu, device_t dev, struct resource *irq, const char *name); void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); -struct taskqgroup *taskqgroup_create(const char *name, int cnt, int stride); +struct taskqgroup *taskqgroup_create(const char *name, int pri, int cnt, + int stride); void taskqgroup_destroy(struct taskqgroup *qgroup); void taskqgroup_bind(struct taskqgroup *qgroup); @@ -97,14 +98,14 @@ #define TASKQGROUP_DECLARE(name) \ extern struct taskqgroup *qgroup_##name -#define TASKQGROUP_DEFINE(name, cnt, stride) \ +#define TASKQGROUP_DEFINE(name, cnt, stride, pri) \ \ struct taskqgroup *qgroup_##name; \ \ static void \ taskqgroup_define_##name(void *arg) \ { \ - qgroup_##name = taskqgroup_create(#name, (cnt), (stride)); \ + qgroup_##name = taskqgroup_create(#name, (pri), (cnt), (stride)); \ } \ SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ taskqgroup_define_##name, NULL); \