Index: sys/kern/subr_epoch.c =================================================================== --- sys/kern/subr_epoch.c +++ sys/kern/subr_epoch.c @@ -58,8 +58,6 @@ #include -static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation"); - #ifdef __amd64__ #define EPOCH_ALIGN CACHE_LINE_SIZE*2 #else @@ -79,12 +77,14 @@ struct epoch { struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); epoch_record_t e_pcpu_record; - int e_idx; int e_flags; struct sx e_drain_sx; struct mtx e_drain_mtx; volatile int e_drain_count; - const char *e_name; + union { + const char *ptr; + uintptr_t val; + } e_name; }; /* arbitrary --- needs benchmarking */ @@ -128,19 +128,23 @@ CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry, ck_epoch_entry_container) -epoch_t allepochs[MAX_EPOCHS]; +static struct epoch epoch_array[MAX_EPOCHS]; DPCPU_DEFINE(struct grouptask, epoch_cb_task); DPCPU_DEFINE(int, epoch_cb_count); static __read_mostly int inited; -static __read_mostly int epoch_count; __read_mostly epoch_t global_epoch; __read_mostly epoch_t global_epoch_preempt; static void epoch_call_task(void *context __unused); static uma_zone_t pcpu_zone_record; +static struct sx epoch_sx; + +#define EPOCH_LOCK() sx_xlock(&epoch_sx) +#define EPOCH_UNLOCK() sx_xunlock(&epoch_sx) + #ifdef EPOCH_TRACE struct stackentry { RB_ENTRY(stackentry) se_node; @@ -217,7 +221,7 @@ if (iet->et_epoch == epoch) epoch_trace_report("Recursively entering epoch %s " "at %s:%d, previously entered at %s:%d\n", - epoch->e_name, file, line, + epoch->e_name.ptr, file, line, iet->et_file, iet->et_line); et->et_epoch = epoch; et->et_file = file; @@ -233,9 +237,9 @@ if (SLIST_FIRST(&td->td_epochs) != et) { epoch_trace_report("Exiting epoch %s in a not nested order " "at %s:%d. Most recently entered %s at %s:%d\n", - epoch->e_name, + epoch->e_name.ptr, file, line, - SLIST_FIRST(&td->td_epochs)->et_epoch->e_name, + SLIST_FIRST(&td->td_epochs)->et_epoch->e_name.ptr, SLIST_FIRST(&td->td_epochs)->et_file, SLIST_FIRST(&td->td_epochs)->et_line); /* This will panic if et is not anywhere on td_epochs. */ @@ -251,7 +255,7 @@ epoch_tracker_t iet; SLIST_FOREACH(iet, &td->td_epochs, et_tlink) - printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name, + printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name.ptr, iet->et_file, iet->et_line); } #endif /* EPOCH_TRACE */ @@ -281,6 +285,7 @@ #ifdef EPOCH_TRACE SLIST_INIT(&thread0.td_epochs); #endif + sx_init(&epoch_sx, "epoch-sx"); inited = 1; global_epoch = epoch_alloc("Global", 0); global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT); @@ -326,19 +331,45 @@ epoch_alloc(const char *name, int flags) { epoch_t epoch; + int i; + MPASS(name != NULL); + if (__predict_false(!inited)) panic("%s called too early in boot", __func__); - epoch = malloc(sizeof(struct epoch), M_EPOCH, M_ZERO | M_WAITOK); + + EPOCH_LOCK(); + + /* + * Find a free index in the epoch array. If no free index is + * found, try to use the index after the last one. + */ + for (i = 0; i != MAX_EPOCHS; i++) { + if (epoch_array[i].e_name.val == 0) + break; + } + + /* If too many epochs are currently allocated, return NULL. */ + if (i == MAX_EPOCHS) { + epoch = NULL; + goto done; + } + + epoch = epoch_array + i; ck_epoch_init(&epoch->e_epoch); epoch_ctor(epoch); - MPASS(epoch_count < MAX_EPOCHS - 2); epoch->e_flags = flags; - epoch->e_idx = epoch_count; - epoch->e_name = name; sx_init(&epoch->e_drain_sx, "epoch-drain-sx"); mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF); - allepochs[epoch_count++] = epoch; + + /* + * Set e_name last, because when this field is set the + * epoch_call_task() function will start scanning this epoch + * structure. + */ + atomic_store_rel_ptr(&epoch->e_name.val, (uintptr_t)name); +done: + EPOCH_UNLOCK(); return (epoch); } @@ -346,13 +377,24 @@ epoch_free(epoch_t epoch) { + EPOCH_LOCK(); + + MPASS(epoch->e_name.val != 0); + epoch_drain_callbacks(epoch); - allepochs[epoch->e_idx] = NULL; + + atomic_store_rel_ptr(&epoch->e_name.val, 0); + /* + * Make sure the epoch_call_task() function see e_name equal + * to zero, by calling epoch_wait() on the global_epoch: + */ epoch_wait(global_epoch); uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record); mtx_destroy(&epoch->e_drain_mtx); sx_destroy(&epoch->e_drain_sx); - free(epoch, M_EPOCH); + memset(epoch, 0, sizeof(*epoch)); + + EPOCH_UNLOCK(); } static epoch_record_t @@ -705,8 +747,10 @@ ck_stack_init(&cb_stack); critical_enter(); epoch_enter(global_epoch); - for (total = i = 0; i < epoch_count; i++) { - if (__predict_false((epoch = allepochs[i]) == NULL)) + for (total = i = 0; i != MAX_EPOCHS; i++) { + epoch = epoch_array + i; + if (__predict_false( + atomic_load_acq_ptr(&epoch->e_name.val) == 0)) continue; er = epoch_currecord(epoch); record = &er->er_record;