Index: vendor-sys/ck/dist/include/ck_epoch.h
===================================================================
--- vendor-sys/ck/dist/include/ck_epoch.h	(revision 333761)
+++ vendor-sys/ck/dist/include/ck_epoch.h	(revision 333762)
@@ -1,280 +1,281 @@
 /*
  * Copyright 2011-2015 Samy Al Bahra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef CK_EPOCH_H
 #define CK_EPOCH_H
 
 /*
  * The implementation here is inspired from the work described in:
  *   Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University
  *   of Cambridge Computing Laboratory.
  */
 
 #include <ck_cc.h>
 #include <ck_md.h>
 #include <ck_pr.h>
 #include <ck_stack.h>
 #include <ck_stdbool.h>
 
 #ifndef CK_EPOCH_LENGTH
 #define CK_EPOCH_LENGTH 4
 #endif
 
 /*
  * This is used for sense detection with-respect to concurrent
  * epoch sections.
  */
 #define CK_EPOCH_SENSE		(2)
 
 struct ck_epoch_entry;
 typedef struct ck_epoch_entry ck_epoch_entry_t;
 typedef void ck_epoch_cb_t(ck_epoch_entry_t *);
 
 /*
  * This should be embedded into objects you wish to be the target of
  * ck_epoch_cb_t functions (with ck_epoch_call).
  */
 struct ck_epoch_entry {
 	ck_epoch_cb_t *function;
 	ck_stack_entry_t stack_entry;
 };
 
 /*
  * A section object may be passed to every begin-end pair to allow for
  * forward progress guarantees with-in prolonged active sections.
  */
 struct ck_epoch_section {
 	unsigned int bucket;
 };
 typedef struct ck_epoch_section ck_epoch_section_t;
 
 /*
  * Return pointer to ck_epoch_entry container object.
  */
 #define CK_EPOCH_CONTAINER(T, M, N) \
 	CK_CC_CONTAINER(struct ck_epoch_entry, T, M, N)
 
 struct ck_epoch_ref {
 	unsigned int epoch;
 	unsigned int count;
 };
 
 struct ck_epoch_record {
 	ck_stack_entry_t record_next;
 	struct ck_epoch *global;
 	unsigned int state;
 	unsigned int epoch;
 	unsigned int active;
 	struct {
 		struct ck_epoch_ref bucket[CK_EPOCH_SENSE];
 	} local CK_CC_CACHELINE;
 	unsigned int n_pending;
 	unsigned int n_peak;
 	unsigned int n_dispatch;
 	void *ct;
 	ck_stack_t pending[CK_EPOCH_LENGTH];
 } CK_CC_CACHELINE;
 typedef struct ck_epoch_record ck_epoch_record_t;
 
 struct ck_epoch {
 	unsigned int epoch;
 	unsigned int n_free;
 	ck_stack_t records;
 };
 typedef struct ck_epoch ck_epoch_t;
 
 /*
  * Internal functions.
  */
 void _ck_epoch_addref(ck_epoch_record_t *, ck_epoch_section_t *);
 bool _ck_epoch_delref(ck_epoch_record_t *, ck_epoch_section_t *);
 
 CK_CC_FORCE_INLINE static void *
 ck_epoch_record_ct(const ck_epoch_record_t *record)
 {
 
 	return ck_pr_load_ptr(&record->ct);
 }
 
 /*
  * Marks the beginning of an epoch-protected section.
  */
 CK_CC_FORCE_INLINE static void
 ck_epoch_begin(ck_epoch_record_t *record, ck_epoch_section_t *section)
 {
 	struct ck_epoch *epoch = record->global;
 
 	/*
 	 * Only observe new epoch if thread is not recursing into a read
 	 * section.
 	 */
 	if (record->active == 0) {
 		unsigned int g_epoch;
 
 		/*
 		 * It is possible for loads to be re-ordered before the store
 		 * is committed into the caller's epoch and active fields.
 		 * For this reason, store to load serialization is necessary.
 		 */
 #if defined(CK_MD_TSO)
 		ck_pr_fas_uint(&record->active, 1);
 		ck_pr_fence_atomic_load();
 #else
 		ck_pr_store_uint(&record->active, 1);
 		ck_pr_fence_memory();
 #endif
 
 		/*
 		 * This load is allowed to be re-ordered prior to setting
 		 * active flag due to monotonic nature of the global epoch.
 		 * However, stale values lead to measurable performance
 		 * degradation in some torture tests so we disallow early load
 		 * of global epoch.
 		 */
 		g_epoch = ck_pr_load_uint(&epoch->epoch);
 		ck_pr_store_uint(&record->epoch, g_epoch);
 	} else {
 		ck_pr_store_uint(&record->active, record->active + 1);
 	}
 
 	if (section != NULL)
 		_ck_epoch_addref(record, section);
 
 	return;
 }
 
 /*
  * Marks the end of an epoch-protected section. Returns true if no more
  * sections exist for the caller.
  */
 CK_CC_FORCE_INLINE static bool
 ck_epoch_end(ck_epoch_record_t *record, ck_epoch_section_t *section)
 {
 
 	ck_pr_fence_release();
 	ck_pr_store_uint(&record->active, record->active - 1);
 
 	if (section != NULL)
 		return _ck_epoch_delref(record, section);
 
 	return record->active == 0;
 }
 
 /*
  * Defers the execution of the function pointed to by the "cb"
  * argument until an epoch counter loop. This allows for a
  * non-blocking deferral.
  *
  * We can get away without a fence here due to the monotonic nature
  * of the epoch counter. Worst case, this will result in some delays
  * before object destruction.
  */
 CK_CC_FORCE_INLINE static void
 ck_epoch_call(ck_epoch_record_t *record,
 	      ck_epoch_entry_t *entry,
 	      ck_epoch_cb_t *function)
 {
 	struct ck_epoch *epoch = record->global;
 	unsigned int e = ck_pr_load_uint(&epoch->epoch);
 	unsigned int offset = e & (CK_EPOCH_LENGTH - 1);
 
 	record->n_pending++;
 	entry->function = function;
 	ck_stack_push_spnc(&record->pending[offset], &entry->stack_entry);
 	return;
 }
 
 /*
  * Same as ck_epoch_call, but allows for records to be shared and is reentrant.
  */
 CK_CC_FORCE_INLINE static void
 ck_epoch_call_strict(ck_epoch_record_t *record,
 	      ck_epoch_entry_t *entry,
 	      ck_epoch_cb_t *function)
 {
 	struct ck_epoch *epoch = record->global;
 	unsigned int e = ck_pr_load_uint(&epoch->epoch);
 	unsigned int offset = e & (CK_EPOCH_LENGTH - 1);
 
 	ck_pr_inc_uint(&record->n_pending);
 	entry->function = function;
 
 	/* Store fence is implied by push operation. */
 	ck_stack_push_upmc(&record->pending[offset], &entry->stack_entry);
 	return;
 }
 
 /*
  * This callback is used for synchronize_wait to allow for custom blocking
  * behavior.
  */
 typedef void ck_epoch_wait_cb_t(ck_epoch_t *, ck_epoch_record_t *,
     void *);
 
 /*
  * Return latest epoch value. This operation provides load ordering.
  */
 CK_CC_FORCE_INLINE static unsigned int
 ck_epoch_value(const ck_epoch_t *ep)
 {
 
 	ck_pr_fence_load();
 	return ck_pr_load_uint(&ep->epoch);
 }
 
 void ck_epoch_init(ck_epoch_t *);
 
 /*
  * Attempts to recycle an unused epoch record. If one is successfully
  * allocated, the record context pointer is also updated.
  */
 ck_epoch_record_t *ck_epoch_recycle(ck_epoch_t *, void *);
 
 /*
  * Registers an epoch record. An optional context pointer may be passed that
  * is retrievable with ck_epoch_record_ct.
  */
 void ck_epoch_register(ck_epoch_t *, ck_epoch_record_t *, void *);
 
 /*
  * Marks a record as available for re-use by a subsequent recycle operation.
  * Note that the record cannot be physically destroyed.
  */
 void ck_epoch_unregister(ck_epoch_record_t *);
 
 bool ck_epoch_poll(ck_epoch_record_t *);
+bool ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred);
 void ck_epoch_synchronize(ck_epoch_record_t *);
 void ck_epoch_synchronize_wait(ck_epoch_t *, ck_epoch_wait_cb_t *, void *);
 void ck_epoch_barrier(ck_epoch_record_t *);
 void ck_epoch_barrier_wait(ck_epoch_record_t *, ck_epoch_wait_cb_t *, void *);
 
 /*
  * Reclaim entries associated with a record. This is safe to call only on
  * the caller's record or records that are using call_strict.
  */
 void ck_epoch_reclaim(ck_epoch_record_t *);
 
 #endif /* CK_EPOCH_H */
Index: vendor-sys/ck/dist/src/ck_epoch.c
===================================================================
--- vendor-sys/ck/dist/src/ck_epoch.c	(revision 333761)
+++ vendor-sys/ck/dist/src/ck_epoch.c	(revision 333762)
@@ -1,585 +1,595 @@
 /*
  * Copyright 2011-2015 Samy Al Bahra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * The implementation here is inspired from the work described in:
  *   Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University
  *   of Cambridge Computing Laboratory.
  */
 
 #include <ck_backoff.h>
 #include <ck_cc.h>
 #include <ck_epoch.h>
 #include <ck_pr.h>
 #include <ck_stack.h>
 #include <ck_stdbool.h>
 #include <ck_string.h>
 
 /*
  * Only three distinct values are used for reclamation, but reclamation occurs
  * at e+2 rather than e+1. Any thread in a "critical section" would have
  * acquired some snapshot (e) of the global epoch value (e_g) and set an active
  * flag. Any hazardous references will only occur after a full memory barrier.
  * For example, assume an initial e_g value of 1, e value of 0 and active value
  * of 0.
  *
  * ck_epoch_begin(...)
  *   e = e_g
  *   active = 1
  *   memory_barrier();
  *
  * Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or
  * e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread
  * has already observed the value of "1" (or the value we are incrementing
  * from). This guarantees us that for any given value e_g, any threads with-in
  * critical sections (referred to as "active" threads from here on) would have
  * an e value of e_g-1 or e_g. This also means that hazardous references may be
  * shared in both e_g-1 and e_g even if they are logically deleted in e_g.
  *
  * For example, assume all threads have an e value of e_g. Another thread may
  * increment to e_g to e_g+1. Older threads may have a reference to an object
  * which is only deleted in e_g+1. It could be that reader threads are
  * executing some hash table look-ups, while some other writer thread (which
  * causes epoch counter tick) actually deletes the same items that reader
  * threads are looking up (this writer thread having an e value of e_g+1).
  * This is possible if the writer thread re-observes the epoch after the
  * counter tick.
  *
  * Psuedo-code for writer:
  *   ck_epoch_begin()
  *   ht_delete(x)
  *   ck_epoch_end()
  *   ck_epoch_begin()
  *   ht_delete(x)
  *   ck_epoch_end()
  *
  * Psuedo-code for reader:
  *   for (;;) {
  *      x = ht_lookup(x)
  *      ck_pr_inc(&x->value);
  *   }
  *
  * Of course, it is also possible for references logically deleted at e_g-1 to
  * still be accessed at e_g as threads are "active" at the same time
  * (real-world time) mutating shared objects.
  *
  * Now, if the epoch counter is ticked to e_g+1, then no new hazardous
  * references could exist to objects logically deleted at e_g-1. The reason for
  * this is that at e_g+1, all epoch read-side critical sections started at
  * e_g-1 must have been completed. If any epoch read-side critical sections at
  * e_g-1 were still active, then we would never increment to e_g+1 (active != 0
  * ^ e != e_g).  Additionally, e_g may still have hazardous references to
  * objects logically deleted at e_g-1 which means objects logically deleted at
  * e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1
  * (since it is valid for active threads to be at e_g and threads at e_g still
  * require safe memory accesses).
  *
  * However, at e_g+2, all active threads must be either at e_g+1 or e_g+2.
  * Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares
  * hazardous references to e_g, no active threads are at e_g or e_g-1. This
  * means no hazardous references could exist to objects deleted at e_g-1 (at
  * e_g+2).
  *
  * To summarize these important points,
  *   1) Active threads will always have a value of e_g or e_g-1.
  *   2) Items that are logically deleted e_g or e_g-1 cannot be physically
  *      deleted.
  *   3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2
  *      or at e_g+1 if no threads are at e_g.
  *
  * Last but not least, if we are at e_g+2, then no active thread is at e_g
  * which means it is safe to apply modulo-3 arithmetic to e_g value in order to
  * re-use e_g to represent the e_g+3 state. This means it is sufficient to
  * represent e_g using only the values 0, 1 or 2. Every time a thread re-visits
  * a e_g (which can be determined with a non-empty deferral list) it can assume
  * objects in the e_g deferral list involved at least three e_g transitions and
  * are thus, safe, for physical deletion.
  *
  * Blocking semantics for epoch reclamation have additional restrictions.
  * Though we only require three deferral lists, reasonable blocking semantics
  * must be able to more gracefully handle bursty write work-loads which could
  * easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for
  * easy-to-trigger live-lock situations. The work-around to this is to not
  * apply modulo arithmetic to e_g but only to deferral list indexing.
  */
 #define CK_EPOCH_GRACE 3U
 
 enum {
 	CK_EPOCH_STATE_USED = 0,
 	CK_EPOCH_STATE_FREE = 1
 };
 
 CK_STACK_CONTAINER(struct ck_epoch_record, record_next,
     ck_epoch_record_container)
 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
     ck_epoch_entry_container)
 
 #define CK_EPOCH_SENSE_MASK	(CK_EPOCH_SENSE - 1)
 
 bool
 _ck_epoch_delref(struct ck_epoch_record *record,
     struct ck_epoch_section *section)
 {
 	struct ck_epoch_ref *current, *other;
 	unsigned int i = section->bucket;
 
 	current = &record->local.bucket[i];
 	current->count--;
 
 	if (current->count > 0)
 		return false;
 
 	/*
 	 * If the current bucket no longer has any references, then
 	 * determine whether we have already transitioned into a newer
 	 * epoch. If so, then make sure to update our shared snapshot
 	 * to allow for forward progress.
 	 *
 	 * If no other active bucket exists, then the record will go
 	 * inactive in order to allow for forward progress.
 	 */
 	other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK];
 	if (other->count > 0 &&
 	    ((int)(current->epoch - other->epoch) < 0)) {
 		/*
 		 * The other epoch value is actually the newest,
 		 * transition to it.
 		 */
 		ck_pr_store_uint(&record->epoch, other->epoch);
 	}
 
 	return true;
 }
 
 void
 _ck_epoch_addref(struct ck_epoch_record *record,
     struct ck_epoch_section *section)
 {
 	struct ck_epoch *global = record->global;
 	struct ck_epoch_ref *ref;
 	unsigned int epoch, i;
 
 	epoch = ck_pr_load_uint(&global->epoch);
 	i = epoch & CK_EPOCH_SENSE_MASK;
 	ref = &record->local.bucket[i];
 
 	if (ref->count++ == 0) {
 #ifndef CK_MD_TSO
 		struct ck_epoch_ref *previous;
 
 		/*
 		 * The system has already ticked. If another non-zero bucket
 		 * exists, make sure to order our observations with respect
 		 * to it. Otherwise, it is possible to acquire a reference
 		 * from the previous epoch generation.
 		 *
 		 * On TSO architectures, the monoticity of the global counter
 		 * and load-{store, load} ordering are sufficient to guarantee
 		 * this ordering.
 		 */
 		previous = &record->local.bucket[(i + 1) &
 		    CK_EPOCH_SENSE_MASK];
 		if (previous->count > 0)
 			ck_pr_fence_acqrel();
 #endif /* !CK_MD_TSO */
 
 		/*
 		 * If this is this is a new reference into the current
 		 * bucket then cache the associated epoch value.
 		 */
 		ref->epoch = epoch;
 	}
 
 	section->bucket = i;
 	return;
 }
 
 void
 ck_epoch_init(struct ck_epoch *global)
 {
 
 	ck_stack_init(&global->records);
 	global->epoch = 1;
 	global->n_free = 0;
 	ck_pr_fence_store();
 	return;
 }
 
 struct ck_epoch_record *
 ck_epoch_recycle(struct ck_epoch *global, void *ct)
 {
 	struct ck_epoch_record *record;
 	ck_stack_entry_t *cursor;
 	unsigned int state;
 
 	if (ck_pr_load_uint(&global->n_free) == 0)
 		return NULL;
 
 	CK_STACK_FOREACH(&global->records, cursor) {
 		record = ck_epoch_record_container(cursor);
 
 		if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) {
 			/* Serialize with respect to deferral list clean-up. */
 			ck_pr_fence_load();
 			state = ck_pr_fas_uint(&record->state,
 			    CK_EPOCH_STATE_USED);
 			if (state == CK_EPOCH_STATE_FREE) {
 				ck_pr_dec_uint(&global->n_free);
 				ck_pr_store_ptr(&record->ct, ct);
 
 				/*
 				 * The context pointer is ordered by a
 				 * subsequent protected section.
 				 */
 				return record;
 			}
 		}
 	}
 
 	return NULL;
 }
 
 void
 ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record,
     void *ct)
 {
 	size_t i;
 
 	record->global = global;
 	record->state = CK_EPOCH_STATE_USED;
 	record->active = 0;
 	record->epoch = 0;
 	record->n_dispatch = 0;
 	record->n_peak = 0;
 	record->n_pending = 0;
 	record->ct = ct;
 	memset(&record->local, 0, sizeof record->local);
 
 	for (i = 0; i < CK_EPOCH_LENGTH; i++)
 		ck_stack_init(&record->pending[i]);
 
 	ck_pr_fence_store();
 	ck_stack_push_upmc(&global->records, &record->record_next);
 	return;
 }
 
 void
 ck_epoch_unregister(struct ck_epoch_record *record)
 {
 	struct ck_epoch *global = record->global;
 	size_t i;
 
 	record->active = 0;
 	record->epoch = 0;
 	record->n_dispatch = 0;
 	record->n_peak = 0;
 	record->n_pending = 0;
 	memset(&record->local, 0, sizeof record->local);
 
 	for (i = 0; i < CK_EPOCH_LENGTH; i++)
 		ck_stack_init(&record->pending[i]);
 
 	ck_pr_store_ptr(&record->ct, NULL);
 	ck_pr_fence_store();
 	ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE);
 	ck_pr_inc_uint(&global->n_free);
 	return;
 }
 
 static struct ck_epoch_record *
 ck_epoch_scan(struct ck_epoch *global,
     struct ck_epoch_record *cr,
     unsigned int epoch,
     bool *af)
 {
 	ck_stack_entry_t *cursor;
 
 	if (cr == NULL) {
 		cursor = CK_STACK_FIRST(&global->records);
 		*af = false;
 	} else {
 		cursor = &cr->record_next;
 		*af = true;
 	}
 
 	while (cursor != NULL) {
 		unsigned int state, active;
 
 		cr = ck_epoch_record_container(cursor);
 
 		state = ck_pr_load_uint(&cr->state);
 		if (state & CK_EPOCH_STATE_FREE) {
 			cursor = CK_STACK_NEXT(cursor);
 			continue;
 		}
 
 		active = ck_pr_load_uint(&cr->active);
 		*af |= active;
 
 		if (active != 0 && ck_pr_load_uint(&cr->epoch) != epoch)
 			return cr;
 
 		cursor = CK_STACK_NEXT(cursor);
 	}
 
 	return NULL;
 }
 
 static void
-ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e)
+ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred)
 {
 	unsigned int epoch = e & (CK_EPOCH_LENGTH - 1);
 	ck_stack_entry_t *head, *next, *cursor;
 	unsigned int n_pending, n_peak;
 	unsigned int i = 0;
 
 	head = ck_stack_batch_pop_upmc(&record->pending[epoch]);
 	for (cursor = head; cursor != NULL; cursor = next) {
 		struct ck_epoch_entry *entry =
 		    ck_epoch_entry_container(cursor);
 
 		next = CK_STACK_NEXT(cursor);
-		entry->function(entry);
+		if (deferred != NULL)
+			ck_stack_push_spnc(deferred, &entry->stack_entry);
+		else
+			entry->function(entry);
 		i++;
 	}
 
 	n_peak = ck_pr_load_uint(&record->n_peak);
 	n_pending = ck_pr_load_uint(&record->n_pending);
 
 	/* We don't require accuracy around peak calculation. */
 	if (n_pending > n_peak)
 		ck_pr_store_uint(&record->n_peak, n_peak);
 
 	if (i > 0) {
 		ck_pr_add_uint(&record->n_dispatch, i);
 		ck_pr_sub_uint(&record->n_pending, i);
 	}
 
 	return;
 }
 
 /*
  * Reclaim all objects associated with a record.
  */
 void
 ck_epoch_reclaim(struct ck_epoch_record *record)
 {
 	unsigned int epoch;
 
 	for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
-		ck_epoch_dispatch(record, epoch);
+		ck_epoch_dispatch(record, epoch, NULL);
 
 	return;
 }
 
 CK_CC_FORCE_INLINE static void
 epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr,
     ck_epoch_wait_cb_t *cb, void *ct)
 {
 
 	if (cb != NULL)
 		cb(global, cr, ct);
 
 	return;
 }
 
 /*
  * This function must not be called with-in read section.
  */
 void
 ck_epoch_synchronize_wait(struct ck_epoch *global,
     ck_epoch_wait_cb_t *cb, void *ct)
 {
 	struct ck_epoch_record *cr;
 	unsigned int delta, epoch, goal, i;
 	bool active;
 
 	ck_pr_fence_memory();
 
 	/*
 	 * The observation of the global epoch must be ordered with respect to
 	 * all prior operations. The re-ordering of loads is permitted given
 	 * monoticity of global epoch counter.
 	 *
 	 * If UINT_MAX concurrent mutations were to occur then it is possible
 	 * to encounter an ABA-issue. If this is a concern, consider tuning
 	 * write-side concurrency.
 	 */
 	delta = epoch = ck_pr_load_uint(&global->epoch);
 	goal = epoch + CK_EPOCH_GRACE;
 
 	for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
 		bool r;
 
 		/*
 		 * Determine whether all threads have observed the current
 		 * epoch with respect to the updates on invocation.
 		 */
 		while (cr = ck_epoch_scan(global, cr, delta, &active),
 		    cr != NULL) {
 			unsigned int e_d;
 
 			ck_pr_stall();
 
 			/*
 			 * Another writer may have already observed a grace
 			 * period.
 			 */
 			e_d = ck_pr_load_uint(&global->epoch);
 			if (e_d == delta) {
 				epoch_block(global, cr, cb, ct);
 				continue;
 			}
 
 			/*
 			 * If the epoch has been updated, we may have already
 			 * met our goal.
 			 */
 			delta = e_d;
 			if ((goal > epoch) & (delta >= goal))
 				goto leave;
 
 			epoch_block(global, cr, cb, ct);
 
 			/*
 			 * If the epoch has been updated, then a grace period
 			 * requires that all threads are observed idle at the
 			 * same epoch.
 			 */
 			cr = NULL;
 		}
 
 		/*
 		 * If we have observed all threads as inactive, then we assume
 		 * we are at a grace period.
 		 */
 		if (active == false)
 			break;
 
 		/*
 		 * Increment current epoch. CAS semantics are used to eliminate
 		 * increment operations for synchronization that occurs for the
 		 * same global epoch value snapshot.
 		 *
 		 * If we can guarantee there will only be one active barrier or
 		 * epoch tick at a given time, then it is sufficient to use an
 		 * increment operation. In a multi-barrier workload, however,
 		 * it is possible to overflow the epoch value if we apply
 		 * modulo-3 arithmetic.
 		 */
 		r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
 		    &delta);
 
 		/* Order subsequent thread active checks. */
 		ck_pr_fence_atomic_load();
 
 		/*
 		 * If CAS has succeeded, then set delta to latest snapshot.
 		 * Otherwise, we have just acquired latest snapshot.
 		 */
 		delta = delta + r;
 	}
 
 	/*
 	 * A majority of use-cases will not require full barrier semantics.
 	 * However, if non-temporal instructions are used, full barrier
 	 * semantics are necessary.
 	 */
 leave:
 	ck_pr_fence_memory();
 	return;
 }
 
 void
 ck_epoch_synchronize(struct ck_epoch_record *record)
 {
 
 	ck_epoch_synchronize_wait(record->global, NULL, NULL);
 	return;
 }
 
 void
 ck_epoch_barrier(struct ck_epoch_record *record)
 {
 
 	ck_epoch_synchronize(record);
 	ck_epoch_reclaim(record);
 	return;
 }
 
 void
 ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb,
     void *ct)
 {
 
 	ck_epoch_synchronize_wait(record->global, cb, ct);
 	ck_epoch_reclaim(record);
 	return;
 }
 
 /*
  * It may be worth it to actually apply these deferral semantics to an epoch
  * that was observed at ck_epoch_call time. The problem is that the latter
  * would require a full fence.
  *
  * ck_epoch_call will dispatch to the latest epoch snapshot that was observed.
  * There are cases where it will fail to reclaim as early as it could. If this
  * becomes a problem, we could actually use a heap for epoch buckets but that
  * is far from ideal too.
  */
 bool
-ck_epoch_poll(struct ck_epoch_record *record)
+ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred)
 {
 	bool active;
 	unsigned int epoch;
 	struct ck_epoch_record *cr = NULL;
 	struct ck_epoch *global = record->global;
 
 	epoch = ck_pr_load_uint(&global->epoch);
 
 	/* Serialize epoch snapshots with respect to global epoch. */
 	ck_pr_fence_memory();
 	cr = ck_epoch_scan(global, cr, epoch, &active);
 	if (cr != NULL) {
 		record->epoch = epoch;
 		return false;
 	}
 
 	/* We are at a grace period if all threads are inactive. */
 	if (active == false) {
 		record->epoch = epoch;
 		for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
-			ck_epoch_dispatch(record, epoch);
+			ck_epoch_dispatch(record, epoch, deferred);
 
 		return true;
 	}
 
 	/* If an active thread exists, rely on epoch observation. */
 	(void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1);
 
-	ck_epoch_dispatch(record, epoch + 1);
+	ck_epoch_dispatch(record, epoch + 1, deferred);
 	return true;
+}
+
+bool
+ck_epoch_poll(struct ck_epoch_record *record)
+{
+
+	return ck_epoch_poll_deferred(record, NULL);
 }