Index: sys/kern/subr_smr.c
===================================================================
--- sys/kern/subr_smr.c
+++ sys/kern/subr_smr.c
@@ -174,6 +174,25 @@
 static counter_u64_t poll_scan = EARLY_COUNTER;
 SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RD, &poll_scan, "");
 
+/*
+ * Advance a lazy write sequence number.  These move forward at the rate of
+ * ticks.  Grace is two ticks in the future.  lazy write sequence numbers can
+ * be odd but not SMR_SEQ_INVALID so we pause time for a tick when we wrap.
+ */
+static smr_seq_t
+smr_lazy_advance(smr_shared_t s)
+{
+	smr_seq_t wr_seq;
+	int t;
+
+	t = ticks;
+	wr_seq = atomic_load_int(&s->s_wr_seq);
+	if (t != SMR_SEQ_INVALID && SMR_SEQ_GT(t, wr_seq) &&
+	    atomic_cmpset_int(&s->s_wr_seq, wr_seq, t))
+		wr_seq = t;
+
+	return (wr_seq + SMR_LAZY_GRACE);
+}
 
 /*
  * Advance the write sequence and return the new value for use as the
@@ -188,26 +207,34 @@
 smr_advance(smr_t smr)
 {
 	smr_shared_t s;
+	smr_t self;
 	smr_seq_t goal, s_rd_seq;
 
 	/*
 	 * It is illegal to enter while in an smr section.
 	 */
 	SMR_ASSERT_NOT_ENTERED(smr);
+	self = zpcpu_get(smr);
+	s = self->c_shared;
 
 	/*
-	 * Modifications not done in a smr section need to be visible
-	 * before advancing the seq.
+	 * Lazy SMRs simply return the grace period.
 	 */
-	atomic_thread_fence_rel();
+	if ((self->c_flags & SMR_LAZY) != 0)
+		return (smr_lazy_advance(s));
 
 	/*
 	 * Load the current read seq before incrementing the goal so
 	 * we are guaranteed it is always < goal.
 	 */
-	s = zpcpu_get(smr)->c_shared;
 	s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
 
+	/*
+	 * Modifications not done in a smr section need to be visible
+	 * before advancing the seq.
+	 */
+	atomic_thread_fence_rel();
+
 	/*
 	 * Increment the shared write sequence by 2.  Since it is
 	 * initialized to 1 this means the only valid values are
@@ -234,17 +261,17 @@
 smr_advance_deferred(smr_t smr, int limit)
 {
 	smr_seq_t goal;
-	smr_t csmr;
+	smr_t self;
 
 	SMR_ASSERT_NOT_ENTERED(smr);
 
 	critical_enter();
-	csmr = zpcpu_get(smr);
-	if (++csmr->c_deferred >= limit) {
-		goal = SMR_SEQ_INVALID;
-		csmr->c_deferred = 0;
-	} else
-		goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR;
+	self = zpcpu_get(smr);
+	goal = SMR_SEQ_INVALID;
+	if (++self->c_deferred >= limit)
+		self->c_deferred = 0;
+	else if ((self->c_flags & SMR_LAZY) == 0)
+		goal = smr_shared_current(self->c_shared) + SMR_SEQ_INCR;
 	critical_exit();
 	if (goal != SMR_SEQ_INVALID)
 		return (goal);
@@ -268,7 +295,7 @@
 smr_poll(smr_t smr, smr_seq_t goal, bool wait)
 {
 	smr_shared_t s;
-	smr_t c;
+	smr_t c, self;
 	smr_seq_t s_wr_seq, s_rd_seq, rd_seq, c_seq;
 	int i;
 	bool success;
@@ -278,6 +305,8 @@
 	 */
 	KASSERT(!wait || !SMR_ENTERED(smr),
 	    ("smr_poll: Blocking not allowed in a SMR section."));
+	KASSERT(!wait || (zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+	    ("smr_poll: Blocking not allowed on lazy smrs."));
 
 	/*
 	 * Use a critical section so that we can avoid ABA races
@@ -285,7 +314,8 @@
 	 */
 	success = true;
 	critical_enter();
-	s = zpcpu_get(smr)->c_shared;
+	self = zpcpu_get(smr);
+	s = self->c_shared;
 	counter_u64_add_protected(poll, 1);
 
 	/*
@@ -295,20 +325,35 @@
 	s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
 
 	/*
-	 * wr_seq must be loaded prior to any c_seq value so that a stale
-	 * c_seq can only reference time after this wr_seq.
+	 * wr_seq must be loaded prior to any c_seq value so that a
+	 * stale c_seq can only reference time after this wr_seq.
 	 */
 	s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
 
-	/*
-	 * This may have come from a deferred advance.  Consider one
-	 * increment past the current wr_seq valid and make sure we
-	 * have advanced far enough to succeed.  We simply add to avoid
-	 * an additional fence.
-	 */
-	if (goal == s_wr_seq + SMR_SEQ_INCR) {
-		atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
-		s_wr_seq = goal;
+	if ((self->c_flags & SMR_LAZY) == 0) {
+		/*
+		 * This may have come from a deferred advance.  Consider one
+		 * increment past the current wr_seq valid and make sure we
+		 * have advanced far enough to succeed.  We simply add to avoid
+		 * an additional fence.
+		 */
+		if (SMR_SEQ_DELTA(goal, s_wr_seq) == SMR_SEQ_INCR) {
+			atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
+			s_wr_seq = goal;
+		}
+	} else {
+		/*
+		 * If this goal is in the future we can't succeed.  We
+		 * assume that read sections are often enough that the
+		 * idle test will not be fruitful.
+		 */
+		if (SMR_SEQ_LT(s_wr_seq, ticks))
+			s_wr_seq = smr_lazy_advance(s) - SMR_LAZY_GRACE;
+		if (SMR_SEQ_GT(goal, s_wr_seq) &&
+		    SMR_SEQ_DELTA(goal, s_wr_seq) <= SMR_LAZY_GRACE) {
+			success = false;
+			goto out;
+		}
 	}
 
 	/*
@@ -407,7 +452,7 @@
 }
 
 smr_t
-smr_create(const char *name)
+smr_create(const char *name, int flags)
 {
 	smr_t smr, c;
 	smr_shared_t s;
@@ -417,13 +462,18 @@
 	smr = uma_zalloc_pcpu(smr_zone, M_WAITOK);
 
 	s->s_name = name;
-	s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+	if ((flags & SMR_LAZY) == 0)
+		s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+	else
+		s->s_rd_seq = s->s_wr_seq = ticks;
 
 	/* Initialize all CPUS, not just those running. */
 	for (i = 0; i <= mp_maxid; i++) {
 		c = zpcpu_get_cpu(smr, i);
 		c->c_seq = SMR_SEQ_INVALID;
 		c->c_shared = s;
+		c->c_deferred = 0;
+		c->c_flags = flags;
 	}
 	atomic_thread_fence_seq_cst();
 
Index: sys/sys/smr.h
===================================================================
--- sys/sys/smr.h
+++ sys/sys/smr.h
@@ -66,8 +66,13 @@
 	smr_seq_t	c_seq;		/* Current observed sequence. */
 	smr_shared_t	c_shared;	/* Shared SMR state. */
 	int		c_deferred;	/* Deferred advance counter. */
+	int		c_flags;	/* SMR Configuration */
 };
 
+#define	SMR_LAZY	0x0001		/* Higher latency write, fast read. */
+
+#define	SMR_LAZY_GRACE	2		/* Grace preriod for lazy smr. */
+
 #define	SMR_ENTERED(smr)						\
     (curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID)
 
@@ -170,7 +175,8 @@
 } while (0)
 
 /*
- * Return the current write sequence number.
+ * Return the current write sequence number.  This is not the same as the
+ * current goal which may be in the future.
  */
 static inline smr_seq_t
 smr_shared_current(smr_shared_t s)
@@ -195,6 +201,8 @@
 
 	critical_enter();
 	smr = zpcpu_get(smr);
+	KASSERT((smr->c_flags & SMR_LAZY) == 0,
+	    ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
 	KASSERT(smr->c_seq == 0,
 	    ("smr_enter(%s) does not support recursion.",
 	    smr->c_shared->s_name));
@@ -228,6 +236,8 @@
 
 	smr = zpcpu_get(smr);
 	CRITICAL_ASSERT(curthread);
+	KASSERT((smr->c_flags & SMR_LAZY) == 0,
+	    ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
 	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
 	    ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
 
@@ -242,6 +252,57 @@
 	critical_exit();
 }
 
+/*
+ * Enter a lazy smr section.  This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_enter(smr_t smr)
+{
+
+	critical_enter();
+	smr = zpcpu_get(smr);
+	KASSERT((smr->c_flags & SMR_LAZY) != 0,
+	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+	KASSERT(smr->c_seq == 0,
+	    ("smr_lazy_enter(%s) does not support recursion.",
+	    smr->c_shared->s_name));
+
+	/*
+	 * This needs no serialization.  If an interrupt occurs before we
+	 * assign sr_seq to c_seq any speculative loads will be discarded.
+	 * If we assign a stale wr_seq value due to interrupt we use the
+	 * same algorithm that renders smr_enter() safe.
+	 */
+	smr->c_seq = smr_shared_current(smr->c_shared);
+}
+
+/*
+ * Exit a lazy smr section.  This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_exit(smr_t smr)
+{
+
+	smr = zpcpu_get(smr);
+	CRITICAL_ASSERT(curthread);
+	KASSERT((smr->c_flags & SMR_LAZY) != 0,
+	    ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+	KASSERT(smr->c_seq != SMR_SEQ_INVALID,
+	    ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
+
+	/*
+	 * All loads/stores must be retired before the sequence becomes
+	 * visible.  The fence compiles away on amd64.  Another
+	 * alternative would be to omit the fence but store the exit
+	 * time and wait 1 tick longer.
+	 */
+	atomic_thread_fence_rel();
+	smr->c_seq = SMR_SEQ_INVALID;
+	critical_exit();
+}
+
 /*
  * Advances the write sequence number.  Returns the sequence number
  * required to ensure that all modifications are visible to readers.
@@ -262,7 +323,9 @@
 bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
 
 /* Create a new SMR context. */
-smr_t smr_create(const char *name);
+smr_t smr_create(const char *name, int flags);
+
+/* Destroy the context. */
 void smr_destroy(smr_t smr);
 
 /*
Index: sys/tools/umaperf/umaperf.c
===================================================================
--- sys/tools/umaperf/umaperf.c
+++ sys/tools/umaperf/umaperf.c
@@ -164,6 +164,7 @@
 	PLAIN,
 #ifdef __FreeBSD__
 	SMR,
+	LAZY_SMR,
 	EPOCH,
 	EPOCH_PRE,
 #else
@@ -176,6 +177,7 @@
 	[PLAIN] = 	"PLAIN",
 #ifdef __FreeBSD__
 	[SMR] = 	"SMR",
+	[LAZY_SMR] = 	"SMR_LAZY",
 	[EPOCH] = 	"EPOCH",
 	[EPOCH_PRE] = 	"EPOCH_PREEMPT"
 #else
@@ -224,6 +226,9 @@
 	case SMR:
 		smr_enter(umaperf_smr);
 		break;
+	case LAZY_SMR:
+		smr_lazy_enter(umaperf_smr);
+		break;
 	case EPOCH:
 		epoch_enter(umaperf_epoch);
 		break;
@@ -249,6 +254,9 @@
 	case SMR:
 		smr_exit(umaperf_smr);
 		break;
+	case LAZY_SMR:
+		smr_lazy_exit(umaperf_smr);
+		break;
 	case EPOCH:
 		epoch_exit(umaperf_epoch);
 		break;
@@ -280,6 +288,7 @@
 		break;
 #ifdef __FreeBSD__
 	case SMR:
+	case LAZY_SMR:
 		uma_zfree_smr(umaperf_zone, p);
 		break;
 	case EPOCH:
@@ -301,6 +310,7 @@
 	switch (umaperf_type) {
 #ifdef __FreeBSD__
 	case SMR:
+	case LAZY_SMR:
 		return uma_zalloc_smr(umaperf_zone, M_WAITOK);
 	case EPOCH:
 	case EPOCH_PRE:
@@ -606,10 +616,15 @@
 	switch (umaperf_type) {
 #ifdef __FreeBSD__
 	case PLAIN:
-		flags = UMA_ZONE_ROUNDROBIN;
+		flags = UMA_ZONE_FIRSTTOUCH;
 		break;
 	case SMR:
-		flags = UMA_ZONE_ROUNDROBIN | UMA_ZONE_SMR;
+		umaperf_smr = smr_create("umaperf", 0);
+		flags = UMA_ZONE_FIRSTTOUCH;
+		break;
+	case LAZY_SMR:
+		umaperf_smr = smr_create("umaperf", SMR_LAZY);
+		flags = UMA_ZONE_FIRSTTOUCH;
 		break;
 	case EPOCH:
 		umaperf_epoch = epoch_alloc("umaperf", 0);
@@ -629,7 +644,8 @@
 	umaperf_zone = uma_zcreate("umaperf", umaperf_zone_size,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, flags);
 #ifdef __FreeBSD__
-	umaperf_smr = uma_zone_get_smr(umaperf_zone);
+	if (umaperf_smr != 0)
+		uma_zone_set_smr(umaperf_zone, umaperf_smr);
 #endif
 	umaperf_init_cpus();
 }
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -2707,7 +2707,7 @@
 
 	/* Caller requests a private SMR context. */
 	if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
-		zone->uz_smr = smr_create(zone->uz_name);
+		zone->uz_smr = smr_create(zone->uz_name, 0);
 
 	KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
 	    (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
Index: tools/uma/smrstress/smrstress.c
===================================================================
--- tools/uma/smrstress/smrstress.c
+++ tools/uma/smrstress/smrstress.c
@@ -64,12 +64,14 @@
 static void
 smrs_error(struct smrs *smrs, const char *fmt, ...)
 {
+	smr_t self;
 	va_list ap;
 
+	self = zpcpu_get(smrs_smr);
 	atomic_add_int(&smrs_failures, 1);
 	printf("SMR ERROR: wr_seq %d, rd_seq %d, c_seq %d, generation %d, count %d ",
-	    smrs_smr->c_shared->s_wr_seq, smrs_smr->c_shared->s_rd_seq,
-	    zpcpu_get(smrs_smr)->c_seq, smrs->generation, smrs->count);
+	    smr_current(smrs_smr), self->c_shared->s_rd_seq, self->c_seq,
+	    smrs->generation, smrs->count);
 	va_start(ap, fmt);
 	(void)vprintf(fmt, ap);
 	va_end(ap);
@@ -83,7 +85,7 @@
 
 	/* Wait for the writer to exit. */
 	while (smrs_completed == 0) {
-		smr_enter(smrs_smr);
+		smr_lazy_enter(smrs_smr);
 		cur = (void *)atomic_load_acq_ptr(&smrs_current);
 		if (cur->generation == -1)
 			smrs_error(cur, "read early: Use after free!\n");
@@ -94,7 +96,7 @@
 			smrs_error(cur, "read late: Use after free!\n");
 		else if (cnt <= 0)
 			smrs_error(cur, "Invalid ref\n");
-		smr_exit(smrs_smr);
+		smr_lazy_exit(smrs_smr);
 		maybe_yield();
 	}
 }
@@ -190,8 +192,9 @@
 
 	smrs_zone = uma_zcreate("smrs", sizeof(struct smrs),
 	    smrs_ctor,  smrs_dtor, NULL, NULL, UMA_ALIGN_PTR,
-	    UMA_ZONE_SMR | UMA_ZONE_ZINIT);
-        smrs_smr = uma_zone_get_smr(smrs_zone);
+	    UMA_ZONE_ZINIT);
+	smrs_smr = smr_create("smrs", SMR_LAZY);
+        uma_zone_set_smr(smrs_zone, smrs_smr);
 }
 
 static void