Page MenuHomeFreeBSD

D23738.id68477.diff
No OneTemporary

D23738.id68477.diff

Index: sys/kern/subr_smr.c
===================================================================
--- sys/kern/subr_smr.c
+++ sys/kern/subr_smr.c
@@ -188,26 +188,34 @@
smr_advance(smr_t smr)
{
smr_shared_t s;
+ smr_t self;
smr_seq_t goal, s_rd_seq;
/*
* It is illegal to enter while in an smr section.
*/
SMR_ASSERT_NOT_ENTERED(smr);
+ self = zpcpu_get(smr);
+ s = self->c_shared;
/*
- * Modifications not done in a smr section need to be visible
- * before advancing the seq.
+ * Lazy SMRs simply return the grace period.
*/
- atomic_thread_fence_rel();
+ if ((self->c_flags & SMR_LAZY) != 0)
+ return (ticks + SMR_LAZY_GRACE);
/*
* Load the current read seq before incrementing the goal so
* we are guaranteed it is always < goal.
*/
- s = zpcpu_get(smr)->c_shared;
s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
+ /*
+ * Modifications not done in a smr section need to be visible
+ * before advancing the seq.
+ */
+ atomic_thread_fence_rel();
+
/*
* Increment the shared write sequence by 2. Since it is
* initialized to 1 this means the only valid values are
@@ -234,17 +242,19 @@
smr_advance_deferred(smr_t smr, int limit)
{
smr_seq_t goal;
- smr_t csmr;
+ smr_t self;
SMR_ASSERT_NOT_ENTERED(smr);
critical_enter();
- csmr = zpcpu_get(smr);
- if (++csmr->c_deferred >= limit) {
+ self = zpcpu_get(smr);
+ if ((self->c_flags & SMR_LAZY) != 0) {
+ goal = ticks + SMR_LAZY_GRACE;
+ } if (++self->c_deferred >= limit) {
goal = SMR_SEQ_INVALID;
- csmr->c_deferred = 0;
+ self->c_deferred = 0;
} else
- goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR;
+ goal = smr_shared_current(self->c_shared) + SMR_SEQ_INCR;
critical_exit();
if (goal != SMR_SEQ_INVALID)
return (goal);
@@ -268,7 +278,7 @@
smr_poll(smr_t smr, smr_seq_t goal, bool wait)
{
smr_shared_t s;
- smr_t c;
+ smr_t c, self;
smr_seq_t s_wr_seq, s_rd_seq, rd_seq, c_seq;
int i;
bool success;
@@ -278,6 +288,8 @@
*/
KASSERT(!wait || !SMR_ENTERED(smr),
("smr_poll: Blocking not allowed in a SMR section."));
+ KASSERT(!wait || (zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+ ("smr_poll: Blocking not allowed on lazy smrs."));
/*
* Use a critical section so that we can avoid ABA races
@@ -285,7 +297,8 @@
*/
success = true;
critical_enter();
- s = zpcpu_get(smr)->c_shared;
+ self = zpcpu_get(smr);
+ s = self->c_shared;
counter_u64_add_protected(poll, 1);
/*
@@ -294,21 +307,35 @@
*/
s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
- /*
- * wr_seq must be loaded prior to any c_seq value so that a stale
- * c_seq can only reference time after this wr_seq.
- */
- s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
+ if ((self->c_flags & SMR_LAZY) == 0) {
+ /*
+ * wr_seq must be loaded prior to any c_seq value so that a
+ * stale c_seq can only reference time after this wr_seq.
+ */
+ s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
- /*
- * This may have come from a deferred advance. Consider one
- * increment past the current wr_seq valid and make sure we
- * have advanced far enough to succeed. We simply add to avoid
- * an additional fence.
- */
- if (goal == s_wr_seq + SMR_SEQ_INCR) {
- atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
- s_wr_seq = goal;
+ /*
+ * This may have come from a deferred advance. Consider one
+ * increment past the current wr_seq valid and make sure we
+ * have advanced far enough to succeed. We simply add to avoid
+ * an additional fence.
+ */
+ if (goal == s_wr_seq + SMR_SEQ_INCR) {
+ atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
+ s_wr_seq = goal;
+ }
+ } else {
+ s_wr_seq = atomic_load_acq_int(&ticks);
+ s_wr_seq = s_wr_seq == SMR_SEQ_INVALID ? s_wr_seq + 1 : s_wr_seq;
+ /*
+ * If this goal is in the future we can't succeed. The
+ * grace may be adjusted once in each direction around 0.
+ */
+ if (SMR_SEQ_GT(goal, s_wr_seq) &&
+ goal - s_wr_seq <= SMR_LAZY_GRACE + 2) {
+ success = false;
+ goto out;
+ }
}
/*
@@ -407,7 +434,7 @@
}
smr_t
-smr_create(const char *name)
+smr_create(const char *name, int flags)
{
smr_t smr, c;
smr_shared_t s;
@@ -417,13 +444,18 @@
smr = uma_zalloc_pcpu(smr_zone, M_WAITOK);
s->s_name = name;
- s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+ if ((flags & SMR_LAZY) == 0)
+ s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+ else
+ s->s_rd_seq = s->s_wr_seq = ticks;
/* Initialize all CPUS, not just those running. */
for (i = 0; i <= mp_maxid; i++) {
c = zpcpu_get_cpu(smr, i);
c->c_seq = SMR_SEQ_INVALID;
c->c_shared = s;
+ c->c_deferred = 0;
+ c->c_flags = flags;
}
atomic_thread_fence_seq_cst();
Index: sys/sys/smr.h
===================================================================
--- sys/sys/smr.h
+++ sys/sys/smr.h
@@ -66,8 +66,13 @@
smr_seq_t c_seq; /* Current observed sequence. */
smr_shared_t c_shared; /* Shared SMR state. */
int c_deferred; /* Deferred advance counter. */
+ int c_flags; /* SMR Configuration */
};
+#define SMR_LAZY 0x0001 /* High-latency free, inexpensive read. */
+
+#define SMR_LAZY_GRACE 2 /* Grace preriod for lazy smr. */
+
#define SMR_ENTERED(smr) \
(curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID)
@@ -182,8 +187,14 @@
static inline smr_seq_t
smr_current(smr_t smr)
{
+ int t;
- return (smr_shared_current(zpcpu_get(smr)->c_shared));
+ smr = zpcpu_get(smr);
+ if ((smr->c_flags & SMR_LAZY) == 0)
+ return (smr_shared_current(smr->c_shared));
+ /* Correct ticks against SMR_SEQ_INVALID. Wait one extra on wrap. */
+ t = ticks + SMR_LAZY_GRACE;
+ return (t == SMR_SEQ_INVALID ? SMR_SEQ_INVALID + 1: t);
}
/*
@@ -195,6 +206,8 @@
critical_enter();
smr = zpcpu_get(smr);
+ KASSERT((smr->c_flags & SMR_LAZY) == 0,
+ ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
KASSERT(smr->c_seq == 0,
("smr_enter(%s) does not support recursion.",
smr->c_shared->s_name));
@@ -228,6 +241,8 @@
smr = zpcpu_get(smr);
CRITICAL_ASSERT(curthread);
+ KASSERT((smr->c_flags & SMR_LAZY) == 0,
+ ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
KASSERT(smr->c_seq != SMR_SEQ_INVALID,
("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
@@ -242,6 +257,60 @@
critical_exit();
}
+/*
+ * Enter a lazy smr section. This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_enter(smr_t smr)
+{
+ smr_seq_t t;
+
+ critical_enter();
+ smr = zpcpu_get(smr);
+ KASSERT((smr->c_flags & SMR_LAZY) != 0,
+ ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+ KASSERT(smr->c_seq == 0,
+ ("smr_lazy_enter(%s) does not support recursion.",
+ smr->c_shared->s_name));
+
+ /*
+ * This needs no serialization. If an interrupt occurs before we
+ * assign ticks to seq any speculative loads will be discarded. If
+ * we assign a stale ticks value due to interrupt we use the same
+ * algorithm that renders smr_enter() safe.
+ */
+ t = ticks;
+ /* Correct ticks against SMR_SEQ_INVALID. Wait one extra on wrap. */
+ smr->c_seq = t == SMR_SEQ_INVALID ? t - 1: t;
+}
+
+/*
+ * Exit a lazy smr section. This is used for read-mostly state that
+ * can tolerate a high free latency.
+ */
+static inline void
+smr_lazy_exit(smr_t smr)
+{
+
+ smr = zpcpu_get(smr);
+ CRITICAL_ASSERT(curthread);
+ KASSERT((smr->c_flags & SMR_LAZY) != 0,
+ ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+ KASSERT(smr->c_seq != SMR_SEQ_INVALID,
+ ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
+
+ /*
+ * All loads/stores must be retired before the sequence becomes
+ * visible. The fence compiles away on amd64. Another
+ * alternative would be to omit the fence but store the exit
+ * time and wait 1 tick longer.
+ */
+ atomic_thread_fence_rel();
+ smr->c_seq = SMR_SEQ_INVALID;
+ critical_exit();
+}
+
/*
* Advances the write sequence number. Returns the sequence number
* required to ensure that all modifications are visible to readers.
@@ -262,7 +331,9 @@
bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
/* Create a new SMR context. */
-smr_t smr_create(const char *name);
+smr_t smr_create(const char *name, int flags);
+
+/* Destroy the context. */
void smr_destroy(smr_t smr);
/*
Index: sys/tools/umaperf/umaperf.c
===================================================================
--- sys/tools/umaperf/umaperf.c
+++ sys/tools/umaperf/umaperf.c
@@ -164,6 +164,7 @@
PLAIN,
#ifdef __FreeBSD__
SMR,
+ LAZY_SMR,
EPOCH,
EPOCH_PRE,
#else
@@ -176,6 +177,7 @@
[PLAIN] = "PLAIN",
#ifdef __FreeBSD__
[SMR] = "SMR",
+ [LAZY_SMR] = "SMR_LAZY",
[EPOCH] = "EPOCH",
[EPOCH_PRE] = "EPOCH_PREEMPT"
#else
@@ -224,6 +226,9 @@
case SMR:
smr_enter(umaperf_smr);
break;
+ case LAZY_SMR:
+ smr_lazy_enter(umaperf_smr);
+ break;
case EPOCH:
epoch_enter(umaperf_epoch);
break;
@@ -249,6 +254,9 @@
case SMR:
smr_exit(umaperf_smr);
break;
+ case LAZY_SMR:
+ smr_lazy_exit(umaperf_smr);
+ break;
case EPOCH:
epoch_exit(umaperf_epoch);
break;
@@ -280,6 +288,7 @@
break;
#ifdef __FreeBSD__
case SMR:
+ case LAZY_SMR:
uma_zfree_smr(umaperf_zone, p);
break;
case EPOCH:
@@ -301,6 +310,7 @@
switch (umaperf_type) {
#ifdef __FreeBSD__
case SMR:
+ case LAZY_SMR:
return uma_zalloc_smr(umaperf_zone, M_WAITOK);
case EPOCH:
case EPOCH_PRE:
@@ -468,6 +478,12 @@
smr_exit(umaperf_smr);
}
break;
+ case LAZY_SMR:
+ for (i = 0; i < umaperf_iterations; i++) {
+ smr_lazy_enter(umaperf_smr);
+ smr_lazy_exit(umaperf_smr);
+ }
+ break;
case EPOCH:
for (i = 0; i < umaperf_iterations; i++) {
epoch_enter(umaperf_epoch);
@@ -594,10 +610,15 @@
switch (umaperf_type) {
#ifdef __FreeBSD__
case PLAIN:
- flags = UMA_ZONE_ROUNDROBIN;
+ flags = UMA_ZONE_FIRSTTOUCH;
break;
case SMR:
- flags = UMA_ZONE_ROUNDROBIN | UMA_ZONE_SMR;
+ umaperf_smr = smr_create("umaperf", 0);
+ flags = UMA_ZONE_FIRSTTOUCH;
+ break;
+ case LAZY_SMR:
+ umaperf_smr = smr_create("umaperf", SMR_LAZY);
+ flags = UMA_ZONE_FIRSTTOUCH;
break;
case EPOCH:
umaperf_epoch = epoch_alloc("umaperf", 0);
@@ -617,7 +638,8 @@
umaperf_zone = uma_zcreate("umaperf", umaperf_zone_size,
NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, flags);
#ifdef __FreeBSD__
- umaperf_smr = uma_zone_get_smr(umaperf_zone);
+ if (umaperf_smr != 0)
+ uma_zone_set_smr(umaperf_zone, umaperf_smr);
#endif
umaperf_init_cpus();
}
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -2707,7 +2707,7 @@
/* Caller requests a private SMR context. */
if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
- zone->uz_smr = smr_create(zone->uz_name);
+ zone->uz_smr = smr_create(zone->uz_name, 0);
KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
(UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
Index: tools/uma/smrstress/smrstress.c
===================================================================
--- tools/uma/smrstress/smrstress.c
+++ tools/uma/smrstress/smrstress.c
@@ -64,12 +64,14 @@
static void
smrs_error(struct smrs *smrs, const char *fmt, ...)
{
+ smr_t self;
va_list ap;
+ self = zpcpu_get(smrs_smr);
atomic_add_int(&smrs_failures, 1);
printf("SMR ERROR: wr_seq %d, rd_seq %d, c_seq %d, generation %d, count %d ",
- smrs_smr->c_shared->s_wr_seq, smrs_smr->c_shared->s_rd_seq,
- zpcpu_get(smrs_smr)->c_seq, smrs->generation, smrs->count);
+ smr_current(smrs_smr), self->c_shared->s_rd_seq, self->c_seq,
+ smrs->generation, smrs->count);
va_start(ap, fmt);
(void)vprintf(fmt, ap);
va_end(ap);
@@ -83,7 +85,7 @@
/* Wait for the writer to exit. */
while (smrs_completed == 0) {
- smr_enter(smrs_smr);
+ smr_lazy_enter(smrs_smr);
cur = (void *)atomic_load_acq_ptr(&smrs_current);
if (cur->generation == -1)
smrs_error(cur, "read early: Use after free!\n");
@@ -94,7 +96,7 @@
smrs_error(cur, "read late: Use after free!\n");
else if (cnt <= 0)
smrs_error(cur, "Invalid ref\n");
- smr_exit(smrs_smr);
+ smr_lazy_exit(smrs_smr);
maybe_yield();
}
}
@@ -190,8 +192,9 @@
smrs_zone = uma_zcreate("smrs", sizeof(struct smrs),
smrs_ctor, smrs_dtor, NULL, NULL, UMA_ALIGN_PTR,
- UMA_ZONE_SMR | UMA_ZONE_ZINIT);
- smrs_smr = uma_zone_get_smr(smrs_zone);
+ UMA_ZONE_ZINIT);
+ smrs_smr = smr_create("smrs", SMR_LAZY);
+ uma_zone_set_smr(smrs_zone, smrs_smr);
}
static void

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 29, 12:45 AM (17 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28070215
Default Alt Text
D23738.id68477.diff (12 KB)

Event Timeline