Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/cxgbe/t4_mp_ring.c
Show All 28 Lines | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/types.h> | #include <sys/types.h> | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/counter.h> | #include <sys/counter.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mutex.h> | |||||
#include <sys/sysctl.h> | |||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include "t4_mp_ring.h" | #include "t4_mp_ring.h" | ||||
#if defined(__i386__) | #if defined(__i386__) | ||||
#define atomic_cmpset_acq_64 atomic_cmpset_64 | #define atomic_cmpset_acq_64 atomic_cmpset_64 | ||||
#define atomic_cmpset_rel_64 atomic_cmpset_64 | #define atomic_cmpset_rel_64 atomic_cmpset_64 | ||||
#endif | #endif | ||||
/* | |||||
* mp_ring handles multiple threads (producers) enqueueing data to a tx queue. | |||||
* The thread that is writing the hardware descriptors is the consumer and it | |||||
* runs with the consumer lock held. A producer becomes the consumer if there | |||||
* isn't one already. The consumer runs with the flags sets to BUSY and | |||||
* consumes everything (IDLE or COALESCING) or gets STALLED. If it is running | |||||
* over its budget it sets flags to TOO_BUSY. A producer that observes a | |||||
* TOO_BUSY consumer will become the new consumer by setting flags to | |||||
* TAKING_OVER. The original consumer stops and sets the flags back to BUSY for | |||||
* the new consumer. | |||||
* | |||||
* COALESCING is the same as IDLE except there are items being held in the hope | |||||
* that they can be coalesced with items that follow. The driver must arrange | |||||
* for a tx update or some other event that transmits all the held items in a | |||||
* timely manner if nothing else is enqueued. | |||||
*/ | |||||
union ring_state { | union ring_state { | ||||
struct { | struct { | ||||
uint16_t pidx_head; | uint16_t pidx_head; | ||||
uint16_t pidx_tail; | uint16_t pidx_tail; | ||||
uint16_t cidx; | uint16_t cidx; | ||||
uint16_t flags; | uint16_t flags; | ||||
}; | }; | ||||
uint64_t state; | uint64_t state; | ||||
}; | }; | ||||
enum { | enum { | ||||
IDLE = 0, /* consumer ran to completion, nothing more to do. */ | IDLE = 0, /* tx is all caught up, nothing to do. */ | ||||
COALESCING, /* IDLE, but tx frames are being held for coalescing */ | |||||
BUSY, /* consumer is running already, or will be shortly. */ | BUSY, /* consumer is running already, or will be shortly. */ | ||||
TOO_BUSY, /* consumer is running and is beyond its budget */ | |||||
TAKING_OVER, /* new consumer taking over from a TOO_BUSY consumer */ | |||||
STALLED, /* consumer stopped due to lack of resources. */ | STALLED, /* consumer stopped due to lack of resources. */ | ||||
ABDICATED, /* consumer stopped even though there was work to be | |||||
done because it wants another thread to take over. */ | |||||
}; | }; | ||||
enum { | |||||
C_FAST = 0, | |||||
C_2, | |||||
C_3, | |||||
C_TAKEOVER, | |||||
}; | |||||
static inline uint16_t | static inline uint16_t | ||||
space_available(struct mp_ring *r, union ring_state s) | space_available(struct mp_ring *r, union ring_state s) | ||||
{ | { | ||||
uint16_t x = r->size - 1; | uint16_t x = r->size - 1; | ||||
if (s.cidx == s.pidx_head) | if (s.cidx == s.pidx_head) | ||||
return (x); | return (x); | ||||
else if (s.cidx > s.pidx_head) | else if (s.cidx > s.pidx_head) | ||||
return (s.cidx - s.pidx_head - 1); | return (s.cidx - s.pidx_head - 1); | ||||
else | else | ||||
return (x - s.pidx_head + s.cidx); | return (x - s.pidx_head + s.cidx); | ||||
} | } | ||||
static inline uint16_t | static inline uint16_t | ||||
increment_idx(struct mp_ring *r, uint16_t idx, uint16_t n) | increment_idx(struct mp_ring *r, uint16_t idx, uint16_t n) | ||||
{ | { | ||||
int x = r->size - idx; | int x = r->size - idx; | ||||
MPASS(x > 0); | MPASS(x > 0); | ||||
return (x > n ? idx + n : n - x); | return (x > n ? idx + n : n - x); | ||||
} | } | ||||
/* Consumer is about to update the ring's state to s */ | |||||
static inline uint16_t | |||||
state_to_flags(union ring_state s, int abdicate) | |||||
{ | |||||
if (s.cidx == s.pidx_tail) | |||||
return (IDLE); | |||||
else if (abdicate && s.pidx_tail != s.pidx_head) | |||||
return (ABDICATED); | |||||
return (BUSY); | |||||
} | |||||
/* | /* | ||||
* Caller passes in a state, with a guarantee that there is work to do and that | * Consumer. Called with the consumer lock held and a guarantee that there is | ||||
* all items up to the pidx_tail in the state are visible. | * work to do. | ||||
*/ | */ | ||||
static void | static void | ||||
drain_ring(struct mp_ring *r, union ring_state os, uint16_t prev, int budget) | drain_ring(struct mp_ring *r, int budget) | ||||
{ | { | ||||
union ring_state ns; | union ring_state os, ns; | ||||
int n, pending, total; | int n, pending, total; | ||||
uint16_t cidx = os.cidx; | uint16_t cidx; | ||||
uint16_t pidx = os.pidx_tail; | uint16_t pidx; | ||||
bool coalescing; | |||||
mtx_assert(r->cons_lock, MA_OWNED); | |||||
os.state = atomic_load_acq_64(&r->state); | |||||
MPASS(os.flags == BUSY); | MPASS(os.flags == BUSY); | ||||
cidx = os.cidx; | |||||
pidx = os.pidx_tail; | |||||
MPASS(cidx != pidx); | MPASS(cidx != pidx); | ||||
if (prev == IDLE) | |||||
counter_u64_add(r->starts, 1); | |||||
pending = 0; | pending = 0; | ||||
total = 0; | total = 0; | ||||
while (cidx != pidx) { | while (cidx != pidx) { | ||||
/* Items from cidx to pidx are available for consumption. */ | /* Items from cidx to pidx are available for consumption. */ | ||||
n = r->drain(r, cidx, pidx); | n = r->drain(r, cidx, pidx, &coalescing); | ||||
if (n == 0) { | if (n == 0) { | ||||
critical_enter(); | critical_enter(); | ||||
os.state = r->state; | os.state = r->state; | ||||
jhb: Maybe use `atomic_load_64` here (no need for _acq and it's just a NOP, but more for… | |||||
do { | do { | ||||
ns.state = os.state; | ns.state = os.state; | ||||
ns.cidx = cidx; | ns.cidx = cidx; | ||||
MPASS(os.flags == BUSY || | |||||
os.flags == TOO_BUSY || | |||||
os.flags == TAKING_OVER); | |||||
if (os.flags == TAKING_OVER) | |||||
ns.flags = BUSY; | |||||
else | |||||
ns.flags = STALLED; | ns.flags = STALLED; | ||||
} while (atomic_fcmpset_64(&r->state, &os.state, | } while (atomic_fcmpset_64(&r->state, &os.state, | ||||
ns.state) == 0); | ns.state) == 0); | ||||
critical_exit(); | critical_exit(); | ||||
if (prev != STALLED) | if (os.flags == TAKING_OVER) | ||||
counter_u64_add(r->abdications, 1); | |||||
else if (ns.flags == STALLED) | |||||
counter_u64_add(r->stalls, 1); | counter_u64_add(r->stalls, 1); | ||||
else if (total > 0) { | |||||
counter_u64_add(r->restarts, 1); | |||||
counter_u64_add(r->stalls, 1); | |||||
} | |||||
break; | break; | ||||
} | } | ||||
cidx = increment_idx(r, cidx, n); | cidx = increment_idx(r, cidx, n); | ||||
pending += n; | pending += n; | ||||
total += n; | total += n; | ||||
counter_u64_add(r->consumed, n); | |||||
/* | os.state = atomic_load_acq_64(&r->state); | ||||
jhbUnsubmitted Not Done Inline ActionsI don't think you need the 'acq' barrier/fence here, only in the atomic_fcmpset() below. jhb: I don't think you need the 'acq' barrier/fence here, only in the atomic_fcmpset() below. | |||||
* We update the cidx only if we've caught up with the pidx, the | |||||
* real cidx is getting too far ahead of the one visible to | |||||
* everyone else, or we have exceeded our budget. | |||||
*/ | |||||
if (cidx != pidx && pending < 64 && total < budget) | |||||
continue; | |||||
critical_enter(); | |||||
os.state = r->state; | |||||
do { | do { | ||||
MPASS(os.flags == BUSY || os.flags == TOO_BUSY || | |||||
os.flags == TAKING_OVER); | |||||
ns.state = os.state; | ns.state = os.state; | ||||
ns.cidx = cidx; | ns.cidx = cidx; | ||||
ns.flags = state_to_flags(ns, total >= budget); | if (__predict_false(os.flags == TAKING_OVER)) { | ||||
MPASS(total >= budget); | |||||
ns.flags = BUSY; | |||||
continue; | |||||
} | |||||
if (cidx == os.pidx_tail) { | |||||
ns.flags = coalescing ? COALESCING : IDLE; | |||||
continue; | |||||
} | |||||
if (total >= budget) { | |||||
ns.flags = TOO_BUSY; | |||||
continue; | |||||
} | |||||
MPASS(os.flags == BUSY); | |||||
if (pending < 32) | |||||
break; | |||||
} while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0); | } while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0); | ||||
critical_exit(); | |||||
if (ns.flags == ABDICATED) | if (__predict_false(os.flags == TAKING_OVER)) { | ||||
MPASS(ns.flags == BUSY); | |||||
counter_u64_add(r->abdications, 1); | counter_u64_add(r->abdications, 1); | ||||
if (ns.flags != BUSY) { | break; | ||||
/* Wrong loop exit if we're going to stall. */ | |||||
MPASS(ns.flags != STALLED); | |||||
if (prev == STALLED) { | |||||
MPASS(total > 0); | |||||
counter_u64_add(r->restarts, 1); | |||||
} | } | ||||
if (ns.flags == IDLE || ns.flags == COALESCING) { | |||||
MPASS(ns.pidx_tail == cidx); | |||||
if (ns.pidx_head != ns.pidx_tail) | |||||
counter_u64_add(r->cons_idle2, 1); | |||||
else | |||||
counter_u64_add(r->cons_idle, 1); | |||||
break; | break; | ||||
} | } | ||||
/* | /* | ||||
* The acquire style atomic above guarantees visibility of items | * The acquire style atomic above guarantees visibility of items | ||||
* associated with any pidx change that we notice here. | * associated with any pidx change that we notice here. | ||||
*/ | */ | ||||
pidx = ns.pidx_tail; | pidx = ns.pidx_tail; | ||||
pending = 0; | pending = 0; | ||||
} | } | ||||
#ifdef INVARIANTS | |||||
if (os.flags == TAKING_OVER) | |||||
MPASS(ns.flags == BUSY); | |||||
else { | |||||
MPASS(ns.flags == IDLE || ns.flags == COALESCING || | |||||
ns.flags == STALLED); | |||||
} | } | ||||
#endif | |||||
} | |||||
static void | |||||
drain_txpkts(struct mp_ring *r, union ring_state os, int budget) | |||||
{ | |||||
union ring_state ns; | |||||
uint16_t cidx = os.cidx; | |||||
uint16_t pidx = os.pidx_tail; | |||||
bool coalescing; | |||||
mtx_assert(r->cons_lock, MA_OWNED); | |||||
MPASS(os.flags == BUSY); | |||||
MPASS(cidx == pidx); | |||||
r->drain(r, cidx, pidx, &coalescing); | |||||
MPASS(coalescing == false); | |||||
critical_enter(); | |||||
os.state = r->state; | |||||
do { | |||||
ns.state = os.state; | |||||
MPASS(os.flags == BUSY); | |||||
MPASS(os.cidx == cidx); | |||||
if (ns.cidx == ns.pidx_tail) | |||||
ns.flags = IDLE; | |||||
else | |||||
ns.flags = BUSY; | |||||
} while (atomic_fcmpset_acq_64(&r->state, &os.state, ns.state) == 0); | |||||
critical_exit(); | |||||
if (ns.flags == BUSY) | |||||
drain_ring(r, budget); | |||||
} | |||||
int | int | ||||
mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain, | mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain, | ||||
ring_can_drain_t can_drain, struct malloc_type *mt, int flags) | ring_can_drain_t can_drain, struct malloc_type *mt, struct mtx *lck, | ||||
int flags) | |||||
{ | { | ||||
struct mp_ring *r; | struct mp_ring *r; | ||||
int i; | |||||
/* All idx are 16b so size can be 65536 at most */ | /* All idx are 16b so size can be 65536 at most */ | ||||
if (pr == NULL || size < 2 || size > 65536 || drain == NULL || | if (pr == NULL || size < 2 || size > 65536 || drain == NULL || | ||||
can_drain == NULL) | can_drain == NULL) | ||||
return (EINVAL); | return (EINVAL); | ||||
*pr = NULL; | *pr = NULL; | ||||
flags &= M_NOWAIT | M_WAITOK; | flags &= M_NOWAIT | M_WAITOK; | ||||
MPASS(flags != 0); | MPASS(flags != 0); | ||||
r = malloc(__offsetof(struct mp_ring, items[size]), mt, flags | M_ZERO); | r = malloc(__offsetof(struct mp_ring, items[size]), mt, flags | M_ZERO); | ||||
if (r == NULL) | if (r == NULL) | ||||
return (ENOMEM); | return (ENOMEM); | ||||
r->size = size; | r->size = size; | ||||
r->cookie = cookie; | r->cookie = cookie; | ||||
r->mt = mt; | r->mt = mt; | ||||
r->drain = drain; | r->drain = drain; | ||||
r->can_drain = can_drain; | r->can_drain = can_drain; | ||||
r->enqueues = counter_u64_alloc(flags); | r->cons_lock = lck; | ||||
r->drops = counter_u64_alloc(flags); | if ((r->dropped = counter_u64_alloc(flags)) == NULL) | ||||
r->starts = counter_u64_alloc(flags); | goto failed; | ||||
r->stalls = counter_u64_alloc(flags); | for (i = 0; i < nitems(r->consumer); i++) { | ||||
r->restarts = counter_u64_alloc(flags); | if ((r->consumer[i] = counter_u64_alloc(flags)) == NULL) | ||||
r->abdications = counter_u64_alloc(flags); | goto failed; | ||||
if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL || | |||||
r->stalls == NULL || r->restarts == NULL || | |||||
r->abdications == NULL) { | |||||
mp_ring_free(r); | |||||
return (ENOMEM); | |||||
} | } | ||||
if ((r->not_consumer = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
if ((r->abdications = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
if ((r->stalls = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
if ((r->consumed = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
if ((r->cons_idle = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
if ((r->cons_idle2 = counter_u64_alloc(flags)) == NULL) | |||||
goto failed; | |||||
*pr = r; | *pr = r; | ||||
return (0); | return (0); | ||||
failed: | |||||
mp_ring_free(r); | |||||
return (ENOMEM); | |||||
} | } | ||||
void | void | ||||
mp_ring_free(struct mp_ring *r) | mp_ring_free(struct mp_ring *r) | ||||
{ | { | ||||
int i; | |||||
if (r == NULL) | if (r == NULL) | ||||
return; | return; | ||||
if (r->enqueues != NULL) | if (r->dropped != NULL) | ||||
counter_u64_free(r->enqueues); | counter_u64_free(r->dropped); | ||||
if (r->drops != NULL) | for (i = 0; i < nitems(r->consumer); i++) { | ||||
counter_u64_free(r->drops); | if (r->consumer[i] != NULL) | ||||
if (r->starts != NULL) | counter_u64_free(r->consumer[i]); | ||||
counter_u64_free(r->starts); | } | ||||
if (r->stalls != NULL) | if (r->not_consumer != NULL) | ||||
counter_u64_free(r->stalls); | counter_u64_free(r->not_consumer); | ||||
if (r->restarts != NULL) | |||||
counter_u64_free(r->restarts); | |||||
if (r->abdications != NULL) | if (r->abdications != NULL) | ||||
counter_u64_free(r->abdications); | counter_u64_free(r->abdications); | ||||
if (r->stalls != NULL) | |||||
counter_u64_free(r->stalls); | |||||
if (r->consumed != NULL) | |||||
counter_u64_free(r->consumed); | |||||
if (r->cons_idle != NULL) | |||||
counter_u64_free(r->cons_idle); | |||||
if (r->cons_idle2 != NULL) | |||||
counter_u64_free(r->cons_idle2); | |||||
free(r, r->mt); | free(r, r->mt); | ||||
} | } | ||||
/* | /* | ||||
* Enqueue n items and maybe drain the ring for some time. | * Enqueue n items and maybe drain the ring for some time. | ||||
* | * | ||||
* Returns an errno. | * Returns an errno. | ||||
*/ | */ | ||||
int | int | ||||
mp_ring_enqueue(struct mp_ring *r, void **items, int n, int budget) | mp_ring_enqueue(struct mp_ring *r, void **items, int n, int budget) | ||||
{ | { | ||||
union ring_state os, ns; | union ring_state os, ns; | ||||
uint16_t pidx_start, pidx_stop; | uint16_t pidx_start, pidx_stop; | ||||
int i; | int i, nospc, cons; | ||||
bool consumer; | |||||
MPASS(items != NULL); | MPASS(items != NULL); | ||||
MPASS(n > 0); | MPASS(n > 0); | ||||
/* | /* | ||||
* Reserve room for the new items. Our reservation, if successful, is | * Reserve room for the new items. Our reservation, if successful, is | ||||
* from 'pidx_start' to 'pidx_stop'. | * from 'pidx_start' to 'pidx_stop'. | ||||
*/ | */ | ||||
nospc = 0; | |||||
os.state = r->state; | os.state = r->state; | ||||
for (;;) { | for (;;) { | ||||
if (n >= space_available(r, os)) { | for (;;) { | ||||
counter_u64_add(r->drops, n); | if (__predict_true(space_available(r, os) >= n)) | ||||
break; | |||||
/* Not enough room in the ring. */ | |||||
MPASS(os.flags != IDLE); | MPASS(os.flags != IDLE); | ||||
if (os.flags == STALLED) | MPASS(os.flags != COALESCING); | ||||
mp_ring_check_drainage(r, 0); | if (__predict_false(++nospc > 100)) { | ||||
counter_u64_add(r->dropped, n); | |||||
return (ENOBUFS); | return (ENOBUFS); | ||||
} | } | ||||
if (os.flags == STALLED) | |||||
mp_ring_check_drainage(r, 64); | |||||
else | |||||
cpu_spinwait(); | |||||
os.state = r->state; | |||||
} | |||||
/* There is room in the ring. */ | |||||
cons = -1; | |||||
ns.state = os.state; | ns.state = os.state; | ||||
ns.pidx_head = increment_idx(r, os.pidx_head, n); | ns.pidx_head = increment_idx(r, os.pidx_head, n); | ||||
if (os.flags == IDLE || os.flags == COALESCING) { | |||||
MPASS(os.pidx_tail == os.cidx); | |||||
if (os.pidx_head == os.pidx_tail) { | |||||
cons = C_FAST; | |||||
ns.pidx_tail = increment_idx(r, os.pidx_tail, n); | |||||
} else | |||||
cons = C_2; | |||||
ns.flags = BUSY; | |||||
} else if (os.flags == TOO_BUSY) { | |||||
cons = C_TAKEOVER; | |||||
ns.flags = TAKING_OVER; | |||||
} | |||||
critical_enter(); | critical_enter(); | ||||
if (atomic_fcmpset_64(&r->state, &os.state, ns.state)) | if (atomic_fcmpset_64(&r->state, &os.state, ns.state)) | ||||
break; | break; | ||||
critical_exit(); | critical_exit(); | ||||
cpu_spinwait(); | cpu_spinwait(); | ||||
} | }; | ||||
pidx_start = os.pidx_head; | pidx_start = os.pidx_head; | ||||
pidx_stop = ns.pidx_head; | pidx_stop = ns.pidx_head; | ||||
if (cons == C_FAST) { | |||||
i = pidx_start; | |||||
do { | |||||
r->items[i] = *items++; | |||||
if (__predict_false(++i == r->size)) | |||||
i = 0; | |||||
} while (i != pidx_stop); | |||||
critical_exit(); | |||||
counter_u64_add(r->consumer[C_FAST], 1); | |||||
mtx_lock(r->cons_lock); | |||||
drain_ring(r, budget); | |||||
mtx_unlock(r->cons_lock); | |||||
return (0); | |||||
} | |||||
/* | /* | ||||
* Wait for other producers who got in ahead of us to enqueue their | * Wait for other producers who got in ahead of us to enqueue their | ||||
* items, one producer at a time. It is our turn when the ring's | * items, one producer at a time. It is our turn when the ring's | ||||
* pidx_tail reaches the beginning of our reservation (pidx_start). | * pidx_tail reaches the beginning of our reservation (pidx_start). | ||||
*/ | */ | ||||
while (ns.pidx_tail != pidx_start) { | while (ns.pidx_tail != pidx_start) { | ||||
cpu_spinwait(); | cpu_spinwait(); | ||||
ns.state = r->state; | ns.state = r->state; | ||||
} | } | ||||
/* Now it is our turn to fill up the area we reserved earlier. */ | /* Now it is our turn to fill up the area we reserved earlier. */ | ||||
i = pidx_start; | i = pidx_start; | ||||
do { | do { | ||||
r->items[i] = *items++; | r->items[i] = *items++; | ||||
if (__predict_false(++i == r->size)) | if (__predict_false(++i == r->size)) | ||||
i = 0; | i = 0; | ||||
} while (i != pidx_stop); | } while (i != pidx_stop); | ||||
/* | /* | ||||
* Update the ring's pidx_tail. The release style atomic guarantees | * Update the ring's pidx_tail. The release style atomic guarantees | ||||
* that the items are visible to any thread that sees the updated pidx. | * that the items are visible to any thread that sees the updated pidx. | ||||
*/ | */ | ||||
os.state = r->state; | os.state = r->state; | ||||
do { | do { | ||||
consumer = false; | |||||
ns.state = os.state; | ns.state = os.state; | ||||
ns.pidx_tail = pidx_stop; | ns.pidx_tail = pidx_stop; | ||||
if (os.flags == IDLE || os.flags == COALESCING || | |||||
(os.flags == STALLED && r->can_drain(r))) { | |||||
MPASS(cons == -1); | |||||
consumer = true; | |||||
ns.flags = BUSY; | ns.flags = BUSY; | ||||
} | |||||
} while (atomic_fcmpset_rel_64(&r->state, &os.state, ns.state) == 0); | } while (atomic_fcmpset_rel_64(&r->state, &os.state, ns.state) == 0); | ||||
critical_exit(); | critical_exit(); | ||||
counter_u64_add(r->enqueues, n); | |||||
/* | if (cons == -1) { | ||||
* Turn into a consumer if some other thread isn't active as a consumer | if (consumer) | ||||
* already. | cons = C_3; | ||||
*/ | else { | ||||
if (os.flags != BUSY) | counter_u64_add(r->not_consumer, 1); | ||||
drain_ring(r, ns, os.flags, budget); | return (0); | ||||
} | |||||
} | |||||
MPASS(cons > C_FAST && cons < nitems(r->consumer)); | |||||
counter_u64_add(r->consumer[cons], 1); | |||||
mtx_lock(r->cons_lock); | |||||
drain_ring(r, budget); | |||||
mtx_unlock(r->cons_lock); | |||||
return (0); | return (0); | ||||
} | } | ||||
void | void | ||||
mp_ring_check_drainage(struct mp_ring *r, int budget) | mp_ring_check_drainage(struct mp_ring *r, int budget) | ||||
{ | { | ||||
union ring_state os, ns; | union ring_state os, ns; | ||||
os.state = r->state; | os.state = r->state; | ||||
if (os.flags != STALLED || os.pidx_head != os.pidx_tail || | if (os.flags == STALLED && r->can_drain(r)) { | ||||
r->can_drain(r) == 0) | |||||
return; | |||||
MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ | MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ | ||||
ns.state = os.state; | ns.state = os.state; | ||||
ns.flags = BUSY; | ns.flags = BUSY; | ||||
if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) { | |||||
/* | mtx_lock(r->cons_lock); | ||||
* The acquire style atomic guarantees visibility of items associated | drain_ring(r, budget); | ||||
* with the pidx that we read here. | mtx_unlock(r->cons_lock); | ||||
*/ | |||||
if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state)) | |||||
return; | |||||
drain_ring(r, ns, os.flags, budget); | |||||
} | } | ||||
} else if (os.flags == COALESCING) { | |||||
MPASS(os.cidx == os.pidx_tail); | |||||
ns.state = os.state; | |||||
ns.flags = BUSY; | |||||
if (atomic_cmpset_acq_64(&r->state, os.state, ns.state)) { | |||||
mtx_lock(r->cons_lock); | |||||
drain_txpkts(r, ns, budget); | |||||
mtx_unlock(r->cons_lock); | |||||
} | |||||
} | |||||
} | |||||
void | void | ||||
mp_ring_reset_stats(struct mp_ring *r) | mp_ring_reset_stats(struct mp_ring *r) | ||||
{ | { | ||||
int i; | |||||
counter_u64_zero(r->enqueues); | counter_u64_zero(r->dropped); | ||||
counter_u64_zero(r->drops); | for (i = 0; i < nitems(r->consumer); i++) | ||||
counter_u64_zero(r->starts); | counter_u64_zero(r->consumer[i]); | ||||
counter_u64_zero(r->stalls); | counter_u64_zero(r->not_consumer); | ||||
counter_u64_zero(r->restarts); | |||||
counter_u64_zero(r->abdications); | counter_u64_zero(r->abdications); | ||||
counter_u64_zero(r->stalls); | |||||
counter_u64_zero(r->consumed); | |||||
counter_u64_zero(r->cons_idle); | |||||
counter_u64_zero(r->cons_idle2); | |||||
} | } | ||||
int | bool | ||||
mp_ring_is_idle(struct mp_ring *r) | mp_ring_is_idle(struct mp_ring *r) | ||||
{ | { | ||||
union ring_state s; | union ring_state s; | ||||
s.state = r->state; | s.state = r->state; | ||||
if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx && | if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx && | ||||
s.flags == IDLE) | s.flags == IDLE) | ||||
return (1); | return (true); | ||||
return (0); | return (false); | ||||
} | |||||
void | |||||
mp_ring_sysctls(struct mp_ring *r, struct sysctl_ctx_list *ctx, | |||||
struct sysctl_oid_list *children) | |||||
{ | |||||
struct sysctl_oid *oid; | |||||
oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "mp_ring", CTLFLAG_RD | | |||||
CTLFLAG_MPSAFE, NULL, "mp_ring statistics"); | |||||
children = SYSCTL_CHILDREN(oid); | |||||
SYSCTL_ADD_U64(ctx, children, OID_AUTO, "state", CTLFLAG_RD, | |||||
__DEVOLATILE(uint64_t *, &r->state), 0, "ring state"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "dropped", CTLFLAG_RD, | |||||
&r->dropped, "# of items dropped"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumed", | |||||
CTLFLAG_RD, &r->consumed, "# of items consumed"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "fast_consumer", | |||||
CTLFLAG_RD, &r->consumer[C_FAST], | |||||
"# of times producer became consumer (fast)"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer2", | |||||
CTLFLAG_RD, &r->consumer[C_2], | |||||
"# of times producer became consumer (2)"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "consumer3", | |||||
CTLFLAG_RD, &r->consumer[C_3], | |||||
"# of times producer became consumer (3)"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "takeovers", | |||||
CTLFLAG_RD, &r->consumer[C_TAKEOVER], | |||||
"# of times producer took over from another consumer."); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "not_consumer", | |||||
CTLFLAG_RD, &r->not_consumer, | |||||
"# of times producer did not become consumer"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "abdications", | |||||
CTLFLAG_RD, &r->abdications, "# of consumer abdications"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "stalls", | |||||
CTLFLAG_RD, &r->stalls, "# of consumer stalls"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle", | |||||
CTLFLAG_RD, &r->cons_idle, | |||||
"# of times consumer ran fully to completion"); | |||||
SYSCTL_ADD_COUNTER_U64(ctx, children, OID_AUTO, "cons_idle2", | |||||
CTLFLAG_RD, &r->cons_idle2, | |||||
"# of times consumer idled when another enqueue was in progress"); | |||||
} | } |
Maybe use atomic_load_64 here (no need for _acq and it's just a NOP, but more for documentation)