diff --git a/include/ck_backoff.h b/include/ck_backoff.h index 82a4f2152e3c..a1f7616a55db 100644 --- a/include/ck_backoff.h +++ b/include/ck_backoff.h @@ -1,57 +1,57 @@ /* * Copyright 2009-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_BACKOFF_H #define CK_BACKOFF_H #include #include #ifndef CK_BACKOFF_CEILING #define CK_BACKOFF_CEILING ((1 << 20) - 1) #endif #define CK_BACKOFF_INITIALIZER (1 << 9) typedef unsigned int ck_backoff_t; /* * This is a exponential back-off implementation. */ CK_CC_INLINE static void ck_backoff_eb(unsigned int *c) { unsigned int i, ceiling; ceiling = *c; for (i = 0; i < ceiling; i++) ck_pr_barrier(); - *c = ceiling <<= ceiling < CK_BACKOFF_CEILING; + *c = ceiling << (ceiling < CK_BACKOFF_CEILING); return; } #endif /* CK_BACKOFF_H */ diff --git a/include/ck_cc.h b/include/ck_cc.h index 9a152a3cddab..1b4ff4635fa6 100644 --- a/include/ck_cc.h +++ b/include/ck_cc.h @@ -1,173 +1,175 @@ /* * Copyright 2009-2015 Samy Al Bahra. * Copyright 2014 Paul Khuong. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_CC_H #define CK_CC_H #if defined(__GNUC__) || defined(__SUNPRO_C) #include "gcc/ck_cc.h" #endif #ifndef CK_CC_RESTRICT #define CK_CC_RESTRICT #endif #ifndef CK_CC_INLINE #define CK_CC_INLINE inline #endif #ifndef CK_CC_FORCE_INLINE #define CK_CC_FORCE_INLINE inline #endif #define CK_CC_DECONST_PTR(X) ((void *)(uintptr_t)(X)) /* * Container function. * This relies on (compiler) implementation-defined behavior. */ +#ifndef CK_CC_CONTAINER #define CK_CC_CONTAINER(F, T, M, N) \ CK_CC_INLINE static T * \ N(F *p) \ { \ F *n = p; \ return (T *)(void *)(((char *)n) - ((size_t)&((T *)0)->M)); \ } +#endif #define CK_CC_PAD(x) union { char pad[x]; } #ifndef CK_CC_ALIASED #define CK_CC_ALIASED #endif #ifndef CK_CC_UNUSED #define CK_CC_UNUSED #endif #ifndef CK_CC_USED #define CK_CC_USED #endif #ifndef CK_CC_IMM #define CK_CC_IMM #endif #ifndef CK_CC_PACKED #define CK_CC_PACKED #endif #ifndef CK_CC_WEAKREF #define CK_CC_WEAKREF #endif #ifndef CK_CC_ALIGN #define CK_CC_ALIGN(X) #endif #ifndef CK_CC_CACHELINE #define CK_CC_CACHELINE #endif #ifndef CK_CC_LIKELY #define CK_CC_LIKELY(x) x #endif #ifndef CK_CC_UNLIKELY #define CK_CC_UNLIKELY(x) x #endif #ifndef CK_CC_TYPEOF #define CK_CC_TYPEOF(X, DEFAULT) (DEFAULT) #endif #define CK_F_CC_FFS_G(L, T) \ CK_CC_INLINE static int \ ck_cc_##L(T v) \ { \ unsigned int i; \ \ if (v == 0) \ return 0; \ \ for (i = 1; (v & 1) == 0; i++, v >>= 1); \ return i; \ } #ifndef CK_F_CC_FFS #define CK_F_CC_FFS CK_F_CC_FFS_G(ffs, unsigned int) #endif /* CK_F_CC_FFS */ #ifndef CK_F_CC_FFSL #define CK_F_CC_FFSL CK_F_CC_FFS_G(ffsl, unsigned long) #endif /* CK_F_CC_FFSL */ #ifndef CK_F_CC_FFSLL #define CK_F_CC_FFSLL CK_F_CC_FFS_G(ffsll, unsigned long long) #endif /* CK_F_CC_FFSLL */ #undef CK_F_CC_FFS_G #ifndef CK_F_CC_CTZ #define CK_F_CC_CTZ CK_CC_INLINE static int ck_cc_ctz(unsigned int x) { unsigned int i; if (x == 0) return 0; for (i = 0; (x & 1) == 0; i++, x >>= 1); return i; } #endif #ifndef CK_F_CC_POPCOUNT #define CK_F_CC_POPCOUNT CK_CC_INLINE static int ck_cc_popcount(unsigned int x) { unsigned int acc; for (acc = 0; x != 0; x >>= 1) acc += x & 1; return acc; } #endif #ifdef __cplusplus #define CK_CPP_CAST(type, arg) static_cast(arg) #else #define CK_CPP_CAST(type, arg) arg #endif #endif /* CK_CC_H */ diff --git a/include/ck_ec.h b/include/ck_ec.h new file mode 100644 index 000000000000..cd2a36813a79 --- /dev/null +++ b/include/ck_ec.h @@ -0,0 +1,945 @@ +/* + * Copyright 2018 Paul Khuong, Google LLC. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Overview + * ======== + * + * ck_ec implements 32- and 64- bit event counts. Event counts let us + * easily integrate OS-level blocking (e.g., futexes) in lock-free + * protocols. Waiters block conditionally, if the event count's value + * is still equal to some old value. + * + * Event counts come in four variants: 32 and 64 bit (with one bit + * stolen for internal signaling, so 31 and 63 bit counters), and + * single or multiple producers (wakers). Waiters are always multiple + * consumers. The 32 bit variants are smaller, and more efficient, + * especially in single producer mode. The 64 bit variants are larger, + * but practically invulnerable to ABA. + * + * The 32 bit variant is always available. The 64 bit variant is only + * available if CK supports 64-bit atomic operations. Currently, + * specialization for single producer is only implemented for x86 and + * x86-64, on compilers that support GCC extended inline assembly; + * other platforms fall back to the multiple producer code path. + * + * A typical usage pattern is: + * + * 1. On the producer side: + * + * - Make changes to some shared data structure, without involving + * the event count at all. + * - After each change, call ck_ec_inc on the event count. The call + * acts as a write-write barrier, and wakes up any consumer blocked + * on the event count (waiting for new changes). + * + * 2. On the consumer side: + * + * - Snapshot ck_ec_value of the event count. The call acts as a + * read barrier. + * - Read and process the shared data structure. + * - Wait for new changes by calling ck_ec_wait with the snapshot value. + * + * Some data structures may opt for tighter integration with their + * event count. For example, an SPMC ring buffer or disruptor might + * use the event count's value as the write pointer. If the buffer is + * regularly full, it might also make sense to store the read pointer + * in an MP event count. + * + * This event count implementation supports tighter integration in two + * ways. + * + * Producers may opt to increment by an arbitrary value (less than + * INT32_MAX / INT64_MAX), in order to encode, e.g., byte + * offsets. Larger increment values make wraparound more likely, so + * the increments should still be relatively small. + * + * Consumers may pass a predicate to ck_ec_wait_pred. This predicate + * can make `ck_ec_wait_pred` return early, before the event count's + * value changes, and can override the deadline passed to futex_wait. + * This lets consumer block on one eventcount, while optimistically + * looking at other waking conditions. + * + * API Reference + * ============= + * + * When compiled as C11 or later, this header defines type-generic + * macros for ck_ec32 and ck_ec64; the reference describes this + * type-generic API. + * + * ck_ec needs additional OS primitives to determine the current time, + * to wait on an address, and to wake all threads waiting on a given + * address. These are defined with fields in a struct ck_ec_ops. Each + * ck_ec_ops may additionally define the number of spin loop + * iterations in the slow path, as well as the initial wait time in + * the internal exponential backoff, the exponential scale factor, and + * the right shift count (< 32). + * + * The ops, in addition to the single/multiple producer flag, are + * encapsulated in a struct ck_ec_mode, passed to most ck_ec + * operations. + * + * ec is a struct ck_ec32 *, or a struct ck_ec64 *. + * + * value is an uint32_t for ck_ec32, and an uint64_t for ck_ec64. It + * never exceeds INT32_MAX and INT64_MAX respectively. + * + * mode is a struct ck_ec_mode *. + * + * deadline is either NULL, or a `const struct timespec *` that will + * be treated as an absolute deadline. + * + * `void ck_ec_init(ec, value)`: initializes the event count to value. + * + * `value ck_ec_value(ec)`: returns the current value of the event + * counter. This read acts as a read (acquire) barrier. + * + * `bool ck_ec_has_waiters(ec)`: returns whether some thread has + * marked the event count as requiring an OS wakeup. + * + * `void ck_ec_inc(ec, mode)`: increments the value of the event + * counter by one. This writes acts as a write barrier. Wakes up + * any waiting thread. + * + * `value ck_ec_add(ec, mode, value)`: increments the event counter by + * `value`, and returns the event counter's previous value. This + * write acts as a write barrier. Wakes up any waiting thread. + * + * `int ck_ec_deadline(struct timespec *new_deadline, + * mode, + * const struct timespec *timeout)`: + * computes a deadline `timeout` away from the current time. If + * timeout is NULL, computes a deadline in the infinite future. The + * resulting deadline is written to `new_deadline`. Returns 0 on + * success, and -1 if ops->gettime failed (without touching errno). + * + * `int ck_ec_wait(ec, mode, value, deadline)`: waits until the event + * counter's value differs from `value`, or, if `deadline` is + * provided and non-NULL, until the current time is after that + * deadline. Use a deadline with tv_sec = 0 for a non-blocking + * execution. Returns 0 if the event counter has changed, and -1 on + * timeout. This function acts as a read (acquire) barrier. + * + * `int ck_ec_wait_pred(ec, mode, value, pred, data, deadline)`: waits + * until the event counter's value differs from `value`, or until + * `pred` returns non-zero, or, if `deadline` is provided and + * non-NULL, until the current time is after that deadline. Use a + * deadline with tv_sec = 0 for a non-blocking execution. Returns 0 if + * the event counter has changed, `pred`'s return value if non-zero, + * and -1 on timeout. This function acts as a read (acquire) barrier. + * + * `pred` is always called as `pred(data, iteration_deadline, now)`, + * where `iteration_deadline` is a timespec of the deadline for this + * exponential backoff iteration, and `now` is the current time. If + * `pred` returns a non-zero value, that value is immediately returned + * to the waiter. Otherwise, `pred` is free to modify + * `iteration_deadline` (moving it further in the future is a bad + * idea). + * + * Implementation notes + * ==================== + * + * The multiple producer implementation is a regular locked event + * count, with a single flag bit to denote the need to wake up waiting + * threads. + * + * The single producer specialization is heavily tied to + * [x86-TSO](https://www.cl.cam.ac.uk/~pes20/weakmemory/cacm.pdf), and + * to non-atomic read-modify-write instructions (e.g., `inc mem`); + * these non-atomic RMW let us write to the same memory locations with + * atomic and non-atomic instructions, without suffering from process + * scheduling stalls. + * + * The reason we can mix atomic and non-atomic writes to the `counter` + * word is that every non-atomic write obviates the need for the + * atomically flipped flag bit: we only use non-atomic writes to + * update the event count, and the atomic flag only informs the + * producer that we would like a futex_wake, because of the update. + * We only require the non-atomic RMW counter update to prevent + * preemption from introducing arbitrarily long worst case delays. + * + * Correctness does not rely on the usual ordering argument: in the + * absence of fences, there is no strict ordering between atomic and + * non-atomic writes. The key is instead x86-TSO's guarantee that a + * read is satisfied from the most recent buffered write in the local + * store queue if there is one, or from memory if there is no write to + * that address in the store queue. + * + * x86-TSO's constraint on reads suffices to guarantee that the + * producer will never forget about a counter update. If the last + * update is still queued, the new update will be based on the queued + * value. Otherwise, the new update will be based on the value in + * memory, which may or may not have had its flag flipped. In either + * case, the value of the counter (modulo flag) is correct. + * + * When the producer forwards the counter's value from its store + * queue, the new update might not preserve a flag flip. Any waiter + * thus has to check from time to time to determine if it wasn't + * woken up because the flag bit was silently cleared. + * + * In reality, the store queue in x86-TSO stands for in-flight + * instructions in the chip's out-of-order backend. In the vast + * majority of cases, instructions will only remain in flight for a + * few hundred or thousand of cycles. That's why ck_ec_wait spins on + * the `counter` word for ~100 iterations after flipping its flag bit: + * if the counter hasn't changed after that many iterations, it is + * very likely that the producer's next counter update will observe + * the flag flip. + * + * That's still not a hard guarantee of correctness. Conservatively, + * we can expect that no instruction will remain in flight for more + * than 1 second... if only because some interrupt will have forced + * the chip to store its architectural state in memory, at which point + * an instruction is either fully retired or rolled back. Interrupts, + * particularly the pre-emption timer, are why single-producer updates + * must happen in a single non-atomic read-modify-write instruction. + * Having a single instruction as the critical section means we only + * have to consider the worst-case execution time for that + * instruction. That's easier than doing the same for a pair of + * instructions, which an unlucky pre-emption could delay for + * arbitrarily long. + * + * Thus, after a short spin loop, ck_ec_wait enters an exponential + * backoff loop, where each "sleep" is instead a futex_wait. The + * backoff is only necessary to handle rare cases where the flag flip + * was overwritten after the spin loop. Eventually, more than one + * second will have elapsed since the flag flip, and the sleep timeout + * becomes infinite: since the flag bit has been set for much longer + * than the time for which an instruction may remain in flight, the + * flag will definitely be observed at the next counter update. + * + * The 64 bit ck_ec_wait pulls another trick: futexes only handle 32 + * bit ints, so we must treat the 64 bit counter's low 32 bits as an + * int in futex_wait. That's a bit dodgy, but fine in practice, given + * that the OS's futex code will always read whatever value is + * currently in memory: even if the producer thread were to wait on + * its own event count, the syscall and ring transition would empty + * the store queue (the out-of-order execution backend). + * + * Finally, what happens when the producer is migrated to another core + * or otherwise pre-empted? Migration must already incur a barrier, so + * that thread always sees its own writes, so that's safe. As for + * pre-emption, that requires storing the architectural state, which + * means every instruction must either be executed fully or not at + * all when pre-emption happens. + */ + +#ifndef CK_EC_H +#define CK_EC_H +#include +#include +#include +#include +#include +#include + +/* + * If we have ck_pr_faa_64 (and, presumably, ck_pr_load_64), we + * support 63 bit counters. + */ +#ifdef CK_F_PR_FAA_64 +#define CK_F_EC64 +#endif /* CK_F_PR_FAA_64 */ + +/* + * GCC inline assembly lets us exploit non-atomic read-modify-write + * instructions on x86/x86_64 for a fast single-producer mode. + * + * If we CK_F_EC_SP is not defined, CK_EC always uses the slower + * multiple producer code. + */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define CK_F_EC_SP +#endif /* GNUC && (__i386__ || __x86_64__) */ + +struct ck_ec_ops; + +struct ck_ec_wait_state { + struct timespec start; /* Time when we entered ck_ec_wait. */ + struct timespec now; /* Time now. */ + const struct ck_ec_ops *ops; + void *data; /* Opaque pointer for the predicate's internal state. */ + +}; + +/* + * ck_ec_ops define system-specific functions to get the current time, + * atomically wait on an address if it still has some expected value, + * and to wake all threads waiting on an address. + * + * Each platform is expected to have few (one) opaque pointer to a + * const ops struct, and reuse it for all ck_ec_mode structs. + */ +struct ck_ec_ops { + /* Populates out with the current time. Returns non-zero on failure. */ + int (*gettime)(const struct ck_ec_ops *, struct timespec *out); + + /* + * Waits on address if its value is still `expected`. If + * deadline is non-NULL, stops waiting once that deadline is + * reached. May return early for any reason. + */ + void (*wait32)(const struct ck_ec_wait_state *, const uint32_t *, + uint32_t expected, const struct timespec *deadline); + + /* + * Same as wait32, but for a 64 bit counter. Only used if + * CK_F_EC64 is defined. + * + * If underlying blocking primitive only supports 32 bit + * control words, it should be safe to block on the least + * significant half of the 64 bit address. + */ + void (*wait64)(const struct ck_ec_wait_state *, const uint64_t *, + uint64_t expected, const struct timespec *deadline); + + /* Wakes up all threads waiting on address. */ + void (*wake32)(const struct ck_ec_ops *, const uint32_t *address); + + /* + * Same as wake32, but for a 64 bit counter. Only used if + * CK_F_EC64 is defined. + * + * When wait64 truncates the control word at address to `only` + * consider its least significant half, wake64 should perform + * any necessary fixup (e.g., on big endian platforms). + */ + void (*wake64)(const struct ck_ec_ops *, const uint64_t *address); + + /* + * Number of iterations for the initial busy wait. 0 defaults + * to 100 (not ABI stable). + */ + uint32_t busy_loop_iter; + + /* + * Delay in nanoseconds for the first iteration of the + * exponential backoff. 0 defaults to 2 ms (not ABI stable). + */ + uint32_t initial_wait_ns; + + /* + * Scale factor for the exponential backoff. 0 defaults to 8x + * (not ABI stable). + */ + uint32_t wait_scale_factor; + + /* + * Right shift count for the exponential backoff. The update + * after each iteration is + * wait_ns = (wait_ns * wait_scale_factor) >> wait_shift_count, + * until one second has elapsed. After that, the deadline goes + * to infinity. + */ + uint32_t wait_shift_count; +}; + +/* + * ck_ec_mode wraps the ops table, and informs the fast path whether + * it should attempt to specialize for single producer mode. + * + * mode structs are expected to be exposed by value, e.g., + * + * extern const struct ck_ec_ops system_ec_ops; + * + * static const struct ck_ec_mode ec_sp = { + * .ops = &system_ec_ops, + * .single_producer = true + * }; + * + * static const struct ck_ec_mode ec_mp = { + * .ops = &system_ec_ops, + * .single_producer = false + * }; + * + * ck_ec_mode structs are only passed to inline functions defined in + * this header, and never escape to their slow paths, so they should + * not result in any object file size increase. + */ +struct ck_ec_mode { + const struct ck_ec_ops *ops; + /* + * If single_producer is true, the event count has a unique + * incrementer. The implementation will specialize ck_ec_inc + * and ck_ec_add if possible (if CK_F_EC_SP is defined). + */ + bool single_producer; +}; + +struct ck_ec32 { + /* Flag is "sign" bit, value in bits 0:30. */ + uint32_t counter; +}; + +typedef struct ck_ec32 ck_ec32_t; + +#ifdef CK_F_EC64 +struct ck_ec64 { + /* + * Flag is bottom bit, value in bits 1:63. Eventcount only + * works on x86-64 (i.e., little endian), so the futex int + * lies in the first 4 (bottom) bytes. + */ + uint64_t counter; +}; + +typedef struct ck_ec64 ck_ec64_t; +#endif /* CK_F_EC64 */ + +#define CK_EC_INITIALIZER { .counter = 0 } + +/* + * Initializes the event count to `value`. The value must not + * exceed INT32_MAX. + */ +static void ck_ec32_init(struct ck_ec32 *ec, uint32_t value); + +#ifndef CK_F_EC64 +#define ck_ec_init ck_ec32_init +#else +/* + * Initializes the event count to `value`. The value must not + * exceed INT64_MAX. + */ +static void ck_ec64_init(struct ck_ec64 *ec, uint64_t value); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_init(EC, VALUE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_init, \ + struct ck_ec64 : ck_ec64_init)((EC), (VALUE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Returns the counter value in the event count. The value is at most + * INT32_MAX. + */ +static uint32_t ck_ec32_value(const struct ck_ec32* ec); + +#ifndef CK_F_EC64 +#define ck_ec_value ck_ec32_value +#else +/* + * Returns the counter value in the event count. The value is at most + * INT64_MAX. + */ +static uint64_t ck_ec64_value(const struct ck_ec64* ec); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_value(EC) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_value, \ + struct ck_ec64 : ck_ec64_value)((EC))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Returns whether there may be slow pathed waiters that need an + * explicit OS wakeup for this event count. + */ +static bool ck_ec32_has_waiters(const struct ck_ec32 *ec); + +#ifndef CK_F_EC64 +#define ck_ec_has_waiters ck_ec32_has_waiters +#else +static bool ck_ec64_has_waiters(const struct ck_ec64 *ec); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_has_waiters(EC) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_has_waiters, \ + struct ck_ec64 : ck_ec64_has_waiters)((EC))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Increments the counter value in the event count by one, and wakes + * up any waiter. + */ +static void ck_ec32_inc(struct ck_ec32 *ec, const struct ck_ec_mode *mode); + +#ifndef CK_F_EC64 +#define ck_ec_inc ck_ec32_inc +#else +static void ck_ec64_inc(struct ck_ec64 *ec, const struct ck_ec_mode *mode); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_inc(EC, MODE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_inc, \ + struct ck_ec64 : ck_ec64_inc)((EC), (MODE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Increments the counter value in the event count by delta, wakes + * up any waiter, and returns the previous counter value. + */ +static uint32_t ck_ec32_add(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta); + +#ifndef CK_F_EC64 +#define ck_ec_add ck_ec32_add +#else +static uint64_t ck_ec64_add(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_add(EC, MODE, DELTA) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_add, \ + struct ck_ec64 : ck_ec64_add)((EC), (MODE), (DELTA))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Populates `new_deadline` with a deadline `timeout` in the future. + * Returns 0 on success, and -1 if clock_gettime failed, in which + * case errno is left as is. + */ +static int ck_ec_deadline(struct timespec *new_deadline, + const struct ck_ec_mode *mode, + const struct timespec *timeout); + +/* + * Waits until the counter value in the event count differs from + * old_value, or, if deadline is non-NULL, until CLOCK_MONOTONIC is + * past the deadline. + * + * Returns 0 on success, and -1 on timeout. + */ +static int ck_ec32_wait(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + const struct timespec *deadline); + +#ifndef CK_F_EC64 +#define ck_ec_wait ck_ec32_wait +#else +static int ck_ec64_wait(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + const struct timespec *deadline); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_wait(EC, MODE, OLD_VALUE, DEADLINE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_wait, \ + struct ck_ec64 : ck_ec64_wait)((EC), (MODE), \ + (OLD_VALUE), (DEADLINE))) + +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Waits until the counter value in the event count differs from + * old_value, pred returns non-zero, or, if deadline is non-NULL, + * until CLOCK_MONOTONIC is past the deadline. + * + * Returns 0 on success, -1 on timeout, and the return value of pred + * if it returns non-zero. + * + * A NULL pred represents a function that always returns 0. + */ +static int ck_ec32_wait_pred(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +#ifndef CK_F_EC64 +#define ck_ec_wait_pred ck_ec32_wait_pred +#else +static int ck_ec64_wait_pred(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +#if __STDC_VERSION__ >= 201112L +#define ck_ec_wait_pred(EC, MODE, OLD_VALUE, PRED, DATA, DEADLINE) \ + (_Generic(*(EC), \ + struct ck_ec32 : ck_ec32_wait_pred, \ + struct ck_ec64 : ck_ec64_wait_pred) \ + ((EC), (MODE), (OLD_VALUE), (PRED), (DATA), (DEADLINE))) +#endif /* __STDC_VERSION__ */ +#endif /* CK_F_EC64 */ + +/* + * Inline implementation details. 32 bit first, then 64 bit + * conditionally. + */ +CK_CC_FORCE_INLINE void ck_ec32_init(struct ck_ec32 *ec, uint32_t value) +{ + ec->counter = value & ~(1UL << 31); + return; +} + +CK_CC_FORCE_INLINE uint32_t ck_ec32_value(const struct ck_ec32 *ec) +{ + uint32_t ret = ck_pr_load_32(&ec->counter) & ~(1UL << 31); + + ck_pr_fence_acquire(); + return ret; +} + +CK_CC_FORCE_INLINE bool ck_ec32_has_waiters(const struct ck_ec32 *ec) +{ + return ck_pr_load_32(&ec->counter) & (1UL << 31); +} + +/* Slow path for ck_ec{32,64}_{inc,add} */ +void ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops); + +CK_CC_FORCE_INLINE void ck_ec32_inc(struct ck_ec32 *ec, + const struct ck_ec_mode *mode) +{ +#if !defined(CK_F_EC_SP) + /* Nothing to specialize if we don't have EC_SP. */ + ck_ec32_add(ec, mode, 1); + return; +#else + char flagged; + +#if __GNUC__ >= 6 + /* + * We don't want to wake if the sign bit is 0. We do want to + * wake if the sign bit just flipped from 1 to 0. We don't + * care what happens when our increment caused the sign bit to + * flip from 0 to 1 (that's once per 2^31 increment). + * + * This leaves us with four cases: + * + * old sign bit | new sign bit | SF | OF | ZF + * ------------------------------------------- + * 0 | 0 | 0 | 0 | ? + * 0 | 1 | 1 | 0 | ? + * 1 | 1 | 1 | 0 | ? + * 1 | 0 | 0 | 0 | 1 + * + * In the first case, we don't want to hit ck_ec32_wake. In + * the last two cases, we do want to call ck_ec32_wake. In the + * second case, we don't care, so we arbitrarily choose to + * call ck_ec32_wake. + * + * The "le" condition checks if SF != OF, or ZF == 1, which + * meets our requirements. + */ +#define CK_EC32_INC_ASM(PREFIX) \ + __asm__ volatile(PREFIX " incl %0" \ + : "+m"(ec->counter), "=@ccle"(flagged) \ + :: "cc", "memory") +#else +#define CK_EC32_INC_ASM(PREFIX) \ + __asm__ volatile(PREFIX " incl %0; setle %1" \ + : "+m"(ec->counter), "=r"(flagged) \ + :: "cc", "memory") +#endif /* __GNUC__ */ + + if (mode->single_producer == true) { + ck_pr_fence_store(); + CK_EC32_INC_ASM(""); + } else { + ck_pr_fence_store_atomic(); + CK_EC32_INC_ASM("lock"); + } +#undef CK_EC32_INC_ASM + + if (CK_CC_UNLIKELY(flagged)) { + ck_ec32_wake(ec, mode->ops); + } + + return; +#endif /* CK_F_EC_SP */ +} + +CK_CC_FORCE_INLINE uint32_t ck_ec32_add_epilogue(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old) +{ + const uint32_t flag_mask = 1U << 31; + uint32_t ret; + + ret = old & ~flag_mask; + /* These two only differ if the flag bit is set. */ + if (CK_CC_UNLIKELY(old != ret)) { + ck_ec32_wake(ec, mode->ops); + } + + return ret; +} + +static CK_CC_INLINE uint32_t ck_ec32_add_mp(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ + uint32_t old; + + ck_pr_fence_store_atomic(); + old = ck_pr_faa_32(&ec->counter, delta); + return ck_ec32_add_epilogue(ec, mode, old); +} + +#ifdef CK_F_EC_SP +static CK_CC_INLINE uint32_t ck_ec32_add_sp(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ + uint32_t old; + + /* + * Correctness of this racy write depends on actually + * having an update to write. Exit here if the update + * is a no-op. + */ + if (CK_CC_UNLIKELY(delta == 0)) { + return ck_ec32_value(ec); + } + + ck_pr_fence_store(); + old = delta; + __asm__ volatile("xaddl %1, %0" + : "+m"(ec->counter), "+r"(old) + :: "cc", "memory"); + return ck_ec32_add_epilogue(ec, mode, old); +} +#endif /* CK_F_EC_SP */ + +CK_CC_FORCE_INLINE uint32_t ck_ec32_add(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t delta) +{ +#ifdef CK_F_EC_SP + if (mode->single_producer == true) { + return ck_ec32_add_sp(ec, mode, delta); + } +#endif + + return ck_ec32_add_mp(ec, mode, delta); +} + +int ck_ec_deadline_impl(struct timespec *new_deadline, + const struct ck_ec_ops *ops, + const struct timespec *timeout); + +CK_CC_FORCE_INLINE int ck_ec_deadline(struct timespec *new_deadline, + const struct ck_ec_mode *mode, + const struct timespec *timeout) +{ + return ck_ec_deadline_impl(new_deadline, mode->ops, timeout); +} + + +int ck_ec32_wait_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int ck_ec32_wait(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + const struct timespec *deadline) +{ + if (ck_ec32_value(ec) != old_value) { + return 0; + } + + return ck_ec32_wait_slow(ec, mode->ops, old_value, deadline); +} + +int ck_ec32_wait_pred_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int +ck_ec32_wait_pred(struct ck_ec32 *ec, + const struct ck_ec_mode *mode, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline) +{ + if (ck_ec32_value(ec) != old_value) { + return 0; + } + + return ck_ec32_wait_pred_slow(ec, mode->ops, old_value, + pred, data, deadline); +} + +#ifdef CK_F_EC64 +CK_CC_FORCE_INLINE void ck_ec64_init(struct ck_ec64 *ec, uint64_t value) +{ + ec->counter = value << 1; + return; +} + +CK_CC_FORCE_INLINE uint64_t ck_ec64_value(const struct ck_ec64 *ec) +{ + uint64_t ret = ck_pr_load_64(&ec->counter) >> 1; + + ck_pr_fence_acquire(); + return ret; +} + +CK_CC_FORCE_INLINE bool ck_ec64_has_waiters(const struct ck_ec64 *ec) +{ + return ck_pr_load_64(&ec->counter) & 1; +} + +void ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops); + +CK_CC_FORCE_INLINE void ck_ec64_inc(struct ck_ec64 *ec, + const struct ck_ec_mode *mode) +{ + /* We always xadd, so there's no special optimization here. */ + (void)ck_ec64_add(ec, mode, 1); + return; +} + +CK_CC_FORCE_INLINE uint64_t ck_ec_add64_epilogue(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old) +{ + uint64_t ret = old >> 1; + + if (CK_CC_UNLIKELY(old & 1)) { + ck_ec64_wake(ec, mode->ops); + } + + return ret; +} + +static CK_CC_INLINE uint64_t ck_ec64_add_mp(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ + uint64_t inc = 2 * delta; /* The low bit is the flag bit. */ + + ck_pr_fence_store_atomic(); + return ck_ec_add64_epilogue(ec, mode, ck_pr_faa_64(&ec->counter, inc)); +} + +#ifdef CK_F_EC_SP +/* Single-producer specialisation. */ +static CK_CC_INLINE uint64_t ck_ec64_add_sp(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ + uint64_t old; + + /* + * Correctness of this racy write depends on actually + * having an update to write. Exit here if the update + * is a no-op. + */ + if (CK_CC_UNLIKELY(delta == 0)) { + return ck_ec64_value(ec); + } + + ck_pr_fence_store(); + old = 2 * delta; /* The low bit is the flag bit. */ + __asm__ volatile("xaddq %1, %0" + : "+m"(ec->counter), "+r"(old) + :: "cc", "memory"); + return ck_ec_add64_epilogue(ec, mode, old); +} +#endif /* CK_F_EC_SP */ + +/* + * Dispatch on mode->single_producer in this FORCE_INLINE function: + * the end result is always small, but not all compilers have enough + * foresight to inline and get the reduction. + */ +CK_CC_FORCE_INLINE uint64_t ck_ec64_add(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t delta) +{ +#ifdef CK_F_EC_SP + if (mode->single_producer == true) { + return ck_ec64_add_sp(ec, mode, delta); + } +#endif + + return ck_ec64_add_mp(ec, mode, delta); +} + +int ck_ec64_wait_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + const struct timespec *deadline); + +CK_CC_FORCE_INLINE int ck_ec64_wait(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + const struct timespec *deadline) +{ + if (ck_ec64_value(ec) != old_value) { + return 0; + } + + return ck_ec64_wait_slow(ec, mode->ops, old_value, deadline); +} + +int ck_ec64_wait_pred_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline); + + +CK_CC_FORCE_INLINE int +ck_ec64_wait_pred(struct ck_ec64 *ec, + const struct ck_ec_mode *mode, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline) +{ + if (ck_ec64_value(ec) != old_value) { + return 0; + } + + return ck_ec64_wait_pred_slow(ec, mode->ops, old_value, + pred, data, deadline); +} +#endif /* CK_F_EC64 */ +#endif /* !CK_EC_H */ diff --git a/include/ck_fifo.h b/include/ck_fifo.h index 6d500708c445..c9a6f3d9a87d 100644 --- a/include/ck_fifo.h +++ b/include/ck_fifo.h @@ -1,478 +1,478 @@ /* * Copyright 2010-2015 Samy Al Bahra. * Copyright 2011 David Joseph. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_FIFO_H #define CK_FIFO_H #include #include #include #include #include #ifndef CK_F_FIFO_SPSC #define CK_F_FIFO_SPSC struct ck_fifo_spsc_entry { void *value; struct ck_fifo_spsc_entry *next; }; typedef struct ck_fifo_spsc_entry ck_fifo_spsc_entry_t; struct ck_fifo_spsc { ck_spinlock_t m_head; struct ck_fifo_spsc_entry *head; char pad[CK_MD_CACHELINE - sizeof(struct ck_fifo_spsc_entry *) - sizeof(ck_spinlock_t)]; ck_spinlock_t m_tail; struct ck_fifo_spsc_entry *tail; struct ck_fifo_spsc_entry *head_snapshot; struct ck_fifo_spsc_entry *garbage; }; typedef struct ck_fifo_spsc ck_fifo_spsc_t; CK_CC_INLINE static bool ck_fifo_spsc_enqueue_trylock(struct ck_fifo_spsc *fifo) { return ck_spinlock_trylock(&fifo->m_tail); } CK_CC_INLINE static void ck_fifo_spsc_enqueue_lock(struct ck_fifo_spsc *fifo) { ck_spinlock_lock(&fifo->m_tail); return; } CK_CC_INLINE static void ck_fifo_spsc_enqueue_unlock(struct ck_fifo_spsc *fifo) { ck_spinlock_unlock(&fifo->m_tail); return; } CK_CC_INLINE static bool ck_fifo_spsc_dequeue_trylock(struct ck_fifo_spsc *fifo) { return ck_spinlock_trylock(&fifo->m_head); } CK_CC_INLINE static void ck_fifo_spsc_dequeue_lock(struct ck_fifo_spsc *fifo) { ck_spinlock_lock(&fifo->m_head); return; } CK_CC_INLINE static void ck_fifo_spsc_dequeue_unlock(struct ck_fifo_spsc *fifo) { ck_spinlock_unlock(&fifo->m_head); return; } CK_CC_INLINE static void ck_fifo_spsc_init(struct ck_fifo_spsc *fifo, struct ck_fifo_spsc_entry *stub) { ck_spinlock_init(&fifo->m_head); ck_spinlock_init(&fifo->m_tail); stub->next = NULL; fifo->head = fifo->tail = fifo->head_snapshot = fifo->garbage = stub; return; } CK_CC_INLINE static void ck_fifo_spsc_deinit(struct ck_fifo_spsc *fifo, struct ck_fifo_spsc_entry **garbage) { - *garbage = fifo->head; + *garbage = fifo->garbage; fifo->head = fifo->tail = NULL; return; } CK_CC_INLINE static void ck_fifo_spsc_enqueue(struct ck_fifo_spsc *fifo, struct ck_fifo_spsc_entry *entry, void *value) { entry->value = value; entry->next = NULL; /* If stub->next is visible, guarantee that entry is consistent. */ ck_pr_fence_store(); ck_pr_store_ptr(&fifo->tail->next, entry); fifo->tail = entry; return; } CK_CC_INLINE static bool ck_fifo_spsc_dequeue(struct ck_fifo_spsc *fifo, void *value) { struct ck_fifo_spsc_entry *entry; /* * The head pointer is guaranteed to always point to a stub entry. * If the stub entry does not point to an entry, then the queue is * empty. */ entry = ck_pr_load_ptr(&fifo->head->next); if (entry == NULL) return false; /* If entry is visible, guarantee store to value is visible. */ ck_pr_store_ptr_unsafe(value, entry->value); ck_pr_fence_store(); ck_pr_store_ptr(&fifo->head, entry); return true; } /* * Recycle a node. This technique for recycling nodes is based on * Dmitriy Vyukov's work. */ CK_CC_INLINE static struct ck_fifo_spsc_entry * ck_fifo_spsc_recycle(struct ck_fifo_spsc *fifo) { struct ck_fifo_spsc_entry *garbage; if (fifo->head_snapshot == fifo->garbage) { fifo->head_snapshot = ck_pr_load_ptr(&fifo->head); if (fifo->head_snapshot == fifo->garbage) return NULL; } garbage = fifo->garbage; fifo->garbage = garbage->next; return garbage; } CK_CC_INLINE static bool ck_fifo_spsc_isempty(struct ck_fifo_spsc *fifo) { struct ck_fifo_spsc_entry *head = ck_pr_load_ptr(&fifo->head); return ck_pr_load_ptr(&head->next) == NULL; } #define CK_FIFO_SPSC_ISEMPTY(f) ((f)->head->next == NULL) #define CK_FIFO_SPSC_FIRST(f) ((f)->head->next) #define CK_FIFO_SPSC_NEXT(m) ((m)->next) #define CK_FIFO_SPSC_SPARE(f) ((f)->head) #define CK_FIFO_SPSC_FOREACH(fifo, entry) \ for ((entry) = CK_FIFO_SPSC_FIRST(fifo); \ (entry) != NULL; \ (entry) = CK_FIFO_SPSC_NEXT(entry)) #define CK_FIFO_SPSC_FOREACH_SAFE(fifo, entry, T) \ for ((entry) = CK_FIFO_SPSC_FIRST(fifo); \ (entry) != NULL && ((T) = (entry)->next, 1); \ (entry) = (T)) #endif /* CK_F_FIFO_SPSC */ #ifdef CK_F_PR_CAS_PTR_2 #ifndef CK_F_FIFO_MPMC #define CK_F_FIFO_MPMC struct ck_fifo_mpmc_entry; struct ck_fifo_mpmc_pointer { struct ck_fifo_mpmc_entry *pointer; char *generation CK_CC_PACKED; } CK_CC_ALIGN(16); struct ck_fifo_mpmc_entry { void *value; struct ck_fifo_mpmc_pointer next; }; typedef struct ck_fifo_mpmc_entry ck_fifo_mpmc_entry_t; struct ck_fifo_mpmc { struct ck_fifo_mpmc_pointer head; char pad[CK_MD_CACHELINE - sizeof(struct ck_fifo_mpmc_pointer)]; struct ck_fifo_mpmc_pointer tail; }; typedef struct ck_fifo_mpmc ck_fifo_mpmc_t; CK_CC_INLINE static void ck_fifo_mpmc_init(struct ck_fifo_mpmc *fifo, struct ck_fifo_mpmc_entry *stub) { stub->next.pointer = NULL; stub->next.generation = NULL; fifo->head.pointer = fifo->tail.pointer = stub; fifo->head.generation = fifo->tail.generation = NULL; return; } CK_CC_INLINE static void ck_fifo_mpmc_deinit(struct ck_fifo_mpmc *fifo, struct ck_fifo_mpmc_entry **garbage) { *garbage = fifo->head.pointer; fifo->head.pointer = fifo->tail.pointer = NULL; return; } CK_CC_INLINE static void ck_fifo_mpmc_enqueue(struct ck_fifo_mpmc *fifo, struct ck_fifo_mpmc_entry *entry, void *value) { struct ck_fifo_mpmc_pointer tail, next, update; /* * Prepare the upcoming node and make sure to commit the updates * before publishing. */ entry->value = value; entry->next.pointer = NULL; entry->next.generation = 0; ck_pr_fence_store_atomic(); for (;;) { tail.generation = ck_pr_load_ptr(&fifo->tail.generation); ck_pr_fence_load(); tail.pointer = ck_pr_load_ptr(&fifo->tail.pointer); next.generation = ck_pr_load_ptr(&tail.pointer->next.generation); ck_pr_fence_load(); next.pointer = ck_pr_load_ptr(&tail.pointer->next.pointer); if (ck_pr_load_ptr(&fifo->tail.generation) != tail.generation) continue; if (next.pointer != NULL) { /* * If the tail pointer has an entry following it then * it needs to be forwarded to the next entry. This * helps us guarantee we are always operating on the * last entry. */ update.pointer = next.pointer; update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); } else { /* * Attempt to commit new entry to the end of the * current tail. */ update.pointer = entry; update.generation = next.generation + 1; if (ck_pr_cas_ptr_2(&tail.pointer->next, &next, &update) == true) break; } } ck_pr_fence_atomic(); /* After a successful insert, forward the tail to the new entry. */ update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return; } CK_CC_INLINE static bool ck_fifo_mpmc_tryenqueue(struct ck_fifo_mpmc *fifo, struct ck_fifo_mpmc_entry *entry, void *value) { struct ck_fifo_mpmc_pointer tail, next, update; entry->value = value; entry->next.pointer = NULL; entry->next.generation = 0; ck_pr_fence_store_atomic(); tail.generation = ck_pr_load_ptr(&fifo->tail.generation); ck_pr_fence_load(); tail.pointer = ck_pr_load_ptr(&fifo->tail.pointer); next.generation = ck_pr_load_ptr(&tail.pointer->next.generation); ck_pr_fence_load(); next.pointer = ck_pr_load_ptr(&tail.pointer->next.pointer); if (ck_pr_load_ptr(&fifo->tail.generation) != tail.generation) return false; if (next.pointer != NULL) { /* * If the tail pointer has an entry following it then * it needs to be forwarded to the next entry. This * helps us guarantee we are always operating on the * last entry. */ update.pointer = next.pointer; update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return false; } else { /* * Attempt to commit new entry to the end of the * current tail. */ update.pointer = entry; update.generation = next.generation + 1; if (ck_pr_cas_ptr_2(&tail.pointer->next, &next, &update) == false) return false; } ck_pr_fence_atomic(); /* After a successful insert, forward the tail to the new entry. */ update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return true; } CK_CC_INLINE static bool ck_fifo_mpmc_dequeue(struct ck_fifo_mpmc *fifo, void *value, struct ck_fifo_mpmc_entry **garbage) { struct ck_fifo_mpmc_pointer head, tail, next, update; for (;;) { head.generation = ck_pr_load_ptr(&fifo->head.generation); ck_pr_fence_load(); head.pointer = ck_pr_load_ptr(&fifo->head.pointer); tail.generation = ck_pr_load_ptr(&fifo->tail.generation); ck_pr_fence_load(); tail.pointer = ck_pr_load_ptr(&fifo->tail.pointer); next.generation = ck_pr_load_ptr(&head.pointer->next.generation); ck_pr_fence_load(); next.pointer = ck_pr_load_ptr(&head.pointer->next.pointer); update.pointer = next.pointer; if (head.pointer == tail.pointer) { /* * The head is guaranteed to always point at a stub * entry. If the stub entry has no references then the * queue is empty. */ if (next.pointer == NULL) return false; /* Forward the tail pointer if necessary. */ update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); } else { /* * It is possible for head snapshot to have been * re-used. Avoid deferencing during enqueue * re-use. */ if (next.pointer == NULL) continue; /* Save value before commit. */ *(void **)value = ck_pr_load_ptr(&next.pointer->value); /* Forward the head pointer to the next entry. */ update.generation = head.generation + 1; if (ck_pr_cas_ptr_2(&fifo->head, &head, &update) == true) break; } } *garbage = head.pointer; return true; } CK_CC_INLINE static bool ck_fifo_mpmc_trydequeue(struct ck_fifo_mpmc *fifo, void *value, struct ck_fifo_mpmc_entry **garbage) { struct ck_fifo_mpmc_pointer head, tail, next, update; head.generation = ck_pr_load_ptr(&fifo->head.generation); ck_pr_fence_load(); head.pointer = ck_pr_load_ptr(&fifo->head.pointer); tail.generation = ck_pr_load_ptr(&fifo->tail.generation); ck_pr_fence_load(); tail.pointer = ck_pr_load_ptr(&fifo->tail.pointer); next.generation = ck_pr_load_ptr(&head.pointer->next.generation); ck_pr_fence_load(); next.pointer = ck_pr_load_ptr(&head.pointer->next.pointer); update.pointer = next.pointer; if (head.pointer == tail.pointer) { /* * The head is guaranteed to always point at a stub * entry. If the stub entry has no references then the * queue is empty. */ if (next.pointer == NULL) return false; /* Forward the tail pointer if necessary. */ update.generation = tail.generation + 1; ck_pr_cas_ptr_2(&fifo->tail, &tail, &update); return false; } else { /* * It is possible for head snapshot to have been * re-used. Avoid deferencing during enqueue. */ if (next.pointer == NULL) return false; /* Save value before commit. */ *(void **)value = ck_pr_load_ptr(&next.pointer->value); /* Forward the head pointer to the next entry. */ update.generation = head.generation + 1; if (ck_pr_cas_ptr_2(&fifo->head, &head, &update) == false) return false; } *garbage = head.pointer; return true; } #define CK_FIFO_MPMC_ISEMPTY(f) ((f)->head.pointer->next.pointer == NULL) #define CK_FIFO_MPMC_FIRST(f) ((f)->head.pointer->next.pointer) #define CK_FIFO_MPMC_NEXT(m) ((m)->next.pointer) #define CK_FIFO_MPMC_FOREACH(fifo, entry) \ for ((entry) = CK_FIFO_MPMC_FIRST(fifo); \ (entry) != NULL; \ (entry) = CK_FIFO_MPMC_NEXT(entry)) #define CK_FIFO_MPMC_FOREACH_SAFE(fifo, entry, T) \ for ((entry) = CK_FIFO_MPMC_FIRST(fifo); \ (entry) != NULL && ((T) = (entry)->next.pointer, 1); \ (entry) = (T)) #endif /* CK_F_FIFO_MPMC */ #endif /* CK_F_PR_CAS_PTR_2 */ #endif /* CK_FIFO_H */ diff --git a/include/ck_hs.h b/include/ck_hs.h index 3c12b6e602a7..cd3e5dac87aa 100644 --- a/include/ck_hs.h +++ b/include/ck_hs.h @@ -1,136 +1,144 @@ /* * Copyright 2012-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_HS_H #define CK_HS_H #include #include #include #include #include #include #include /* * Indicates a single-writer many-reader workload. Mutually * exclusive with CK_HS_MODE_MPMC */ #define CK_HS_MODE_SPMC 1 /* * Indicates that values to be stored are not pointers but * values. Allows for full precision. Mutually exclusive * with CK_HS_MODE_OBJECT. */ #define CK_HS_MODE_DIRECT 2 /* * Indicates that the values to be stored are pointers. * Allows for space optimizations in the presence of pointer * packing. Mutually exclusive with CK_HS_MODE_DIRECT. */ #define CK_HS_MODE_OBJECT 8 /* * Indicates a delete-heavy workload. This will reduce the * need for garbage collection at the cost of approximately * 12% to 20% increased memory usage. */ #define CK_HS_MODE_DELETE 16 /* Currently unsupported. */ #define CK_HS_MODE_MPMC (void) /* * Hash callback function. */ typedef unsigned long ck_hs_hash_cb_t(const void *, unsigned long); /* * Returns pointer to object if objects are equivalent. */ typedef bool ck_hs_compare_cb_t(const void *, const void *); #if defined(CK_MD_POINTER_PACK_ENABLE) && defined(CK_MD_VMA_BITS) #define CK_HS_PP #define CK_HS_KEY_MASK ((1U << ((sizeof(void *) * 8) - CK_MD_VMA_BITS)) - 1) #endif struct ck_hs_map; struct ck_hs { struct ck_malloc *m; struct ck_hs_map *map; unsigned int mode; unsigned long seed; ck_hs_hash_cb_t *hf; ck_hs_compare_cb_t *compare; }; typedef struct ck_hs ck_hs_t; struct ck_hs_stat { unsigned long tombstones; unsigned long n_entries; unsigned int probe_maximum; }; struct ck_hs_iterator { void **cursor; unsigned long offset; struct ck_hs_map *map; }; typedef struct ck_hs_iterator ck_hs_iterator_t; #define CK_HS_ITERATOR_INITIALIZER { NULL, 0, NULL } /* Convenience wrapper to table hash function. */ #define CK_HS_HASH(T, F, K) F((K), (T)->seed) +/* Computes the hash of n bytes of k for the specified hash map. */ +static inline unsigned long +ck_hs_hash(const struct ck_hs *hs, const void *k) +{ + + return hs->hf(k, hs->seed); +} + typedef void *ck_hs_apply_fn_t(void *, void *); bool ck_hs_apply(ck_hs_t *, unsigned long, const void *, ck_hs_apply_fn_t *, void *); void ck_hs_iterator_init(ck_hs_iterator_t *); bool ck_hs_next(ck_hs_t *, ck_hs_iterator_t *, void **); bool ck_hs_next_spmc(ck_hs_t *, ck_hs_iterator_t *, void **); bool ck_hs_move(ck_hs_t *, ck_hs_t *, ck_hs_hash_cb_t *, ck_hs_compare_cb_t *, struct ck_malloc *); bool ck_hs_init(ck_hs_t *, unsigned int, ck_hs_hash_cb_t *, ck_hs_compare_cb_t *, struct ck_malloc *, unsigned long, unsigned long); void ck_hs_destroy(ck_hs_t *); void *ck_hs_get(ck_hs_t *, unsigned long, const void *); bool ck_hs_put(ck_hs_t *, unsigned long, const void *); bool ck_hs_put_unique(ck_hs_t *, unsigned long, const void *); bool ck_hs_set(ck_hs_t *, unsigned long, const void *, void **); bool ck_hs_fas(ck_hs_t *, unsigned long, const void *, void **); void *ck_hs_remove(ck_hs_t *, unsigned long, const void *); bool ck_hs_grow(ck_hs_t *, unsigned long); bool ck_hs_rebuild(ck_hs_t *); bool ck_hs_gc(ck_hs_t *, unsigned long, unsigned long); unsigned long ck_hs_count(ck_hs_t *); bool ck_hs_reset(ck_hs_t *); bool ck_hs_reset_size(ck_hs_t *, unsigned long); void ck_hs_stat(ck_hs_t *, struct ck_hs_stat *); #endif /* CK_HS_H */ diff --git a/include/ck_pr.h b/include/ck_pr.h index 2de6e13ec3c9..8ebf855692dd 100644 --- a/include/ck_pr.h +++ b/include/ck_pr.h @@ -1,1262 +1,1275 @@ /* * Copyright 2009-2015 Samy Al Bahra. * Copyright 2011 David Joseph. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_H #define CK_PR_H #include #include #include #include #include -#ifndef CK_USE_CC_BUILTINS +/* + * Default to using builtins for clang analyzer, coverity, and sparse: + * inline assembly is often too opaque for useful analysis. Override + * the defaults by defining CK_USE_CC_BUILTINS=0 or 1. + */ +#if !defined(CK_USE_CC_BUILTINS) +#if defined(__clang_analyzer__) || defined(__COVERITY__) || defined(__CHECKER__) +#define CK_USE_CC_BUILTINS 1 +#else +#define CK_USE_CC_BUILTINS 0 +#endif +#endif + +#if !CK_USE_CC_BUILTINS #if defined(__x86_64__) #include "gcc/x86_64/ck_pr.h" #elif defined(__x86__) #include "gcc/x86/ck_pr.h" #elif defined(__sparcv9__) #include "gcc/sparcv9/ck_pr.h" #elif defined(__ppc64__) #include "gcc/ppc64/ck_pr.h" #elif defined(__s390x__) #include "gcc/s390x/ck_pr.h" #elif defined(__ppc__) #include "gcc/ppc/ck_pr.h" #elif defined(__arm__) #include "gcc/arm/ck_pr.h" #elif defined(__aarch64__) #include "gcc/aarch64/ck_pr.h" #elif !defined(__GNUC__) #error Your platform is unsupported #endif #endif /* !CK_USE_CC_BUILTINS */ #if defined(__GNUC__) #include "gcc/ck_pr.h" #endif #define CK_PR_FENCE_EMIT(T) \ CK_CC_INLINE static void \ ck_pr_fence_##T(void) \ { \ ck_pr_fence_strict_##T(); \ return; \ } #define CK_PR_FENCE_NOOP(T) \ CK_CC_INLINE static void \ ck_pr_fence_##T(void) \ { \ ck_pr_barrier(); \ return; \ } /* * None of the currently supported platforms allow for data-dependent * load ordering. */ CK_PR_FENCE_NOOP(load_depends) #define ck_pr_fence_strict_load_depends ck_pr_fence_load_depends /* * In memory models where atomic operations do not have serializing * effects, atomic read-modify-write operations are modeled as stores. */ #if defined(CK_MD_RMO) /* * Only stores to the same location have a global * ordering. */ CK_PR_FENCE_EMIT(atomic) CK_PR_FENCE_EMIT(atomic_load) CK_PR_FENCE_EMIT(atomic_store) CK_PR_FENCE_EMIT(store_atomic) CK_PR_FENCE_EMIT(load_atomic) CK_PR_FENCE_EMIT(load_store) CK_PR_FENCE_EMIT(store_load) CK_PR_FENCE_EMIT(load) CK_PR_FENCE_EMIT(store) CK_PR_FENCE_EMIT(memory) CK_PR_FENCE_EMIT(acquire) CK_PR_FENCE_EMIT(release) CK_PR_FENCE_EMIT(acqrel) CK_PR_FENCE_EMIT(lock) CK_PR_FENCE_EMIT(unlock) #elif defined(CK_MD_PSO) /* * Anything can be re-ordered with respect to stores. * Otherwise, loads are executed in-order. */ CK_PR_FENCE_EMIT(atomic) CK_PR_FENCE_NOOP(atomic_load) CK_PR_FENCE_EMIT(atomic_store) CK_PR_FENCE_EMIT(store_atomic) CK_PR_FENCE_NOOP(load_atomic) CK_PR_FENCE_EMIT(load_store) CK_PR_FENCE_EMIT(store_load) CK_PR_FENCE_NOOP(load) CK_PR_FENCE_EMIT(store) CK_PR_FENCE_EMIT(memory) CK_PR_FENCE_EMIT(acquire) CK_PR_FENCE_EMIT(release) CK_PR_FENCE_EMIT(acqrel) CK_PR_FENCE_EMIT(lock) CK_PR_FENCE_EMIT(unlock) #elif defined(CK_MD_TSO) /* * Only loads are re-ordered and only with respect to * prior stores. Atomic operations are serializing. */ CK_PR_FENCE_NOOP(atomic) CK_PR_FENCE_NOOP(atomic_load) CK_PR_FENCE_NOOP(atomic_store) CK_PR_FENCE_NOOP(store_atomic) CK_PR_FENCE_NOOP(load_atomic) CK_PR_FENCE_NOOP(load_store) CK_PR_FENCE_EMIT(store_load) CK_PR_FENCE_NOOP(load) CK_PR_FENCE_NOOP(store) CK_PR_FENCE_EMIT(memory) CK_PR_FENCE_NOOP(acquire) CK_PR_FENCE_NOOP(release) CK_PR_FENCE_NOOP(acqrel) CK_PR_FENCE_NOOP(lock) CK_PR_FENCE_NOOP(unlock) #else #error "No memory model has been defined." #endif /* CK_MD_TSO */ #undef CK_PR_FENCE_EMIT #undef CK_PR_FENCE_NOOP #ifndef CK_F_PR_RFO #define CK_F_PR_RFO CK_CC_INLINE static void ck_pr_rfo(const void *m) { (void)m; return; } #endif /* CK_F_PR_RFO */ #define CK_PR_STORE_SAFE(DST, VAL, TYPE) \ ck_pr_md_store_##TYPE( \ ((void)sizeof(*(DST) = (VAL)), (DST)), \ (VAL)) #define ck_pr_store_ptr(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), ptr) #define ck_pr_store_char(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), char) #ifndef CK_PR_DISABLE_DOUBLE #define ck_pr_store_double(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), double) #endif #define ck_pr_store_uint(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), uint) #define ck_pr_store_int(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), int) #define ck_pr_store_32(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), 32) #define ck_pr_store_16(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), 16) #define ck_pr_store_8(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), 8) #define ck_pr_store_ptr_unsafe(DST, VAL) ck_pr_md_store_ptr((DST), (VAL)) #ifdef CK_F_PR_LOAD_64 #define ck_pr_store_64(DST, VAL) CK_PR_STORE_SAFE((DST), (VAL), 64) #endif /* CK_F_PR_LOAD_64 */ #define CK_PR_LOAD_PTR_SAFE(SRC) (CK_CC_TYPEOF(*(SRC), (void *)))ck_pr_md_load_ptr((SRC)) #define ck_pr_load_ptr(SRC) CK_PR_LOAD_PTR_SAFE((SRC)) #define CK_PR_LOAD_SAFE(SRC, TYPE) ck_pr_md_load_##TYPE((SRC)) #define ck_pr_load_char(SRC) CK_PR_LOAD_SAFE((SRC), char) #ifndef CK_PR_DISABLE_DOUBLE #define ck_pr_load_double(SRC) CK_PR_LOAD_SAFE((SRC), double) #endif #define ck_pr_load_uint(SRC) CK_PR_LOAD_SAFE((SRC), uint) #define ck_pr_load_int(SRC) CK_PR_LOAD_SAFE((SRC), int) #define ck_pr_load_32(SRC) CK_PR_LOAD_SAFE((SRC), 32) #define ck_pr_load_16(SRC) CK_PR_LOAD_SAFE((SRC), 16) #define ck_pr_load_8(SRC) CK_PR_LOAD_SAFE((SRC), 8) #ifdef CK_F_PR_LOAD_64 #define ck_pr_load_64(SRC) CK_PR_LOAD_SAFE((SRC), 64) #endif /* CK_F_PR_LOAD_64 */ #define CK_PR_BIN(K, S, M, T, P, C) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T value) \ { \ T previous; \ C punt; \ punt = ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, \ (C)previous, \ (C)(previous P value), \ &previous) == false) \ ck_pr_stall(); \ \ return; \ } #define CK_PR_BIN_S(K, S, T, P) CK_PR_BIN(K, S, T, T, P, T) #if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE) #ifndef CK_F_PR_ADD_CHAR #define CK_F_PR_ADD_CHAR CK_PR_BIN_S(add, char, char, +) #endif /* CK_F_PR_ADD_CHAR */ #ifndef CK_F_PR_SUB_CHAR #define CK_F_PR_SUB_CHAR CK_PR_BIN_S(sub, char, char, -) #endif /* CK_F_PR_SUB_CHAR */ #ifndef CK_F_PR_AND_CHAR #define CK_F_PR_AND_CHAR CK_PR_BIN_S(and, char, char, &) #endif /* CK_F_PR_AND_CHAR */ #ifndef CK_F_PR_XOR_CHAR #define CK_F_PR_XOR_CHAR CK_PR_BIN_S(xor, char, char, ^) #endif /* CK_F_PR_XOR_CHAR */ #ifndef CK_F_PR_OR_CHAR #define CK_F_PR_OR_CHAR CK_PR_BIN_S(or, char, char, |) #endif /* CK_F_PR_OR_CHAR */ #endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */ #if defined(CK_F_PR_LOAD_INT) && defined(CK_F_PR_CAS_INT_VALUE) #ifndef CK_F_PR_ADD_INT #define CK_F_PR_ADD_INT CK_PR_BIN_S(add, int, int, +) #endif /* CK_F_PR_ADD_INT */ #ifndef CK_F_PR_SUB_INT #define CK_F_PR_SUB_INT CK_PR_BIN_S(sub, int, int, -) #endif /* CK_F_PR_SUB_INT */ #ifndef CK_F_PR_AND_INT #define CK_F_PR_AND_INT CK_PR_BIN_S(and, int, int, &) #endif /* CK_F_PR_AND_INT */ #ifndef CK_F_PR_XOR_INT #define CK_F_PR_XOR_INT CK_PR_BIN_S(xor, int, int, ^) #endif /* CK_F_PR_XOR_INT */ #ifndef CK_F_PR_OR_INT #define CK_F_PR_OR_INT CK_PR_BIN_S(or, int, int, |) #endif /* CK_F_PR_OR_INT */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ #if defined(CK_F_PR_LOAD_DOUBLE) && defined(CK_F_PR_CAS_DOUBLE_VALUE) && \ !defined(CK_PR_DISABLE_DOUBLE) #ifndef CK_F_PR_ADD_DOUBLE #define CK_F_PR_ADD_DOUBLE CK_PR_BIN_S(add, double, double, +) #endif /* CK_F_PR_ADD_DOUBLE */ #ifndef CK_F_PR_SUB_DOUBLE #define CK_F_PR_SUB_DOUBLE CK_PR_BIN_S(sub, double, double, -) #endif /* CK_F_PR_SUB_DOUBLE */ #endif /* CK_F_PR_LOAD_DOUBLE && CK_F_PR_CAS_DOUBLE_VALUE && !CK_PR_DISABLE_DOUBLE */ #if defined(CK_F_PR_LOAD_UINT) && defined(CK_F_PR_CAS_UINT_VALUE) #ifndef CK_F_PR_ADD_UINT #define CK_F_PR_ADD_UINT CK_PR_BIN_S(add, uint, unsigned int, +) #endif /* CK_F_PR_ADD_UINT */ #ifndef CK_F_PR_SUB_UINT #define CK_F_PR_SUB_UINT CK_PR_BIN_S(sub, uint, unsigned int, -) #endif /* CK_F_PR_SUB_UINT */ #ifndef CK_F_PR_AND_UINT #define CK_F_PR_AND_UINT CK_PR_BIN_S(and, uint, unsigned int, &) #endif /* CK_F_PR_AND_UINT */ #ifndef CK_F_PR_XOR_UINT #define CK_F_PR_XOR_UINT CK_PR_BIN_S(xor, uint, unsigned int, ^) #endif /* CK_F_PR_XOR_UINT */ #ifndef CK_F_PR_OR_UINT #define CK_F_PR_OR_UINT CK_PR_BIN_S(or, uint, unsigned int, |) #endif /* CK_F_PR_OR_UINT */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ #if defined(CK_F_PR_LOAD_PTR) && defined(CK_F_PR_CAS_PTR_VALUE) #ifndef CK_F_PR_ADD_PTR #define CK_F_PR_ADD_PTR CK_PR_BIN(add, ptr, void, uintptr_t, +, void *) #endif /* CK_F_PR_ADD_PTR */ #ifndef CK_F_PR_SUB_PTR #define CK_F_PR_SUB_PTR CK_PR_BIN(sub, ptr, void, uintptr_t, -, void *) #endif /* CK_F_PR_SUB_PTR */ #ifndef CK_F_PR_AND_PTR #define CK_F_PR_AND_PTR CK_PR_BIN(and, ptr, void, uintptr_t, &, void *) #endif /* CK_F_PR_AND_PTR */ #ifndef CK_F_PR_XOR_PTR #define CK_F_PR_XOR_PTR CK_PR_BIN(xor, ptr, void, uintptr_t, ^, void *) #endif /* CK_F_PR_XOR_PTR */ #ifndef CK_F_PR_OR_PTR #define CK_F_PR_OR_PTR CK_PR_BIN(or, ptr, void, uintptr_t, |, void *) #endif /* CK_F_PR_OR_PTR */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ #if defined(CK_F_PR_LOAD_64) && defined(CK_F_PR_CAS_64_VALUE) #ifndef CK_F_PR_ADD_64 #define CK_F_PR_ADD_64 CK_PR_BIN_S(add, 64, uint64_t, +) #endif /* CK_F_PR_ADD_64 */ #ifndef CK_F_PR_SUB_64 #define CK_F_PR_SUB_64 CK_PR_BIN_S(sub, 64, uint64_t, -) #endif /* CK_F_PR_SUB_64 */ #ifndef CK_F_PR_AND_64 #define CK_F_PR_AND_64 CK_PR_BIN_S(and, 64, uint64_t, &) #endif /* CK_F_PR_AND_64 */ #ifndef CK_F_PR_XOR_64 #define CK_F_PR_XOR_64 CK_PR_BIN_S(xor, 64, uint64_t, ^) #endif /* CK_F_PR_XOR_64 */ #ifndef CK_F_PR_OR_64 #define CK_F_PR_OR_64 CK_PR_BIN_S(or, 64, uint64_t, |) #endif /* CK_F_PR_OR_64 */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ #if defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_CAS_32_VALUE) #ifndef CK_F_PR_ADD_32 #define CK_F_PR_ADD_32 CK_PR_BIN_S(add, 32, uint32_t, +) #endif /* CK_F_PR_ADD_32 */ #ifndef CK_F_PR_SUB_32 #define CK_F_PR_SUB_32 CK_PR_BIN_S(sub, 32, uint32_t, -) #endif /* CK_F_PR_SUB_32 */ #ifndef CK_F_PR_AND_32 #define CK_F_PR_AND_32 CK_PR_BIN_S(and, 32, uint32_t, &) #endif /* CK_F_PR_AND_32 */ #ifndef CK_F_PR_XOR_32 #define CK_F_PR_XOR_32 CK_PR_BIN_S(xor, 32, uint32_t, ^) #endif /* CK_F_PR_XOR_32 */ #ifndef CK_F_PR_OR_32 #define CK_F_PR_OR_32 CK_PR_BIN_S(or, 32, uint32_t, |) #endif /* CK_F_PR_OR_32 */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ #if defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_CAS_16_VALUE) #ifndef CK_F_PR_ADD_16 #define CK_F_PR_ADD_16 CK_PR_BIN_S(add, 16, uint16_t, +) #endif /* CK_F_PR_ADD_16 */ #ifndef CK_F_PR_SUB_16 #define CK_F_PR_SUB_16 CK_PR_BIN_S(sub, 16, uint16_t, -) #endif /* CK_F_PR_SUB_16 */ #ifndef CK_F_PR_AND_16 #define CK_F_PR_AND_16 CK_PR_BIN_S(and, 16, uint16_t, &) #endif /* CK_F_PR_AND_16 */ #ifndef CK_F_PR_XOR_16 #define CK_F_PR_XOR_16 CK_PR_BIN_S(xor, 16, uint16_t, ^) #endif /* CK_F_PR_XOR_16 */ #ifndef CK_F_PR_OR_16 #define CK_F_PR_OR_16 CK_PR_BIN_S(or, 16, uint16_t, |) #endif /* CK_F_PR_OR_16 */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_CAS_8_VALUE) #ifndef CK_F_PR_ADD_8 #define CK_F_PR_ADD_8 CK_PR_BIN_S(add, 8, uint8_t, +) #endif /* CK_F_PR_ADD_8 */ #ifndef CK_F_PR_SUB_8 #define CK_F_PR_SUB_8 CK_PR_BIN_S(sub, 8, uint8_t, -) #endif /* CK_F_PR_SUB_8 */ #ifndef CK_F_PR_AND_8 #define CK_F_PR_AND_8 CK_PR_BIN_S(and, 8, uint8_t, &) #endif /* CK_F_PR_AND_8 */ #ifndef CK_F_PR_XOR_8 #define CK_F_PR_XOR_8 CK_PR_BIN_S(xor, 8, uint8_t, ^) #endif /* CK_F_PR_XOR_8 */ #ifndef CK_F_PR_OR_8 #define CK_F_PR_OR_8 CK_PR_BIN_S(or, 8, uint8_t, |) #endif /* CK_F_PR_OR_8 */ #endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */ #undef CK_PR_BIN_S #undef CK_PR_BIN #define CK_PR_BTX(K, S, M, T, P, C, R) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S(M *target, unsigned int offset) \ { \ T previous; \ C punt; \ punt = ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, (C)previous, \ (C)(previous P (R ((T)1 << offset))), &previous) == false) \ ck_pr_stall(); \ return ((previous >> offset) & 1); \ } #define CK_PR_BTX_S(K, S, T, P, R) CK_PR_BTX(K, S, T, T, P, T, R) #if defined(CK_F_PR_LOAD_INT) && defined(CK_F_PR_CAS_INT_VALUE) #ifndef CK_F_PR_BTC_INT #define CK_F_PR_BTC_INT CK_PR_BTX_S(btc, int, int, ^,) #endif /* CK_F_PR_BTC_INT */ #ifndef CK_F_PR_BTR_INT #define CK_F_PR_BTR_INT CK_PR_BTX_S(btr, int, int, &, ~) #endif /* CK_F_PR_BTR_INT */ #ifndef CK_F_PR_BTS_INT #define CK_F_PR_BTS_INT CK_PR_BTX_S(bts, int, int, |,) #endif /* CK_F_PR_BTS_INT */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ #if defined(CK_F_PR_LOAD_UINT) && defined(CK_F_PR_CAS_UINT_VALUE) #ifndef CK_F_PR_BTC_UINT #define CK_F_PR_BTC_UINT CK_PR_BTX_S(btc, uint, unsigned int, ^,) #endif /* CK_F_PR_BTC_UINT */ #ifndef CK_F_PR_BTR_UINT #define CK_F_PR_BTR_UINT CK_PR_BTX_S(btr, uint, unsigned int, &, ~) #endif /* CK_F_PR_BTR_UINT */ #ifndef CK_F_PR_BTS_UINT #define CK_F_PR_BTS_UINT CK_PR_BTX_S(bts, uint, unsigned int, |,) #endif /* CK_F_PR_BTS_UINT */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ #if defined(CK_F_PR_LOAD_PTR) && defined(CK_F_PR_CAS_PTR_VALUE) #ifndef CK_F_PR_BTC_PTR #define CK_F_PR_BTC_PTR CK_PR_BTX(btc, ptr, void, uintptr_t, ^, void *,) #endif /* CK_F_PR_BTC_PTR */ #ifndef CK_F_PR_BTR_PTR #define CK_F_PR_BTR_PTR CK_PR_BTX(btr, ptr, void, uintptr_t, &, void *, ~) #endif /* CK_F_PR_BTR_PTR */ #ifndef CK_F_PR_BTS_PTR #define CK_F_PR_BTS_PTR CK_PR_BTX(bts, ptr, void, uintptr_t, |, void *,) #endif /* CK_F_PR_BTS_PTR */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ #if defined(CK_F_PR_LOAD_64) && defined(CK_F_PR_CAS_64_VALUE) #ifndef CK_F_PR_BTC_64 #define CK_F_PR_BTC_64 CK_PR_BTX_S(btc, 64, uint64_t, ^,) #endif /* CK_F_PR_BTC_64 */ #ifndef CK_F_PR_BTR_64 #define CK_F_PR_BTR_64 CK_PR_BTX_S(btr, 64, uint64_t, &, ~) #endif /* CK_F_PR_BTR_64 */ #ifndef CK_F_PR_BTS_64 #define CK_F_PR_BTS_64 CK_PR_BTX_S(bts, 64, uint64_t, |,) #endif /* CK_F_PR_BTS_64 */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ #if defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_CAS_32_VALUE) #ifndef CK_F_PR_BTC_32 #define CK_F_PR_BTC_32 CK_PR_BTX_S(btc, 32, uint32_t, ^,) #endif /* CK_F_PR_BTC_32 */ #ifndef CK_F_PR_BTR_32 #define CK_F_PR_BTR_32 CK_PR_BTX_S(btr, 32, uint32_t, &, ~) #endif /* CK_F_PR_BTR_32 */ #ifndef CK_F_PR_BTS_32 #define CK_F_PR_BTS_32 CK_PR_BTX_S(bts, 32, uint32_t, |,) #endif /* CK_F_PR_BTS_32 */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ #if defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_CAS_16_VALUE) #ifndef CK_F_PR_BTC_16 #define CK_F_PR_BTC_16 CK_PR_BTX_S(btc, 16, uint16_t, ^,) #endif /* CK_F_PR_BTC_16 */ #ifndef CK_F_PR_BTR_16 #define CK_F_PR_BTR_16 CK_PR_BTX_S(btr, 16, uint16_t, &, ~) #endif /* CK_F_PR_BTR_16 */ #ifndef CK_F_PR_BTS_16 #define CK_F_PR_BTS_16 CK_PR_BTX_S(bts, 16, uint16_t, |,) #endif /* CK_F_PR_BTS_16 */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ #undef CK_PR_BTX_S #undef CK_PR_BTX #define CK_PR_UNARY(K, X, S, M, T) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target) \ { \ ck_pr_##X##_##S(target, (T)1); \ return; \ } #define CK_PR_UNARY_Z(K, S, M, T, P, C, Z) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S##_is_zero(M *target) \ { \ T previous; \ C punt; \ punt = (C)ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, \ (C)previous, \ (C)(previous P 1), \ &previous) == false) \ ck_pr_stall(); \ return previous == (T)Z; \ } #define CK_PR_UNARY_Z_STUB(K, S, M) \ CK_CC_INLINE static void \ ck_pr_##K##_##S##_zero(M *target, bool *zero) \ { \ *zero = ck_pr_##K##_##S##_is_zero(target); \ return; \ } #define CK_PR_UNARY_S(K, X, S, M) CK_PR_UNARY(K, X, S, M, M) #define CK_PR_UNARY_Z_S(K, S, M, P, Z) \ CK_PR_UNARY_Z(K, S, M, M, P, M, Z) \ CK_PR_UNARY_Z_STUB(K, S, M) #if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE) #ifndef CK_F_PR_INC_CHAR #define CK_F_PR_INC_CHAR CK_PR_UNARY_S(inc, add, char, char) #endif /* CK_F_PR_INC_CHAR */ #ifndef CK_F_PR_INC_CHAR_ZERO #define CK_F_PR_INC_CHAR_ZERO CK_PR_UNARY_Z_S(inc, char, char, +, -1) #else CK_PR_UNARY_Z_STUB(inc, char, char) #endif /* CK_F_PR_INC_CHAR_ZERO */ #ifndef CK_F_PR_DEC_CHAR #define CK_F_PR_DEC_CHAR CK_PR_UNARY_S(dec, sub, char, char) #endif /* CK_F_PR_DEC_CHAR */ #ifndef CK_F_PR_DEC_CHAR_ZERO #define CK_F_PR_DEC_CHAR_ZERO CK_PR_UNARY_Z_S(dec, char, char, -, 1) #else CK_PR_UNARY_Z_STUB(dec, char, char) #endif /* CK_F_PR_DEC_CHAR_ZERO */ #endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */ #if defined(CK_F_PR_LOAD_INT) && defined(CK_F_PR_CAS_INT_VALUE) #ifndef CK_F_PR_INC_INT #define CK_F_PR_INC_INT CK_PR_UNARY_S(inc, add, int, int) #endif /* CK_F_PR_INC_INT */ #ifndef CK_F_PR_INC_INT_ZERO #define CK_F_PR_INC_INT_ZERO CK_PR_UNARY_Z_S(inc, int, int, +, -1) #else CK_PR_UNARY_Z_STUB(inc, int, int) #endif /* CK_F_PR_INC_INT_ZERO */ #ifndef CK_F_PR_DEC_INT #define CK_F_PR_DEC_INT CK_PR_UNARY_S(dec, sub, int, int) #endif /* CK_F_PR_DEC_INT */ #ifndef CK_F_PR_DEC_INT_ZERO #define CK_F_PR_DEC_INT_ZERO CK_PR_UNARY_Z_S(dec, int, int, -, 1) #else CK_PR_UNARY_Z_STUB(dec, int, int) #endif /* CK_F_PR_DEC_INT_ZERO */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ #if defined(CK_F_PR_LOAD_DOUBLE) && defined(CK_F_PR_CAS_DOUBLE_VALUE) && \ !defined(CK_PR_DISABLE_DOUBLE) #ifndef CK_F_PR_INC_DOUBLE #define CK_F_PR_INC_DOUBLE CK_PR_UNARY_S(inc, add, double, double) #endif /* CK_F_PR_INC_DOUBLE */ #ifndef CK_F_PR_DEC_DOUBLE #define CK_F_PR_DEC_DOUBLE CK_PR_UNARY_S(dec, sub, double, double) #endif /* CK_F_PR_DEC_DOUBLE */ #endif /* CK_F_PR_LOAD_DOUBLE && CK_F_PR_CAS_DOUBLE_VALUE && !CK_PR_DISABLE_DOUBLE */ #if defined(CK_F_PR_LOAD_UINT) && defined(CK_F_PR_CAS_UINT_VALUE) #ifndef CK_F_PR_INC_UINT #define CK_F_PR_INC_UINT CK_PR_UNARY_S(inc, add, uint, unsigned int) #endif /* CK_F_PR_INC_UINT */ #ifndef CK_F_PR_INC_UINT_ZERO #define CK_F_PR_INC_UINT_ZERO CK_PR_UNARY_Z_S(inc, uint, unsigned int, +, UINT_MAX) #else CK_PR_UNARY_Z_STUB(inc, uint, unsigned int) #endif /* CK_F_PR_INC_UINT_ZERO */ #ifndef CK_F_PR_DEC_UINT #define CK_F_PR_DEC_UINT CK_PR_UNARY_S(dec, sub, uint, unsigned int) #endif /* CK_F_PR_DEC_UINT */ #ifndef CK_F_PR_DEC_UINT_ZERO #define CK_F_PR_DEC_UINT_ZERO CK_PR_UNARY_Z_S(dec, uint, unsigned int, -, 1) #else CK_PR_UNARY_Z_STUB(dec, uint, unsigned int) #endif /* CK_F_PR_DEC_UINT_ZERO */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ #if defined(CK_F_PR_LOAD_PTR) && defined(CK_F_PR_CAS_PTR_VALUE) #ifndef CK_F_PR_INC_PTR #define CK_F_PR_INC_PTR CK_PR_UNARY(inc, add, ptr, void, uintptr_t) #endif /* CK_F_PR_INC_PTR */ #ifndef CK_F_PR_INC_PTR_ZERO #define CK_F_PR_INC_PTR_ZERO CK_PR_UNARY_Z(inc, ptr, void, uintptr_t, +, void *, UINT_MAX) #else CK_PR_UNARY_Z_STUB(inc, ptr, void) #endif /* CK_F_PR_INC_PTR_ZERO */ #ifndef CK_F_PR_DEC_PTR #define CK_F_PR_DEC_PTR CK_PR_UNARY(dec, sub, ptr, void, uintptr_t) #endif /* CK_F_PR_DEC_PTR */ #ifndef CK_F_PR_DEC_PTR_ZERO #define CK_F_PR_DEC_PTR_ZERO CK_PR_UNARY_Z(dec, ptr, void, uintptr_t, -, void *, 1) #else CK_PR_UNARY_Z_STUB(dec, ptr, void) #endif /* CK_F_PR_DEC_PTR_ZERO */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ #if defined(CK_F_PR_LOAD_64) && defined(CK_F_PR_CAS_64_VALUE) #ifndef CK_F_PR_INC_64 #define CK_F_PR_INC_64 CK_PR_UNARY_S(inc, add, 64, uint64_t) #endif /* CK_F_PR_INC_64 */ #ifndef CK_F_PR_INC_64_ZERO #define CK_F_PR_INC_64_ZERO CK_PR_UNARY_Z_S(inc, 64, uint64_t, +, UINT64_MAX) #else CK_PR_UNARY_Z_STUB(inc, 64, uint64_t) #endif /* CK_F_PR_INC_64_ZERO */ #ifndef CK_F_PR_DEC_64 #define CK_F_PR_DEC_64 CK_PR_UNARY_S(dec, sub, 64, uint64_t) #endif /* CK_F_PR_DEC_64 */ #ifndef CK_F_PR_DEC_64_ZERO #define CK_F_PR_DEC_64_ZERO CK_PR_UNARY_Z_S(dec, 64, uint64_t, -, 1) #else CK_PR_UNARY_Z_STUB(dec, 64, uint64_t) #endif /* CK_F_PR_DEC_64_ZERO */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ #if defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_CAS_32_VALUE) #ifndef CK_F_PR_INC_32 #define CK_F_PR_INC_32 CK_PR_UNARY_S(inc, add, 32, uint32_t) #endif /* CK_F_PR_INC_32 */ #ifndef CK_F_PR_INC_32_ZERO #define CK_F_PR_INC_32_ZERO CK_PR_UNARY_Z_S(inc, 32, uint32_t, +, UINT32_MAX) #else CK_PR_UNARY_Z_STUB(inc, 32, uint32_t) #endif /* CK_F_PR_INC_32_ZERO */ #ifndef CK_F_PR_DEC_32 #define CK_F_PR_DEC_32 CK_PR_UNARY_S(dec, sub, 32, uint32_t) #endif /* CK_F_PR_DEC_32 */ #ifndef CK_F_PR_DEC_32_ZERO #define CK_F_PR_DEC_32_ZERO CK_PR_UNARY_Z_S(dec, 32, uint32_t, -, 1) #else CK_PR_UNARY_Z_STUB(dec, 32, uint32_t) #endif /* CK_F_PR_DEC_32_ZERO */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ #if defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_CAS_16_VALUE) #ifndef CK_F_PR_INC_16 #define CK_F_PR_INC_16 CK_PR_UNARY_S(inc, add, 16, uint16_t) #endif /* CK_F_PR_INC_16 */ #ifndef CK_F_PR_INC_16_ZERO #define CK_F_PR_INC_16_ZERO CK_PR_UNARY_Z_S(inc, 16, uint16_t, +, UINT16_MAX) #else CK_PR_UNARY_Z_STUB(inc, 16, uint16_t) #endif /* CK_F_PR_INC_16_ZERO */ #ifndef CK_F_PR_DEC_16 #define CK_F_PR_DEC_16 CK_PR_UNARY_S(dec, sub, 16, uint16_t) #endif /* CK_F_PR_DEC_16 */ #ifndef CK_F_PR_DEC_16_ZERO #define CK_F_PR_DEC_16_ZERO CK_PR_UNARY_Z_S(dec, 16, uint16_t, -, 1) #else CK_PR_UNARY_Z_STUB(dec, 16, uint16_t) #endif /* CK_F_PR_DEC_16_ZERO */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_CAS_8_VALUE) #ifndef CK_F_PR_INC_8 #define CK_F_PR_INC_8 CK_PR_UNARY_S(inc, add, 8, uint8_t) #endif /* CK_F_PR_INC_8 */ #ifndef CK_F_PR_INC_8_ZERO #define CK_F_PR_INC_8_ZERO CK_PR_UNARY_Z_S(inc, 8, uint8_t, +, UINT8_MAX) #else CK_PR_UNARY_Z_STUB(inc, 8, uint8_t) #endif /* CK_F_PR_INC_8_ZERO */ #ifndef CK_F_PR_DEC_8 #define CK_F_PR_DEC_8 CK_PR_UNARY_S(dec, sub, 8, uint8_t) #endif /* CK_F_PR_DEC_8 */ #ifndef CK_F_PR_DEC_8_ZERO #define CK_F_PR_DEC_8_ZERO CK_PR_UNARY_Z_S(dec, 8, uint8_t, -, 1) #else CK_PR_UNARY_Z_STUB(dec, 8, uint8_t) #endif /* CK_F_PR_DEC_8_ZERO */ #endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */ #undef CK_PR_UNARY_Z_S #undef CK_PR_UNARY_S #undef CK_PR_UNARY_Z #undef CK_PR_UNARY #define CK_PR_N(K, S, M, T, P, C) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target) \ { \ T previous; \ C punt; \ punt = (C)ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, \ (C)previous, \ (C)(P previous), \ &previous) == false) \ ck_pr_stall(); \ \ return; \ } #define CK_PR_N_Z(S, M, T, C) \ CK_CC_INLINE static void \ ck_pr_neg_##S##_zero(M *target, bool *zero) \ { \ T previous; \ C punt; \ punt = (C)ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, \ (C)previous, \ (C)(-previous), \ &previous) == false) \ ck_pr_stall(); \ \ *zero = previous == 0; \ return; \ } #define CK_PR_N_S(K, S, M, P) CK_PR_N(K, S, M, M, P, M) #define CK_PR_N_Z_S(S, M) CK_PR_N_Z(S, M, M, M) #if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE) #ifndef CK_F_PR_NOT_CHAR #define CK_F_PR_NOT_CHAR CK_PR_N_S(not, char, char, ~) #endif /* CK_F_PR_NOT_CHAR */ #ifndef CK_F_PR_NEG_CHAR #define CK_F_PR_NEG_CHAR CK_PR_N_S(neg, char, char, -) #endif /* CK_F_PR_NEG_CHAR */ #ifndef CK_F_PR_NEG_CHAR_ZERO #define CK_F_PR_NEG_CHAR_ZERO CK_PR_N_Z_S(char, char) #endif /* CK_F_PR_NEG_CHAR_ZERO */ #endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */ #if defined(CK_F_PR_LOAD_INT) && defined(CK_F_PR_CAS_INT_VALUE) #ifndef CK_F_PR_NOT_INT #define CK_F_PR_NOT_INT CK_PR_N_S(not, int, int, ~) #endif /* CK_F_PR_NOT_INT */ #ifndef CK_F_PR_NEG_INT #define CK_F_PR_NEG_INT CK_PR_N_S(neg, int, int, -) #endif /* CK_F_PR_NEG_INT */ #ifndef CK_F_PR_NEG_INT_ZERO #define CK_F_PR_NEG_INT_ZERO CK_PR_N_Z_S(int, int) #endif /* CK_F_PR_NEG_INT_ZERO */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ #if defined(CK_F_PR_LOAD_DOUBLE) && defined(CK_F_PR_CAS_DOUBLE_VALUE) && \ !defined(CK_PR_DISABLE_DOUBLE) #ifndef CK_F_PR_NEG_DOUBLE #define CK_F_PR_NEG_DOUBLE CK_PR_N_S(neg, double, double, -) #endif /* CK_F_PR_NEG_DOUBLE */ #endif /* CK_F_PR_LOAD_DOUBLE && CK_F_PR_CAS_DOUBLE_VALUE && !CK_PR_DISABLE_DOUBLE */ #if defined(CK_F_PR_LOAD_UINT) && defined(CK_F_PR_CAS_UINT_VALUE) #ifndef CK_F_PR_NOT_UINT #define CK_F_PR_NOT_UINT CK_PR_N_S(not, uint, unsigned int, ~) #endif /* CK_F_PR_NOT_UINT */ #ifndef CK_F_PR_NEG_UINT #define CK_F_PR_NEG_UINT CK_PR_N_S(neg, uint, unsigned int, -) #endif /* CK_F_PR_NEG_UINT */ #ifndef CK_F_PR_NEG_UINT_ZERO #define CK_F_PR_NEG_UINT_ZERO CK_PR_N_Z_S(uint, unsigned int) #endif /* CK_F_PR_NEG_UINT_ZERO */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ #if defined(CK_F_PR_LOAD_PTR) && defined(CK_F_PR_CAS_PTR_VALUE) #ifndef CK_F_PR_NOT_PTR #define CK_F_PR_NOT_PTR CK_PR_N(not, ptr, void, uintptr_t, ~, void *) #endif /* CK_F_PR_NOT_PTR */ #ifndef CK_F_PR_NEG_PTR #define CK_F_PR_NEG_PTR CK_PR_N(neg, ptr, void, uintptr_t, -, void *) #endif /* CK_F_PR_NEG_PTR */ #ifndef CK_F_PR_NEG_PTR_ZERO #define CK_F_PR_NEG_PTR_ZERO CK_PR_N_Z(ptr, void, uintptr_t, void *) #endif /* CK_F_PR_NEG_PTR_ZERO */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ #if defined(CK_F_PR_LOAD_64) && defined(CK_F_PR_CAS_64_VALUE) #ifndef CK_F_PR_NOT_64 #define CK_F_PR_NOT_64 CK_PR_N_S(not, 64, uint64_t, ~) #endif /* CK_F_PR_NOT_64 */ #ifndef CK_F_PR_NEG_64 #define CK_F_PR_NEG_64 CK_PR_N_S(neg, 64, uint64_t, -) #endif /* CK_F_PR_NEG_64 */ #ifndef CK_F_PR_NEG_64_ZERO #define CK_F_PR_NEG_64_ZERO CK_PR_N_Z_S(64, uint64_t) #endif /* CK_F_PR_NEG_64_ZERO */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ #if defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_CAS_32_VALUE) #ifndef CK_F_PR_NOT_32 #define CK_F_PR_NOT_32 CK_PR_N_S(not, 32, uint32_t, ~) #endif /* CK_F_PR_NOT_32 */ #ifndef CK_F_PR_NEG_32 #define CK_F_PR_NEG_32 CK_PR_N_S(neg, 32, uint32_t, -) #endif /* CK_F_PR_NEG_32 */ #ifndef CK_F_PR_NEG_32_ZERO #define CK_F_PR_NEG_32_ZERO CK_PR_N_Z_S(32, uint32_t) #endif /* CK_F_PR_NEG_32_ZERO */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ #if defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_CAS_16_VALUE) #ifndef CK_F_PR_NOT_16 #define CK_F_PR_NOT_16 CK_PR_N_S(not, 16, uint16_t, ~) #endif /* CK_F_PR_NOT_16 */ #ifndef CK_F_PR_NEG_16 #define CK_F_PR_NEG_16 CK_PR_N_S(neg, 16, uint16_t, -) #endif /* CK_F_PR_NEG_16 */ #ifndef CK_F_PR_NEG_16_ZERO #define CK_F_PR_NEG_16_ZERO CK_PR_N_Z_S(16, uint16_t) #endif /* CK_F_PR_NEG_16_ZERO */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_CAS_8_VALUE) #ifndef CK_F_PR_NOT_8 #define CK_F_PR_NOT_8 CK_PR_N_S(not, 8, uint8_t, ~) #endif /* CK_F_PR_NOT_8 */ #ifndef CK_F_PR_NEG_8 #define CK_F_PR_NEG_8 CK_PR_N_S(neg, 8, uint8_t, -) #endif /* CK_F_PR_NEG_8 */ #ifndef CK_F_PR_NEG_8_ZERO #define CK_F_PR_NEG_8_ZERO CK_PR_N_Z_S(8, uint8_t) #endif /* CK_F_PR_NEG_8_ZERO */ #endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */ #undef CK_PR_N_Z_S #undef CK_PR_N_S #undef CK_PR_N_Z #undef CK_PR_N #define CK_PR_FAA(S, M, T, C) \ CK_CC_INLINE static C \ ck_pr_faa_##S(M *target, T delta) \ { \ T previous; \ C punt; \ punt = (C)ck_pr_md_load_##S(target); \ previous = (T)punt; \ while (ck_pr_cas_##S##_value(target, \ (C)previous, \ (C)(previous + delta), \ &previous) == false) \ ck_pr_stall(); \ \ return ((C)previous); \ } #define CK_PR_FAS(S, M, C) \ CK_CC_INLINE static C \ ck_pr_fas_##S(M *target, C update) \ { \ C previous; \ previous = ck_pr_md_load_##S(target); \ while (ck_pr_cas_##S##_value(target, \ previous, \ update, \ &previous) == false) \ ck_pr_stall(); \ \ return (previous); \ } #define CK_PR_FAA_S(S, M) CK_PR_FAA(S, M, M, M) #define CK_PR_FAS_S(S, M) CK_PR_FAS(S, M, M) #if defined(CK_F_PR_LOAD_CHAR) && defined(CK_F_PR_CAS_CHAR_VALUE) #ifndef CK_F_PR_FAA_CHAR #define CK_F_PR_FAA_CHAR CK_PR_FAA_S(char, char) #endif /* CK_F_PR_FAA_CHAR */ #ifndef CK_F_PR_FAS_CHAR #define CK_F_PR_FAS_CHAR CK_PR_FAS_S(char, char) #endif /* CK_F_PR_FAS_CHAR */ #endif /* CK_F_PR_LOAD_CHAR && CK_F_PR_CAS_CHAR_VALUE */ #if defined(CK_F_PR_LOAD_INT) && defined(CK_F_PR_CAS_INT_VALUE) #ifndef CK_F_PR_FAA_INT #define CK_F_PR_FAA_INT CK_PR_FAA_S(int, int) #endif /* CK_F_PR_FAA_INT */ #ifndef CK_F_PR_FAS_INT #define CK_F_PR_FAS_INT CK_PR_FAS_S(int, int) #endif /* CK_F_PR_FAS_INT */ #endif /* CK_F_PR_LOAD_INT && CK_F_PR_CAS_INT_VALUE */ #if defined(CK_F_PR_LOAD_DOUBLE) && defined(CK_F_PR_CAS_DOUBLE_VALUE) && \ !defined(CK_PR_DISABLE_DOUBLE) #ifndef CK_F_PR_FAA_DOUBLE #define CK_F_PR_FAA_DOUBLE CK_PR_FAA_S(double, double) #endif /* CK_F_PR_FAA_DOUBLE */ #ifndef CK_F_PR_FAS_DOUBLE #define CK_F_PR_FAS_DOUBLE CK_PR_FAS_S(double, double) #endif /* CK_F_PR_FAS_DOUBLE */ #endif /* CK_F_PR_LOAD_DOUBLE && CK_F_PR_CAS_DOUBLE_VALUE && !CK_PR_DISABLE_DOUBLE */ #if defined(CK_F_PR_LOAD_UINT) && defined(CK_F_PR_CAS_UINT_VALUE) #ifndef CK_F_PR_FAA_UINT #define CK_F_PR_FAA_UINT CK_PR_FAA_S(uint, unsigned int) #endif /* CK_F_PR_FAA_UINT */ #ifndef CK_F_PR_FAS_UINT #define CK_F_PR_FAS_UINT CK_PR_FAS_S(uint, unsigned int) #endif /* CK_F_PR_FAS_UINT */ #endif /* CK_F_PR_LOAD_UINT && CK_F_PR_CAS_UINT_VALUE */ #if defined(CK_F_PR_LOAD_PTR) && defined(CK_F_PR_CAS_PTR_VALUE) #ifndef CK_F_PR_FAA_PTR #define CK_F_PR_FAA_PTR CK_PR_FAA(ptr, void, uintptr_t, void *) #endif /* CK_F_PR_FAA_PTR */ #ifndef CK_F_PR_FAS_PTR #define CK_F_PR_FAS_PTR CK_PR_FAS(ptr, void, void *) #endif /* CK_F_PR_FAS_PTR */ #endif /* CK_F_PR_LOAD_PTR && CK_F_PR_CAS_PTR_VALUE */ #if defined(CK_F_PR_LOAD_64) && defined(CK_F_PR_CAS_64_VALUE) #ifndef CK_F_PR_FAA_64 #define CK_F_PR_FAA_64 CK_PR_FAA_S(64, uint64_t) #endif /* CK_F_PR_FAA_64 */ #ifndef CK_F_PR_FAS_64 #define CK_F_PR_FAS_64 CK_PR_FAS_S(64, uint64_t) #endif /* CK_F_PR_FAS_64 */ #endif /* CK_F_PR_LOAD_64 && CK_F_PR_CAS_64_VALUE */ #if defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_CAS_32_VALUE) #ifndef CK_F_PR_FAA_32 #define CK_F_PR_FAA_32 CK_PR_FAA_S(32, uint32_t) #endif /* CK_F_PR_FAA_32 */ #ifndef CK_F_PR_FAS_32 #define CK_F_PR_FAS_32 CK_PR_FAS_S(32, uint32_t) #endif /* CK_F_PR_FAS_32 */ #endif /* CK_F_PR_LOAD_32 && CK_F_PR_CAS_32_VALUE */ #if defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_CAS_16_VALUE) #ifndef CK_F_PR_FAA_16 #define CK_F_PR_FAA_16 CK_PR_FAA_S(16, uint16_t) #endif /* CK_F_PR_FAA_16 */ #ifndef CK_F_PR_FAS_16 #define CK_F_PR_FAS_16 CK_PR_FAS_S(16, uint16_t) #endif /* CK_F_PR_FAS_16 */ #endif /* CK_F_PR_LOAD_16 && CK_F_PR_CAS_16_VALUE */ #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_CAS_8_VALUE) #ifndef CK_F_PR_FAA_8 #define CK_F_PR_FAA_8 CK_PR_FAA_S(8, uint8_t) #endif /* CK_F_PR_FAA_8 */ #ifndef CK_F_PR_FAS_8 #define CK_F_PR_FAS_8 CK_PR_FAS_S(8, uint8_t) #endif /* CK_F_PR_FAS_8 */ #endif /* CK_F_PR_LOAD_8 && CK_F_PR_CAS_8_VALUE */ #undef CK_PR_FAA_S #undef CK_PR_FAS_S #undef CK_PR_FAA #undef CK_PR_FAS #endif /* CK_PR_H */ diff --git a/include/ck_queue.h b/include/ck_queue.h index 3f503aa6c3e5..fd38d8a583fa 100644 --- a/include/ck_queue.h +++ b/include/ck_queue.h @@ -1,438 +1,438 @@ /* * Copyright 2012-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $FreeBSD$ + * $FreeBSD: release/9.0.0/sys/sys/queue.h 221843 2011-05-13 15:49:23Z mdf $ */ #ifndef CK_QUEUE_H #define CK_QUEUE_H #include /* * This file defines three types of data structures: singly-linked lists, * singly-linked tail queues and lists. * * A singly-linked list is headed by a single forward pointer. The elements * are singly linked for minimum space and pointer manipulation overhead at * the expense of O(n) removal for arbitrary elements. New elements can be * added to the list after an existing element or at the head of the list. * Elements being removed from the head of the list should use the explicit * macro for this purpose for optimum efficiency. A singly-linked list may * only be traversed in the forward direction. Singly-linked lists are ideal * for applications with large datasets and few or no removals or for * implementing a LIFO queue. * * A singly-linked tail queue is headed by a pair of pointers, one to the * head of the list and the other to the tail of the list. The elements are * singly linked for minimum space and pointer manipulation overhead at the * expense of O(n) removal for arbitrary elements. New elements can be added * to the list after an existing element, at the head of the list, or at the * end of the list. Elements being removed from the head of the tail queue * should use the explicit macro for this purpose for optimum efficiency. * A singly-linked tail queue may only be traversed in the forward direction. * Singly-linked tail queues are ideal for applications with large datasets * and few or no removals or for implementing a FIFO queue. * * A list is headed by a single forward pointer (or an array of forward * pointers for a hash table header). The elements are doubly linked * so that an arbitrary element can be removed without a need to * traverse the list. New elements can be added to the list before * or after an existing element or at the head of the list. A list * may only be traversed in the forward direction. * * It is safe to use _FOREACH/_FOREACH_SAFE in the presence of concurrent * modifications to the list. Writers to these lists must, on the other hand, * implement writer-side synchronization. The _SWAP operations are not atomic. * This facility is currently unsupported on architectures such as the Alpha * which require load-depend memory fences. * * CK_SLIST CK_LIST CK_STAILQ * _HEAD + + + * _HEAD_INITIALIZER + + + * _ENTRY + + + * _INIT + + + * _EMPTY + + + * _FIRST + + + * _NEXT + + + * _FOREACH + + + * _FOREACH_SAFE + + + * _INSERT_HEAD + + + * _INSERT_BEFORE - + - * _INSERT_AFTER + + + * _INSERT_TAIL - - + * _REMOVE_AFTER + - + * _REMOVE_HEAD + - + * _REMOVE + + + * _SWAP + + + * _MOVE + + + */ /* * Singly-linked List declarations. */ #define CK_SLIST_HEAD(name, type) \ struct name { \ struct type *cslh_first; /* first element */ \ } #define CK_SLIST_HEAD_INITIALIZER(head) \ { NULL } #define CK_SLIST_ENTRY(type) \ struct { \ struct type *csle_next; /* next element */ \ } /* * Singly-linked List functions. */ #define CK_SLIST_EMPTY(head) \ (ck_pr_load_ptr(&(head)->cslh_first) == NULL) #define CK_SLIST_FIRST(head) \ (ck_pr_load_ptr(&(head)->cslh_first)) #define CK_SLIST_NEXT(elm, field) \ ck_pr_load_ptr(&((elm)->field.csle_next)) #define CK_SLIST_FOREACH(var, head, field) \ for ((var) = CK_SLIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_SLIST_NEXT((var), field)) -#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = CK_SLIST_FIRST(head); \ - (var) && (ck_pr_fence_load(), (tvar) = CK_SLIST_NEXT(var, field), 1);\ +#define CK_SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = CK_SLIST_FIRST(head); \ + (var) && ((tvar) = CK_SLIST_NEXT(var, field), 1); \ (var) = (tvar)) #define CK_SLIST_FOREACH_PREVPTR(var, varp, head, field) \ for ((varp) = &(head)->cslh_first; \ - ((var) = ck_pr_load_ptr(varp)) != NULL && (ck_pr_fence_load(), 1); \ + ((var) = ck_pr_load_ptr(varp)) != NULL; \ (varp) = &(var)->field.csle_next) #define CK_SLIST_INIT(head) do { \ ck_pr_store_ptr(&(head)->cslh_first, NULL); \ ck_pr_fence_store(); \ } while (0) #define CK_SLIST_INSERT_AFTER(a, b, field) do { \ (b)->field.csle_next = (a)->field.csle_next; \ ck_pr_fence_store(); \ ck_pr_store_ptr(&(a)->field.csle_next, b); \ } while (0) #define CK_SLIST_INSERT_HEAD(head, elm, field) do { \ (elm)->field.csle_next = (head)->cslh_first; \ ck_pr_fence_store(); \ ck_pr_store_ptr(&(head)->cslh_first, elm); \ } while (0) #define CK_SLIST_INSERT_PREVPTR(prevp, slistelm, elm, field) do { \ (elm)->field.csle_next = (slistelm); \ ck_pr_fence_store(); \ ck_pr_store_ptr(prevp, elm); \ } while (0) #define CK_SLIST_REMOVE_AFTER(elm, field) do { \ ck_pr_store_ptr(&(elm)->field.csle_next, \ (elm)->field.csle_next->field.csle_next); \ } while (0) #define CK_SLIST_REMOVE(head, elm, type, field) do { \ if ((head)->cslh_first == (elm)) { \ CK_SLIST_REMOVE_HEAD((head), field); \ } else { \ struct type *curelm = (head)->cslh_first; \ while (curelm->field.csle_next != (elm)) \ curelm = curelm->field.csle_next; \ CK_SLIST_REMOVE_AFTER(curelm, field); \ } \ } while (0) #define CK_SLIST_REMOVE_HEAD(head, field) do { \ ck_pr_store_ptr(&(head)->cslh_first, \ (head)->cslh_first->field.csle_next); \ } while (0) #define CK_SLIST_REMOVE_PREVPTR(prevp, elm, field) do { \ ck_pr_store_ptr(prevptr, (elm)->field.csle_next); \ } while (0) #define CK_SLIST_MOVE(head1, head2, field) do { \ ck_pr_store_ptr(&(head1)->cslh_first, (head2)->cslh_first); \ } while (0) /* * This operation is not applied atomically. */ #define CK_SLIST_SWAP(a, b, type) do { \ struct type *swap_first = (a)->cslh_first; \ (a)->cslh_first = (b)->cslh_first; \ (b)->cslh_first = swap_first; \ } while (0) /* * Singly-linked Tail queue declarations. */ #define CK_STAILQ_HEAD(name, type) \ struct name { \ struct type *cstqh_first;/* first element */ \ struct type **cstqh_last;/* addr of last next element */ \ } #define CK_STAILQ_HEAD_INITIALIZER(head) \ { NULL, &(head).cstqh_first } #define CK_STAILQ_ENTRY(type) \ struct { \ struct type *cstqe_next; /* next element */ \ } /* * Singly-linked Tail queue functions. */ #define CK_STAILQ_CONCAT(head1, head2) do { \ if ((head2)->cstqh_first != NULL) { \ ck_pr_store_ptr((head1)->cstqh_last, (head2)->cstqh_first); \ ck_pr_fence_store(); \ (head1)->cstqh_last = (head2)->cstqh_last; \ CK_STAILQ_INIT((head2)); \ } \ } while (0) #define CK_STAILQ_EMPTY(head) (ck_pr_load_ptr(&(head)->cstqh_first) == NULL) #define CK_STAILQ_FIRST(head) (ck_pr_load_ptr(&(head)->cstqh_first)) #define CK_STAILQ_FOREACH(var, head, field) \ for((var) = CK_STAILQ_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_STAILQ_NEXT((var), field)) #define CK_STAILQ_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_STAILQ_FIRST((head)); \ - (var) && (ck_pr_fence_load(), (tvar) = \ + (var) && ((tvar) = \ CK_STAILQ_NEXT((var), field), 1); \ (var) = (tvar)) #define CK_STAILQ_INIT(head) do { \ ck_pr_store_ptr(&(head)->cstqh_first, NULL); \ ck_pr_fence_store(); \ (head)->cstqh_last = &(head)->cstqh_first; \ } while (0) #define CK_STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ (elm)->field.cstqe_next = (tqelm)->field.cstqe_next; \ ck_pr_fence_store(); \ ck_pr_store_ptr(&(tqelm)->field.cstqe_next, elm); \ if ((elm)->field.cstqe_next == NULL) \ (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_INSERT_HEAD(head, elm, field) do { \ (elm)->field.cstqe_next = (head)->cstqh_first; \ ck_pr_fence_store(); \ ck_pr_store_ptr(&(head)->cstqh_first, elm); \ if ((elm)->field.cstqe_next == NULL) \ (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.cstqe_next = NULL; \ ck_pr_fence_store(); \ ck_pr_store_ptr((head)->cstqh_last, (elm)); \ (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_NEXT(elm, field) \ (ck_pr_load_ptr(&(elm)->field.cstqe_next)) #define CK_STAILQ_REMOVE(head, elm, type, field) do { \ if ((head)->cstqh_first == (elm)) { \ CK_STAILQ_REMOVE_HEAD((head), field); \ } else { \ struct type *curelm = (head)->cstqh_first; \ while (curelm->field.cstqe_next != (elm)) \ curelm = curelm->field.cstqe_next; \ CK_STAILQ_REMOVE_AFTER(head, curelm, field); \ } \ } while (0) #define CK_STAILQ_REMOVE_AFTER(head, elm, field) do { \ ck_pr_store_ptr(&(elm)->field.cstqe_next, \ (elm)->field.cstqe_next->field.cstqe_next); \ if ((elm)->field.cstqe_next == NULL) \ (head)->cstqh_last = &(elm)->field.cstqe_next; \ } while (0) #define CK_STAILQ_REMOVE_HEAD(head, field) do { \ ck_pr_store_ptr(&(head)->cstqh_first, \ (head)->cstqh_first->field.cstqe_next); \ if ((head)->cstqh_first == NULL) \ (head)->cstqh_last = &(head)->cstqh_first; \ } while (0) #define CK_STAILQ_MOVE(head1, head2, field) do { \ ck_pr_store_ptr(&(head1)->cstqh_first, (head2)->cstqh_first); \ (head1)->cstqh_last = (head2)->cstqh_last; \ if ((head2)->cstqh_last == &(head2)->cstqh_first) \ (head1)->cstqh_last = &(head1)->cstqh_first; \ } while (0) /* * This operation is not applied atomically. */ #define CK_STAILQ_SWAP(head1, head2, type) do { \ struct type *swap_first = CK_STAILQ_FIRST(head1); \ struct type **swap_last = (head1)->cstqh_last; \ CK_STAILQ_FIRST(head1) = CK_STAILQ_FIRST(head2); \ (head1)->cstqh_last = (head2)->cstqh_last; \ CK_STAILQ_FIRST(head2) = swap_first; \ (head2)->cstqh_last = swap_last; \ if (CK_STAILQ_EMPTY(head1)) \ (head1)->cstqh_last = &(head1)->cstqh_first; \ if (CK_STAILQ_EMPTY(head2)) \ (head2)->cstqh_last = &(head2)->cstqh_first; \ } while (0) /* * List declarations. */ #define CK_LIST_HEAD(name, type) \ struct name { \ struct type *clh_first; /* first element */ \ } #define CK_LIST_HEAD_INITIALIZER(head) \ { NULL } #define CK_LIST_ENTRY(type) \ struct { \ struct type *cle_next; /* next element */ \ struct type **cle_prev; /* address of previous next element */ \ } #define CK_LIST_FIRST(head) ck_pr_load_ptr(&(head)->clh_first) #define CK_LIST_EMPTY(head) (CK_LIST_FIRST(head) == NULL) #define CK_LIST_NEXT(elm, field) ck_pr_load_ptr(&(elm)->field.cle_next) #define CK_LIST_FOREACH(var, head, field) \ for ((var) = CK_LIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), 1); \ + (var); \ (var) = CK_LIST_NEXT((var), field)) #define CK_LIST_FOREACH_SAFE(var, head, field, tvar) \ for ((var) = CK_LIST_FIRST((head)); \ - (var) && (ck_pr_fence_load(), (tvar) = CK_LIST_NEXT((var), field), 1);\ + (var) && ((tvar) = CK_LIST_NEXT((var), field), 1); \ (var) = (tvar)) #define CK_LIST_INIT(head) do { \ ck_pr_store_ptr(&(head)->clh_first, NULL); \ ck_pr_fence_store(); \ } while (0) #define CK_LIST_INSERT_AFTER(listelm, elm, field) do { \ (elm)->field.cle_next = (listelm)->field.cle_next; \ (elm)->field.cle_prev = &(listelm)->field.cle_next; \ ck_pr_fence_store(); \ if ((listelm)->field.cle_next != NULL) \ (listelm)->field.cle_next->field.cle_prev = &(elm)->field.cle_next;\ ck_pr_store_ptr(&(listelm)->field.cle_next, elm); \ } while (0) #define CK_LIST_INSERT_BEFORE(listelm, elm, field) do { \ (elm)->field.cle_prev = (listelm)->field.cle_prev; \ (elm)->field.cle_next = (listelm); \ ck_pr_fence_store(); \ ck_pr_store_ptr((listelm)->field.cle_prev, (elm)); \ (listelm)->field.cle_prev = &(elm)->field.cle_next; \ } while (0) #define CK_LIST_INSERT_HEAD(head, elm, field) do { \ (elm)->field.cle_next = (head)->clh_first; \ ck_pr_fence_store(); \ if ((elm)->field.cle_next != NULL) \ (head)->clh_first->field.cle_prev = &(elm)->field.cle_next; \ ck_pr_store_ptr(&(head)->clh_first, elm); \ (elm)->field.cle_prev = &(head)->clh_first; \ } while (0) #define CK_LIST_REMOVE(elm, field) do { \ ck_pr_store_ptr((elm)->field.cle_prev, (elm)->field.cle_next); \ if ((elm)->field.cle_next != NULL) \ (elm)->field.cle_next->field.cle_prev = (elm)->field.cle_prev; \ } while (0) #define CK_LIST_MOVE(head1, head2, field) do { \ ck_pr_store_ptr(&(head1)->clh_first, (head2)->clh_first); \ if ((head1)->clh_first != NULL) \ (head1)->clh_first->field.cle_prev = &(head1)->clh_first; \ } while (0) /* * This operation is not applied atomically. */ #define CK_LIST_SWAP(head1, head2, type, field) do { \ struct type *swap_tmp = (head1)->clh_first; \ (head1)->clh_first = (head2)->clh_first; \ (head2)->clh_first = swap_tmp; \ if ((swap_tmp = (head1)->clh_first) != NULL) \ swap_tmp->field.cle_prev = &(head1)->clh_first; \ if ((swap_tmp = (head2)->clh_first) != NULL) \ swap_tmp->field.cle_prev = &(head2)->clh_first; \ } while (0) #endif /* CK_QUEUE_H */ diff --git a/include/ck_ring.h b/include/ck_ring.h index e5f0712ef7cf..9f6754e0cd24 100644 --- a/include/ck_ring.h +++ b/include/ck_ring.h @@ -1,687 +1,1037 @@ /* * Copyright 2009-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_RING_H #define CK_RING_H #include #include #include #include #include /* * Concurrent ring buffer. */ struct ck_ring { unsigned int c_head; char pad[CK_MD_CACHELINE - sizeof(unsigned int)]; unsigned int p_tail; unsigned int p_head; char _pad[CK_MD_CACHELINE - sizeof(unsigned int) * 2]; unsigned int size; unsigned int mask; }; typedef struct ck_ring ck_ring_t; struct ck_ring_buffer { void *value; }; typedef struct ck_ring_buffer ck_ring_buffer_t; CK_CC_INLINE static unsigned int ck_ring_size(const struct ck_ring *ring) { unsigned int c, p; c = ck_pr_load_uint(&ring->c_head); p = ck_pr_load_uint(&ring->p_tail); return (p - c) & ring->mask; } CK_CC_INLINE static unsigned int ck_ring_capacity(const struct ck_ring *ring) { + return ring->size; } +/* + * This function is only safe to call when there are no concurrent operations + * on the ring. This is primarily meant for persistent ck_ring use-cases. The + * function returns true if any mutations were performed on the ring. + */ +CK_CC_INLINE static bool +ck_ring_repair(struct ck_ring *ring) +{ + bool r = false; + + if (ring->p_tail != ring->p_head) { + ring->p_tail = ring->p_head; + r = true; + } + + return r; +} + +/* + * This can be called when no concurrent updates are occurring on the ring + * structure to check for consistency. This is primarily meant to be used for + * persistent storage of the ring. If this functions returns false, the ring + * is in an inconsistent state. + */ +CK_CC_INLINE static bool +ck_ring_valid(const struct ck_ring *ring) +{ + unsigned int size = ring->size; + unsigned int c_head = ring->c_head; + unsigned int p_head = ring->p_head; + + /* The ring must be a power of 2. */ + if (size & (size - 1)) + return false; + + /* The consumer counter must always be smaller than the producer. */ + if (c_head > p_head) + return false; + + /* The producer may only be up to size slots ahead of consumer. */ + if (p_head - c_head >= size) + return false; + + return true; +} + CK_CC_INLINE static void ck_ring_init(struct ck_ring *ring, unsigned int size) { ring->size = size; ring->mask = size - 1; ring->p_tail = 0; ring->p_head = 0; ring->c_head = 0; return; } /* * The _ck_ring_* namespace is internal only and must not used externally. */ + +/* + * This function will return a region of memory to write for the next value + * for a single producer. + */ +CK_CC_FORCE_INLINE static void * +_ck_ring_enqueue_reserve_sp(struct ck_ring *ring, + void *CK_CC_RESTRICT buffer, + unsigned int ts, + unsigned int *size) +{ + const unsigned int mask = ring->mask; + unsigned int consumer, producer, delta; + + consumer = ck_pr_load_uint(&ring->c_head); + producer = ring->p_tail; + delta = producer + 1; + if (size != NULL) + *size = (producer - consumer) & mask; + + if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) + return NULL; + + return (char *)buffer + ts * (producer & mask); +} + +/* + * This is to be called to commit and make visible a region of previously + * reserved with reverse_sp. + */ +CK_CC_FORCE_INLINE static void +_ck_ring_enqueue_commit_sp(struct ck_ring *ring) +{ + + ck_pr_fence_store(); + ck_pr_store_uint(&ring->p_tail, ring->p_tail + 1); + return; +} + CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_sp(struct ck_ring *ring, void *CK_CC_RESTRICT buffer, const void *CK_CC_RESTRICT entry, unsigned int ts, unsigned int *size) { const unsigned int mask = ring->mask; unsigned int consumer, producer, delta; consumer = ck_pr_load_uint(&ring->c_head); producer = ring->p_tail; delta = producer + 1; if (size != NULL) *size = (producer - consumer) & mask; if (CK_CC_UNLIKELY((delta & mask) == (consumer & mask))) return false; buffer = (char *)buffer + ts * (producer & mask); memcpy(buffer, entry, ts); /* * Make sure to update slot value before indicating * that the slot is available for consumption. */ ck_pr_fence_store(); ck_pr_store_uint(&ring->p_tail, delta); return true; } CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_sp_size(struct ck_ring *ring, void *CK_CC_RESTRICT buffer, const void *CK_CC_RESTRICT entry, unsigned int ts, unsigned int *size) { unsigned int sz; bool r; r = _ck_ring_enqueue_sp(ring, buffer, entry, ts, &sz); *size = sz; return r; } CK_CC_FORCE_INLINE static bool _ck_ring_dequeue_sc(struct ck_ring *ring, const void *CK_CC_RESTRICT buffer, void *CK_CC_RESTRICT target, unsigned int size) { const unsigned int mask = ring->mask; unsigned int consumer, producer; consumer = ring->c_head; producer = ck_pr_load_uint(&ring->p_tail); if (CK_CC_UNLIKELY(consumer == producer)) return false; /* * Make sure to serialize with respect to our snapshot * of the producer counter. */ ck_pr_fence_load(); buffer = (const char *)buffer + size * (consumer & mask); memcpy(target, buffer, size); /* * Make sure copy is completed with respect to consumer * update. */ ck_pr_fence_store(); ck_pr_store_uint(&ring->c_head, consumer + 1); return true; } +CK_CC_FORCE_INLINE static void * +_ck_ring_enqueue_reserve_mp(struct ck_ring *ring, + void *buffer, + unsigned int ts, + unsigned int *ticket, + unsigned int *size) +{ + const unsigned int mask = ring->mask; + unsigned int producer, consumer, delta; + + producer = ck_pr_load_uint(&ring->p_head); + + for (;;) { + ck_pr_fence_load(); + consumer = ck_pr_load_uint(&ring->c_head); + + delta = producer + 1; + + if (CK_CC_LIKELY((producer - consumer) < mask)) { + if (ck_pr_cas_uint_value(&ring->p_head, + producer, delta, &producer) == true) { + break; + } + } else { + unsigned int new_producer; + + ck_pr_fence_load(); + new_producer = ck_pr_load_uint(&ring->p_head); + + if (producer == new_producer) { + if (size != NULL) + *size = (producer - consumer) & mask; + + return false; + } + + producer = new_producer; + } + } + + *ticket = producer; + if (size != NULL) + *size = (producer - consumer) & mask; + + return (char *)buffer + ts * (producer & mask); +} + +CK_CC_FORCE_INLINE static void +_ck_ring_enqueue_commit_mp(struct ck_ring *ring, unsigned int producer) +{ + + while (ck_pr_load_uint(&ring->p_tail) != producer) + ck_pr_stall(); + + ck_pr_fence_store(); + ck_pr_store_uint(&ring->p_tail, producer + 1); + return; +} + CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_mp(struct ck_ring *ring, void *buffer, const void *entry, unsigned int ts, unsigned int *size) { const unsigned int mask = ring->mask; unsigned int producer, consumer, delta; bool r = true; producer = ck_pr_load_uint(&ring->p_head); for (;;) { /* * The snapshot of producer must be up to date with respect to * consumer. */ ck_pr_fence_load(); consumer = ck_pr_load_uint(&ring->c_head); delta = producer + 1; /* * Only try to CAS if the producer is not clearly stale (not * less than consumer) and the buffer is definitely not full. */ if (CK_CC_LIKELY((producer - consumer) < mask)) { if (ck_pr_cas_uint_value(&ring->p_head, producer, delta, &producer) == true) { break; } } else { unsigned int new_producer; /* * Slow path. Either the buffer is full or we have a * stale snapshot of p_head. Execute a second read of * p_read that must be ordered wrt the snapshot of * c_head. */ ck_pr_fence_load(); new_producer = ck_pr_load_uint(&ring->p_head); /* * Only fail if we haven't made forward progress in * production: the buffer must have been full when we * read new_producer (or we wrapped around UINT_MAX * during this iteration). */ if (producer == new_producer) { r = false; goto leave; } /* * p_head advanced during this iteration. Try again. */ producer = new_producer; } } buffer = (char *)buffer + ts * (producer & mask); memcpy(buffer, entry, ts); /* * Wait until all concurrent producers have completed writing * their data into the ring buffer. */ while (ck_pr_load_uint(&ring->p_tail) != producer) ck_pr_stall(); /* * Ensure that copy is completed before updating shared producer * counter. */ ck_pr_fence_store(); ck_pr_store_uint(&ring->p_tail, delta); leave: if (size != NULL) *size = (producer - consumer) & mask; return r; } CK_CC_FORCE_INLINE static bool _ck_ring_enqueue_mp_size(struct ck_ring *ring, void *buffer, const void *entry, unsigned int ts, unsigned int *size) { unsigned int sz; bool r; r = _ck_ring_enqueue_mp(ring, buffer, entry, ts, &sz); *size = sz; return r; } CK_CC_FORCE_INLINE static bool _ck_ring_trydequeue_mc(struct ck_ring *ring, const void *buffer, void *data, unsigned int size) { const unsigned int mask = ring->mask; unsigned int consumer, producer; consumer = ck_pr_load_uint(&ring->c_head); ck_pr_fence_load(); producer = ck_pr_load_uint(&ring->p_tail); if (CK_CC_UNLIKELY(consumer == producer)) return false; ck_pr_fence_load(); buffer = (const char *)buffer + size * (consumer & mask); memcpy(data, buffer, size); ck_pr_fence_store_atomic(); return ck_pr_cas_uint(&ring->c_head, consumer, consumer + 1); } CK_CC_FORCE_INLINE static bool _ck_ring_dequeue_mc(struct ck_ring *ring, const void *buffer, void *data, unsigned int ts) { const unsigned int mask = ring->mask; unsigned int consumer, producer; consumer = ck_pr_load_uint(&ring->c_head); do { const char *target; /* * Producer counter must represent state relative to * our latest consumer snapshot. */ ck_pr_fence_load(); producer = ck_pr_load_uint(&ring->p_tail); if (CK_CC_UNLIKELY(consumer == producer)) return false; ck_pr_fence_load(); target = (const char *)buffer + ts * (consumer & mask); memcpy(data, target, ts); /* Serialize load with respect to head update. */ ck_pr_fence_store_atomic(); } while (ck_pr_cas_uint_value(&ring->c_head, consumer, consumer + 1, &consumer) == false); return true; } /* * The ck_ring_*_spsc namespace is the public interface for interacting with a * ring buffer containing pointers. Correctness is only provided if there is up * to one concurrent consumer and up to one concurrent producer. */ CK_CC_INLINE static bool ck_ring_enqueue_spsc_size(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry, unsigned int *size) { return _ck_ring_enqueue_sp_size(ring, buffer, &entry, sizeof(entry), size); } CK_CC_INLINE static bool ck_ring_enqueue_spsc(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry) { return _ck_ring_enqueue_sp(ring, buffer, &entry, sizeof(entry), NULL); } +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spsc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), + size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spsc(struct ck_ring *ring, + struct ck_ring_buffer *buffer) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), + NULL); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_spsc(struct ck_ring *ring) +{ + + _ck_ring_enqueue_commit_sp(ring); + return; +} + CK_CC_INLINE static bool ck_ring_dequeue_spsc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_dequeue_sc(ring, buffer, (void **)data, sizeof(void *)); } /* * The ck_ring_*_mpmc namespace is the public interface for interacting with a * ring buffer containing pointers. Correctness is provided for any number of * producers and consumers. */ CK_CC_INLINE static bool ck_ring_enqueue_mpmc(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry) { - return _ck_ring_enqueue_mp(ring, buffer, &entry, - sizeof(entry), NULL); + return _ck_ring_enqueue_mp(ring, buffer, &entry, sizeof(entry), NULL); } CK_CC_INLINE static bool ck_ring_enqueue_mpmc_size(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry, unsigned int *size) { - return _ck_ring_enqueue_mp_size(ring, buffer, &entry, - sizeof(entry), size); + return _ck_ring_enqueue_mp_size(ring, buffer, &entry, sizeof(entry), + size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpmc(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, NULL); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpmc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, size); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_mpmc(struct ck_ring *ring, unsigned int ticket) +{ + + _ck_ring_enqueue_commit_mp(ring, ticket); + return; } CK_CC_INLINE static bool ck_ring_trydequeue_mpmc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_trydequeue_mc(ring, buffer, (void **)data, sizeof(void *)); } CK_CC_INLINE static bool ck_ring_dequeue_mpmc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_dequeue_mc(ring, buffer, (void **)data, sizeof(void *)); } /* * The ck_ring_*_spmc namespace is the public interface for interacting with a * ring buffer containing pointers. Correctness is provided for any number of * consumers with up to one concurrent producer. */ +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spmc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), size); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_spmc(struct ck_ring *ring, + struct ck_ring_buffer *buffer) +{ + + return _ck_ring_enqueue_reserve_sp(ring, buffer, sizeof(void *), NULL); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_spmc(struct ck_ring *ring) +{ + + _ck_ring_enqueue_commit_sp(ring); + return; +} + CK_CC_INLINE static bool ck_ring_enqueue_spmc_size(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry, unsigned int *size) { return _ck_ring_enqueue_sp_size(ring, buffer, &entry, sizeof(entry), size); } CK_CC_INLINE static bool ck_ring_enqueue_spmc(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry) { return _ck_ring_enqueue_sp(ring, buffer, &entry, sizeof(entry), NULL); } CK_CC_INLINE static bool ck_ring_trydequeue_spmc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_trydequeue_mc(ring, buffer, (void **)data, sizeof(void *)); } CK_CC_INLINE static bool ck_ring_dequeue_spmc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_dequeue_mc(ring, buffer, (void **)data, sizeof(void *)); } /* * The ck_ring_*_mpsc namespace is the public interface for interacting with a * ring buffer containing pointers. Correctness is provided for any number of * producers with up to one concurrent consumers. */ +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpsc(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, NULL); +} + +CK_CC_INLINE static void * +ck_ring_enqueue_reserve_mpsc_size(struct ck_ring *ring, + struct ck_ring_buffer *buffer, + unsigned int *ticket, + unsigned int *size) +{ + + return _ck_ring_enqueue_reserve_mp(ring, buffer, sizeof(void *), + ticket, size); +} + +CK_CC_INLINE static void +ck_ring_enqueue_commit_mpsc(struct ck_ring *ring, unsigned int ticket) +{ + + _ck_ring_enqueue_commit_mp(ring, ticket); + return; +} + CK_CC_INLINE static bool ck_ring_enqueue_mpsc(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry) { return _ck_ring_enqueue_mp(ring, buffer, &entry, sizeof(entry), NULL); } CK_CC_INLINE static bool ck_ring_enqueue_mpsc_size(struct ck_ring *ring, struct ck_ring_buffer *buffer, const void *entry, unsigned int *size) { return _ck_ring_enqueue_mp_size(ring, buffer, &entry, sizeof(entry), size); } CK_CC_INLINE static bool ck_ring_dequeue_mpsc(struct ck_ring *ring, const struct ck_ring_buffer *buffer, void *data) { return _ck_ring_dequeue_sc(ring, buffer, (void **)data, sizeof(void *)); } /* * CK_RING_PROTOTYPE is used to define a type-safe interface for inlining * values of a particular type in the ring the buffer. */ -#define CK_RING_PROTOTYPE(name, type) \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_sp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_sp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_spsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_sc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_sp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_sp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_trydequeue_mc(a, \ - b, c, sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_spmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_mc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_mp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_mp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_sc(a, b, c, \ - sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c, \ - unsigned int *d) \ -{ \ - \ - return _ck_ring_enqueue_mp_size(a, b, c, \ - sizeof(struct type), d); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_enqueue_mp(a, b, c, \ - sizeof(struct type), NULL); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_trydequeue_mc(a, \ - b, c, sizeof(struct type)); \ -} \ - \ -CK_CC_INLINE static bool \ -ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \ - struct type *b, \ - struct type *c) \ -{ \ - \ - return _ck_ring_dequeue_mc(a, b, c, \ - sizeof(struct type)); \ +#define CK_RING_PROTOTYPE(name, type) \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spsc_##name(struct ck_ring *a, \ + struct type *b) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), c); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_sp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_sp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_spsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_sc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spmc_##name(struct ck_ring *a, \ + struct type *b) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_spmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_sp(a, b, \ + sizeof(struct type), c); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_sp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_sp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_trydequeue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_trydequeue_mc(a, \ + b, c, sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_spmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_mc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_mp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpsc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_mp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_mpsc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_sc(a, b, c, \ + sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, NULL); \ +} \ + \ +CK_CC_INLINE static struct type * \ +ck_ring_enqueue_reserve_mpmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + unsigned int *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_reserve_mp(a, b, \ + sizeof(struct type), c, d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpmc_size_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c, \ + unsigned int *d) \ +{ \ + \ + return _ck_ring_enqueue_mp_size(a, b, c, \ + sizeof(struct type), d); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_enqueue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_enqueue_mp(a, b, c, \ + sizeof(struct type), NULL); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_trydequeue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_trydequeue_mc(a, \ + b, c, sizeof(struct type)); \ +} \ + \ +CK_CC_INLINE static bool \ +ck_ring_dequeue_mpmc_##name(struct ck_ring *a, \ + struct type *b, \ + struct type *c) \ +{ \ + \ + return _ck_ring_dequeue_mc(a, b, c, \ + sizeof(struct type)); \ } /* * A single producer with one concurrent consumer. */ -#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_SPSC(name, a, b, c) \ ck_ring_enqueue_spsc_##name(a, b, c) -#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_SPSC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_spsc_size_##name(a, b, c, d) -#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_SPSC(name, a, b, c) \ + ck_ring_enqueue_reserve_spsc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_SPSC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_spsc_size_##name(a, b, c, d) +#define CK_RING_DEQUEUE_SPSC(name, a, b, c) \ ck_ring_dequeue_spsc_##name(a, b, c) /* * A single producer with any number of concurrent consumers. */ -#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_SPMC(name, a, b, c) \ ck_ring_enqueue_spmc_##name(a, b, c) -#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_SPMC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_spmc_size_##name(a, b, c, d) -#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_SPMC(name, a, b, c) \ + ck_ring_enqueue_reserve_spmc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_SPMC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_spmc_size_##name(a, b, c, d) +#define CK_RING_TRYDEQUEUE_SPMC(name, a, b, c) \ ck_ring_trydequeue_spmc_##name(a, b, c) -#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \ +#define CK_RING_DEQUEUE_SPMC(name, a, b, c) \ ck_ring_dequeue_spmc_##name(a, b, c) /* * Any number of concurrent producers with up to one * concurrent consumer. */ -#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_MPSC(name, a, b, c) \ ck_ring_enqueue_mpsc_##name(a, b, c) -#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_MPSC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_mpsc_size_##name(a, b, c, d) -#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_MPSC(name, a, b, c) \ + ck_ring_enqueue_reserve_mpsc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_MPSC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_mpsc_size_##name(a, b, c, d) +#define CK_RING_DEQUEUE_MPSC(name, a, b, c) \ ck_ring_dequeue_mpsc_##name(a, b, c) /* * Any number of concurrent producers and consumers. */ -#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_MPMC(name, a, b, c) \ ck_ring_enqueue_mpmc_##name(a, b, c) -#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \ +#define CK_RING_ENQUEUE_MPMC_SIZE(name, a, b, c, d) \ ck_ring_enqueue_mpmc_size_##name(a, b, c, d) -#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_ENQUEUE_RESERVE_MPMC(name, a, b, c) \ + ck_ring_enqueue_reserve_mpmc_##name(a, b, c) +#define CK_RING_ENQUEUE_RESERVE_MPMC_SIZE(name, a, b, c, d) \ + ck_ring_enqueue_reserve_mpmc_size_##name(a, b, c, d) +#define CK_RING_TRYDEQUEUE_MPMC(name, a, b, c) \ ck_ring_trydequeue_mpmc_##name(a, b, c) -#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \ +#define CK_RING_DEQUEUE_MPMC(name, a, b, c) \ ck_ring_dequeue_mpmc_##name(a, b, c) #endif /* CK_RING_H */ diff --git a/include/gcc/aarch64/ck_pr.h b/include/gcc/aarch64/ck_pr.h index e739c4d5b18e..0a473072fffd 100644 --- a/include/gcc/aarch64/ck_pr.h +++ b/include/gcc/aarch64/ck_pr.h @@ -1,227 +1,227 @@ /* * Copyright 2009-2016 Samy Al Bahra. * Copyright 2013-2016 Olivier Houchard. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_AARCH64_H #define CK_PR_AARCH64_H #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif #include #include /* * The following represent supported atomic operations. * These operations may be emulated. */ #include "ck_f_pr.h" /* * Minimum interface requirement met. */ #define CK_F_PR CK_CC_INLINE static void ck_pr_stall(void) { __asm__ __volatile__("" ::: "memory"); return; } #define CK_DMB_SY __asm __volatile("dmb ish" : : "r" (0) : "memory") #define CK_DMB_LD __asm __volatile("dmb ishld" : : "r" (0) : "memory") #define CK_DMB_ST __asm __volatile("dmb ishst" : : "r" (0) : "memory") #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ I; \ } CK_PR_FENCE(atomic, CK_DMB_ST) CK_PR_FENCE(atomic_store, CK_DMB_ST) CK_PR_FENCE(atomic_load, CK_DMB_SY) CK_PR_FENCE(store_atomic, CK_DMB_ST) CK_PR_FENCE(load_atomic, CK_DMB_SY) CK_PR_FENCE(store, CK_DMB_ST) CK_PR_FENCE(store_load, CK_DMB_SY) CK_PR_FENCE(load, CK_DMB_LD) CK_PR_FENCE(load_store, CK_DMB_SY) CK_PR_FENCE(memory, CK_DMB_SY) CK_PR_FENCE(acquire, CK_DMB_SY) CK_PR_FENCE(release, CK_DMB_SY) CK_PR_FENCE(acqrel, CK_DMB_SY) CK_PR_FENCE(lock, CK_DMB_SY) CK_PR_FENCE(unlock, CK_DMB_SY) #undef CK_PR_FENCE #undef CK_DMB_SI #undef CK_DMB_LD #undef CK_DMB_ST #define CK_PR_LOAD(S, M, T, I) \ CK_CC_INLINE static T \ ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %w0, [%1];" \ + __asm__ __volatile__(I " %w0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ return ((T)r); \ } #define CK_PR_LOAD_64(S, M, T, I) \ CK_CC_INLINE static T \ ck_pr_md_load_##S(const M *target) \ { \ long r = 0; \ - __asm__ __volatile__(I " %0, [%1];" \ + __asm__ __volatile__(I " %0, [%1]\n" \ : "=r" (r) \ : "r" (target) \ : "memory"); \ return ((T)r); \ } CK_PR_LOAD_64(ptr, void, void *, "ldr") #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, I) #define CK_PR_LOAD_S_64(S, T, I) CK_PR_LOAD_64(S, T, T, I) CK_PR_LOAD_S_64(64, uint64_t, "ldr") CK_PR_LOAD_S(32, uint32_t, "ldr") CK_PR_LOAD_S(16, uint16_t, "ldrh") CK_PR_LOAD_S(8, uint8_t, "ldrb") CK_PR_LOAD_S(uint, unsigned int, "ldr") CK_PR_LOAD_S(int, int, "ldr") CK_PR_LOAD_S(short, short, "ldrh") CK_PR_LOAD_S(char, char, "ldrb") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_LOAD_S_64(double, double, "ldr") #endif #undef CK_PR_LOAD_S #undef CK_PR_LOAD_S_64 #undef CK_PR_LOAD #undef CK_PR_LAOD_64 #define CK_PR_STORE(S, M, T, I) \ CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %w1, [%0]" \ : \ : "r" (target), \ "r" (v) \ : "memory"); \ return; \ } #define CK_PR_STORE_64(S, M, T, I) \ CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %1, [%0]" \ : \ : "r" (target), \ "r" (v) \ : "memory"); \ return; \ } CK_PR_STORE_64(ptr, void, const void *, "str") #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, I) #define CK_PR_STORE_S_64(S, T, I) CK_PR_STORE_64(S, T, T, I) CK_PR_STORE_S_64(64, uint64_t, "str") CK_PR_STORE_S(32, uint32_t, "str") CK_PR_STORE_S(16, uint16_t, "strh") CK_PR_STORE_S(8, uint8_t, "strb") CK_PR_STORE_S(uint, unsigned int, "str") CK_PR_STORE_S(int, int, "str") CK_PR_STORE_S(short, short, "strh") CK_PR_STORE_S(char, char, "strb") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_STORE_S_64(double, double, "str") #endif #undef CK_PR_STORE_S #undef CK_PR_STORE_S_64 #undef CK_PR_STORE #undef CK_PR_STORE_64 #ifdef CK_MD_LSE_ENABLE #include "ck_pr_lse.h" #else #include "ck_pr_llsc.h" #endif /* * ck_pr_neg_*() functions can only be implemented via LL/SC, as there are no * LSE alternatives. */ #define CK_PR_NEG(N, M, T, W, R) \ CK_CC_INLINE static void \ ck_pr_neg_##N(M *target) \ { \ T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "neg %" R "0, %" R "0;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "neg %" R "0, %" R "0\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ : "memory", "cc"); \ return; \ } CK_PR_NEG(ptr, void, void *, "", "") CK_PR_NEG(64, uint64_t, uint64_t, "", "") #define CK_PR_NEG_S(S, T, W) \ CK_PR_NEG(S, T, T, W, "w") \ CK_PR_NEG_S(32, uint32_t, "") CK_PR_NEG_S(uint, unsigned int, "") CK_PR_NEG_S(int, int, "") CK_PR_NEG_S(16, uint16_t, "h") CK_PR_NEG_S(8, uint8_t, "b") CK_PR_NEG_S(short, short, "h") CK_PR_NEG_S(char, char, "b") #undef CK_PR_NEG_S #undef CK_PR_NEG #endif /* CK_PR_AARCH64_H */ diff --git a/include/gcc/aarch64/ck_pr_llsc.h b/include/gcc/aarch64/ck_pr_llsc.h index aa4e3090fa3a..6500d9661c08 100644 --- a/include/gcc/aarch64/ck_pr_llsc.h +++ b/include/gcc/aarch64/ck_pr_llsc.h @@ -1,352 +1,352 @@ /* * Copyright 2009-2016 Samy Al Bahra. * Copyright 2013-2016 Olivier Houchard. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_AARCH64_LLSC_H #define CK_PR_AARCH64_LLSC_H #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif CK_CC_INLINE static bool ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], uint64_t value[2]) { uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%4];" - "mov %2, %0;" - "mov %3, %1;" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %7, %8, [%4];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%4]\n" + "mov %2, %0\n" + "mov %3, %1\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %7, %8, [%4]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2), "=&r" (value[0]), "=&r" (value[1]) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) : "cc", "memory"); return (tmp1); } CK_CC_INLINE static bool ck_pr_cas_ptr_2_value(void *target, void *compare, void *set, void *value) { return (ck_pr_cas_64_2_value(CK_CPP_CAST(uint64_t *, target), CK_CPP_CAST(uint64_t *, compare), CK_CPP_CAST(uint64_t *, set), CK_CPP_CAST(uint64_t *, value))); } CK_CC_INLINE static bool ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) { uint64_t tmp1, tmp2; __asm__ __volatile__("1:" - "ldxp %0, %1, [%2];" - "eor %0, %0, %3;" - "eor %1, %1, %4;" - "orr %1, %0, %1;" - "mov %w0, #0;" - "cbnz %1, 2f;" - "stxp %w0, %5, %6, [%2];" - "cbnz %w0, 1b;" - "mov %w0, #1;" + "ldxp %0, %1, [%2]\n" + "eor %0, %0, %3\n" + "eor %1, %1, %4\n" + "orr %1, %0, %1\n" + "mov %w0, #0\n" + "cbnz %1, 2f\n" + "stxp %w0, %5, %6, [%2]\n" + "cbnz %w0, 1b\n" + "mov %w0, #1\n" "2:" : "=&r" (tmp1), "=&r" (tmp2) : "r" (target), "r" (compare[0]), "r" (compare[1]), "r" (set[0]), "r" (set[1]) : "cc", "memory"); return (tmp1); } CK_CC_INLINE static bool ck_pr_cas_ptr_2(void *target, void *compare, void *set) { return (ck_pr_cas_64_2(CK_CPP_CAST(uint64_t *, target), CK_CPP_CAST(uint64_t *, compare), CK_CPP_CAST(uint64_t *, set))); } #define CK_PR_CAS(N, M, T, W, R) \ CK_CC_INLINE static bool \ ck_pr_cas_##N##_value(M *target, T compare, T set, M *value) \ { \ T previous; \ T tmp; \ - __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + __asm__ __volatile__("1:\n" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ "r" (set), \ "r" (compare) \ : "memory", "cc"); \ *(T *)value = previous; \ return (previous == compare); \ } \ CK_CC_INLINE static bool \ ck_pr_cas_##N(M *target, T compare, T set) \ { \ T previous; \ T tmp; \ __asm__ __volatile__( \ "1:" \ - "ldxr" W " %" R "0, [%2];" \ - "cmp %" R "0, %" R "4;" \ - "b.ne 2f;" \ - "stxr" W " %w1, %" R "3, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n" \ + "cmp %" R "0, %" R "4\n" \ + "b.ne 2f\n" \ + "stxr" W " %w1, %" R "3, [%2]\n" \ + "cbnz %w1, 1b\n" \ "2:" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ "r" (set), \ "r" (compare) \ : "memory", "cc"); \ return (previous == compare); \ } CK_PR_CAS(ptr, void, void *, "", "") #define CK_PR_CAS_S(N, M, W, R) CK_PR_CAS(N, M, M, W, R) CK_PR_CAS_S(64, uint64_t, "", "") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_CAS_S(double, double, "", "") #endif CK_PR_CAS_S(32, uint32_t, "", "w") CK_PR_CAS_S(uint, unsigned int, "", "w") CK_PR_CAS_S(int, int, "", "w") CK_PR_CAS_S(16, uint16_t, "h", "w") CK_PR_CAS_S(8, uint8_t, "b", "w") CK_PR_CAS_S(short, short, "h", "w") CK_PR_CAS_S(char, char, "b", "w") #undef CK_PR_CAS_S #undef CK_PR_CAS #define CK_PR_FAS(N, M, T, W, R) \ CK_CC_INLINE static T \ ck_pr_fas_##N(M *target, T v) \ { \ T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - "stxr" W " %w1, %" R "3, [%2];"\ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + "stxr" W " %w1, %" R "3, [%2]\n"\ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ "r" (v) \ : "memory", "cc"); \ return (previous); \ } CK_PR_FAS(64, uint64_t, uint64_t, "", "") CK_PR_FAS(32, uint32_t, uint32_t, "", "w") CK_PR_FAS(ptr, void, void *, "", "") CK_PR_FAS(int, int, int, "", "w") CK_PR_FAS(uint, unsigned int, unsigned int, "", "w") CK_PR_FAS(16, uint16_t, uint16_t, "h", "w") CK_PR_FAS(8, uint8_t, uint8_t, "b", "w") CK_PR_FAS(short, short, short, "h", "w") CK_PR_FAS(char, char, char, "b", "w") #undef CK_PR_FAS #define CK_PR_UNARY(O, N, M, T, I, W, R) \ CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target) \ { \ T previous = 0; \ T tmp = 0; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];" \ - I ";" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I "\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target) \ : "memory", "cc"); \ return; \ } CK_PR_UNARY(inc, ptr, void, void *, "add %0, %0, #1", "", "") CK_PR_UNARY(dec, ptr, void, void *, "sub %0, %0, #1", "", "") CK_PR_UNARY(not, ptr, void, void *, "mvn %0, %0", "", "") CK_PR_UNARY(inc, 64, uint64_t, uint64_t, "add %0, %0, #1", "", "") CK_PR_UNARY(dec, 64, uint64_t, uint64_t, "sub %0, %0, #1", "", "") CK_PR_UNARY(not, 64, uint64_t, uint64_t, "mvn %0, %0", "", "") #define CK_PR_UNARY_S(S, T, W) \ CK_PR_UNARY(inc, S, T, T, "add %w0, %w0, #1", W, "w") \ CK_PR_UNARY(dec, S, T, T, "sub %w0, %w0, #1", W, "w") \ CK_PR_UNARY(not, S, T, T, "mvn %w0, %w0", W, "w") \ CK_PR_UNARY_S(32, uint32_t, "") CK_PR_UNARY_S(uint, unsigned int, "") CK_PR_UNARY_S(int, int, "") CK_PR_UNARY_S(16, uint16_t, "h") CK_PR_UNARY_S(8, uint8_t, "b") CK_PR_UNARY_S(short, short, "h") CK_PR_UNARY_S(char, char, "b") #undef CK_PR_UNARY_S #undef CK_PR_UNARY #define CK_PR_BINARY(O, N, M, T, I, W, R) \ CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target, T delta) \ { \ T previous; \ T tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %" R "0, [%2];"\ - I " %" R "0, %" R "0, %" R "3;" \ - "stxr" W " %w1, %" R "0, [%2];" \ - "cbnz %w1, 1b;" \ + "ldxr" W " %" R "0, [%2]\n"\ + I " %" R "0, %" R "0, %" R "3\n" \ + "stxr" W " %w1, %" R "0, [%2]\n" \ + "cbnz %w1, 1b\n" \ : "=&r" (previous), \ "=&r" (tmp) \ : "r" (target), \ "r" (delta) \ : "memory", "cc"); \ return; \ } CK_PR_BINARY(and, ptr, void, uintptr_t, "and", "", "") CK_PR_BINARY(add, ptr, void, uintptr_t, "add", "", "") CK_PR_BINARY(or, ptr, void, uintptr_t, "orr", "", "") CK_PR_BINARY(sub, ptr, void, uintptr_t, "sub", "", "") CK_PR_BINARY(xor, ptr, void, uintptr_t, "eor", "", "") CK_PR_BINARY(and, 64, uint64_t, uint64_t, "and", "", "") CK_PR_BINARY(add, 64, uint64_t, uint64_t, "add", "", "") CK_PR_BINARY(or, 64, uint64_t, uint64_t, "orr", "", "") CK_PR_BINARY(sub, 64, uint64_t, uint64_t, "sub", "", "") CK_PR_BINARY(xor, 64, uint64_t, uint64_t, "eor", "", "") #define CK_PR_BINARY_S(S, T, W) \ CK_PR_BINARY(and, S, T, T, "and", W, "w") \ CK_PR_BINARY(add, S, T, T, "add", W, "w") \ CK_PR_BINARY(or, S, T, T, "orr", W, "w") \ CK_PR_BINARY(sub, S, T, T, "sub", W, "w") \ CK_PR_BINARY(xor, S, T, T, "eor", W, "w") CK_PR_BINARY_S(32, uint32_t, "") CK_PR_BINARY_S(uint, unsigned int, "") CK_PR_BINARY_S(int, int, "") CK_PR_BINARY_S(16, uint16_t, "h") CK_PR_BINARY_S(8, uint8_t, "b") CK_PR_BINARY_S(short, short, "h") CK_PR_BINARY_S(char, char, "b") #undef CK_PR_BINARY_S #undef CK_PR_BINARY CK_CC_INLINE static void * ck_pr_faa_ptr(void *target, uintptr_t delta) { uintptr_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" - "cbnz %w2, 1b;" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" + "cbnz %w2, 1b\n" : "=&r" (previous), "=&r" (r), "=&r" (tmp) : "r" (target), "r" (delta) : "memory", "cc"); return (void *)(previous); } CK_CC_INLINE static uint64_t ck_pr_faa_64(uint64_t *target, uint64_t delta) { uint64_t previous, r, tmp; __asm__ __volatile__("1:" - "ldxr %0, [%3];" - "add %1, %4, %0;" - "stxr %w2, %1, [%3];" + "ldxr %0, [%3]\n" + "add %1, %4, %0\n" + "stxr %w2, %1, [%3]\n" "cbnz %w2, 1b;" : "=&r" (previous), "=&r" (r), "=&r" (tmp) : "r" (target), "r" (delta) : "memory", "cc"); return (previous); } #define CK_PR_FAA(S, T, W) \ CK_CC_INLINE static T \ ck_pr_faa_##S(T *target, T delta) \ { \ T previous, r, tmp; \ __asm__ __volatile__("1:" \ - "ldxr" W " %w0, [%3];" \ - "add %w1, %w4, %w0;" \ - "stxr" W " %w2, %w1, [%3];" \ - "cbnz %w2, 1b;" \ + "ldxr" W " %w0, [%3]\n" \ + "add %w1, %w4, %w0\n" \ + "stxr" W " %w2, %w1, [%3]\n" \ + "cbnz %w2, 1b\n" \ : "=&r" (previous), \ "=&r" (r), \ "=&r" (tmp) \ : "r" (target), \ "r" (delta) \ : "memory", "cc"); \ return (previous); \ } CK_PR_FAA(32, uint32_t, "") CK_PR_FAA(uint, unsigned int, "") CK_PR_FAA(int, int, "") CK_PR_FAA(16, uint16_t, "h") CK_PR_FAA(8, uint8_t, "b") CK_PR_FAA(short, short, "h") CK_PR_FAA(char, char, "b") #undef CK_PR_FAA #endif /* CK_PR_AARCH64_LLSC_H */ diff --git a/include/gcc/aarch64/ck_pr_lse.h b/include/gcc/aarch64/ck_pr_lse.h index e2c9554c8b4a..e450e72d60ec 100644 --- a/include/gcc/aarch64/ck_pr_lse.h +++ b/include/gcc/aarch64/ck_pr_lse.h @@ -1,298 +1,299 @@ /* * Copyright 2009-2016 Samy Al Bahra. * Copyright 2013-2016 Olivier Houchard. * Copyright 2016 Alexey Kopytov. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_AARCH64_LSE_H #define CK_PR_AARCH64_LSE_H +#error bite #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif CK_CC_INLINE static bool ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], uint64_t value[2]) { uint64_t tmp1; uint64_t tmp2; register uint64_t x0 __asm__ ("x0") = compare[0]; register uint64_t x1 __asm__ ("x1") = compare[1]; register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %4, %5, [%6];" - "eor %2, %0, %7;" - "eor %3, %1, %8;" - "orr %2, %2, %3;" + __asm__ __volatile__("casp %0, %1, %4, %5, [%6]\n" + "eor %2, %0, %7\n" + "eor %3, %1, %8\n" + "orr %2, %2, %3\n" : "+&r" (x0), "+&r" (x1), "=&r" (tmp1), "=&r" (tmp2) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); value[0] = x0; value[1] = x1; return (!!tmp1); } CK_CC_INLINE static bool ck_pr_cas_ptr_2_value(void *target, void *compare, void *set, void *value) { return (ck_pr_cas_64_2_value(CK_CPP_CAST(uint64_t *, target), CK_CPP_CAST(uint64_t *, compare), CK_CPP_CAST(uint64_t *, set), CK_CPP_CAST(uint64_t *, value))); } CK_CC_INLINE static bool ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) { register uint64_t x0 __asm__ ("x0") = compare[0]; register uint64_t x1 __asm__ ("x1") = compare[1]; register uint64_t x2 __asm__ ("x2") = set[0]; register uint64_t x3 __asm__ ("x3") = set[1]; - __asm__ __volatile__("casp %0, %1, %2, %3, [%4];" - "eor %0, %0, %5;" - "eor %1, %1, %6;" - "orr %0, %0, %1;" + __asm__ __volatile__("casp %0, %1, %2, %3, [%4]\n" + "eor %0, %0, %5\n" + "eor %1, %1, %6\n" + "orr %0, %0, %1\n" : "+&r" (x0), "+&r" (x1) : "r" (x2), "r" (x3), "r" (target), "r" (compare[0]), "r" (compare[1]) : "memory"); return (!!x0); } CK_CC_INLINE static bool ck_pr_cas_ptr_2(void *target, void *compare, void *set) { return (ck_pr_cas_64_2(CK_CPP_CAST(uint64_t *, target), CK_CPP_CAST(uint64_t *, compare), CK_CPP_CAST(uint64_t *, set))); } #define CK_PR_CAS(N, M, T, W, R) \ CK_CC_INLINE static bool \ ck_pr_cas_##N##_value(M *target, T compare, T set, M *value) \ { \ *(T *)value = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (*(T *)value) \ : "r" (target), \ "r" (set) \ : "memory"); \ return (*(T *)value == compare); \ } \ CK_CC_INLINE static bool \ ck_pr_cas_##N(M *target, T compare, T set) \ { \ T previous = compare; \ __asm__ __volatile__( \ - "cas" W " %" R "0, %" R "2, [%1];" \ + "cas" W " %" R "0, %" R "2, [%1]\n"\ : "+&r" (previous) \ : "r" (target), \ "r" (set) \ : "memory"); \ return (previous == compare); \ } CK_PR_CAS(ptr, void, void *, "", "") #define CK_PR_CAS_S(N, M, W, R) CK_PR_CAS(N, M, M, W, R) CK_PR_CAS_S(64, uint64_t, "", "") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_CAS_S(double, double, "", "") #endif CK_PR_CAS_S(32, uint32_t, "", "w") CK_PR_CAS_S(uint, unsigned int, "", "w") CK_PR_CAS_S(int, int, "", "w") CK_PR_CAS_S(16, uint16_t, "h", "w") CK_PR_CAS_S(8, uint8_t, "b", "w") CK_PR_CAS_S(short, short, "h", "w") CK_PR_CAS_S(char, char, "b", "w") #undef CK_PR_CAS_S #undef CK_PR_CAS #define CK_PR_FAS(N, M, T, W, R) \ CK_CC_INLINE static T \ ck_pr_fas_##N(M *target, T v) \ { \ T previous; \ __asm__ __volatile__( \ - "swp" W " %" R "2, %" R "0, [%1];" \ + "swp" W " %" R "2, %" R "0, [%1]\n"\ : "=&r" (previous) \ : "r" (target), \ "r" (v) \ : "memory"); \ return (previous); \ } CK_PR_FAS(64, uint64_t, uint64_t, "", "") CK_PR_FAS(32, uint32_t, uint32_t, "", "w") CK_PR_FAS(ptr, void, void *, "", "") CK_PR_FAS(int, int, int, "", "w") CK_PR_FAS(uint, unsigned int, unsigned int, "", "w") CK_PR_FAS(16, uint16_t, uint16_t, "h", "w") CK_PR_FAS(8, uint8_t, uint8_t, "b", "w") CK_PR_FAS(short, short, short, "h", "w") CK_PR_FAS(char, char, char, "b", "w") #undef CK_PR_FAS #define CK_PR_UNARY(O, N, M, T, I, W, R, S) \ CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " " R "0, [%0];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " " R "0, [%0]\n" \ : \ : "r" (target) \ : "x0", "memory"); \ return; \ } CK_PR_UNARY(inc, ptr, void, void *, "mov x0, 1", "", "x", "add") CK_PR_UNARY(dec, ptr, void, void *, "mov x0, -1", "", "x", "add") CK_PR_UNARY(not, ptr, void, void *, "mov x0, -1", "", "x", "eor") CK_PR_UNARY(inc, 64, uint64_t, uint64_t, "mov x0, 1", "", "x", "add") CK_PR_UNARY(dec, 64, uint64_t, uint64_t, "mov x0, -1", "", "x", "add") CK_PR_UNARY(not, 64, uint64_t, uint64_t, "mov x0, -1", "", "x", "eor") #define CK_PR_UNARY_S(S, T, W) \ CK_PR_UNARY(inc, S, T, T, "mov w0, 1", W, "w", "add") \ CK_PR_UNARY(dec, S, T, T, "mov w0, -1", W, "w", "add") \ CK_PR_UNARY(not, S, T, T, "mov w0, -1", W, "w", "eor") \ CK_PR_UNARY_S(32, uint32_t, "") CK_PR_UNARY_S(uint, unsigned int, "") CK_PR_UNARY_S(int, int, "") CK_PR_UNARY_S(16, uint16_t, "h") CK_PR_UNARY_S(8, uint8_t, "b") CK_PR_UNARY_S(short, short, "h") CK_PR_UNARY_S(char, char, "b") #undef CK_PR_UNARY_S #undef CK_PR_UNARY #define CK_PR_BINARY(O, N, M, T, S, W, R, I) \ CK_CC_INLINE static void \ ck_pr_##O##_##N(M *target, T delta) \ { \ - __asm__ __volatile__(I ";" \ - "st" S W " %" R "0, [%1];" \ + __asm__ __volatile__(I "\n" \ + "st" S W " %" R "0, [%1]\n"\ : "+&r" (delta) \ : "r" (target) \ : "memory"); \ return; \ } CK_PR_BINARY(and, ptr, void, uintptr_t, "clr", "", "", "mvn %0, %0") CK_PR_BINARY(add, ptr, void, uintptr_t, "add", "", "", "") CK_PR_BINARY(or, ptr, void, uintptr_t, "set", "", "", "") CK_PR_BINARY(sub, ptr, void, uintptr_t, "add", "", "", "neg %0, %0") CK_PR_BINARY(xor, ptr, void, uintptr_t, "eor", "", "", "") CK_PR_BINARY(and, 64, uint64_t, uint64_t, "clr", "", "", "mvn %0, %0") CK_PR_BINARY(add, 64, uint64_t, uint64_t, "add", "", "", "") CK_PR_BINARY(or, 64, uint64_t, uint64_t, "set", "", "", "") CK_PR_BINARY(sub, 64, uint64_t, uint64_t, "add", "", "", "neg %0, %0") CK_PR_BINARY(xor, 64, uint64_t, uint64_t, "eor", "", "", "") #define CK_PR_BINARY_S(S, T, W) \ CK_PR_BINARY(and, S, T, T, "clr", W, "w", "mvn %w0, %w0") \ CK_PR_BINARY(add, S, T, T, "add", W, "w", "") \ CK_PR_BINARY(or, S, T, T, "set", W, "w", "") \ CK_PR_BINARY(sub, S, T, T, "add", W, "w", "neg %w0, %w0") \ CK_PR_BINARY(xor, S, T, T, "eor", W, "w", "") CK_PR_BINARY_S(32, uint32_t, "") CK_PR_BINARY_S(uint, unsigned int, "") CK_PR_BINARY_S(int, int, "") CK_PR_BINARY_S(16, uint16_t, "h") CK_PR_BINARY_S(8, uint8_t, "b") CK_PR_BINARY_S(short, short, "h") CK_PR_BINARY_S(char, char, "b") #undef CK_PR_BINARY_S #undef CK_PR_BINARY CK_CC_INLINE static void * ck_pr_faa_ptr(void *target, uintptr_t delta) { uintptr_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) : "memory"); return (void *)(previous); } CK_CC_INLINE static uint64_t ck_pr_faa_64(uint64_t *target, uint64_t delta) { uint64_t previous; __asm__ __volatile__( - "ldadd %2, %0, [%1];" + "ldadd %2, %0, [%1]\n" : "=r" (previous) : "r" (target), "r" (delta) : "memory"); return (previous); } #define CK_PR_FAA(S, T, W) \ CK_CC_INLINE static T \ ck_pr_faa_##S(T *target, T delta) \ { \ T previous; \ __asm__ __volatile__( \ - "ldadd" W " %w2, %w0, [%1];" \ + "ldadd" W " %w2, %w0, [%1]\n" \ : "=r" (previous) \ : "r" (target), \ "r" (delta) \ : "memory"); \ return (previous); \ } CK_PR_FAA(32, uint32_t, "") CK_PR_FAA(uint, unsigned int, "") CK_PR_FAA(int, int, "") CK_PR_FAA(16, uint16_t, "h") CK_PR_FAA(8, uint8_t, "b") CK_PR_FAA(short, short, "h") CK_PR_FAA(char, char, "b") #undef CK_PR_FAA #endif /* CK_PR_AARCH64_LSE_H */ diff --git a/include/gcc/ck_cc.h b/include/gcc/ck_cc.h index 6ebc59cb5921..0a6d17b93569 100644 --- a/include/gcc/ck_cc.h +++ b/include/gcc/ck_cc.h @@ -1,141 +1,150 @@ /* * Copyright 2009-2015 Samy Al Bahra. * Copyright 2014 Paul Khuong. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_GCC_CC_H #define CK_GCC_CC_H #include #ifdef __SUNPRO_C #define CK_CC_UNUSED #define CK_CC_USED #define CK_CC_IMM #define CK_CC_IMM_U32 #else #define CK_CC_UNUSED __attribute__((unused)) #define CK_CC_USED __attribute__((used)) #define CK_CC_IMM "i" + +#define CK_CC_CONTAINER(F, T, M, N) \ + CK_CC_INLINE static T * \ + N(F *p) \ + { \ + \ + return (T *)(void *)((char *)p - __builtin_offsetof(T, M)); \ + } + #if defined(__x86_64__) || defined(__x86__) #define CK_CC_IMM_U32 "Z" #define CK_CC_IMM_S32 "e" #else #define CK_CC_IMM_U32 CK_CC_IMM #define CK_CC_IMM_S32 CK_CC_IMM #endif /* __x86_64__ || __x86__ */ #endif #ifdef __OPTIMIZE__ #define CK_CC_INLINE CK_CC_UNUSED inline #else #define CK_CC_INLINE CK_CC_UNUSED #endif #define CK_CC_FORCE_INLINE CK_CC_UNUSED __attribute__((always_inline)) inline #define CK_CC_RESTRICT __restrict__ /* * Packed attribute. */ #define CK_CC_PACKED __attribute__((packed)) /* * Weak reference. */ #define CK_CC_WEAKREF __attribute__((weakref)) /* * Alignment attribute. */ #define CK_CC_ALIGN(B) __attribute__((aligned(B))) /* * Cache align. */ #define CK_CC_CACHELINE CK_CC_ALIGN(CK_MD_CACHELINE) /* * These are functions which should be avoided. */ #ifdef __freestanding__ #pragma GCC poison malloc free #endif /* * Branch execution hints. */ #define CK_CC_LIKELY(x) (__builtin_expect(!!(x), 1)) #define CK_CC_UNLIKELY(x) (__builtin_expect(!!(x), 0)) /* * Some compilers are overly strict regarding aliasing semantics. * Unfortunately, in many cases it makes more sense to pay aliasing * cost rather than overly expensive register spillage. */ #define CK_CC_ALIASED __attribute__((__may_alias__)) /* * Compile-time typeof */ #define CK_CC_TYPEOF(X, DEFAULT) __typeof__(X) /* * Portability wrappers for bitwise operations. */ #ifndef CK_MD_CC_BUILTIN_DISABLE #define CK_F_CC_FFS CK_CC_INLINE static int ck_cc_ffs(unsigned int x) { return __builtin_ffsl(x); } #define CK_F_CC_FFSL CK_CC_INLINE static int ck_cc_ffsl(unsigned long x) { return __builtin_ffsll(x); } #define CK_F_CC_CTZ CK_CC_INLINE static int ck_cc_ctz(unsigned int x) { return __builtin_ctz(x); } #define CK_F_CC_POPCOUNT CK_CC_INLINE static int ck_cc_popcount(unsigned int x) { return __builtin_popcount(x); } #endif /* CK_MD_CC_BUILTIN_DISABLE */ #endif /* CK_GCC_CC_H */ diff --git a/include/gcc/x86/ck_pr.h b/include/gcc/x86/ck_pr.h index e678e830e0b4..12291c830dfd 100644 --- a/include/gcc/x86/ck_pr.h +++ b/include/gcc/x86/ck_pr.h @@ -1,408 +1,419 @@ /* * Copyright 2009-2015 Samy Al Bahra. * Copyright 2011 Devon H. O'Dell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_X86_H #define CK_PR_X86_H #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif #include #include #include /* * The following represent supported atomic operations. * These operations may be emulated. */ #include "ck_f_pr.h" /* Minimum requirements for the CK_PR interface are met. */ #define CK_F_PR /* * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined * delay". */ CK_CC_INLINE static void ck_pr_stall(void) { __asm__ __volatile__("pause" ::: "memory"); return; } #ifdef CK_MD_UMP #define CK_PR_LOCK_PREFIX #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__("" ::: "memory"); \ return; \ } #else #define CK_PR_LOCK_PREFIX "lock " #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ return; \ } #endif /* CK_MD_UMP */ #if defined(CK_MD_SSE_DISABLE) /* If SSE is disabled, then use atomic operations for serialization. */ #define CK_MD_X86_MFENCE "lock addl $0, (%%esp)" #define CK_MD_X86_SFENCE CK_MD_X86_MFENCE #define CK_MD_X86_LFENCE CK_MD_X86_MFENCE #else #define CK_MD_X86_SFENCE "sfence" #define CK_MD_X86_LFENCE "lfence" #define CK_MD_X86_MFENCE "mfence" #endif /* !CK_MD_SSE_DISABLE */ CK_PR_FENCE(atomic, "") CK_PR_FENCE(atomic_store, "") CK_PR_FENCE(atomic_load, "") CK_PR_FENCE(store_atomic, "") CK_PR_FENCE(load_atomic, "") CK_PR_FENCE(load, CK_MD_X86_LFENCE) CK_PR_FENCE(load_store, CK_MD_X86_MFENCE) CK_PR_FENCE(store, CK_MD_X86_SFENCE) CK_PR_FENCE(store_load, CK_MD_X86_MFENCE) CK_PR_FENCE(memory, CK_MD_X86_MFENCE) CK_PR_FENCE(release, CK_MD_X86_MFENCE) CK_PR_FENCE(acquire, CK_MD_X86_MFENCE) CK_PR_FENCE(acqrel, CK_MD_X86_MFENCE) CK_PR_FENCE(lock, CK_MD_X86_MFENCE) CK_PR_FENCE(unlock, CK_MD_X86_MFENCE) #undef CK_PR_FENCE /* * Atomic fetch-and-store operations. */ #define CK_PR_FAS(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_fas_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %0, %1" \ : "+m" (*(C *)target), \ "+q" (v) \ : \ : "memory"); \ return v; \ } -CK_PR_FAS(ptr, void, void *, char, "xchgl") +CK_PR_FAS(ptr, void, void *, uint32_t, "xchgl") #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) CK_PR_FAS_S(char, char, "xchgb") CK_PR_FAS_S(uint, unsigned int, "xchgl") CK_PR_FAS_S(int, int, "xchgl") CK_PR_FAS_S(32, uint32_t, "xchgl") CK_PR_FAS_S(16, uint16_t, "xchgw") CK_PR_FAS_S(8, uint8_t, "xchgb") #undef CK_PR_FAS_S #undef CK_PR_FAS #define CK_PR_LOAD(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_md_load_##S(const M *target) \ { \ T r; \ __asm__ __volatile__(I " %1, %0" \ : "=q" (r) \ : "m" (*(const C *)target) \ : "memory"); \ return (r); \ } -CK_PR_LOAD(ptr, void, void *, char, "movl") +CK_PR_LOAD(ptr, void, void *, uint32_t, "movl") #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) CK_PR_LOAD_S(char, char, "movb") CK_PR_LOAD_S(uint, unsigned int, "movl") CK_PR_LOAD_S(int, int, "movl") CK_PR_LOAD_S(32, uint32_t, "movl") CK_PR_LOAD_S(16, uint16_t, "movw") CK_PR_LOAD_S(8, uint8_t, "movb") #undef CK_PR_LOAD_S #undef CK_PR_LOAD #define CK_PR_STORE(S, M, T, C, I) \ CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %1, %0" \ : "=m" (*(C *)target) \ : CK_CC_IMM "q" (v) \ : "memory"); \ return; \ } -CK_PR_STORE(ptr, void, const void *, char, "movl") +CK_PR_STORE(ptr, void, const void *, uint32_t, "movl") #define CK_PR_STORE_S(S, T, I) CK_PR_STORE(S, T, T, T, I) CK_PR_STORE_S(char, char, "movb") CK_PR_STORE_S(uint, unsigned int, "movl") CK_PR_STORE_S(int, int, "movl") CK_PR_STORE_S(32, uint32_t, "movl") CK_PR_STORE_S(16, uint16_t, "movw") CK_PR_STORE_S(8, uint8_t, "movb") #undef CK_PR_STORE_S #undef CK_PR_STORE /* * Atomic fetch-and-add operations. */ #define CK_PR_FAA(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_faa_##S(M *target, T d) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ : "+m" (*(C *)target), \ "+q" (d) \ : \ : "memory", "cc"); \ return (d); \ } -CK_PR_FAA(ptr, void, uintptr_t, char, "xaddl") +CK_PR_FAA(ptr, void, uintptr_t, uint32_t, "xaddl") #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) CK_PR_FAA_S(char, char, "xaddb") CK_PR_FAA_S(uint, unsigned int, "xaddl") CK_PR_FAA_S(int, int, "xaddl") CK_PR_FAA_S(32, uint32_t, "xaddl") CK_PR_FAA_S(16, uint16_t, "xaddw") CK_PR_FAA_S(8, uint8_t, "xaddb") #undef CK_PR_FAA_S #undef CK_PR_FAA /* * Atomic store-only unary operations. */ #define CK_PR_UNARY(K, S, T, C, I) \ CK_PR_UNARY_R(K, S, T, C, I) \ CK_PR_UNARY_V(K, S, T, C, I) #define CK_PR_UNARY_R(K, S, T, C, I) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(T *target) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ : "+m" (*(C *)target) \ : \ : "memory", "cc"); \ return; \ } #define CK_PR_UNARY_V(K, S, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S##_is_zero(T *target) \ { \ bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ - "=rm" (ret) \ + "=qm" (ret) \ : \ : "memory", "cc"); \ return ret; \ } #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_UNARY(K, ptr, void, char, #K "l") \ + CK_PR_UNARY(K, ptr, void, uint32_t, #K "l") \ CK_PR_UNARY_S(K, char, char, #K "b") \ CK_PR_UNARY_S(K, int, int, #K "l") \ CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ CK_PR_UNARY_S(K, 8, uint8_t, #K "b") CK_PR_GENERATE(inc) CK_PR_GENERATE(dec) CK_PR_GENERATE(neg) /* not does not affect condition flags. */ #undef CK_PR_UNARY_V #define CK_PR_UNARY_V(a, b, c, d, e) CK_PR_GENERATE(not) #undef CK_PR_GENERATE #undef CK_PR_UNARY_S #undef CK_PR_UNARY_V #undef CK_PR_UNARY_R #undef CK_PR_UNARY /* * Atomic store-only binary operations. */ #define CK_PR_BINARY(K, S, M, T, C, I) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T d) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ : "+m" (*(C *)target) \ : CK_CC_IMM "q" (d) \ : "memory", "cc"); \ return; \ } #define CK_PR_BINARY_S(K, S, T, I) CK_PR_BINARY(K, S, T, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "l") \ + CK_PR_BINARY(K, ptr, void, uintptr_t, uint32_t, #K "l") \ CK_PR_BINARY_S(K, char, char, #K "b") \ CK_PR_BINARY_S(K, int, int, #K "l") \ CK_PR_BINARY_S(K, uint, unsigned int, #K "l") \ CK_PR_BINARY_S(K, 32, uint32_t, #K "l") \ CK_PR_BINARY_S(K, 16, uint16_t, #K "w") \ CK_PR_BINARY_S(K, 8, uint8_t, #K "b") CK_PR_GENERATE(add) CK_PR_GENERATE(sub) CK_PR_GENERATE(and) CK_PR_GENERATE(or) CK_PR_GENERATE(xor) #undef CK_PR_GENERATE #undef CK_PR_BINARY_S #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ { \ bool z; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ : "+m" (*(C *)target), \ "=a" (z) \ : "q" (set), \ "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif -CK_PR_CAS(ptr, void, void *, char, "cmpxchgl") +CK_PR_CAS(ptr, void, void *, uint32_t, "cmpxchgl") #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) CK_PR_CAS_S(char, char, "cmpxchgb") CK_PR_CAS_S(int, int, "cmpxchgl") CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") CK_PR_CAS_S(32, uint32_t, "cmpxchgl") CK_PR_CAS_S(16, uint16_t, "cmpxchgw") CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS_S #undef CK_PR_CAS -/* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return (bool)z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "l", "eax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - /* * Atomic bit test operations. */ #define CK_PR_BT(K, S, T, P, C, I) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S(T *target, unsigned int b) \ { \ bool c; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ : "+m" (*(C *)target), \ "=q" (c) \ : "q" ((P)b) \ : "memory", "cc"); \ return (bool)c; \ } #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) -#define CK_PR_GENERATE(K) \ - CK_PR_BT(K, ptr, void, uint32_t, char, #K "l %2, %0") \ - CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ - CK_PR_BT_S(K, int, int, #K "l %2, %0") \ - CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ +#define CK_PR_GENERATE(K) \ + CK_PR_BT(K, ptr, void, uint32_t, uint32_t, #K "l %2, %0") \ + CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ + CK_PR_BT_S(K, int, int, #K "l %2, %0") \ + CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") CK_PR_GENERATE(btc) CK_PR_GENERATE(bts) CK_PR_GENERATE(btr) #undef CK_PR_GENERATE #undef CK_PR_BT #endif /* CK_PR_X86_H */ diff --git a/include/gcc/x86_64/ck_pr.h b/include/gcc/x86_64/ck_pr.h index fb2804e8d8e5..37678b12b44a 100644 --- a/include/gcc/x86_64/ck_pr.h +++ b/include/gcc/x86_64/ck_pr.h @@ -1,606 +1,613 @@ /* * Copyright 2009-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_PR_X86_64_H #define CK_PR_X86_64_H #ifndef CK_PR_H #error Do not include this file directly, use ck_pr.h #endif #include #include #include /* * The following represent supported atomic operations. * These operations may be emulated. */ #include "ck_f_pr.h" /* * Support for TSX extensions. */ #ifdef CK_MD_RTM_ENABLE #include "ck_pr_rtm.h" #endif /* Minimum requirements for the CK_PR interface are met. */ #define CK_F_PR #ifdef CK_MD_UMP #define CK_PR_LOCK_PREFIX #else #define CK_PR_LOCK_PREFIX "lock " #endif /* * Prevent speculative execution in busy-wait loops (P4 <=) or "predefined * delay". */ CK_CC_INLINE static void ck_pr_stall(void) { __asm__ __volatile__("pause" ::: "memory"); return; } #define CK_PR_FENCE(T, I) \ CK_CC_INLINE static void \ ck_pr_fence_strict_##T(void) \ { \ __asm__ __volatile__(I ::: "memory"); \ } /* Atomic operations are always serializing. */ CK_PR_FENCE(atomic, "") CK_PR_FENCE(atomic_store, "") CK_PR_FENCE(atomic_load, "") CK_PR_FENCE(store_atomic, "") CK_PR_FENCE(load_atomic, "") /* Traditional fence interface. */ CK_PR_FENCE(load, "lfence") CK_PR_FENCE(load_store, "mfence") CK_PR_FENCE(store, "sfence") CK_PR_FENCE(store_load, "mfence") CK_PR_FENCE(memory, "mfence") /* Below are stdatomic-style fences. */ /* * Provides load-store and store-store ordering. However, Intel specifies that * the WC memory model is relaxed. It is likely an sfence *is* sufficient (in * particular, stores are not re-ordered with respect to prior loads and it is * really just the stores that are subject to re-ordering). However, we take * the conservative route as the manuals are too ambiguous for my taste. */ CK_PR_FENCE(release, "mfence") /* * Provides load-load and load-store ordering. The lfence instruction ensures * all prior load operations are complete before any subsequent instructions * actually begin execution. However, the manual also ends up going to describe * WC memory as a relaxed model. */ CK_PR_FENCE(acquire, "mfence") CK_PR_FENCE(acqrel, "mfence") CK_PR_FENCE(lock, "mfence") CK_PR_FENCE(unlock, "mfence") #undef CK_PR_FENCE /* * Read for ownership. Older compilers will generate the 32-bit * 3DNow! variant which is binary compatible with x86-64 variant * of prefetchw. */ #ifndef CK_F_PR_RFO #define CK_F_PR_RFO CK_CC_INLINE static void ck_pr_rfo(const void *m) { __asm__ __volatile__("prefetchw (%0)" : : "r" (m) : "memory"); return; } #endif /* CK_F_PR_RFO */ /* * Atomic fetch-and-store operations. */ #define CK_PR_FAS(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_fas_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %0, %1" \ : "+m" (*(C *)target), \ "+q" (v) \ : \ : "memory"); \ return v; \ } -CK_PR_FAS(ptr, void, void *, char, "xchgq") +CK_PR_FAS(ptr, void, void *, uint64_t, "xchgq") #define CK_PR_FAS_S(S, T, I) CK_PR_FAS(S, T, T, T, I) #ifndef CK_PR_DISABLE_DOUBLE CK_PR_FAS_S(double, double, "xchgq") #endif CK_PR_FAS_S(char, char, "xchgb") CK_PR_FAS_S(uint, unsigned int, "xchgl") CK_PR_FAS_S(int, int, "xchgl") CK_PR_FAS_S(64, uint64_t, "xchgq") CK_PR_FAS_S(32, uint32_t, "xchgl") CK_PR_FAS_S(16, uint16_t, "xchgw") CK_PR_FAS_S(8, uint8_t, "xchgb") #undef CK_PR_FAS_S #undef CK_PR_FAS /* * Atomic load-from-memory operations. */ #define CK_PR_LOAD(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_md_load_##S(const M *target) \ { \ T r; \ __asm__ __volatile__(I " %1, %0" \ : "=q" (r) \ : "m" (*(const C *)target) \ : "memory"); \ return (r); \ } -CK_PR_LOAD(ptr, void, void *, char, "movq") +CK_PR_LOAD(ptr, void, void *, uint64_t, "movq") #define CK_PR_LOAD_S(S, T, I) CK_PR_LOAD(S, T, T, T, I) CK_PR_LOAD_S(char, char, "movb") CK_PR_LOAD_S(uint, unsigned int, "movl") CK_PR_LOAD_S(int, int, "movl") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_LOAD_S(double, double, "movq") #endif CK_PR_LOAD_S(64, uint64_t, "movq") CK_PR_LOAD_S(32, uint32_t, "movl") CK_PR_LOAD_S(16, uint16_t, "movw") CK_PR_LOAD_S(8, uint8_t, "movb") #undef CK_PR_LOAD_S #undef CK_PR_LOAD CK_CC_INLINE static void ck_pr_load_64_2(const uint64_t target[2], uint64_t v[2]) { __asm__ __volatile__("movq %%rdx, %%rcx;" "movq %%rax, %%rbx;" CK_PR_LOCK_PREFIX "cmpxchg16b %2;" : "=a" (v[0]), "=d" (v[1]) : "m" (*(const uint64_t *)target) : "rbx", "rcx", "memory", "cc"); return; } CK_CC_INLINE static void ck_pr_load_ptr_2(const void *t, void *v) { ck_pr_load_64_2(CK_CPP_CAST(const uint64_t *, t), CK_CPP_CAST(uint64_t *, v)); return; } #define CK_PR_LOAD_2(S, W, T) \ CK_CC_INLINE static void \ ck_pr_md_load_##S##_##W(const T t[2], T v[2]) \ { \ ck_pr_load_64_2((const uint64_t *)(const void *)t, \ (uint64_t *)(void *)v); \ return; \ } CK_PR_LOAD_2(char, 16, char) CK_PR_LOAD_2(int, 4, int) CK_PR_LOAD_2(uint, 4, unsigned int) CK_PR_LOAD_2(32, 4, uint32_t) CK_PR_LOAD_2(16, 8, uint16_t) CK_PR_LOAD_2(8, 16, uint8_t) #undef CK_PR_LOAD_2 /* * Atomic store-to-memory operations. */ #define CK_PR_STORE_IMM(S, M, T, C, I, K) \ CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %1, %0" \ : "=m" (*(C *)target) \ : K "q" (v) \ : "memory"); \ return; \ } #define CK_PR_STORE(S, M, T, C, I) \ CK_CC_INLINE static void \ ck_pr_md_store_##S(M *target, T v) \ { \ __asm__ __volatile__(I " %1, %0" \ : "=m" (*(C *)target) \ : "q" (v) \ : "memory"); \ return; \ } -CK_PR_STORE_IMM(ptr, void, const void *, char, "movq", CK_CC_IMM_U32) +CK_PR_STORE_IMM(ptr, void, const void *, uint64_t, "movq", CK_CC_IMM_U32) #ifndef CK_PR_DISABLE_DOUBLE CK_PR_STORE(double, double, double, double, "movq") #endif #define CK_PR_STORE_S(S, T, I, K) CK_PR_STORE_IMM(S, T, T, T, I, K) CK_PR_STORE_S(char, char, "movb", CK_CC_IMM_S32) CK_PR_STORE_S(int, int, "movl", CK_CC_IMM_S32) CK_PR_STORE_S(uint, unsigned int, "movl", CK_CC_IMM_U32) CK_PR_STORE_S(64, uint64_t, "movq", CK_CC_IMM_U32) CK_PR_STORE_S(32, uint32_t, "movl", CK_CC_IMM_U32) CK_PR_STORE_S(16, uint16_t, "movw", CK_CC_IMM_U32) CK_PR_STORE_S(8, uint8_t, "movb", CK_CC_IMM_U32) #undef CK_PR_STORE_S #undef CK_PR_STORE_IMM #undef CK_PR_STORE /* * Atomic fetch-and-add operations. */ #define CK_PR_FAA(S, M, T, C, I) \ CK_CC_INLINE static T \ ck_pr_faa_##S(M *target, T d) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ : "+m" (*(C *)target), \ "+q" (d) \ : \ : "memory", "cc"); \ return (d); \ } -CK_PR_FAA(ptr, void, uintptr_t, char, "xaddq") +CK_PR_FAA(ptr, void, uintptr_t, uint64_t, "xaddq") #define CK_PR_FAA_S(S, T, I) CK_PR_FAA(S, T, T, T, I) CK_PR_FAA_S(char, char, "xaddb") CK_PR_FAA_S(uint, unsigned int, "xaddl") CK_PR_FAA_S(int, int, "xaddl") CK_PR_FAA_S(64, uint64_t, "xaddq") CK_PR_FAA_S(32, uint32_t, "xaddl") CK_PR_FAA_S(16, uint16_t, "xaddw") CK_PR_FAA_S(8, uint8_t, "xaddb") #undef CK_PR_FAA_S #undef CK_PR_FAA /* * Atomic store-only unary operations. */ #define CK_PR_UNARY(K, S, T, C, I) \ CK_PR_UNARY_R(K, S, T, C, I) \ CK_PR_UNARY_V(K, S, T, C, I) #define CK_PR_UNARY_R(K, S, T, C, I) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(T *target) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0" \ : "+m" (*(C *)target) \ : \ : "memory", "cc"); \ return; \ } #define CK_PR_UNARY_V(K, S, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S##_is_zero(T *target) \ { \ bool ret; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %0; setz %1" \ : "+m" (*(C *)target), \ "=rm" (ret) \ : \ : "memory", "cc"); \ return ret; \ } #define CK_PR_UNARY_S(K, S, T, I) CK_PR_UNARY(K, S, T, T, I) #define CK_PR_GENERATE(K) \ - CK_PR_UNARY(K, ptr, void, char, #K "q") \ + CK_PR_UNARY(K, ptr, void, uint64_t, #K "q") \ CK_PR_UNARY_S(K, char, char, #K "b") \ CK_PR_UNARY_S(K, int, int, #K "l") \ CK_PR_UNARY_S(K, uint, unsigned int, #K "l") \ CK_PR_UNARY_S(K, 64, uint64_t, #K "q") \ CK_PR_UNARY_S(K, 32, uint32_t, #K "l") \ CK_PR_UNARY_S(K, 16, uint16_t, #K "w") \ CK_PR_UNARY_S(K, 8, uint8_t, #K "b") CK_PR_GENERATE(inc) CK_PR_GENERATE(dec) CK_PR_GENERATE(neg) /* not does not affect condition flags. */ #undef CK_PR_UNARY_V #define CK_PR_UNARY_V(a, b, c, d, e) CK_PR_GENERATE(not) #undef CK_PR_GENERATE #undef CK_PR_UNARY_S #undef CK_PR_UNARY_V #undef CK_PR_UNARY_R #undef CK_PR_UNARY /* * Atomic store-only binary operations. */ #define CK_PR_BINARY(K, S, M, T, C, I, O) \ CK_CC_INLINE static void \ ck_pr_##K##_##S(M *target, T d) \ { \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %1, %0" \ : "+m" (*(C *)target) \ : O "q" (d) \ : "memory", "cc"); \ return; \ } #define CK_PR_BINARY_S(K, S, T, I, O) CK_PR_BINARY(K, S, T, T, T, I, O) #define CK_PR_GENERATE(K) \ - CK_PR_BINARY(K, ptr, void, uintptr_t, char, #K "q", CK_CC_IMM_U32) \ + CK_PR_BINARY(K, ptr, void, uintptr_t, uint64_t, #K "q", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, char, char, #K "b", CK_CC_IMM_S32) \ CK_PR_BINARY_S(K, int, int, #K "l", CK_CC_IMM_S32) \ CK_PR_BINARY_S(K, uint, unsigned int, #K "l", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, 64, uint64_t, #K "q", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, 32, uint32_t, #K "l", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, 16, uint16_t, #K "w", CK_CC_IMM_U32) \ CK_PR_BINARY_S(K, 8, uint8_t, #K "b", CK_CC_IMM_U32) CK_PR_GENERATE(add) CK_PR_GENERATE(sub) CK_PR_GENERATE(and) CK_PR_GENERATE(or) CK_PR_GENERATE(xor) #undef CK_PR_GENERATE #undef CK_PR_BINARY_S #undef CK_PR_BINARY /* - * Atomic compare and swap. + * Atomic compare and swap, with a variant that sets *v to the old value of target. */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +#define CK_PR_CAS(S, M, T, C, I) \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S(M *target, T compare, T set) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + /* RAX is clobbered by cmpxchg. */ \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + : "+m" (*(C *)target), \ + "=@ccz" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ + } +#else #define CK_PR_CAS(S, M, T, C, I) \ CK_CC_INLINE static bool \ ck_pr_cas_##S(M *target, T compare, T set) \ { \ bool z; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %2, %0; setz %1" \ : "+m" (*(C *)target), \ "=a" (z) \ : "q" (set), \ "a" (compare) \ : "memory", "cc"); \ return z; \ + } \ + \ + CK_CC_INLINE static bool \ + ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ + { \ + bool z; \ + __asm__ __volatile__(CK_PR_LOCK_PREFIX I " %3, %0;" \ + "setz %1;" \ + : "+m" (*(C *)target), \ + "=q" (z), \ + "+a" (compare) \ + : "q" (set) \ + : "memory", "cc"); \ + *(T *)v = compare; \ + return z; \ } +#endif -CK_PR_CAS(ptr, void, void *, char, "cmpxchgq") +CK_PR_CAS(ptr, void, void *, uint64_t, "cmpxchgq") #define CK_PR_CAS_S(S, T, I) CK_PR_CAS(S, T, T, T, I) CK_PR_CAS_S(char, char, "cmpxchgb") CK_PR_CAS_S(int, int, "cmpxchgl") CK_PR_CAS_S(uint, unsigned int, "cmpxchgl") #ifndef CK_PR_DISABLE_DOUBLE CK_PR_CAS_S(double, double, "cmpxchgq") #endif CK_PR_CAS_S(64, uint64_t, "cmpxchgq") CK_PR_CAS_S(32, uint32_t, "cmpxchgl") CK_PR_CAS_S(16, uint16_t, "cmpxchgw") CK_PR_CAS_S(8, uint8_t, "cmpxchgb") #undef CK_PR_CAS_S #undef CK_PR_CAS -/* - * Compare and swap, set *v to old value of target. - */ -#define CK_PR_CAS_O(S, M, T, C, I, R) \ - CK_CC_INLINE static bool \ - ck_pr_cas_##S##_value(M *target, T compare, T set, M *v) \ - { \ - bool z; \ - __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg" I " %3, %0;" \ - "mov %% " R ", %2;" \ - "setz %1;" \ - : "+m" (*(C *)target), \ - "=a" (z), \ - "=m" (*(C *)v) \ - : "q" (set), \ - "a" (compare) \ - : "memory", "cc"); \ - return z; \ - } - -CK_PR_CAS_O(ptr, void, void *, char, "q", "rax") - -#define CK_PR_CAS_O_S(S, T, I, R) \ - CK_PR_CAS_O(S, T, T, T, I, R) - -CK_PR_CAS_O_S(char, char, "b", "al") -CK_PR_CAS_O_S(int, int, "l", "eax") -CK_PR_CAS_O_S(uint, unsigned int, "l", "eax") -#ifndef CK_PR_DISABLE_DOUBLE -CK_PR_CAS_O_S(double, double, "q", "rax") -#endif -CK_PR_CAS_O_S(64, uint64_t, "q", "rax") -CK_PR_CAS_O_S(32, uint32_t, "l", "eax") -CK_PR_CAS_O_S(16, uint16_t, "w", "ax") -CK_PR_CAS_O_S(8, uint8_t, "b", "al") - -#undef CK_PR_CAS_O_S -#undef CK_PR_CAS_O - /* * Contrary to C-interface, alignment requirements are that of uint64_t[2]. */ CK_CC_INLINE static bool ck_pr_cas_64_2(uint64_t target[2], uint64_t compare[2], uint64_t set[2]) { bool z; __asm__ __volatile__("movq 0(%4), %%rax;" "movq 8(%4), %%rdx;" CK_PR_LOCK_PREFIX "cmpxchg16b %0; setz %1" : "+m" (*target), "=q" (z) : "b" (set[0]), "c" (set[1]), "q" (compare) : "memory", "cc", "%rax", "%rdx"); return z; } CK_CC_INLINE static bool ck_pr_cas_ptr_2(void *t, void *c, void *s) { return ck_pr_cas_64_2(CK_CPP_CAST(uint64_t *, t), CK_CPP_CAST(uint64_t *, c), CK_CPP_CAST(uint64_t *, s)); } CK_CC_INLINE static bool ck_pr_cas_64_2_value(uint64_t target[2], uint64_t compare[2], uint64_t set[2], uint64_t v[2]) { bool z; __asm__ __volatile__(CK_PR_LOCK_PREFIX "cmpxchg16b %0;" "setz %3" : "+m" (*target), "=a" (v[0]), "=d" (v[1]), "=q" (z) : "a" (compare[0]), "d" (compare[1]), "b" (set[0]), "c" (set[1]) : "memory", "cc"); return z; } CK_CC_INLINE static bool ck_pr_cas_ptr_2_value(void *t, void *c, void *s, void *v) { return ck_pr_cas_64_2_value(CK_CPP_CAST(uint64_t *,t), CK_CPP_CAST(uint64_t *,c), CK_CPP_CAST(uint64_t *,s), CK_CPP_CAST(uint64_t *,v)); } #define CK_PR_CAS_V(S, W, T) \ CK_CC_INLINE static bool \ ck_pr_cas_##S##_##W(T t[W], T c[W], T s[W]) \ { \ return ck_pr_cas_64_2((uint64_t *)(void *)t, \ (uint64_t *)(void *)c, \ (uint64_t *)(void *)s); \ } \ CK_CC_INLINE static bool \ ck_pr_cas_##S##_##W##_value(T *t, T c[W], T s[W], T *v) \ { \ return ck_pr_cas_64_2_value((uint64_t *)(void *)t, \ (uint64_t *)(void *)c, \ (uint64_t *)(void *)s, \ (uint64_t *)(void *)v); \ } #ifndef CK_PR_DISABLE_DOUBLE CK_PR_CAS_V(double, 2, double) #endif CK_PR_CAS_V(char, 16, char) CK_PR_CAS_V(int, 4, int) CK_PR_CAS_V(uint, 4, unsigned int) CK_PR_CAS_V(32, 4, uint32_t) CK_PR_CAS_V(16, 8, uint16_t) CK_PR_CAS_V(8, 16, uint8_t) #undef CK_PR_CAS_V /* * Atomic bit test operations. */ #define CK_PR_BT(K, S, T, P, C, I) \ CK_CC_INLINE static bool \ ck_pr_##K##_##S(T *target, unsigned int b) \ { \ bool c; \ __asm__ __volatile__(CK_PR_LOCK_PREFIX I "; setc %1" \ : "+m" (*(C *)target), \ "=q" (c) \ : "q" ((P)b) \ : "memory", "cc"); \ return c; \ } #define CK_PR_BT_S(K, S, T, I) CK_PR_BT(K, S, T, T, T, I) -#define CK_PR_GENERATE(K) \ - CK_PR_BT(K, ptr, void, uint64_t, char, #K "q %2, %0") \ - CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ - CK_PR_BT_S(K, int, int, #K "l %2, %0") \ - CK_PR_BT_S(K, 64, uint64_t, #K "q %2, %0") \ - CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ +#define CK_PR_GENERATE(K) \ + CK_PR_BT(K, ptr, void, uint64_t, uint64_t, #K "q %2, %0") \ + CK_PR_BT_S(K, uint, unsigned int, #K "l %2, %0") \ + CK_PR_BT_S(K, int, int, #K "l %2, %0") \ + CK_PR_BT_S(K, 64, uint64_t, #K "q %2, %0") \ + CK_PR_BT_S(K, 32, uint32_t, #K "l %2, %0") \ CK_PR_BT_S(K, 16, uint16_t, #K "w %w2, %0") CK_PR_GENERATE(btc) CK_PR_GENERATE(bts) CK_PR_GENERATE(btr) #undef CK_PR_GENERATE #undef CK_PR_BT #endif /* CK_PR_X86_64_H */ diff --git a/include/spinlock/fas.h b/include/spinlock/fas.h index 4e6c1230eaf1..bfe91fed2f9f 100644 --- a/include/spinlock/fas.h +++ b/include/spinlock/fas.h @@ -1,118 +1,119 @@ /* * Copyright 2010-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef CK_SPINLOCK_FAS_H #define CK_SPINLOCK_FAS_H #include #include #include #include #include #ifndef CK_F_SPINLOCK_FAS #define CK_F_SPINLOCK_FAS struct ck_spinlock_fas { unsigned int value; }; typedef struct ck_spinlock_fas ck_spinlock_fas_t; #define CK_SPINLOCK_FAS_INITIALIZER {false} CK_CC_INLINE static void ck_spinlock_fas_init(struct ck_spinlock_fas *lock) { lock->value = false; ck_pr_barrier(); return; } CK_CC_INLINE static bool ck_spinlock_fas_trylock(struct ck_spinlock_fas *lock) { bool value; value = ck_pr_fas_uint(&lock->value, true); ck_pr_fence_lock(); return !value; } CK_CC_INLINE static bool ck_spinlock_fas_locked(struct ck_spinlock_fas *lock) { bool r; r = ck_pr_load_uint(&lock->value); ck_pr_fence_acquire(); return r; } CK_CC_INLINE static void ck_spinlock_fas_lock(struct ck_spinlock_fas *lock) { - while (ck_pr_fas_uint(&lock->value, true) == true) { - while (ck_pr_load_uint(&lock->value) == true) - ck_pr_stall(); - } + while (CK_CC_UNLIKELY(ck_pr_fas_uint(&lock->value, true) == true)) { + do { + ck_pr_stall(); + } while (ck_pr_load_uint(&lock->value) == true); + } ck_pr_fence_lock(); return; } CK_CC_INLINE static void ck_spinlock_fas_lock_eb(struct ck_spinlock_fas *lock) { ck_backoff_t backoff = CK_BACKOFF_INITIALIZER; while (ck_pr_fas_uint(&lock->value, true) == true) ck_backoff_eb(&backoff); ck_pr_fence_lock(); return; } CK_CC_INLINE static void ck_spinlock_fas_unlock(struct ck_spinlock_fas *lock) { ck_pr_fence_unlock(); ck_pr_store_uint(&lock->value, false); return; } CK_ELIDE_PROTOTYPE(ck_spinlock_fas, ck_spinlock_fas_t, ck_spinlock_fas_locked, ck_spinlock_fas_lock, ck_spinlock_fas_locked, ck_spinlock_fas_unlock) CK_ELIDE_TRYLOCK_PROTOTYPE(ck_spinlock_fas, ck_spinlock_fas_t, ck_spinlock_fas_locked, ck_spinlock_fas_trylock) #endif /* CK_F_SPINLOCK_FAS */ #endif /* CK_SPINLOCK_FAS_H */ diff --git a/src/ck_ec.c b/src/ck_ec.c new file mode 100644 index 000000000000..9b24e762947c --- /dev/null +++ b/src/ck_ec.c @@ -0,0 +1,425 @@ +#include +#include + +#include "ck_ec_timeutil.h" + +#define DEFAULT_BUSY_LOOP_ITER 100U + +/* + * The 2ms, 8x/iter default parameter hit 1.024 seconds after 3 + * iterations. + */ +#define DEFAULT_INITIAL_WAIT_NS 2000000L /* Start at 2 ms */ +/* Grow the wait time 8x/iteration. */ +#define DEFAULT_WAIT_SCALE_FACTOR 8 +#define DEFAULT_WAIT_SHIFT_COUNT 0 + +struct ck_ec32_slow_path_state { + struct ck_ec32 *ec; + uint32_t flagged_word; +}; + +#ifdef CK_F_EC64 +struct ck_ec64_slow_path_state { + struct ck_ec64 *ec; + uint64_t flagged_word; +}; +#endif + +/* Once we've waited for >= 1 sec, go for the full deadline. */ +static const struct timespec final_wait_time = { + .tv_sec = 1 +}; + +void +ck_ec32_wake(struct ck_ec32 *ec, const struct ck_ec_ops *ops) +{ + /* Spurious wake-ups are OK. Clear the flag before futexing. */ + ck_pr_and_32(&ec->counter, (1U << 31) - 1); + ops->wake32(ops, &ec->counter); + return; +} + +int +ck_ec32_wait_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + const struct timespec *deadline) +{ + return ck_ec32_wait_pred_slow(ec, ops, old_value, + NULL, NULL, deadline); +} + +#ifdef CK_F_EC64 +void +ck_ec64_wake(struct ck_ec64 *ec, const struct ck_ec_ops *ops) +{ + ck_pr_and_64(&ec->counter, ~1); + ops->wake64(ops, &ec->counter); + return; +} + +int +ck_ec64_wait_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + const struct timespec *deadline) +{ + return ck_ec64_wait_pred_slow(ec, ops, old_value, + NULL, NULL, deadline); +} +#endif + +int +ck_ec_deadline_impl(struct timespec *new_deadline, + const struct ck_ec_ops *ops, + const struct timespec *timeout) +{ + struct timespec now; + int r; + + if (timeout == NULL) { + new_deadline->tv_sec = TIME_MAX; + new_deadline->tv_nsec = NSEC_MAX; + return 0; + } + + r = ops->gettime(ops, &now); + if (r != 0) { + return -1; + } + + *new_deadline = timespec_add(now, *timeout); + return 0; +} + +/* The rest of the file implements wait_pred_slow. */ + +/* + * Returns a timespec value for deadline_ptr. If deadline_ptr is NULL, + * returns a timespec far in the future. + */ +static struct timespec +canonical_deadline(const struct timespec *deadline_ptr) +{ + + if (deadline_ptr == NULL) { + return (struct timespec) { .tv_sec = TIME_MAX }; + } + + return *deadline_ptr; +} + +/* + * Really slow (sleeping) path for ck_ec_wait. Drives the exponential + * backoff scheme to sleep for longer and longer periods of time, + * until either the sleep function returns true (the eventcount's + * value has changed), or the predicate returns non-0 (something else + * has changed). + * + * If deadline is ever reached, returns -1 (timeout). + * + * TODO: add some form of randomisation to the intermediate timeout + * values. + */ +static int +exponential_backoff(struct ck_ec_wait_state *wait_state, + bool (*sleep)(const void *sleep_state, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline), + const void *sleep_state, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + const struct timespec *deadline) +{ + struct timespec begin; + struct timespec stop_backoff; + const struct ck_ec_ops *ops = wait_state->ops; + const uint32_t scale_factor = (ops->wait_scale_factor != 0) + ? ops->wait_scale_factor + : DEFAULT_WAIT_SCALE_FACTOR; + const uint32_t shift_count = (ops->wait_shift_count != 0) + ? ops->wait_shift_count + : DEFAULT_WAIT_SHIFT_COUNT; + uint32_t wait_ns = (ops->initial_wait_ns != 0) + ? ops->initial_wait_ns + : DEFAULT_INITIAL_WAIT_NS; + bool first = true; + + for (;;) { + struct timespec now; + struct timespec partial_deadline; + + if (check_deadline(&now, ops, *deadline) == true) { + /* Timeout. Bail out. */ + return -1; + } + + if (first) { + begin = now; + wait_state->start = begin; + stop_backoff = timespec_add(begin, final_wait_time); + first = false; + } + + wait_state->now = now; + if (timespec_cmp(now, stop_backoff) >= 0) { + partial_deadline = *deadline; + } else { + do { + partial_deadline = + timespec_add_ns(begin, wait_ns); + wait_ns = + wait_time_scale(wait_ns, + scale_factor, + shift_count); + } while (timespec_cmp(partial_deadline, now) <= 0); + } + + if (pred != NULL) { + int r = pred(wait_state, &partial_deadline); + if (r != 0) { + return r; + } + } + + /* Canonicalize deadlines in the far future to NULL. */ + if (sleep(sleep_state, wait_state, + ((partial_deadline.tv_sec == TIME_MAX) + ? NULL : &partial_deadline)) == true) { + return 0; + } + } +} + +/* + * Loops up to BUSY_LOOP_ITER times, or until ec's counter value + * (including the flag) differs from old_value. + * + * Returns the new value in ec. + */ +#define DEF_WAIT_EASY(W) \ + static uint##W##_t ck_ec##W##_wait_easy(struct ck_ec##W* ec, \ + const struct ck_ec_ops *ops, \ + uint##W##_t expected) \ + { \ + uint##W##_t current = ck_pr_load_##W(&ec->counter); \ + size_t n = (ops->busy_loop_iter != 0) \ + ? ops->busy_loop_iter \ + : DEFAULT_BUSY_LOOP_ITER; \ + \ + for (size_t i = 0; \ + i < n && current == expected; \ + i++) { \ + ck_pr_stall(); \ + current = ck_pr_load_##W(&ec->counter); \ + } \ + \ + return current; \ + } + +DEF_WAIT_EASY(32) +#ifdef CK_F_EC64 +DEF_WAIT_EASY(64) +#endif +#undef DEF_WAIT_EASY +/* + * Attempts to upgrade ec->counter from unflagged to flagged. + * + * Returns true if the event count has changed. Otherwise, ec's + * counter word is equal to flagged on return, or has been at some + * time before the return. + */ +#define DEF_UPGRADE(W) \ + static bool ck_ec##W##_upgrade(struct ck_ec##W* ec, \ + uint##W##_t current, \ + uint##W##_t unflagged, \ + uint##W##_t flagged) \ + { \ + uint##W##_t old_word; \ + \ + if (current == flagged) { \ + /* Nothing to do, no change. */ \ + return false; \ + } \ + \ + if (current != unflagged) { \ + /* We have a different counter value! */ \ + return true; \ + } \ + \ + /* \ + * Flag the counter value. The CAS only fails if the \ + * counter is already flagged, or has a new value. \ + */ \ + return (ck_pr_cas_##W##_value(&ec->counter, \ + unflagged, flagged, \ + &old_word) == false && \ + old_word != flagged); \ + } + +DEF_UPGRADE(32) +#ifdef CK_F_EC64 +DEF_UPGRADE(64) +#endif +#undef DEF_UPGRADE + +/* + * Blocks until partial_deadline on the ck_ec. Returns true if the + * eventcount's value has changed. If partial_deadline is NULL, wait + * forever. + */ +static bool +ck_ec32_wait_slow_once(const void *vstate, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline) +{ + const struct ck_ec32_slow_path_state *state = vstate; + const struct ck_ec32 *ec = state->ec; + const uint32_t flagged_word = state->flagged_word; + + wait_state->ops->wait32(wait_state, &ec->counter, + flagged_word, partial_deadline); + return ck_pr_load_32(&ec->counter) != flagged_word; +} + +#ifdef CK_F_EC64 +static bool +ck_ec64_wait_slow_once(const void *vstate, + const struct ck_ec_wait_state *wait_state, + const struct timespec *partial_deadline) +{ + const struct ck_ec64_slow_path_state *state = vstate; + const struct ck_ec64 *ec = state->ec; + const uint64_t flagged_word = state->flagged_word; + + /* futex_wait will only compare the low 32 bits. Perform a + * full comparison here to maximise the changes of catching an + * ABA in the low 32 bits. + */ + if (ck_pr_load_64(&ec->counter) != flagged_word) { + return true; + } + + wait_state->ops->wait64(wait_state, &ec->counter, + flagged_word, partial_deadline); + return ck_pr_load_64(&ec->counter) != flagged_word; +} +#endif + +/* + * The full wait logic is a lot of code (> 1KB). Encourage the + * compiler to lay this all out linearly with LIKELY annotations on + * every early exit. + */ +#define WAIT_SLOW_BODY(W, ec, ops, pred, data, deadline_ptr, \ + old_value, unflagged, flagged) \ + do { \ + struct ck_ec_wait_state wait_state = { \ + .ops = ops, \ + .data = data \ + }; \ + const struct ck_ec##W##_slow_path_state state = { \ + .ec = ec, \ + .flagged_word = flagged \ + }; \ + const struct timespec deadline = \ + canonical_deadline(deadline_ptr); \ + \ + /* Detect infinite past deadlines. */ \ + if (CK_CC_LIKELY(deadline.tv_sec <= 0)) { \ + return -1; \ + } \ + \ + for (;;) { \ + uint##W##_t current; \ + int r; \ + \ + current = ck_ec##W##_wait_easy(ec, ops, unflagged); \ + \ + /* \ + * We're about to wait harder (i.e., \ + * potentially with futex). Make sure the \ + * counter word is flagged. \ + */ \ + if (CK_CC_LIKELY( \ + ck_ec##W##_upgrade(ec, current, \ + unflagged, flagged) == true)) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + /* \ + * By now, ec->counter == flagged_word (at \ + * some point in the past). Spin some more to \ + * heuristically let any in-flight SP inc/add \ + * to retire. This does not affect \ + * correctness, but practically eliminates \ + * lost wake-ups. \ + */ \ + current = ck_ec##W##_wait_easy(ec, ops, flagged); \ + if (CK_CC_LIKELY(current != flagged_word)) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + r = exponential_backoff(&wait_state, \ + ck_ec##W##_wait_slow_once, \ + &state, \ + pred, &deadline); \ + if (r != 0) { \ + return r; \ + } \ + \ + if (ck_ec##W##_value(ec) != old_value) { \ + ck_pr_fence_acquire(); \ + return 0; \ + } \ + \ + /* Spurious wake-up. Redo the slow path. */ \ + } \ + } while (0) + +int +ck_ec32_wait_pred_slow(struct ck_ec32 *ec, + const struct ck_ec_ops *ops, + uint32_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline_ptr) +{ + const uint32_t unflagged_word = old_value; + const uint32_t flagged_word = old_value | (1UL << 31); + + if (CK_CC_UNLIKELY(ck_ec32_value(ec) != old_value)) { + return 0; + } + + WAIT_SLOW_BODY(32, ec, ops, pred, data, deadline_ptr, + old_value, unflagged_word, flagged_word); +} + +#ifdef CK_F_EC64 +int +ck_ec64_wait_pred_slow(struct ck_ec64 *ec, + const struct ck_ec_ops *ops, + uint64_t old_value, + int (*pred)(const struct ck_ec_wait_state *state, + struct timespec *deadline), + void *data, + const struct timespec *deadline_ptr) +{ + const uint64_t unflagged_word = old_value << 1; + const uint64_t flagged_word = unflagged_word | 1; + + if (CK_CC_UNLIKELY(ck_ec64_value(ec) != old_value)) { + return 0; + } + + WAIT_SLOW_BODY(64, ec, ops, pred, data, deadline_ptr, + old_value, unflagged_word, flagged_word); +} +#endif + +#undef WAIT_SLOW_BODY diff --git a/src/ck_ec_timeutil.h b/src/ck_ec_timeutil.h new file mode 100644 index 000000000000..50cfb67bf4a4 --- /dev/null +++ b/src/ck_ec_timeutil.h @@ -0,0 +1,150 @@ +#ifndef CK_EC_TIMEUTIL_H +#define CK_EC_TIMEUTIL_H +#include +#include +#include +#include +#include + +#define TIME_MAX ((time_t)((1ULL << ((sizeof(time_t) * CHAR_BIT) - 1)) - 1)) +#define NSEC_MAX ((1000L * 1000 * 1000) - 1) + +/* + * Approximates (nsec * multiplier) >> shift. Clamps to UINT32_MAX on + * overflow. + */ +CK_CC_UNUSED static uint32_t +wait_time_scale(uint32_t nsec, + uint32_t multiplier, + unsigned int shift) +{ + uint64_t temp = (uint64_t)nsec * multiplier; + uint64_t max = (uint64_t)UINT32_MAX << shift; + + if (temp >= max) { + return UINT32_MAX; + } + + return temp >> shift; +} + + +/* + * Returns ts + ns. ns is clamped to at most 1 second. Clamps the + * return value to TIME_MAX, NSEC_MAX on overflow. + * + */ +CK_CC_UNUSED static struct timespec timespec_add_ns(const struct timespec ts, + uint32_t ns) +{ + struct timespec ret = { + .tv_sec = TIME_MAX, + .tv_nsec = NSEC_MAX + }; + time_t sec; + uint32_t sum_ns; + + if (ns > (uint32_t)NSEC_MAX) { + if (ts.tv_sec >= TIME_MAX) { + return ret; + } + + ret.tv_sec = ts.tv_sec + 1; + ret.tv_nsec = ts.tv_nsec; + return ret; + } + + sec = ts.tv_sec; + sum_ns = ns + ts.tv_nsec; + if (sum_ns > NSEC_MAX) { + if (sec >= TIME_MAX) { + return ret; + } + + sec++; + sum_ns -= (NSEC_MAX + 1); + } + + ret.tv_sec = sec; + ret.tv_nsec = sum_ns; + return ret; +} + + +/* + * Returns ts + inc. If inc is negative, it is normalized to 0. + * Clamps the return value to TIME_MAX, NSEC_MAX on overflow. + */ +CK_CC_UNUSED static struct timespec timespec_add(const struct timespec ts, + const struct timespec inc) +{ + /* Initial return value is clamped to infinite future. */ + struct timespec ret = { + .tv_sec = TIME_MAX, + .tv_nsec = NSEC_MAX + }; + time_t sec; + unsigned long nsec; + + /* Non-positive delta is a no-op. Invalid nsec is another no-op. */ + if (inc.tv_sec < 0 || inc.tv_nsec < 0 || inc.tv_nsec > NSEC_MAX) { + return ts; + } + + /* Detect overflow early. */ + if (inc.tv_sec > TIME_MAX - ts.tv_sec) { + return ret; + } + + sec = ts.tv_sec + inc.tv_sec; + /* This sum can't overflow if the inputs are valid.*/ + nsec = (unsigned long)ts.tv_nsec + inc.tv_nsec; + + if (nsec > NSEC_MAX) { + if (sec >= TIME_MAX) { + return ret; + } + + sec++; + nsec -= (NSEC_MAX + 1); + } + + ret.tv_sec = sec; + ret.tv_nsec = nsec; + return ret; +} + +/* Compares two timespecs. Returns -1 if x < y, 0 if x == y, and 1 if x > y. */ +CK_CC_UNUSED static int timespec_cmp(const struct timespec x, + const struct timespec y) +{ + if (x.tv_sec != y.tv_sec) { + return (x.tv_sec < y.tv_sec) ? -1 : 1; + } + + if (x.tv_nsec != y.tv_nsec) { + return (x.tv_nsec < y.tv_nsec) ? -1 : 1; + } + + return 0; +} + +/* + * Overwrites now with the current CLOCK_MONOTONIC time, and returns + * true if the current time is greater than or equal to the deadline, + * or the clock is somehow broken. + */ +CK_CC_UNUSED static bool check_deadline(struct timespec *now, + const struct ck_ec_ops *ops, + const struct timespec deadline) +{ + int r; + + r = ops->gettime(ops, now); + if (r != 0) { + return true; + } + + return timespec_cmp(*now, deadline) >= 0; +} +#endif /* !CK_EC_TIMEUTIL_H */ diff --git a/src/ck_hs.c b/src/ck_hs.c index a7e15eaddbeb..246bceb2a0ab 100644 --- a/src/ck_hs.c +++ b/src/ck_hs.c @@ -1,958 +1,963 @@ /* * Copyright 2012-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include "ck_internal.h" #ifndef CK_HS_PROBE_L1_SHIFT #define CK_HS_PROBE_L1_SHIFT 3ULL #endif /* CK_HS_PROBE_L1_SHIFT */ #define CK_HS_PROBE_L1 (1 << CK_HS_PROBE_L1_SHIFT) #define CK_HS_PROBE_L1_MASK (CK_HS_PROBE_L1 - 1) #ifndef CK_HS_PROBE_L1_DEFAULT #define CK_HS_PROBE_L1_DEFAULT CK_MD_CACHELINE #endif #define CK_HS_VMA_MASK ((uintptr_t)((1ULL << CK_MD_VMA_BITS) - 1)) #define CK_HS_VMA(x) \ ((void *)((uintptr_t)(x) & CK_HS_VMA_MASK)) #define CK_HS_EMPTY NULL #define CK_HS_TOMBSTONE ((void *)~(uintptr_t)0) #define CK_HS_G (2) #define CK_HS_G_MASK (CK_HS_G - 1) #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_STORE_8) #define CK_HS_WORD uint8_t #define CK_HS_WORD_MAX UINT8_MAX #define CK_HS_STORE(x, y) ck_pr_store_8(x, y) #define CK_HS_LOAD(x) ck_pr_load_8(x) #elif defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_STORE_16) #define CK_HS_WORD uint16_t #define CK_HS_WORD_MAX UINT16_MAX #define CK_HS_STORE(x, y) ck_pr_store_16(x, y) #define CK_HS_LOAD(x) ck_pr_load_16(x) #elif defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_STORE_32) #define CK_HS_WORD uint32_t #define CK_HS_WORD_MAX UINT32_MAX #define CK_HS_STORE(x, y) ck_pr_store_32(x, y) #define CK_HS_LOAD(x) ck_pr_load_32(x) #else #error "ck_hs is not supported on your platform." #endif enum ck_hs_probe_behavior { CK_HS_PROBE = 0, /* Default behavior. */ CK_HS_PROBE_TOMBSTONE, /* Short-circuit on tombstone. */ CK_HS_PROBE_INSERT /* Short-circuit on probe bound if tombstone found. */ }; struct ck_hs_map { unsigned int generation[CK_HS_G]; unsigned int probe_maximum; unsigned long mask; unsigned long step; unsigned int probe_limit; unsigned int tombstones; unsigned long n_entries; unsigned long capacity; unsigned long size; CK_HS_WORD *probe_bound; const void **entries; }; static inline void ck_hs_map_signal(struct ck_hs_map *map, unsigned long h) { h &= CK_HS_G_MASK; ck_pr_store_uint(&map->generation[h], map->generation[h] + 1); ck_pr_fence_store(); return; } static bool -_ck_hs_next(struct ck_hs *hs, struct ck_hs_map *map, struct ck_hs_iterator *i, void **key) +_ck_hs_next(struct ck_hs *hs, struct ck_hs_map *map, + struct ck_hs_iterator *i, void **key) { void *value; + if (i->offset >= map->capacity) return false; do { value = CK_CC_DECONST_PTR(map->entries[i->offset]); if (value != CK_HS_EMPTY && value != CK_HS_TOMBSTONE) { #ifdef CK_HS_PP if (hs->mode & CK_HS_MODE_OBJECT) value = CK_HS_VMA(value); #else (void)hs; /* Avoid unused parameter warning. */ #endif i->offset++; *key = value; return true; } } while (++i->offset < map->capacity); return false; } void ck_hs_iterator_init(struct ck_hs_iterator *iterator) { iterator->cursor = NULL; iterator->offset = 0; iterator->map = NULL; return; } bool ck_hs_next(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) { + return _ck_hs_next(hs, hs->map, i, key); } bool ck_hs_next_spmc(struct ck_hs *hs, struct ck_hs_iterator *i, void **key) { struct ck_hs_map *m = i->map; + if (m == NULL) { m = i->map = ck_pr_load_ptr(&hs->map); } + return _ck_hs_next(hs, m, i, key); } void ck_hs_stat(struct ck_hs *hs, struct ck_hs_stat *st) { struct ck_hs_map *map = hs->map; st->n_entries = map->n_entries; st->tombstones = map->tombstones; st->probe_maximum = map->probe_maximum; return; } unsigned long ck_hs_count(struct ck_hs *hs) { return hs->map->n_entries; } static void ck_hs_map_destroy(struct ck_malloc *m, struct ck_hs_map *map, bool defer) { m->free(map, map->size, defer); return; } void ck_hs_destroy(struct ck_hs *hs) { ck_hs_map_destroy(hs->m, hs->map, false); return; } static struct ck_hs_map * ck_hs_map_create(struct ck_hs *hs, unsigned long entries) { struct ck_hs_map *map; unsigned long size, n_entries, prefix, limit; n_entries = ck_internal_power_2(entries); if (n_entries < CK_HS_PROBE_L1) n_entries = CK_HS_PROBE_L1; size = sizeof(struct ck_hs_map) + (sizeof(void *) * n_entries + CK_MD_CACHELINE - 1); if (hs->mode & CK_HS_MODE_DELETE) { prefix = sizeof(CK_HS_WORD) * n_entries; size += prefix; } else { prefix = 0; } map = hs->m->malloc(size); if (map == NULL) return NULL; map->size = size; /* We should probably use a more intelligent heuristic for default probe length. */ limit = ck_internal_max(n_entries >> (CK_HS_PROBE_L1_SHIFT + 2), CK_HS_PROBE_L1_DEFAULT); if (limit > UINT_MAX) limit = UINT_MAX; map->probe_limit = (unsigned int)limit; map->probe_maximum = 0; map->capacity = n_entries; map->step = ck_cc_ffsl(n_entries); map->mask = n_entries - 1; map->n_entries = 0; /* Align map allocation to cache line. */ map->entries = (void *)(((uintptr_t)&map[1] + prefix + CK_MD_CACHELINE - 1) & ~(CK_MD_CACHELINE - 1)); memset(map->entries, 0, sizeof(void *) * n_entries); memset(map->generation, 0, sizeof map->generation); if (hs->mode & CK_HS_MODE_DELETE) { map->probe_bound = (CK_HS_WORD *)&map[1]; memset(map->probe_bound, 0, prefix); } else { map->probe_bound = NULL; } /* Commit entries purge with respect to map publication. */ ck_pr_fence_store(); return map; } bool ck_hs_reset_size(struct ck_hs *hs, unsigned long capacity) { struct ck_hs_map *map, *previous; previous = hs->map; map = ck_hs_map_create(hs, capacity); if (map == NULL) return false; ck_pr_store_ptr(&hs->map, map); ck_hs_map_destroy(hs->m, previous, true); return true; } bool ck_hs_reset(struct ck_hs *hs) { struct ck_hs_map *previous; previous = hs->map; return ck_hs_reset_size(hs, previous->capacity); } static inline unsigned long ck_hs_map_probe_next(struct ck_hs_map *map, unsigned long offset, unsigned long h, unsigned long level, unsigned long probes) { unsigned long r, stride; r = (h >> map->step) >> level; stride = (r & ~CK_HS_PROBE_L1_MASK) << 1 | (r & CK_HS_PROBE_L1_MASK); return (offset + (probes >> CK_HS_PROBE_L1_SHIFT) + (stride | CK_HS_PROBE_L1)) & map->mask; } static inline void ck_hs_map_bound_set(struct ck_hs_map *m, unsigned long h, unsigned long n_probes) { unsigned long offset = h & m->mask; if (n_probes > m->probe_maximum) ck_pr_store_uint(&m->probe_maximum, n_probes); if (m->probe_bound != NULL && m->probe_bound[offset] < n_probes) { if (n_probes > CK_HS_WORD_MAX) n_probes = CK_HS_WORD_MAX; CK_HS_STORE(&m->probe_bound[offset], n_probes); ck_pr_fence_store(); } return; } static inline unsigned int ck_hs_map_bound_get(struct ck_hs_map *m, unsigned long h) { unsigned long offset = h & m->mask; unsigned int r = CK_HS_WORD_MAX; if (m->probe_bound != NULL) { r = CK_HS_LOAD(&m->probe_bound[offset]); if (r == CK_HS_WORD_MAX) r = ck_pr_load_uint(&m->probe_maximum); } else { r = ck_pr_load_uint(&m->probe_maximum); } return r; } bool ck_hs_grow(struct ck_hs *hs, unsigned long capacity) { struct ck_hs_map *map, *update; unsigned long k, i, j, offset, probes; const void *previous, **bucket; restart: map = hs->map; if (map->capacity > capacity) return false; update = ck_hs_map_create(hs, capacity); if (update == NULL) return false; for (k = 0; k < map->capacity; k++) { unsigned long h; previous = map->entries[k]; if (previous == CK_HS_EMPTY || previous == CK_HS_TOMBSTONE) continue; #ifdef CK_HS_PP if (hs->mode & CK_HS_MODE_OBJECT) previous = CK_HS_VMA(previous); #endif h = hs->hf(previous, hs->seed); offset = h & update->mask; i = probes = 0; for (;;) { bucket = (const void **)((uintptr_t)&update->entries[offset] & ~(CK_MD_CACHELINE - 1)); for (j = 0; j < CK_HS_PROBE_L1; j++) { const void **cursor = bucket + ((j + offset) & (CK_HS_PROBE_L1 - 1)); if (probes++ == update->probe_limit) break; if (CK_CC_LIKELY(*cursor == CK_HS_EMPTY)) { *cursor = map->entries[k]; update->n_entries++; ck_hs_map_bound_set(update, h, probes); break; } } if (j < CK_HS_PROBE_L1) break; offset = ck_hs_map_probe_next(update, offset, h, i++, probes); } if (probes > update->probe_limit) { /* * We have hit the probe limit, map needs to be even larger. */ ck_hs_map_destroy(hs->m, update, false); capacity <<= 1; goto restart; } } ck_pr_fence_store(); ck_pr_store_ptr(&hs->map, update); ck_hs_map_destroy(hs->m, map, true); return true; } static void ck_hs_map_postinsert(struct ck_hs *hs, struct ck_hs_map *map) { map->n_entries++; if ((map->n_entries << 1) > map->capacity) ck_hs_grow(hs, map->capacity << 1); return; } bool ck_hs_rebuild(struct ck_hs *hs) { return ck_hs_grow(hs, hs->map->capacity); } static const void ** ck_hs_map_probe(struct ck_hs *hs, struct ck_hs_map *map, unsigned long *n_probes, const void ***priority, unsigned long h, const void *key, const void **object, unsigned long probe_limit, enum ck_hs_probe_behavior behavior) { const void **bucket, **cursor, *k, *compare; const void **pr = NULL; unsigned long offset, j, i, probes, opl; #ifdef CK_HS_PP /* If we are storing object pointers, then we may leverage pointer packing. */ unsigned long hv = 0; if (hs->mode & CK_HS_MODE_OBJECT) { hv = (h >> 25) & CK_HS_KEY_MASK; compare = CK_HS_VMA(key); } else { compare = key; } #else compare = key; #endif offset = h & map->mask; *object = NULL; i = probes = 0; opl = probe_limit; if (behavior == CK_HS_PROBE_INSERT) probe_limit = ck_hs_map_bound_get(map, h); for (;;) { bucket = (const void **)((uintptr_t)&map->entries[offset] & ~(CK_MD_CACHELINE - 1)); for (j = 0; j < CK_HS_PROBE_L1; j++) { cursor = bucket + ((j + offset) & (CK_HS_PROBE_L1 - 1)); if (probes++ == probe_limit) { if (probe_limit == opl || pr != NULL) { k = CK_HS_EMPTY; goto leave; } /* * If no eligible slot has been found yet, continue probe * sequence with original probe limit. */ probe_limit = opl; } k = ck_pr_load_ptr(cursor); if (k == CK_HS_EMPTY) goto leave; if (k == CK_HS_TOMBSTONE) { if (pr == NULL) { pr = cursor; *n_probes = probes; if (behavior == CK_HS_PROBE_TOMBSTONE) { k = CK_HS_EMPTY; goto leave; } } continue; } #ifdef CK_HS_PP if (hs->mode & CK_HS_MODE_OBJECT) { if (((uintptr_t)k >> CK_MD_VMA_BITS) != hv) continue; k = CK_HS_VMA(k); } #endif if (k == compare) goto leave; if (hs->compare == NULL) continue; if (hs->compare(k, key) == true) goto leave; } offset = ck_hs_map_probe_next(map, offset, h, i++, probes); } leave: if (probes > probe_limit) { cursor = NULL; } else { *object = k; } if (pr == NULL) *n_probes = probes; *priority = pr; return cursor; } static inline const void * ck_hs_marshal(unsigned int mode, const void *key, unsigned long h) { #ifdef CK_HS_PP const void *insert; if (mode & CK_HS_MODE_OBJECT) { insert = (void *)((uintptr_t)CK_HS_VMA(key) | ((h >> 25) << CK_MD_VMA_BITS)); } else { insert = key; } return insert; #else (void)mode; (void)h; return key; #endif } bool ck_hs_gc(struct ck_hs *hs, unsigned long cycles, unsigned long seed) { unsigned long size = 0; unsigned long i; struct ck_hs_map *map = hs->map; unsigned int maximum; CK_HS_WORD *bounds = NULL; if (map->n_entries == 0) { ck_pr_store_uint(&map->probe_maximum, 0); if (map->probe_bound != NULL) memset(map->probe_bound, 0, sizeof(CK_HS_WORD) * map->capacity); return true; } if (cycles == 0) { maximum = 0; if (map->probe_bound != NULL) { size = sizeof(CK_HS_WORD) * map->capacity; bounds = hs->m->malloc(size); if (bounds == NULL) return false; memset(bounds, 0, size); } } else { maximum = map->probe_maximum; } for (i = 0; i < map->capacity; i++) { const void **first, *object, **slot, *entry; unsigned long n_probes, offset, h; entry = map->entries[(i + seed) & map->mask]; if (entry == CK_HS_EMPTY || entry == CK_HS_TOMBSTONE) continue; #ifdef CK_HS_PP if (hs->mode & CK_HS_MODE_OBJECT) entry = CK_HS_VMA(entry); #endif h = hs->hf(entry, hs->seed); offset = h & map->mask; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, entry, &object, ck_hs_map_bound_get(map, h), CK_HS_PROBE); if (first != NULL) { const void *insert = ck_hs_marshal(hs->mode, entry, h); ck_pr_store_ptr(first, insert); ck_hs_map_signal(map, h); ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); } if (cycles == 0) { if (n_probes > maximum) maximum = n_probes; if (n_probes > CK_HS_WORD_MAX) n_probes = CK_HS_WORD_MAX; if (bounds != NULL && n_probes > bounds[offset]) bounds[offset] = n_probes; } else if (--cycles == 0) break; } /* * The following only apply to garbage collection involving * a full scan of all entries. */ if (maximum != map->probe_maximum) ck_pr_store_uint(&map->probe_maximum, maximum); if (bounds != NULL) { for (i = 0; i < map->capacity; i++) CK_HS_STORE(&map->probe_bound[i], bounds[i]); hs->m->free(bounds, size, false); } return true; } bool ck_hs_fas(struct ck_hs *hs, unsigned long h, const void *key, void **previous) { const void **slot, **first, *object, *insert; struct ck_hs_map *map = hs->map; unsigned long n_probes; *previous = NULL; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, ck_hs_map_bound_get(map, h), CK_HS_PROBE); /* Replacement semantics presume existence. */ if (object == NULL) return false; insert = ck_hs_marshal(hs->mode, key, h); if (first != NULL) { ck_pr_store_ptr(first, insert); ck_hs_map_signal(map, h); ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); } else { ck_pr_store_ptr(slot, insert); } *previous = CK_CC_DECONST_PTR(object); return true; } /* * An apply function takes two arguments. The first argument is a pointer to a * pre-existing object. The second argument is a pointer to the fifth argument * passed to ck_hs_apply. If a non-NULL pointer is passed to the first argument * and the return value of the apply function is NULL, then the pre-existing * value is deleted. If the return pointer is the same as the one passed to the * apply function then no changes are made to the hash table. If the first * argument is non-NULL and the return pointer is different than that passed to * the apply function, then the pre-existing value is replaced. For * replacement, it is required that the value itself is identical to the * previous value. */ bool ck_hs_apply(struct ck_hs *hs, unsigned long h, const void *key, ck_hs_apply_fn_t *fn, void *cl) { const void **slot, **first, *object, *delta, *insert; unsigned long n_probes; struct ck_hs_map *map; restart: map = hs->map; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, map->probe_limit, CK_HS_PROBE_INSERT); if (slot == NULL && first == NULL) { if (ck_hs_grow(hs, map->capacity << 1) == false) return false; goto restart; } delta = fn(CK_CC_DECONST_PTR(object), cl); if (delta == NULL) { /* * The apply function has requested deletion. If the object doesn't exist, * then exit early. */ if (CK_CC_UNLIKELY(object == NULL)) return true; /* Otherwise, mark slot as deleted. */ ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); map->n_entries--; map->tombstones++; return true; } /* The apply function has not requested hash set modification so exit early. */ if (delta == object) return true; /* A modification or insertion has been requested. */ ck_hs_map_bound_set(map, h, n_probes); insert = ck_hs_marshal(hs->mode, delta, h); if (first != NULL) { /* * This follows the same semantics as ck_hs_set, please refer to that * function for documentation. */ ck_pr_store_ptr(first, insert); if (object != NULL) { ck_hs_map_signal(map, h); ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); } } else { /* * If we are storing into same slot, then atomic store is sufficient * for replacement. */ ck_pr_store_ptr(slot, insert); } if (object == NULL) ck_hs_map_postinsert(hs, map); return true; } bool ck_hs_set(struct ck_hs *hs, unsigned long h, const void *key, void **previous) { const void **slot, **first, *object, *insert; unsigned long n_probes; struct ck_hs_map *map; *previous = NULL; restart: map = hs->map; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, map->probe_limit, CK_HS_PROBE_INSERT); if (slot == NULL && first == NULL) { if (ck_hs_grow(hs, map->capacity << 1) == false) return false; goto restart; } ck_hs_map_bound_set(map, h, n_probes); insert = ck_hs_marshal(hs->mode, key, h); if (first != NULL) { /* If an earlier bucket was found, then store entry there. */ ck_pr_store_ptr(first, insert); /* * If a duplicate key was found, then delete it after * signaling concurrent probes to restart. Optionally, * it is possible to install tombstone after grace * period if we can guarantee earlier position of * duplicate key. */ if (object != NULL) { ck_hs_map_signal(map, h); ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); } } else { /* * If we are storing into same slot, then atomic store is sufficient * for replacement. */ ck_pr_store_ptr(slot, insert); } if (object == NULL) ck_hs_map_postinsert(hs, map); *previous = CK_CC_DECONST_PTR(object); return true; } CK_CC_INLINE static bool ck_hs_put_internal(struct ck_hs *hs, unsigned long h, const void *key, enum ck_hs_probe_behavior behavior) { const void **slot, **first, *object, *insert; unsigned long n_probes; struct ck_hs_map *map; restart: map = hs->map; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, map->probe_limit, behavior); if (slot == NULL && first == NULL) { if (ck_hs_grow(hs, map->capacity << 1) == false) return false; goto restart; } /* Fail operation if a match was found. */ if (object != NULL) return false; ck_hs_map_bound_set(map, h, n_probes); insert = ck_hs_marshal(hs->mode, key, h); if (first != NULL) { /* Insert key into first bucket in probe sequence. */ ck_pr_store_ptr(first, insert); } else { /* An empty slot was found. */ ck_pr_store_ptr(slot, insert); } ck_hs_map_postinsert(hs, map); return true; } bool ck_hs_put(struct ck_hs *hs, unsigned long h, const void *key) { return ck_hs_put_internal(hs, h, key, CK_HS_PROBE_INSERT); } bool ck_hs_put_unique(struct ck_hs *hs, unsigned long h, const void *key) { return ck_hs_put_internal(hs, h, key, CK_HS_PROBE_TOMBSTONE); } void * ck_hs_get(struct ck_hs *hs, unsigned long h, const void *key) { const void **first, *object; struct ck_hs_map *map; unsigned long n_probes; unsigned int g, g_p, probe; unsigned int *generation; do { map = ck_pr_load_ptr(&hs->map); generation = &map->generation[h & CK_HS_G_MASK]; g = ck_pr_load_uint(generation); probe = ck_hs_map_bound_get(map, h); ck_pr_fence_load(); ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, probe, CK_HS_PROBE); ck_pr_fence_load(); g_p = ck_pr_load_uint(generation); } while (g != g_p); return CK_CC_DECONST_PTR(object); } void * ck_hs_remove(struct ck_hs *hs, unsigned long h, const void *key) { const void **slot, **first, *object; struct ck_hs_map *map = hs->map; unsigned long n_probes; slot = ck_hs_map_probe(hs, map, &n_probes, &first, h, key, &object, ck_hs_map_bound_get(map, h), CK_HS_PROBE); if (object == NULL) return NULL; ck_pr_store_ptr(slot, CK_HS_TOMBSTONE); map->n_entries--; map->tombstones++; return CK_CC_DECONST_PTR(object); } bool ck_hs_move(struct ck_hs *hs, struct ck_hs *source, ck_hs_hash_cb_t *hf, ck_hs_compare_cb_t *compare, struct ck_malloc *m) { if (m == NULL || m->malloc == NULL || m->free == NULL || hf == NULL) return false; hs->mode = source->mode; hs->seed = source->seed; hs->map = source->map; hs->m = m; hs->hf = hf; hs->compare = compare; return true; } bool ck_hs_init(struct ck_hs *hs, unsigned int mode, ck_hs_hash_cb_t *hf, ck_hs_compare_cb_t *compare, struct ck_malloc *m, unsigned long n_entries, unsigned long seed) { if (m == NULL || m->malloc == NULL || m->free == NULL || hf == NULL) return false; hs->m = m; hs->mode = mode; hs->seed = seed; hs->hf = hf; hs->compare = compare; hs->map = ck_hs_map_create(hs, n_entries); return hs->map != NULL; } diff --git a/src/ck_ht.c b/src/ck_ht.c index 48b04c9678d9..66c7315038c1 100644 --- a/src/ck_ht.c +++ b/src/ck_ht.c @@ -1,1036 +1,1033 @@ /* * Copyright 2012-2015 Samy Al Bahra. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #define CK_HT_IM #include /* * This implementation borrows several techniques from Josh Dybnis's * nbds library which can be found at http://code.google.com/p/nbds - * - * This release currently only includes support for 64-bit platforms. - * We can address 32-bit platforms in a future release. */ #include #include #include #include #include #include #include "ck_ht_hash.h" #include "ck_internal.h" #ifndef CK_HT_BUCKET_LENGTH #ifdef CK_HT_PP #define CK_HT_BUCKET_SHIFT 2ULL #else #define CK_HT_BUCKET_SHIFT 1ULL #endif #define CK_HT_BUCKET_LENGTH (1U << CK_HT_BUCKET_SHIFT) #define CK_HT_BUCKET_MASK (CK_HT_BUCKET_LENGTH - 1) #endif #ifndef CK_HT_PROBE_DEFAULT #define CK_HT_PROBE_DEFAULT 64ULL #endif #if defined(CK_F_PR_LOAD_8) && defined(CK_F_PR_STORE_8) #define CK_HT_WORD uint8_t #define CK_HT_WORD_MAX UINT8_MAX #define CK_HT_STORE(x, y) ck_pr_store_8(x, y) #define CK_HT_LOAD(x) ck_pr_load_8(x) #elif defined(CK_F_PR_LOAD_16) && defined(CK_F_PR_STORE_16) #define CK_HT_WORD uint16_t #define CK_HT_WORD_MAX UINT16_MAX #define CK_HT_STORE(x, y) ck_pr_store_16(x, y) #define CK_HT_LOAD(x) ck_pr_load_16(x) #elif defined(CK_F_PR_LOAD_32) && defined(CK_F_PR_STORE_32) #define CK_HT_WORD uint32_t #define CK_HT_WORD_MAX UINT32_MAX #define CK_HT_STORE(x, y) ck_pr_store_32(x, y) #define CK_HT_LOAD(x) ck_pr_load_32(x) #else #error "ck_ht is not supported on your platform." #endif struct ck_ht_map { unsigned int mode; CK_HT_TYPE deletions; CK_HT_TYPE probe_maximum; CK_HT_TYPE probe_length; CK_HT_TYPE probe_limit; CK_HT_TYPE size; CK_HT_TYPE n_entries; CK_HT_TYPE mask; CK_HT_TYPE capacity; CK_HT_TYPE step; CK_HT_WORD *probe_bound; struct ck_ht_entry *entries; }; void ck_ht_stat(struct ck_ht *table, struct ck_ht_stat *st) { struct ck_ht_map *map = table->map; st->n_entries = map->n_entries; st->probe_maximum = map->probe_maximum; return; } void ck_ht_hash(struct ck_ht_hash *h, struct ck_ht *table, const void *key, uint16_t key_length) { table->h(h, key, key_length, table->seed); return; } void ck_ht_hash_direct(struct ck_ht_hash *h, struct ck_ht *table, uintptr_t key) { ck_ht_hash(h, table, &key, sizeof(key)); return; } static void ck_ht_hash_wrapper(struct ck_ht_hash *h, const void *key, size_t length, uint64_t seed) { h->value = (unsigned long)MurmurHash64A(key, length, seed); return; } static struct ck_ht_map * ck_ht_map_create(struct ck_ht *table, CK_HT_TYPE entries) { struct ck_ht_map *map; CK_HT_TYPE size; uintptr_t prefix; uint32_t n_entries; n_entries = ck_internal_power_2(entries); if (n_entries < CK_HT_BUCKET_LENGTH) n_entries = CK_HT_BUCKET_LENGTH; size = sizeof(struct ck_ht_map) + (sizeof(struct ck_ht_entry) * n_entries + CK_MD_CACHELINE - 1); if (table->mode & CK_HT_WORKLOAD_DELETE) { prefix = sizeof(CK_HT_WORD) * n_entries; size += prefix; } else { prefix = 0; } map = table->m->malloc(size); if (map == NULL) return NULL; map->mode = table->mode; map->size = size; map->probe_limit = ck_internal_max_64(n_entries >> (CK_HT_BUCKET_SHIFT + 2), CK_HT_PROBE_DEFAULT); map->deletions = 0; map->probe_maximum = 0; map->capacity = n_entries; map->step = ck_cc_ffsll(map->capacity); map->mask = map->capacity - 1; map->n_entries = 0; map->entries = (struct ck_ht_entry *)(((uintptr_t)&map[1] + prefix + CK_MD_CACHELINE - 1) & ~(CK_MD_CACHELINE - 1)); if (table->mode & CK_HT_WORKLOAD_DELETE) { map->probe_bound = (CK_HT_WORD *)&map[1]; memset(map->probe_bound, 0, prefix); } else { map->probe_bound = NULL; } memset(map->entries, 0, sizeof(struct ck_ht_entry) * n_entries); ck_pr_fence_store(); return map; } static inline void ck_ht_map_bound_set(struct ck_ht_map *m, struct ck_ht_hash h, CK_HT_TYPE n_probes) { CK_HT_TYPE offset = h.value & m->mask; if (n_probes > m->probe_maximum) CK_HT_TYPE_STORE(&m->probe_maximum, n_probes); if (m->probe_bound != NULL && m->probe_bound[offset] < n_probes) { if (n_probes >= CK_HT_WORD_MAX) n_probes = CK_HT_WORD_MAX; CK_HT_STORE(&m->probe_bound[offset], n_probes); ck_pr_fence_store(); } return; } static inline CK_HT_TYPE ck_ht_map_bound_get(struct ck_ht_map *m, struct ck_ht_hash h) { CK_HT_TYPE offset = h.value & m->mask; CK_HT_TYPE r = CK_HT_WORD_MAX; if (m->probe_bound != NULL) { r = CK_HT_LOAD(&m->probe_bound[offset]); if (r == CK_HT_WORD_MAX) r = CK_HT_TYPE_LOAD(&m->probe_maximum); } else { r = CK_HT_TYPE_LOAD(&m->probe_maximum); } return r; } static void ck_ht_map_destroy(struct ck_malloc *m, struct ck_ht_map *map, bool defer) { m->free(map, map->size, defer); return; } static inline size_t ck_ht_map_probe_next(struct ck_ht_map *map, size_t offset, ck_ht_hash_t h, size_t probes) { ck_ht_hash_t r; size_t stride; unsigned long level = (unsigned long)probes >> CK_HT_BUCKET_SHIFT; r.value = (h.value >> map->step) >> level; stride = (r.value & ~CK_HT_BUCKET_MASK) << 1 | (r.value & CK_HT_BUCKET_MASK); return (offset + level + (stride | CK_HT_BUCKET_LENGTH)) & map->mask; } bool ck_ht_init(struct ck_ht *table, unsigned int mode, ck_ht_hash_cb_t *h, struct ck_malloc *m, CK_HT_TYPE entries, uint64_t seed) { if (m == NULL || m->malloc == NULL || m->free == NULL) return false; table->m = m; table->mode = mode; table->seed = seed; if (h == NULL) { table->h = ck_ht_hash_wrapper; } else { table->h = h; } table->map = ck_ht_map_create(table, entries); return table->map != NULL; } static struct ck_ht_entry * ck_ht_map_probe_wr(struct ck_ht_map *map, ck_ht_hash_t h, ck_ht_entry_t *snapshot, ck_ht_entry_t **available, const void *key, uint16_t key_length, CK_HT_TYPE *probe_limit, CK_HT_TYPE *probe_wr) { struct ck_ht_entry *bucket, *cursor; struct ck_ht_entry *first = NULL; size_t offset, i, j; CK_HT_TYPE probes = 0; CK_HT_TYPE limit; if (probe_limit == NULL) { limit = ck_ht_map_bound_get(map, h); } else { limit = CK_HT_TYPE_MAX; } offset = h.value & map->mask; for (i = 0; i < map->probe_limit; i++) { /* * Probe on a complete cache line first. Scan forward and wrap around to * the beginning of the cache line. Only when the complete cache line has * been scanned do we move on to the next row. */ bucket = (void *)((uintptr_t)(map->entries + offset) & ~(CK_MD_CACHELINE - 1)); for (j = 0; j < CK_HT_BUCKET_LENGTH; j++) { uint16_t k; if (probes++ > limit) break; cursor = bucket + ((j + offset) & (CK_HT_BUCKET_LENGTH - 1)); /* * It is probably worth it to encapsulate probe state * in order to prevent a complete reprobe sequence in * the case of intermittent writers. */ if (cursor->key == CK_HT_KEY_TOMBSTONE) { if (first == NULL) { first = cursor; *probe_wr = probes; } continue; } if (cursor->key == CK_HT_KEY_EMPTY) goto leave; if (cursor->key == (uintptr_t)key) goto leave; if (map->mode & CK_HT_MODE_BYTESTRING) { void *pointer; /* * Check memoized portion of hash value before * expensive full-length comparison. */ k = ck_ht_entry_key_length(cursor); if (k != key_length) continue; #ifdef CK_HT_PP if ((cursor->value >> CK_MD_VMA_BITS) != ((h.value >> 32) & CK_HT_KEY_MASK)) continue; #else if (cursor->hash != h.value) continue; #endif pointer = ck_ht_entry_key(cursor); if (memcmp(pointer, key, key_length) == 0) goto leave; } } offset = ck_ht_map_probe_next(map, offset, h, probes); } cursor = NULL; leave: if (probe_limit != NULL) { *probe_limit = probes; } else if (first == NULL) { *probe_wr = probes; } *available = first; if (cursor != NULL) { *snapshot = *cursor; } return cursor; } bool ck_ht_gc(struct ck_ht *ht, unsigned long cycles, unsigned long seed) { CK_HT_WORD *bounds = NULL; struct ck_ht_map *map = ht->map; CK_HT_TYPE maximum, i; CK_HT_TYPE size = 0; if (map->n_entries == 0) { CK_HT_TYPE_STORE(&map->probe_maximum, 0); if (map->probe_bound != NULL) memset(map->probe_bound, 0, sizeof(CK_HT_WORD) * map->capacity); return true; } if (cycles == 0) { maximum = 0; if (map->probe_bound != NULL) { size = sizeof(CK_HT_WORD) * map->capacity; bounds = ht->m->malloc(size); if (bounds == NULL) return false; memset(bounds, 0, size); } } else { maximum = map->probe_maximum; } for (i = 0; i < map->capacity; i++) { struct ck_ht_entry *entry, *priority, snapshot; struct ck_ht_hash h; CK_HT_TYPE probes_wr; CK_HT_TYPE offset; entry = &map->entries[(i + seed) & map->mask]; if (entry->key == CK_HT_KEY_EMPTY || entry->key == CK_HT_KEY_TOMBSTONE) { continue; } if (ht->mode & CK_HT_MODE_BYTESTRING) { #ifndef CK_HT_PP h.value = entry->hash; #else ht->h(&h, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry), ht->seed); #endif entry = ck_ht_map_probe_wr(map, h, &snapshot, &priority, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry), NULL, &probes_wr); } else { #ifndef CK_HT_PP h.value = entry->hash; #else ht->h(&h, &entry->key, sizeof(entry->key), ht->seed); #endif entry = ck_ht_map_probe_wr(map, h, &snapshot, &priority, (void *)entry->key, sizeof(entry->key), NULL, &probes_wr); } offset = h.value & map->mask; if (priority != NULL) { CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); #ifndef CK_HT_PP CK_HT_TYPE_STORE(&priority->key_length, entry->key_length); CK_HT_TYPE_STORE(&priority->hash, entry->hash); #endif ck_pr_store_ptr_unsafe(&priority->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&priority->key, (void *)entry->key); ck_pr_fence_store(); CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&entry->key, (void *)CK_HT_KEY_TOMBSTONE); ck_pr_fence_store(); } if (cycles == 0) { if (probes_wr > maximum) maximum = probes_wr; if (probes_wr >= CK_HT_WORD_MAX) probes_wr = CK_HT_WORD_MAX; if (bounds != NULL && probes_wr > bounds[offset]) bounds[offset] = probes_wr; } else if (--cycles == 0) break; } if (maximum != map->probe_maximum) CK_HT_TYPE_STORE(&map->probe_maximum, maximum); if (bounds != NULL) { for (i = 0; i < map->capacity; i++) CK_HT_STORE(&map->probe_bound[i], bounds[i]); ht->m->free(bounds, size, false); } return true; } static struct ck_ht_entry * ck_ht_map_probe_rd(struct ck_ht_map *map, ck_ht_hash_t h, ck_ht_entry_t *snapshot, const void *key, uint16_t key_length) { struct ck_ht_entry *bucket, *cursor; size_t offset, i, j; CK_HT_TYPE probes = 0; CK_HT_TYPE probe_maximum; #ifndef CK_HT_PP CK_HT_TYPE d = 0; CK_HT_TYPE d_prime = 0; retry: #endif probe_maximum = ck_ht_map_bound_get(map, h); offset = h.value & map->mask; for (i = 0; i < map->probe_limit; i++) { /* * Probe on a complete cache line first. Scan forward and wrap around to * the beginning of the cache line. Only when the complete cache line has * been scanned do we move on to the next row. */ bucket = (void *)((uintptr_t)(map->entries + offset) & ~(CK_MD_CACHELINE - 1)); for (j = 0; j < CK_HT_BUCKET_LENGTH; j++) { uint16_t k; if (probes++ > probe_maximum) return NULL; cursor = bucket + ((j + offset) & (CK_HT_BUCKET_LENGTH - 1)); #ifdef CK_HT_PP snapshot->key = (uintptr_t)ck_pr_load_ptr(&cursor->key); ck_pr_fence_load(); snapshot->value = (uintptr_t)ck_pr_load_ptr(&cursor->value); #else d = CK_HT_TYPE_LOAD(&map->deletions); snapshot->key = (uintptr_t)ck_pr_load_ptr(&cursor->key); ck_pr_fence_load(); snapshot->key_length = CK_HT_TYPE_LOAD(&cursor->key_length); snapshot->hash = CK_HT_TYPE_LOAD(&cursor->hash); snapshot->value = (uintptr_t)ck_pr_load_ptr(&cursor->value); #endif /* * It is probably worth it to encapsulate probe state * in order to prevent a complete reprobe sequence in * the case of intermittent writers. */ if (snapshot->key == CK_HT_KEY_TOMBSTONE) continue; if (snapshot->key == CK_HT_KEY_EMPTY) goto leave; if (snapshot->key == (uintptr_t)key) goto leave; if (map->mode & CK_HT_MODE_BYTESTRING) { void *pointer; /* * Check memoized portion of hash value before * expensive full-length comparison. */ k = ck_ht_entry_key_length(snapshot); if (k != key_length) continue; #ifdef CK_HT_PP if ((snapshot->value >> CK_MD_VMA_BITS) != ((h.value >> 32) & CK_HT_KEY_MASK)) continue; #else if (snapshot->hash != h.value) continue; d_prime = CK_HT_TYPE_LOAD(&map->deletions); /* * It is possible that the slot was * replaced, initiate a re-probe. */ if (d != d_prime) goto retry; #endif pointer = ck_ht_entry_key(snapshot); if (memcmp(pointer, key, key_length) == 0) goto leave; } } offset = ck_ht_map_probe_next(map, offset, h, probes); } return NULL; leave: return cursor; } CK_HT_TYPE ck_ht_count(struct ck_ht *table) { struct ck_ht_map *map = ck_pr_load_ptr(&table->map); return CK_HT_TYPE_LOAD(&map->n_entries); } bool ck_ht_next(struct ck_ht *table, struct ck_ht_iterator *i, struct ck_ht_entry **entry) { struct ck_ht_map *map = table->map; uintptr_t key; if (i->offset >= map->capacity) return false; do { key = map->entries[i->offset].key; if (key != CK_HT_KEY_EMPTY && key != CK_HT_KEY_TOMBSTONE) break; } while (++i->offset < map->capacity); if (i->offset >= map->capacity) return false; *entry = map->entries + i->offset++; return true; } bool ck_ht_reset_size_spmc(struct ck_ht *table, CK_HT_TYPE size) { struct ck_ht_map *map, *update; map = table->map; update = ck_ht_map_create(table, size); if (update == NULL) return false; ck_pr_store_ptr_unsafe(&table->map, update); ck_ht_map_destroy(table->m, map, true); return true; } bool ck_ht_reset_spmc(struct ck_ht *table) { struct ck_ht_map *map = table->map; return ck_ht_reset_size_spmc(table, map->capacity); } bool ck_ht_grow_spmc(struct ck_ht *table, CK_HT_TYPE capacity) { struct ck_ht_map *map, *update; struct ck_ht_entry *bucket, *previous; struct ck_ht_hash h; size_t k, i, j, offset; CK_HT_TYPE probes; restart: map = table->map; if (map->capacity >= capacity) return false; update = ck_ht_map_create(table, capacity); if (update == NULL) return false; for (k = 0; k < map->capacity; k++) { previous = &map->entries[k]; if (previous->key == CK_HT_KEY_EMPTY || previous->key == CK_HT_KEY_TOMBSTONE) continue; if (table->mode & CK_HT_MODE_BYTESTRING) { #ifdef CK_HT_PP void *key; uint16_t key_length; key = ck_ht_entry_key(previous); key_length = ck_ht_entry_key_length(previous); #endif #ifndef CK_HT_PP h.value = previous->hash; #else table->h(&h, key, key_length, table->seed); #endif } else { #ifndef CK_HT_PP h.value = previous->hash; #else table->h(&h, &previous->key, sizeof(previous->key), table->seed); #endif } offset = h.value & update->mask; probes = 0; for (i = 0; i < update->probe_limit; i++) { bucket = (void *)((uintptr_t)(update->entries + offset) & ~(CK_MD_CACHELINE - 1)); for (j = 0; j < CK_HT_BUCKET_LENGTH; j++) { struct ck_ht_entry *cursor = bucket + ((j + offset) & (CK_HT_BUCKET_LENGTH - 1)); probes++; if (CK_CC_LIKELY(cursor->key == CK_HT_KEY_EMPTY)) { *cursor = *previous; update->n_entries++; ck_ht_map_bound_set(update, h, probes); break; } } if (j < CK_HT_BUCKET_LENGTH) break; offset = ck_ht_map_probe_next(update, offset, h, probes); } if (i == update->probe_limit) { /* * We have hit the probe limit, the map needs to be even * larger. */ ck_ht_map_destroy(table->m, update, false); capacity <<= 1; goto restart; } } ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&table->map, update); ck_ht_map_destroy(table->m, map, true); return true; } bool ck_ht_remove_spmc(struct ck_ht *table, ck_ht_hash_t h, ck_ht_entry_t *entry) { struct ck_ht_map *map; struct ck_ht_entry *candidate, snapshot; map = table->map; if (table->mode & CK_HT_MODE_BYTESTRING) { candidate = ck_ht_map_probe_rd(map, h, &snapshot, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry)); } else { candidate = ck_ht_map_probe_rd(map, h, &snapshot, (void *)entry->key, sizeof(entry->key)); } /* No matching entry was found. */ if (candidate == NULL || snapshot.key == CK_HT_KEY_EMPTY) return false; *entry = snapshot; ck_pr_store_ptr_unsafe(&candidate->key, (void *)CK_HT_KEY_TOMBSTONE); ck_pr_fence_store(); CK_HT_TYPE_STORE(&map->n_entries, map->n_entries - 1); return true; } bool ck_ht_get_spmc(struct ck_ht *table, ck_ht_hash_t h, ck_ht_entry_t *entry) { struct ck_ht_entry *candidate, snapshot; struct ck_ht_map *map; CK_HT_TYPE d, d_prime; restart: map = ck_pr_load_ptr(&table->map); /* * Platforms that cannot read key and key_length atomically must reprobe * on the scan of any single entry. */ d = CK_HT_TYPE_LOAD(&map->deletions); if (table->mode & CK_HT_MODE_BYTESTRING) { candidate = ck_ht_map_probe_rd(map, h, &snapshot, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry)); } else { candidate = ck_ht_map_probe_rd(map, h, &snapshot, (void *)entry->key, sizeof(entry->key)); } d_prime = CK_HT_TYPE_LOAD(&map->deletions); if (d != d_prime) { /* * It is possible we have read (K, V'). Only valid states are * (K, V), (K', V') and (T, V). Restart load operation in face * of concurrent deletions or replacements. */ goto restart; } if (candidate == NULL || snapshot.key == CK_HT_KEY_EMPTY) return false; *entry = snapshot; return true; } bool ck_ht_set_spmc(struct ck_ht *table, ck_ht_hash_t h, ck_ht_entry_t *entry) { struct ck_ht_entry snapshot, *candidate, *priority; struct ck_ht_map *map; CK_HT_TYPE probes, probes_wr; bool empty = false; for (;;) { map = table->map; if (table->mode & CK_HT_MODE_BYTESTRING) { candidate = ck_ht_map_probe_wr(map, h, &snapshot, &priority, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry), &probes, &probes_wr); } else { candidate = ck_ht_map_probe_wr(map, h, &snapshot, &priority, (void *)entry->key, sizeof(entry->key), &probes, &probes_wr); } if (priority != NULL) { probes = probes_wr; break; } if (candidate != NULL) break; if (ck_ht_grow_spmc(table, map->capacity << 1) == false) return false; } if (candidate == NULL) { candidate = priority; empty = true; } if (candidate->key != CK_HT_KEY_EMPTY && priority != NULL && candidate != priority) { /* * Entry is moved into another position in probe sequence. * We avoid a state of (K, B) (where [K, B] -> [K', B]) by * guaranteeing a forced reprobe before transitioning from K to * T. (K, B) implies (K, B, D') so we will reprobe successfully * from this transient state. */ probes = probes_wr; #ifndef CK_HT_PP CK_HT_TYPE_STORE(&priority->key_length, entry->key_length); CK_HT_TYPE_STORE(&priority->hash, entry->hash); #endif /* * Readers must observe version counter change before they * observe re-use. If they observe re-use, it is at most * a tombstone. */ if (priority->value == CK_HT_KEY_TOMBSTONE) { CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); } ck_pr_store_ptr_unsafe(&priority->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&priority->key, (void *)entry->key); ck_pr_fence_store(); /* * Make sure that readers who observe the tombstone would * also observe counter change. */ CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&candidate->key, (void *)CK_HT_KEY_TOMBSTONE); ck_pr_fence_store(); } else { /* * In this case we are inserting a new entry or replacing * an existing entry. Yes, this can be combined into above branch, * but isn't because you are actually looking at dying code * (ck_ht is effectively deprecated and is being replaced soon). */ bool replace = candidate->key != CK_HT_KEY_EMPTY && candidate->key != CK_HT_KEY_TOMBSTONE; if (priority != NULL) { if (priority->key == CK_HT_KEY_TOMBSTONE) { CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); } candidate = priority; probes = probes_wr; } #ifdef CK_HT_PP ck_pr_store_ptr_unsafe(&candidate->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&candidate->key, (void *)entry->key); #else CK_HT_TYPE_STORE(&candidate->key_length, entry->key_length); CK_HT_TYPE_STORE(&candidate->hash, entry->hash); ck_pr_store_ptr_unsafe(&candidate->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&candidate->key, (void *)entry->key); #endif /* * If we are insert a new entry then increment number * of entries associated with map. */ if (replace == false) CK_HT_TYPE_STORE(&map->n_entries, map->n_entries + 1); } ck_ht_map_bound_set(map, h, probes); /* Enforce a load factor of 0.5. */ if (map->n_entries * 2 > map->capacity) ck_ht_grow_spmc(table, map->capacity << 1); if (empty == true) { entry->key = CK_HT_KEY_EMPTY; } else { *entry = snapshot; } return true; } bool ck_ht_put_spmc(struct ck_ht *table, ck_ht_hash_t h, ck_ht_entry_t *entry) { struct ck_ht_entry snapshot, *candidate, *priority; struct ck_ht_map *map; CK_HT_TYPE probes, probes_wr; for (;;) { map = table->map; if (table->mode & CK_HT_MODE_BYTESTRING) { candidate = ck_ht_map_probe_wr(map, h, &snapshot, &priority, ck_ht_entry_key(entry), ck_ht_entry_key_length(entry), &probes, &probes_wr); } else { candidate = ck_ht_map_probe_wr(map, h, &snapshot, &priority, (void *)entry->key, sizeof(entry->key), &probes, &probes_wr); } if (candidate != NULL || priority != NULL) break; if (ck_ht_grow_spmc(table, map->capacity << 1) == false) return false; } if (priority != NULL) { /* Version counter is updated before re-use. */ CK_HT_TYPE_STORE(&map->deletions, map->deletions + 1); ck_pr_fence_store(); /* Re-use tombstone if one was found. */ candidate = priority; probes = probes_wr; } else if (candidate->key != CK_HT_KEY_EMPTY && candidate->key != CK_HT_KEY_TOMBSTONE) { /* * If the snapshot key is non-empty and the value field is not * a tombstone then an identical key was found. As store does * not implement replacement, we will fail. */ return false; } ck_ht_map_bound_set(map, h, probes); #ifdef CK_HT_PP ck_pr_store_ptr_unsafe(&candidate->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&candidate->key, (void *)entry->key); #else CK_HT_TYPE_STORE(&candidate->key_length, entry->key_length); CK_HT_TYPE_STORE(&candidate->hash, entry->hash); ck_pr_store_ptr_unsafe(&candidate->value, (void *)entry->value); ck_pr_fence_store(); ck_pr_store_ptr_unsafe(&candidate->key, (void *)entry->key); #endif CK_HT_TYPE_STORE(&map->n_entries, map->n_entries + 1); /* Enforce a load factor of 0.5. */ if (map->n_entries * 2 > map->capacity) ck_ht_grow_spmc(table, map->capacity << 1); return true; } void ck_ht_destroy(struct ck_ht *table) { ck_ht_map_destroy(table->m, table->map, false); return; }