Index: head/sys/amd64/include/atomic.h =================================================================== --- head/sys/amd64/include/atomic.h (revision 285282) +++ head/sys/amd64/include/atomic.h (revision 285283) @@ -1,545 +1,577 @@ /*- * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #define mb() __asm __volatile("mfence;" : : : "memory") #define wmb() __asm __volatile("sfence;" : : : "memory") #define rmb() __asm __volatile("lfence;" : : : "memory") /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. * * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) * atomic_add_char(P, V) (*(u_char *)(P) += (V)) * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) * atomic_add_short(P, V) (*(u_short *)(P) += (V)) * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) * atomic_add_int(P, V) (*(u_int *)(P) += (V)) * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) * atomic_add_long(P, V) (*(u_long *)(P) += (V)) * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ /* * The above functions are expanded inline in the statically-linked * kernel. Lock prefixes are generated if an SMP kernel is being * built. * * Kernel modules call real functions which are built into the kernel. * This allows kernel modules to be portable between UP and SMP systems. */ #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); int atomic_testandset_int(volatile u_int *p, u_int v); int atomic_testandset_long(volatile u_long *p, u_int v); +void atomic_thread_fence_acq(void); +void atomic_thread_fence_acq_rel(void); +void atomic_thread_fence_rel(void); +void atomic_thread_fence_seq_cst(void); #define ATOMIC_LOAD(TYPE) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ /* * For userland, always use lock prefixes so that the binaries will run * on both SMP and !SMP systems. */ #if defined(SMP) || !defined(_KERNEL) #define MPLOCKED "lock ; " #else #define MPLOCKED #endif /* * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ } \ \ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ } \ struct __hack /* * Atomic compare and set, used by the mutex functions * * if (*dst == expect) *dst = src (all 32 bit words) * * Returns 0 on failure, non-zero on success */ static __inline int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchgl %3,%1 ; " " sete %0 ; " "# atomic_cmpset_int" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+a" (expect) /* 2 */ : "r" (src) /* 3 */ : "memory", "cc"); return (res); } static __inline int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchgq %3,%1 ; " " sete %0 ; " "# atomic_cmpset_long" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+a" (expect) /* 2 */ : "r" (src) /* 3 */ : "memory", "cc"); return (res); } /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( " " MPLOCKED " " " xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } /* * Atomically add the value of v to the long integer pointed to by p and return * the previous value of *p. */ static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { __asm __volatile( " " MPLOCKED " " " xaddq %0,%1 ; " "# atomic_fetchadd_long" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } static __inline int atomic_testandset_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " " MPLOCKED " " " btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } static __inline int atomic_testandset_long(volatile u_long *p, u_int v) { u_char res; __asm __volatile( " " MPLOCKED " " " btsq %2,%1 ; " " setc %0 ; " "# atomic_testandset_long" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Jr" ((u_long)(v & 0x3f)) /* 2 */ : "cc"); return (res); } /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * * However, a load may pass a store if they are performed on distinct * addresses, so for atomic_load_acq we introduce a Store/Load barrier * before the load in SMP kernels. We use "lock addl $0,mem", as * recommended by the AMD Software Optimization Guide, and not mfence. * In the kernel, we use a private per-cpu cache line as the target * for the locked addition, to avoid introducing false data * dependencies. In userspace, a word in the red zone on the stack * (-8(%rsp)) is utilized. * * For UP kernels, however, the memory of the single processor is * always consistent, so we only need to stop the compiler from * reordering accesses in a way that violates the semantics of acquire * and release. */ #if defined(_KERNEL) /* * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). * * The open-coded number is used instead of the symbolic expression to * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in amd64/vm_machdep.c ensures that the value is correct. */ #define OFFSETOF_MONITORBUF 0x180 #if defined(SMP) static __inline void __storeload_barrier(void) { __asm __volatile("lock; addl $0,%%gs:%0" : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); } #else /* _KERNEL && UP */ static __inline void __storeload_barrier(void) { __compiler_membar(); } #endif /* SMP */ #else /* !_KERNEL */ static __inline void __storeload_barrier(void) { __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); } #endif /* _KERNEL*/ /* * C11-standard acq/rel semantics only apply when the variable in the * call is the same for acq as it is for rel. However, our previous * (x86) implementations provided much stronger ordering than required * (essentially what is called seq_cst order in C11). This * implementation provides the historical strong ordering since some * callers depend on it. */ #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE res; \ \ __storeload_barrier(); \ res = *p; \ __compiler_membar(); \ return (res); \ } \ struct __hack #define ATOMIC_STORE(TYPE) \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ \ __compiler_membar(); \ *p = v; \ } \ struct __hack + +static __inline void +atomic_thread_fence_acq(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + __storeload_barrier(); +} #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); ATOMIC_ASM(set, long, "orq %1,%0", "ir", v); ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); #define ATOMIC_LOADSTORE(TYPE) \ ATOMIC_LOAD(TYPE); \ ATOMIC_STORE(TYPE) ATOMIC_LOADSTORE(char); ATOMIC_LOADSTORE(short); ATOMIC_LOADSTORE(int); ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE #undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS /* Read the current value and store a new value in the destination. */ #ifdef __GNUCLIKE_ASM static __inline u_int atomic_swap_int(volatile u_int *p, u_int v) { __asm __volatile( " xchgl %1,%0 ; " "# atomic_swap_int" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } static __inline u_long atomic_swap_long(volatile u_long *p, u_long v) { __asm __volatile( " xchgq %1,%0 ; " "# atomic_swap_long" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } #else /* !__GNUCLIKE_ASM */ u_int atomic_swap_int(volatile u_int *p, u_int v); u_long atomic_swap_long(volatile u_long *p, u_long v); #endif /* __GNUCLIKE_ASM */ #define atomic_set_acq_char atomic_set_barr_char #define atomic_set_rel_char atomic_set_barr_char #define atomic_clear_acq_char atomic_clear_barr_char #define atomic_clear_rel_char atomic_clear_barr_char #define atomic_add_acq_char atomic_add_barr_char #define atomic_add_rel_char atomic_add_barr_char #define atomic_subtract_acq_char atomic_subtract_barr_char #define atomic_subtract_rel_char atomic_subtract_barr_char #define atomic_set_acq_short atomic_set_barr_short #define atomic_set_rel_short atomic_set_barr_short #define atomic_clear_acq_short atomic_clear_barr_short #define atomic_clear_rel_short atomic_clear_barr_short #define atomic_add_acq_short atomic_add_barr_short #define atomic_add_rel_short atomic_add_barr_short #define atomic_subtract_acq_short atomic_subtract_barr_short #define atomic_subtract_rel_short atomic_subtract_barr_short #define atomic_set_acq_int atomic_set_barr_int #define atomic_set_rel_int atomic_set_barr_int #define atomic_clear_acq_int atomic_clear_barr_int #define atomic_clear_rel_int atomic_clear_barr_int #define atomic_add_acq_int atomic_add_barr_int #define atomic_add_rel_int atomic_add_barr_int #define atomic_subtract_acq_int atomic_subtract_barr_int #define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_set_acq_long atomic_set_barr_long #define atomic_set_rel_long atomic_set_barr_long #define atomic_clear_acq_long atomic_clear_barr_long #define atomic_clear_rel_long atomic_clear_barr_long #define atomic_add_acq_long atomic_add_barr_long #define atomic_add_rel_long atomic_add_barr_long #define atomic_subtract_acq_long atomic_subtract_barr_long #define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_readandclear_int(p) atomic_swap_int(p, 0) #define atomic_readandclear_long(p) atomic_swap_long(p, 0) /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char #define atomic_clear_8 atomic_clear_char #define atomic_clear_acq_8 atomic_clear_acq_char #define atomic_clear_rel_8 atomic_clear_rel_char #define atomic_add_8 atomic_add_char #define atomic_add_acq_8 atomic_add_acq_char #define atomic_add_rel_8 atomic_add_rel_char #define atomic_subtract_8 atomic_subtract_char #define atomic_subtract_acq_8 atomic_subtract_acq_char #define atomic_subtract_rel_8 atomic_subtract_rel_char #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char /* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short #define atomic_clear_16 atomic_clear_short #define atomic_clear_acq_16 atomic_clear_acq_short #define atomic_clear_rel_16 atomic_clear_rel_short #define atomic_add_16 atomic_add_short #define atomic_add_acq_16 atomic_add_acq_short #define atomic_add_rel_16 atomic_add_rel_short #define atomic_subtract_16 atomic_subtract_short #define atomic_subtract_acq_16 atomic_subtract_acq_short #define atomic_subtract_rel_16 atomic_subtract_rel_short #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short /* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_swap_32 atomic_swap_int #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int /* Operations on 64-bit quad words. */ #define atomic_set_64 atomic_set_long #define atomic_set_acq_64 atomic_set_acq_long #define atomic_set_rel_64 atomic_set_rel_long #define atomic_clear_64 atomic_clear_long #define atomic_clear_acq_64 atomic_clear_acq_long #define atomic_clear_rel_64 atomic_clear_rel_long #define atomic_add_64 atomic_add_long #define atomic_add_acq_64 atomic_add_acq_long #define atomic_add_rel_64 atomic_add_rel_long #define atomic_subtract_64 atomic_subtract_long #define atomic_subtract_acq_64 atomic_subtract_acq_long #define atomic_subtract_rel_64 atomic_subtract_rel_long #define atomic_load_acq_64 atomic_load_acq_long #define atomic_store_rel_64 atomic_store_rel_long #define atomic_cmpset_64 atomic_cmpset_long #define atomic_cmpset_acq_64 atomic_cmpset_acq_long #define atomic_cmpset_rel_64 atomic_cmpset_rel_long #define atomic_swap_64 atomic_swap_long #define atomic_readandclear_64 atomic_readandclear_long #define atomic_testandset_64 atomic_testandset_long /* Operations on pointers. */ #define atomic_set_ptr atomic_set_long #define atomic_set_acq_ptr atomic_set_acq_long #define atomic_set_rel_ptr atomic_set_rel_long #define atomic_clear_ptr atomic_clear_long #define atomic_clear_acq_ptr atomic_clear_acq_long #define atomic_clear_rel_ptr atomic_clear_rel_long #define atomic_add_ptr atomic_add_long #define atomic_add_acq_ptr atomic_add_acq_long #define atomic_add_rel_ptr atomic_add_rel_long #define atomic_subtract_ptr atomic_subtract_long #define atomic_subtract_acq_ptr atomic_subtract_acq_long #define atomic_subtract_rel_ptr atomic_subtract_rel_long #define atomic_load_acq_ptr atomic_load_acq_long #define atomic_store_rel_ptr atomic_store_rel_long #define atomic_cmpset_ptr atomic_cmpset_long #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long #define atomic_swap_ptr atomic_swap_long #define atomic_readandclear_ptr atomic_readandclear_long #endif /* !WANT_FUNCTIONS */ #endif /* !_MACHINE_ATOMIC_H_ */ Index: head/sys/arm/include/atomic.h =================================================================== --- head/sys/arm/include/atomic.h (revision 285282) +++ head/sys/arm/include/atomic.h (revision 285283) @@ -1,1134 +1,1162 @@ /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */ /*- * Copyright (C) 2003-2004 Olivier Houchard * Copyright (C) 1994-1997 Mark Brinicombe * Copyright (C) 1994 Brini * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Brini. * 4. The name of Brini may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #include #include #ifndef _KERNEL #include #else #include #endif #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__) #define isb() __asm __volatile("isb" : : : "memory") #define dsb() __asm __volatile("dsb" : : : "memory") #define dmb() __asm __volatile("dmb" : : : "memory") #elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \ defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \ defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__) #define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory") #define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory") #define dmb() __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory") #else #define isb() __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory") #define dsb() __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory") #define dmb() dsb() #endif #define mb() dmb() #define wmb() dmb() #define rmb() dmb() /* * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h * here, but that header can't be included here because this is C * code. I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition * out of asm.h so it can be used in both asm and C code. - kientzle@ */ #if defined (__ARM_ARCH_7__) || \ defined (__ARM_ARCH_7A__) || \ defined (__ARM_ARCH_6__) || \ defined (__ARM_ARCH_6J__) || \ defined (__ARM_ARCH_6K__) || \ defined (__ARM_ARCH_6T2__) || \ defined (__ARM_ARCH_6Z__) || \ defined (__ARM_ARCH_6ZK__) #define ARM_HAVE_ATOMIC64 static __inline void __do_dmb(void) { #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__) __asm __volatile("dmb" : : : "memory"); #else __asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory"); #endif } #define ATOMIC_ACQ_REL_LONG(NAME) \ static __inline void \ atomic_##NAME##_acq_long(__volatile u_long *p, u_long v) \ { \ atomic_##NAME##_long(p, v); \ __do_dmb(); \ } \ \ static __inline void \ atomic_##NAME##_rel_long(__volatile u_long *p, u_long v) \ { \ __do_dmb(); \ atomic_##NAME##_long(p, v); \ } #define ATOMIC_ACQ_REL(NAME, WIDTH) \ static __inline void \ atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ { \ atomic_##NAME##_##WIDTH(p, v); \ __do_dmb(); \ } \ \ static __inline void \ atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ { \ __do_dmb(); \ atomic_##NAME##_##WIDTH(p, v); \ } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { uint32_t tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "orr %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) , "+r" (address), "+r" (setmask) : : "cc", "memory"); } static __inline void atomic_set_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " orr %Q[tmp], %Q[val]\n" " orr %R[tmp], %R[val]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); } static __inline void atomic_set_long(volatile u_long *address, u_long setmask) { u_long tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "orr %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) , "+r" (address), "+r" (setmask) : : "cc", "memory"); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t setmask) { uint32_t tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "bic %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (address), "+r" (setmask) : : "cc", "memory"); } static __inline void atomic_clear_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " bic %Q[tmp], %Q[val]\n" " bic %R[tmp], %R[val]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); } static __inline void atomic_clear_long(volatile u_long *address, u_long setmask) { u_long tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "bic %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (address), "+r" (setmask) : : "cc", "memory"); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { uint32_t ret; __asm __volatile("1: ldrex %0, [%1]\n" "cmp %0, %2\n" "itt ne\n" "movne %0, #0\n" "bne 2f\n" "strex %0, %3, [%1]\n" "cmp %0, #0\n" "ite eq\n" "moveq %0, #1\n" "bne 1b\n" "2:" : "=&r" (ret) ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory"); return (ret); } static __inline int atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { uint64_t tmp; uint32_t ret; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %Q[tmp], %Q[cmpval]\n" " itee eq \n" " teqeq %R[tmp], %R[cmpval]\n" " movne %[ret], #0\n" " bne 2f\n" " strexd %[ret], %Q[newval], %R[newval], [%[ptr]]\n" " teq %[ret], #0\n" " it ne \n" " bne 1b\n" " mov %[ret], #1\n" "2: \n" : [ret] "=&r" (ret), [tmp] "=&r" (tmp) : [ptr] "r" (p), [cmpval] "r" (cmpval), [newval] "r" (newval) : "cc", "memory"); return (ret); } static __inline u_long atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) { u_long ret; __asm __volatile("1: ldrex %0, [%1]\n" "cmp %0, %2\n" "itt ne\n" "movne %0, #0\n" "bne 2f\n" "strex %0, %3, [%1]\n" "cmp %0, #0\n" "ite eq\n" "moveq %0, #1\n" "bne 1b\n" "2:" : "=&r" (ret) ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory"); return (ret); } static __inline u_int32_t atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { u_int32_t ret = atomic_cmpset_32(p, cmpval, newval); __do_dmb(); return (ret); } static __inline uint64_t atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval) { uint64_t ret = atomic_cmpset_64(p, cmpval, newval); __do_dmb(); return (ret); } static __inline u_long atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) { u_long ret = atomic_cmpset_long(p, cmpval, newval); __do_dmb(); return (ret); } static __inline u_int32_t atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { __do_dmb(); return (atomic_cmpset_32(p, cmpval, newval)); } static __inline uint64_t atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval) { __do_dmb(); return (atomic_cmpset_64(p, cmpval, newval)); } static __inline u_long atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval) { __do_dmb(); return (atomic_cmpset_long(p, cmpval, newval)); } static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { uint32_t tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "add %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); } static __inline void atomic_add_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " adds %Q[tmp], %Q[val]\n" " adc %R[tmp], %R[tmp], %R[val]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); } static __inline void atomic_add_long(volatile u_long *p, u_long val) { u_long tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "add %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { uint32_t tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "sub %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); } static __inline void atomic_subtract_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " subs %Q[tmp], %Q[val]\n" " sbc %R[tmp], %R[tmp], %R[val]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); } static __inline void atomic_subtract_long(volatile u_long *p, u_long val) { u_long tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%2]\n" "sub %0, %0, %3\n" "strex %1, %0, [%2]\n" "cmp %1, #0\n" "it ne\n" "bne 1b\n" : "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); } ATOMIC_ACQ_REL(clear, 32) ATOMIC_ACQ_REL(add, 32) ATOMIC_ACQ_REL(subtract, 32) ATOMIC_ACQ_REL(set, 32) ATOMIC_ACQ_REL(clear, 64) ATOMIC_ACQ_REL(add, 64) ATOMIC_ACQ_REL(subtract, 64) ATOMIC_ACQ_REL(set, 64) ATOMIC_ACQ_REL_LONG(clear) ATOMIC_ACQ_REL_LONG(add) ATOMIC_ACQ_REL_LONG(subtract) ATOMIC_ACQ_REL_LONG(set) #undef ATOMIC_ACQ_REL #undef ATOMIC_ACQ_REL_LONG static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp = 0, tmp2 = 0, ret = 0; __asm __volatile("1: ldrex %0, [%3]\n" "add %1, %0, %4\n" "strex %2, %1, [%3]\n" "cmp %2, #0\n" "it ne\n" "bne 1b\n" : "+r" (ret), "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); return (ret); } static __inline uint32_t atomic_readandclear_32(volatile u_int32_t *p) { uint32_t ret, tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%3]\n" "mov %1, #0\n" "strex %2, %1, [%3]\n" "cmp %2, #0\n" "it ne\n" "bne 1b\n" : "=r" (ret), "=&r" (tmp), "+r" (tmp2) ,"+r" (p) : : "cc", "memory"); return (ret); } static __inline uint32_t atomic_load_acq_32(volatile uint32_t *p) { uint32_t v; v = *p; __do_dmb(); return (v); } static __inline void atomic_store_rel_32(volatile uint32_t *p, uint32_t v) { __do_dmb(); *p = v; } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t val) { uint64_t ret, tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " adds %Q[tmp], %Q[ret], %Q[val]\n" " adc %R[tmp], %R[ret], %R[val]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [ret] "=&r" (ret), [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); return (ret); } static __inline uint64_t atomic_readandclear_64(volatile uint64_t *p) { uint64_t ret, tmp; uint32_t exflag; __asm __volatile( "1: \n" " ldrexd %Q[ret], %R[ret], [%[ptr]]\n" " mov %Q[tmp], #0\n" " mov %R[tmp], #0\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [ret] "=&r" (ret), [exf] "=&r" (exflag), [tmp] "=&r" (tmp) : [ptr] "r" (p) : "cc", "memory"); return (ret); } static __inline uint64_t atomic_load_64(volatile uint64_t *p) { uint64_t ret; /* * The only way to atomically load 64 bits is with LDREXD which puts the * exclusive monitor into the exclusive state, so reset it to open state * with CLREX because we don't actually need to store anything. */ __asm __volatile( "1: \n" " ldrexd %Q[ret], %R[ret], [%[ptr]]\n" " clrex \n" : [ret] "=&r" (ret) : [ptr] "r" (p) : "cc", "memory"); return (ret); } static __inline uint64_t atomic_load_acq_64(volatile uint64_t *p) { uint64_t ret; ret = atomic_load_64(p); __do_dmb(); return (ret); } static __inline void atomic_store_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; uint32_t exflag; /* * The only way to atomically store 64 bits is with STREXD, which will * succeed only if paired up with a preceeding LDREXD using the same * address, so we read and discard the existing value before storing. */ __asm __volatile( "1: \n" " ldrexd %Q[tmp], %R[tmp], [%[ptr]]\n" " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]]\n" " teq %[exf], #0\n" " it ne \n" " bne 1b\n" : [tmp] "=&r" (tmp), [exf] "=&r" (exflag) : [ptr] "r" (p), [val] "r" (val) : "cc", "memory"); } static __inline void atomic_store_rel_64(volatile uint64_t *p, uint64_t val) { __do_dmb(); atomic_store_64(p, val); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long val) { u_long tmp = 0, tmp2 = 0, ret = 0; __asm __volatile("1: ldrex %0, [%3]\n" "add %1, %0, %4\n" "strex %2, %1, [%3]\n" "cmp %2, #0\n" "it ne\n" "bne 1b\n" : "+r" (ret), "=&r" (tmp), "+r" (tmp2) ,"+r" (p), "+r" (val) : : "cc", "memory"); return (ret); } static __inline u_long atomic_readandclear_long(volatile u_long *p) { u_long ret, tmp = 0, tmp2 = 0; __asm __volatile("1: ldrex %0, [%3]\n" "mov %1, #0\n" "strex %2, %1, [%3]\n" "cmp %2, #0\n" "it ne\n" "bne 1b\n" : "=r" (ret), "=&r" (tmp), "+r" (tmp2) ,"+r" (p) : : "cc", "memory"); return (ret); } static __inline u_long atomic_load_acq_long(volatile u_long *p) { u_long v; v = *p; __do_dmb(); return (v); } static __inline void atomic_store_rel_long(volatile u_long *p, u_long v) { __do_dmb(); *p = v; } #else /* < armv6 */ #define __with_interrupts_disabled(expr) \ do { \ u_int cpsr_save, tmp; \ \ __asm __volatile( \ "mrs %0, cpsr;" \ "orr %1, %0, %2;" \ "msr cpsr_fsxc, %1;" \ : "=r" (cpsr_save), "=r" (tmp) \ : "I" (PSR_I | PSR_F) \ : "cc" ); \ (expr); \ __asm __volatile( \ "msr cpsr_fsxc, %0" \ : /* no output */ \ : "r" (cpsr_save) \ : "cc" ); \ } while(0) static __inline uint32_t __swp(uint32_t val, volatile uint32_t *ptr) { __asm __volatile("swp %0, %2, [%3]" : "=&r" (val), "=m" (*ptr) : "r" (val), "r" (ptr), "m" (*ptr) : "memory"); return (val); } #ifdef _KERNEL #define ARM_HAVE_ATOMIC64 static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_set_64(volatile uint64_t *address, uint64_t setmask) { __with_interrupts_disabled(*address |= setmask); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline void atomic_clear_64(volatile uint64_t *address, uint64_t clearmask) { __with_interrupts_disabled(*address &= ~clearmask); } static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline u_int64_t atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval) { int ret; __with_interrupts_disabled( { if (*p == cmpval) { *p = newval; ret = 1; } else { ret = 0; } }); return (ret); } static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_add_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p += val); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { __with_interrupts_disabled(*p -= val); } static __inline void atomic_subtract_64(volatile u_int64_t *p, u_int64_t val) { __with_interrupts_disabled(*p -= val); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t v) { uint64_t value; __with_interrupts_disabled( { value = *p; *p += v; }); return (value); } static __inline uint64_t atomic_load_64(volatile uint64_t *p) { uint64_t value; __with_interrupts_disabled(value = *p); return (value); } static __inline void atomic_store_64(volatile uint64_t *p, uint64_t value) { __with_interrupts_disabled(*p = value); } #else /* !_KERNEL */ static __inline u_int32_t atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval) { register int done, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "cmp %1, %3\n" "streq %4, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" "moveq %1, #1\n" "movne %1, #0\n" : "+r" (ras_start), "=r" (done) ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory"); return (done); } static __inline void atomic_add_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "add %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } static __inline void atomic_subtract_32(volatile u_int32_t *p, u_int32_t val) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "sub %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val) : : "memory"); } static __inline void atomic_set_32(volatile uint32_t *address, uint32_t setmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "orr %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask) : : "memory"); } static __inline void atomic_clear_32(volatile uint32_t *address, uint32_t clearmask) { int start, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%2]\n" "bic %1, %1, %3\n" "str %1, [%2]\n" "2:\n" "mov %1, #0\n" "str %1, [%0]\n" "mov %1, #0xffffffff\n" "str %1, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask) : : "memory"); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t v) { uint32_t start, tmp, ras_start = ARM_RAS_START; __asm __volatile("1:\n" "adr %1, 1b\n" "str %1, [%0]\n" "adr %1, 2f\n" "str %1, [%0, #4]\n" "ldr %1, [%3]\n" "mov %2, %1\n" "add %2, %2, %4\n" "str %2, [%3]\n" "2:\n" "mov %2, #0\n" "str %2, [%0]\n" "mov %2, #0xffffffff\n" "str %2, [%0, #4]\n" : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v) : : "memory"); return (start); } #endif /* _KERNEL */ static __inline uint32_t atomic_readandclear_32(volatile u_int32_t *p) { return (__swp(0, p)); } #define atomic_cmpset_rel_32 atomic_cmpset_32 #define atomic_cmpset_acq_32 atomic_cmpset_32 #define atomic_set_rel_32 atomic_set_32 #define atomic_set_acq_32 atomic_set_32 #define atomic_clear_rel_32 atomic_clear_32 #define atomic_clear_acq_32 atomic_clear_32 #define atomic_add_rel_32 atomic_add_32 #define atomic_add_acq_32 atomic_add_32 #define atomic_subtract_rel_32 atomic_subtract_32 #define atomic_subtract_acq_32 atomic_subtract_32 #define atomic_store_rel_32 atomic_store_32 #define atomic_store_rel_long atomic_store_long #define atomic_load_acq_32 atomic_load_32 #define atomic_load_acq_long atomic_load_long #define atomic_add_acq_long atomic_add_long #define atomic_add_rel_long atomic_add_long #define atomic_subtract_acq_long atomic_subtract_long #define atomic_subtract_rel_long atomic_subtract_long #define atomic_clear_acq_long atomic_clear_long #define atomic_clear_rel_long atomic_clear_long #define atomic_set_acq_long atomic_set_long #define atomic_set_rel_long atomic_set_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_load_acq_long atomic_load_long #undef __with_interrupts_disabled static __inline void atomic_add_long(volatile u_long *p, u_long v) { atomic_add_32((volatile uint32_t *)p, v); } static __inline void atomic_clear_long(volatile u_long *p, u_long v) { atomic_clear_32((volatile uint32_t *)p, v); } static __inline int atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe) { return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe)); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_32((volatile uint32_t *)p, v)); } static __inline void atomic_readandclear_long(volatile u_long *p) { atomic_readandclear_32((volatile uint32_t *)p); } static __inline void atomic_set_long(volatile u_long *p, u_long v) { atomic_set_32((volatile uint32_t *)p, v); } static __inline void atomic_subtract_long(volatile u_long *p, u_long v) { atomic_subtract_32((volatile uint32_t *)p, v); } #endif /* Arch >= v6 */ static __inline int atomic_load_32(volatile uint32_t *v) { return (*v); } static __inline void atomic_store_32(volatile uint32_t *dst, uint32_t src) { *dst = src; } static __inline int atomic_load_long(volatile u_long *v) { return (*v); } static __inline void atomic_store_long(volatile u_long *dst, u_long src) { *dst = src; } +static __inline void +atomic_thread_fence_acq(void) +{ + + dmb(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + dmb(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + dmb(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + dmb(); +} + #define atomic_clear_ptr atomic_clear_32 #define atomic_set_ptr atomic_set_32 #define atomic_cmpset_ptr atomic_cmpset_32 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_32 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_32 #define atomic_store_ptr atomic_store_32 #define atomic_store_rel_ptr atomic_store_rel_32 #define atomic_add_int atomic_add_32 #define atomic_add_acq_int atomic_add_acq_32 #define atomic_add_rel_int atomic_add_rel_32 #define atomic_subtract_int atomic_subtract_32 #define atomic_subtract_acq_int atomic_subtract_acq_32 #define atomic_subtract_rel_int atomic_subtract_rel_32 #define atomic_clear_int atomic_clear_32 #define atomic_clear_acq_int atomic_clear_acq_32 #define atomic_clear_rel_int atomic_clear_rel_32 #define atomic_set_int atomic_set_32 #define atomic_set_acq_int atomic_set_acq_32 #define atomic_set_rel_int atomic_set_rel_32 #define atomic_cmpset_int atomic_cmpset_32 #define atomic_cmpset_acq_int atomic_cmpset_acq_32 #define atomic_cmpset_rel_int atomic_cmpset_rel_32 #define atomic_fetchadd_int atomic_fetchadd_32 #define atomic_readandclear_int atomic_readandclear_32 #define atomic_load_acq_int atomic_load_acq_32 #define atomic_store_rel_int atomic_store_rel_32 #endif /* _MACHINE_ATOMIC_H_ */ Index: head/sys/arm64/include/atomic.h =================================================================== --- head/sys/arm64/include/atomic.h (revision 285282) +++ head/sys/arm64/include/atomic.h (revision 285283) @@ -1,746 +1,774 @@ /*- * Copyright (c) 2013 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #define isb() __asm __volatile("isb" : : : "memory") /* * Options for DMB and DSB: * oshld Outer Shareable, load * oshst Outer Shareable, store * osh Outer Shareable, all * nshld Non-shareable, load * nshst Non-shareable, store * nsh Non-shareable, all * ishld Inner Shareable, load * ishst Inner Shareable, store * ish Inner Shareable, all * ld Full system, load * st Full system, store * sy Full system, all */ #define dsb(opt) __asm __volatile("dsb " __STRING(opt) : : : "memory") #define dmb(opt) __asm __volatile("dmb " __STRING(opt) : : : "memory") #define mb() dmb(sy) /* Full system memory barrier all */ #define wmb() dmb(st) /* Full system memory barrier store */ #define rmb() dmb(ld) /* Full system memory barrier load */ static __inline void atomic_add_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " add %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline void atomic_clear_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " bic %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline int atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) { uint32_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldxr %w0, [%2] \n" " cmp %w0, %w3 \n" " b.ne 2f \n" " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc" ); return (!res); } static __inline uint32_t atomic_fetchadd_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp, ret; int res; __asm __volatile( "1: ldxr %w4, [%2] \n" " add %w0, %w4, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc" ); return (ret); } static __inline uint32_t atomic_readandclear_32(volatile uint32_t *p) { uint32_t tmp, ret; int res; __asm __volatile( " mov %w0, #0 \n" "1: ldxr %w3, [%2] \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc" ); return (ret); } static __inline void atomic_set_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " orr %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline void atomic_subtract_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " sub %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } #define atomic_add_int atomic_add_32 #define atomic_clear_int atomic_clear_32 #define atomic_cmpset_int atomic_cmpset_32 #define atomic_fetchadd_int atomic_fetchadd_32 #define atomic_readandclear_int atomic_readandclear_32 #define atomic_set_int atomic_set_32 #define atomic_subtract_int atomic_subtract_32 static __inline void atomic_add_acq_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldaxr %w0, [%2] \n" " add %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_clear_acq_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldaxr %w0, [%2] \n" " bic %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline int atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) { uint32_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldaxr %w0, [%2] \n" " cmp %w0, %w3 \n" " b.ne 2f \n" " stxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory" ); return (!res); } static __inline uint32_t atomic_load_acq_32(volatile uint32_t *p) { uint32_t ret; __asm __volatile( "ldar %w0, [%1] \n" : "=&r" (ret) : "r" (p) : "memory"); return (ret); } static __inline void atomic_set_acq_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldaxr %w0, [%2] \n" " orr %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_subtract_acq_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldaxr %w0, [%2] \n" " sub %w0, %w0, %w3 \n" " stxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } #define atomic_add_acq_int atomic_add_acq_32 #define atomic_clear_acq_int atomic_clear_acq_32 #define atomic_cmpset_acq_int atomic_cmpset_acq_32 #define atomic_load_acq_int atomic_load_acq_32 #define atomic_set_acq_int atomic_set_acq_32 #define atomic_subtract_acq_int atomic_subtract_acq_32 /* The atomic functions currently are both acq and rel, we should fix this. */ static __inline void atomic_add_rel_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " add %w0, %w0, %w3 \n" " stlxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_clear_rel_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " bic %w0, %w0, %w3 \n" " stlxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline int atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval) { uint32_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldxr %w0, [%2] \n" " cmp %w0, %w3 \n" " b.ne 2f \n" " stlxr %w1, %w4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory" ); return (!res); } static __inline void atomic_set_rel_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " orr %w0, %w0, %w3 \n" " stlxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_store_rel_32(volatile uint32_t *p, uint32_t val) { __asm __volatile( "stlr %w0, [%1] \n" : : "r" (val), "r" (p) : "memory"); } static __inline void atomic_subtract_rel_32(volatile uint32_t *p, uint32_t val) { uint32_t tmp; int res; __asm __volatile( "1: ldxr %w0, [%2] \n" " sub %w0, %w0, %w3 \n" " stlxr %w1, %w0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } #define atomic_add_rel_int atomic_add_rel_32 #define atomic_clear_rel_int atomic_add_rel_32 #define atomic_cmpset_rel_int atomic_cmpset_rel_32 #define atomic_set_rel_int atomic_set_rel_32 #define atomic_subtract_rel_int atomic_subtract_rel_32 #define atomic_store_rel_int atomic_store_rel_32 static __inline void atomic_add_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " add %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (val) : : "cc" ); } static __inline void atomic_clear_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " bic %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline int atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { uint64_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldxr %0, [%2] \n" " cmp %0, %3 \n" " b.ne 2f \n" " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r" (tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory" ); return (!res); } static __inline uint64_t atomic_fetchadd_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp, ret; int res; __asm __volatile( "1: ldxr %4, [%2] \n" " add %0, %4, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc" ); return (ret); } static __inline uint64_t atomic_readandclear_64(volatile uint64_t *p) { uint64_t tmp, ret; int res; __asm __volatile( " mov %0, #0 \n" "1: ldxr %3, [%2] \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc" ); return (ret); } static __inline void atomic_set_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " orr %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline void atomic_subtract_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " sub %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc" ); } static __inline uint64_t atomic_swap_64(volatile uint64_t *p, uint64_t val) { uint64_t old; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " stxr %w1, %3, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(old), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); return (old); } #define atomic_add_long atomic_add_64 #define atomic_clear_long atomic_clear_64 #define atomic_cmpset_long atomic_cmpset_64 #define atomic_fetchadd_long atomic_fetchadd_64 #define atomic_readandclear_long atomic_readandclear_64 #define atomic_set_long atomic_set_64 #define atomic_subtract_long atomic_subtract_64 #define atomic_add_ptr atomic_add_64 #define atomic_clear_ptr atomic_clear_64 #define atomic_cmpset_ptr atomic_cmpset_64 #define atomic_fetchadd_ptr atomic_fetchadd_64 #define atomic_readandclear_ptr atomic_readandclear_64 #define atomic_set_ptr atomic_set_64 #define atomic_subtract_ptr atomic_subtract_64 static __inline void atomic_add_acq_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldaxr %0, [%2] \n" " add %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_clear_acq_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldaxr %0, [%2] \n" " bic %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline int atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { uint64_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldaxr %0, [%2] \n" " cmp %0, %3 \n" " b.ne 2f \n" " stxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory" ); return (!res); } static __inline uint64_t atomic_load_acq_64(volatile uint64_t *p) { uint64_t ret; __asm __volatile( "ldar %0, [%1] \n" : "=&r" (ret) : "r" (p) : "memory"); return (ret); } static __inline void atomic_set_acq_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldaxr %0, [%2] \n" " orr %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_subtract_acq_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldaxr %0, [%2] \n" " sub %0, %0, %3 \n" " stxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } #define atomic_add_acq_long atomic_add_acq_64 #define atomic_clear_acq_long atomic_add_acq_64 #define atomic_cmpset_acq_long atomic_cmpset_acq_64 #define atomic_load_acq_long atomic_load_acq_64 #define atomic_set_acq_long atomic_set_acq_64 #define atomic_subtract_acq_long atomic_subtract_acq_64 #define atomic_add_acq_ptr atomic_add_acq_64 #define atomic_clear_acq_ptr atomic_add_acq_64 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_64 #define atomic_load_acq_ptr atomic_load_acq_64 #define atomic_set_acq_ptr atomic_set_acq_64 #define atomic_subtract_acq_ptr atomic_subtract_acq_64 /* * TODO: The atomic functions currently are both acq and rel, we should fix * this. */ static __inline void atomic_add_rel_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " add %0, %0, %3 \n" " stlxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_clear_rel_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " bic %0, %0, %3 \n" " stlxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline int atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { uint64_t tmp; int res; __asm __volatile( "1: mov %w1, #1 \n" " ldxr %0, [%2] \n" " cmp %0, %3 \n" " b.ne 2f \n" " stlxr %w1, %4, [%2] \n" " cbnz %w1, 1b \n" "2:" : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory" ); return (!res); } static __inline void atomic_set_rel_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " orr %0, %0, %3 \n" " stlxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } static __inline void atomic_store_rel_64(volatile uint64_t *p, uint64_t val) { __asm __volatile( "stlr %0, [%1] \n" : : "r" (val), "r" (p) : "memory"); } static __inline void atomic_subtract_rel_64(volatile uint64_t *p, uint64_t val) { uint64_t tmp; int res; __asm __volatile( "1: ldxr %0, [%2] \n" " sub %0, %0, %3 \n" " stlxr %w1, %0, [%2] \n" " cbnz %w1, 1b \n" : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory" ); } +static __inline void +atomic_thread_fence_acq(void) +{ + + dmb(ld); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + dmb(sy); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + dmb(sy); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + dmb(sy); +} + #define atomic_add_rel_long atomic_add_rel_64 #define atomic_clear_rel_long atomic_clear_rel_64 #define atomic_cmpset_rel_long atomic_cmpset_rel_64 #define atomic_set_rel_long atomic_set_rel_64 #define atomic_subtract_rel_long atomic_subtract_rel_64 #define atomic_store_rel_long atomic_store_rel_64 #define atomic_add_rel_ptr atomic_add_rel_64 #define atomic_clear_rel_ptr atomic_clear_rel_64 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_64 #define atomic_set_rel_ptr atomic_set_rel_64 #define atomic_subtract_rel_ptr atomic_subtract_rel_64 #define atomic_store_rel_ptr atomic_store_rel_64 #endif /* _MACHINE_ATOMIC_H_ */ Index: head/sys/i386/include/atomic.h =================================================================== --- head/sys/i386/include/atomic.h (revision 285282) +++ head/sys/i386/include/atomic.h (revision 285283) @@ -1,757 +1,789 @@ /*- * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif #ifdef _KERNEL #include #include #endif #define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") #define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") #define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. * * atomic_set_char(P, V) (*(u_char *)(P) |= (V)) * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V)) * atomic_add_char(P, V) (*(u_char *)(P) += (V)) * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V)) * * atomic_set_short(P, V) (*(u_short *)(P) |= (V)) * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V)) * atomic_add_short(P, V) (*(u_short *)(P) += (V)) * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V)) * * atomic_set_int(P, V) (*(u_int *)(P) |= (V)) * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V)) * atomic_add_int(P, V) (*(u_int *)(P) += (V)) * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V)) * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);) * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;) * * atomic_set_long(P, V) (*(u_long *)(P) |= (V)) * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V)) * atomic_add_long(P, V) (*(u_long *)(P) += (V)) * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V)) * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);) * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;) */ /* * The above functions are expanded inline in the statically-linked * kernel. Lock prefixes are generated if an SMP kernel is being * built. * * Kernel modules call real functions which are built into the kernel. * This allows kernel modules to be portable between UP and SMP systems. */ #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM) #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); int atomic_testandset_int(volatile u_int *p, u_int v); +void atomic_thread_fence_acq(void); +void atomic_thread_fence_acq_rel(void); +void atomic_thread_fence_rel(void); +void atomic_thread_fence_seq_cst(void); #define ATOMIC_LOAD(TYPE) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t); uint64_t atomic_load_acq_64(volatile uint64_t *); void atomic_store_rel_64(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); #else /* !KLD_MODULE && __GNUCLIKE_ASM */ /* * For userland, always use lock prefixes so that the binaries will run * on both SMP and !SMP systems. */ #if defined(SMP) || !defined(_KERNEL) #define MPLOCKED "lock ; " #else #define MPLOCKED #endif /* * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. */ #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \ static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ } \ \ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ __asm __volatile(MPLOCKED OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ } \ struct __hack /* * Atomic compare and set, used by the mutex functions * * if (*dst == expect) *dst = src (all 32 bit words) * * Returns 0 on failure, non-zero on success */ #ifdef CPU_DISABLE_CMPXCHG static __inline int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) { u_char res; __asm __volatile( " pushfl ; " " cli ; " " cmpl %3,%1 ; " " jne 1f ; " " movl %2,%1 ; " "1: " " sete %0 ; " " popfl ; " "# atomic_cmpset_int" : "=q" (res), /* 0 */ "+m" (*dst) /* 1 */ : "r" (src), /* 2 */ "r" (expect) /* 3 */ : "memory"); return (res); } #else /* !CPU_DISABLE_CMPXCHG */ static __inline int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchgl %3,%1 ; " " sete %0 ; " "# atomic_cmpset_int" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+a" (expect) /* 2 */ : "r" (src) /* 3 */ : "memory", "cc"); return (res); } #endif /* CPU_DISABLE_CMPXCHG */ /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( " " MPLOCKED " " " xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ : : "cc"); return (v); } static __inline int atomic_testandset_int(volatile u_int *p, u_int v) { u_char res; __asm __volatile( " " MPLOCKED " " " btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ "+m" (*p) /* 1 */ : "Ir" (v & 0x1f) /* 2 */ : "cc"); return (res); } /* * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * * However, a load may pass a store if they are performed on distinct * addresses, so for atomic_load_acq we introduce a Store/Load barrier * before the load in SMP kernels. We use "lock addl $0,mem", as * recommended by the AMD Software Optimization Guide, and not mfence. * In the kernel, we use a private per-cpu cache line as the target * for the locked addition, to avoid introducing false data * dependencies. In userspace, a word at the top of the stack is * utilized. * * For UP kernels, however, the memory of the single processor is * always consistent, so we only need to stop the compiler from * reordering accesses in a way that violates the semantics of acquire * and release. */ #if defined(_KERNEL) /* * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). * * The open-coded number is used instead of the symbolic expression to * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in i386/vm_machdep.c ensures that the value is correct. */ #define OFFSETOF_MONITORBUF 0x180 #if defined(SMP) static __inline void __storeload_barrier(void) { __asm __volatile("lock; addl $0,%%fs:%0" : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); } #else /* _KERNEL && UP */ static __inline void __storeload_barrier(void) { __compiler_membar(); } #endif /* SMP */ #else /* !_KERNEL */ static __inline void __storeload_barrier(void) { __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc"); } #endif /* _KERNEL*/ /* * C11-standard acq/rel semantics only apply when the variable in the * call is the same for acq as it is for rel. However, our previous * (x86) implementations provided much stronger ordering than required * (essentially what is called seq_cst order in C11). This * implementation provides the historical strong ordering since some * callers depend on it. */ #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE res; \ \ __storeload_barrier(); \ res = *p; \ __compiler_membar(); \ return (res); \ } \ struct __hack #define ATOMIC_STORE(TYPE) \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ \ __compiler_membar(); \ *p = v; \ } \ struct __hack + +static __inline void +atomic_thread_fence_acq(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + __storeload_barrier(); +} #ifdef _KERNEL #ifdef WANT_FUNCTIONS int atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t); int atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t); uint64_t atomic_load_acq_64_i386(volatile uint64_t *); uint64_t atomic_load_acq_64_i586(volatile uint64_t *); void atomic_store_rel_64_i386(volatile uint64_t *, uint64_t); void atomic_store_rel_64_i586(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i386(volatile uint64_t *, uint64_t); uint64_t atomic_swap_64_i586(volatile uint64_t *, uint64_t); #endif /* I486 does not support SMP or CMPXCHG8B. */ static __inline int atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src) { volatile uint32_t *p; u_char res; p = (volatile uint32_t *)dst; __asm __volatile( " pushfl ; " " cli ; " " xorl %1,%%eax ; " " xorl %2,%%edx ; " " orl %%edx,%%eax ; " " jne 1f ; " " movl %4,%1 ; " " movl %5,%2 ; " "1: " " sete %3 ; " " popfl" : "+A" (expect), /* 0 */ "+m" (*p), /* 1 */ "+m" (*(p + 1)), /* 2 */ "=q" (res) /* 3 */ : "r" ((uint32_t)src), /* 4 */ "r" ((uint32_t)(src >> 32)) /* 5 */ : "memory", "cc"); return (res); } static __inline uint64_t atomic_load_acq_64_i386(volatile uint64_t *p) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " popfl" : "=&A" (res) /* 0 */ : "m" (*q), /* 1 */ "m" (*(q + 1)) /* 2 */ : "memory"); return (res); } static __inline void atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %%eax,%0 ; " " movl %%edx,%1 ; " " popfl" : "=m" (*q), /* 0 */ "=m" (*(q + 1)) /* 1 */ : "A" (v) /* 2 */ : "memory"); } static __inline uint64_t atomic_swap_64_i386(volatile uint64_t *p, uint64_t v) { volatile uint32_t *q; uint64_t res; q = (volatile uint32_t *)p; __asm __volatile( " pushfl ; " " cli ; " " movl %1,%%eax ; " " movl %2,%%edx ; " " movl %4,%2 ; " " movl %3,%1 ; " " popfl" : "=&A" (res), /* 0 */ "+m" (*q), /* 1 */ "+m" (*(q + 1)) /* 2 */ : "r" ((uint32_t)v), /* 3 */ "r" ((uint32_t)(v >> 32))); /* 4 */ return (res); } static __inline int atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src) { u_char res; __asm __volatile( " " MPLOCKED " " " cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ "+A" (expect) /* 2 */ : "b" ((uint32_t)src), /* 3 */ "c" ((uint32_t)(src >> 32)) /* 4 */ : "memory", "cc"); return (res); } static __inline uint64_t atomic_load_acq_64_i586(volatile uint64_t *p) { uint64_t res; __asm __volatile( " movl %%ebx,%%eax ; " " movl %%ecx,%%edx ; " " " MPLOCKED " " " cmpxchg8b %1" : "=&A" (res), /* 0 */ "+m" (*p) /* 1 */ : : "memory", "cc"); return (res); } static __inline void atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " " MPLOCKED " " " cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); } static __inline uint64_t atomic_swap_64_i586(volatile uint64_t *p, uint64_t v) { __asm __volatile( " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " " " MPLOCKED " " " cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ : : "ebx", "ecx", "memory", "cc"); return (v); } static __inline int atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_cmpset_64_i386(dst, expect, src)); else return (atomic_cmpset_64_i586(dst, expect, src)); } static __inline uint64_t atomic_load_acq_64(volatile uint64_t *p) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_load_acq_64_i386(p)); else return (atomic_load_acq_64_i586(p)); } static __inline void atomic_store_rel_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) atomic_store_rel_64_i386(p, v); else atomic_store_rel_64_i586(p, v); } static __inline uint64_t atomic_swap_64(volatile uint64_t *p, uint64_t v) { if ((cpu_feature & CPUID_CX8) == 0) return (atomic_swap_64_i386(p, v)); else return (atomic_swap_64_i586(p, v)); } #endif /* _KERNEL */ #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v); ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v); ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v); ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v); ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v); ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v); ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v); ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v); ATOMIC_ASM(set, int, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, int, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v); ATOMIC_ASM(set, long, "orl %1,%0", "ir", v); ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); #define ATOMIC_LOADSTORE(TYPE) \ ATOMIC_LOAD(TYPE); \ ATOMIC_STORE(TYPE) ATOMIC_LOADSTORE(char); ATOMIC_LOADSTORE(short); ATOMIC_LOADSTORE(int); ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE #undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS static __inline int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) { return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect, (u_int)src)); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v)); } static __inline int atomic_testandset_long(volatile u_long *p, u_int v) { return (atomic_testandset_int((volatile u_int *)p, v)); } /* Read the current value and store a new value in the destination. */ #ifdef __GNUCLIKE_ASM static __inline u_int atomic_swap_int(volatile u_int *p, u_int v) { __asm __volatile( " xchgl %1,%0 ; " "# atomic_swap_int" : "+r" (v), /* 0 */ "+m" (*p)); /* 1 */ return (v); } static __inline u_long atomic_swap_long(volatile u_long *p, u_long v) { return (atomic_swap_int((volatile u_int *)p, (u_int)v)); } #else /* !__GNUCLIKE_ASM */ u_int atomic_swap_int(volatile u_int *p, u_int v); u_long atomic_swap_long(volatile u_long *p, u_long v); #endif /* __GNUCLIKE_ASM */ #define atomic_set_acq_char atomic_set_barr_char #define atomic_set_rel_char atomic_set_barr_char #define atomic_clear_acq_char atomic_clear_barr_char #define atomic_clear_rel_char atomic_clear_barr_char #define atomic_add_acq_char atomic_add_barr_char #define atomic_add_rel_char atomic_add_barr_char #define atomic_subtract_acq_char atomic_subtract_barr_char #define atomic_subtract_rel_char atomic_subtract_barr_char #define atomic_set_acq_short atomic_set_barr_short #define atomic_set_rel_short atomic_set_barr_short #define atomic_clear_acq_short atomic_clear_barr_short #define atomic_clear_rel_short atomic_clear_barr_short #define atomic_add_acq_short atomic_add_barr_short #define atomic_add_rel_short atomic_add_barr_short #define atomic_subtract_acq_short atomic_subtract_barr_short #define atomic_subtract_rel_short atomic_subtract_barr_short #define atomic_set_acq_int atomic_set_barr_int #define atomic_set_rel_int atomic_set_barr_int #define atomic_clear_acq_int atomic_clear_barr_int #define atomic_clear_rel_int atomic_clear_barr_int #define atomic_add_acq_int atomic_add_barr_int #define atomic_add_rel_int atomic_add_barr_int #define atomic_subtract_acq_int atomic_subtract_barr_int #define atomic_subtract_rel_int atomic_subtract_barr_int #define atomic_cmpset_acq_int atomic_cmpset_int #define atomic_cmpset_rel_int atomic_cmpset_int #define atomic_set_acq_long atomic_set_barr_long #define atomic_set_rel_long atomic_set_barr_long #define atomic_clear_acq_long atomic_clear_barr_long #define atomic_clear_rel_long atomic_clear_barr_long #define atomic_add_acq_long atomic_add_barr_long #define atomic_add_rel_long atomic_add_barr_long #define atomic_subtract_acq_long atomic_subtract_barr_long #define atomic_subtract_rel_long atomic_subtract_barr_long #define atomic_cmpset_acq_long atomic_cmpset_long #define atomic_cmpset_rel_long atomic_cmpset_long #define atomic_readandclear_int(p) atomic_swap_int(p, 0) #define atomic_readandclear_long(p) atomic_swap_long(p, 0) /* Operations on 8-bit bytes. */ #define atomic_set_8 atomic_set_char #define atomic_set_acq_8 atomic_set_acq_char #define atomic_set_rel_8 atomic_set_rel_char #define atomic_clear_8 atomic_clear_char #define atomic_clear_acq_8 atomic_clear_acq_char #define atomic_clear_rel_8 atomic_clear_rel_char #define atomic_add_8 atomic_add_char #define atomic_add_acq_8 atomic_add_acq_char #define atomic_add_rel_8 atomic_add_rel_char #define atomic_subtract_8 atomic_subtract_char #define atomic_subtract_acq_8 atomic_subtract_acq_char #define atomic_subtract_rel_8 atomic_subtract_rel_char #define atomic_load_acq_8 atomic_load_acq_char #define atomic_store_rel_8 atomic_store_rel_char /* Operations on 16-bit words. */ #define atomic_set_16 atomic_set_short #define atomic_set_acq_16 atomic_set_acq_short #define atomic_set_rel_16 atomic_set_rel_short #define atomic_clear_16 atomic_clear_short #define atomic_clear_acq_16 atomic_clear_acq_short #define atomic_clear_rel_16 atomic_clear_rel_short #define atomic_add_16 atomic_add_short #define atomic_add_acq_16 atomic_add_acq_short #define atomic_add_rel_16 atomic_add_rel_short #define atomic_subtract_16 atomic_subtract_short #define atomic_subtract_acq_16 atomic_subtract_acq_short #define atomic_subtract_rel_16 atomic_subtract_rel_short #define atomic_load_acq_16 atomic_load_acq_short #define atomic_store_rel_16 atomic_store_rel_short /* Operations on 32-bit double words. */ #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #define atomic_swap_32 atomic_swap_int #define atomic_readandclear_32 atomic_readandclear_int #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_testandset_32 atomic_testandset_int /* Operations on pointers. */ #define atomic_set_ptr(p, v) \ atomic_set_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_acq_ptr(p, v) \ atomic_set_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_set_rel_ptr(p, v) \ atomic_set_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_ptr(p, v) \ atomic_clear_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_acq_ptr(p, v) \ atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_rel_ptr(p, v) \ atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_ptr(p, v) \ atomic_add_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_acq_ptr(p, v) \ atomic_add_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_add_rel_ptr(p, v) \ atomic_add_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_ptr(p, v) \ atomic_subtract_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_acq_ptr(p, v) \ atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_rel_ptr(p, v) \ atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v)) #define atomic_load_acq_ptr(p) \ atomic_load_acq_int((volatile u_int *)(p)) #define atomic_store_rel_ptr(p, v) \ atomic_store_rel_int((volatile u_int *)(p), (v)) #define atomic_cmpset_ptr(dst, old, new) \ atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new)) #define atomic_cmpset_acq_ptr(dst, old, new) \ atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_cmpset_rel_ptr(dst, old, new) \ atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \ (u_int)(new)) #define atomic_swap_ptr(p, v) \ atomic_swap_int((volatile u_int *)(p), (u_int)(v)) #define atomic_readandclear_ptr(p) \ atomic_readandclear_int((volatile u_int *)(p)) #endif /* !WANT_FUNCTIONS */ #endif /* !_MACHINE_ATOMIC_H_ */ Index: head/sys/mips/include/atomic.h =================================================================== --- head/sys/mips/include/atomic.h (revision 285282) +++ head/sys/mips/include/atomic.h (revision 285283) @@ -1,644 +1,672 @@ /*- * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: src/sys/alpha/include/atomic.h,v 1.21.2.3 2005/10/06 18:12:05 jhb * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif /* * Note: All the 64-bit atomic operations are only atomic when running * in 64-bit mode. It is assumed that code compiled for n32 and n64 * fits into this definition and no further safeties are needed. * * It is also assumed that the add, subtract and other arithmetic is * done on numbers not pointers. The special rules for n32 pointers * do not have atomic operations defined for them, but generally shouldn't * need atomic operations. */ #ifndef __MIPS_PLATFORM_SYNC_NOPS #define __MIPS_PLATFORM_SYNC_NOPS "" #endif static __inline void mips_sync(void) { __asm __volatile (".set noreorder\n" "\tsync\n" __MIPS_PLATFORM_SYNC_NOPS ".set reorder\n" : : : "memory"); } #define mb() mips_sync() #define wmb() mips_sync() #define rmb() mips_sync() /* * Various simple arithmetic on memory which is atomic in the presence * of interrupts and SMP safe. */ void atomic_set_8(__volatile uint8_t *, uint8_t); void atomic_clear_8(__volatile uint8_t *, uint8_t); void atomic_add_8(__volatile uint8_t *, uint8_t); void atomic_subtract_8(__volatile uint8_t *, uint8_t); void atomic_set_16(__volatile uint16_t *, uint16_t); void atomic_clear_16(__volatile uint16_t *, uint16_t); void atomic_add_16(__volatile uint16_t *, uint16_t); void atomic_subtract_16(__volatile uint16_t *, uint16_t); static __inline void atomic_set_32(__volatile uint32_t *p, uint32_t v) { uint32_t temp; __asm __volatile ( "1:\tll %0, %3\n\t" /* load old value */ "or %0, %2, %0\n\t" /* calculate new value */ "sc %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_clear_32(__volatile uint32_t *p, uint32_t v) { uint32_t temp; v = ~v; __asm __volatile ( "1:\tll %0, %3\n\t" /* load old value */ "and %0, %2, %0\n\t" /* calculate new value */ "sc %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_add_32(__volatile uint32_t *p, uint32_t v) { uint32_t temp; __asm __volatile ( "1:\tll %0, %3\n\t" /* load old value */ "addu %0, %2, %0\n\t" /* calculate new value */ "sc %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_subtract_32(__volatile uint32_t *p, uint32_t v) { uint32_t temp; __asm __volatile ( "1:\tll %0, %3\n\t" /* load old value */ "subu %0, %2\n\t" /* calculate new value */ "sc %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline uint32_t atomic_readandclear_32(__volatile uint32_t *addr) { uint32_t result,temp; __asm __volatile ( "1:\tll %0,%3\n\t" /* load current value, asserting lock */ "li %1,0\n\t" /* value to store */ "sc %1,%2\n\t" /* attempt to store */ "beqz %1, 1b\n\t" /* if the store failed, spin */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "m" (*addr) : "memory"); return result; } static __inline uint32_t atomic_readandset_32(__volatile uint32_t *addr, uint32_t value) { uint32_t result,temp; __asm __volatile ( "1:\tll %0,%3\n\t" /* load current value, asserting lock */ "or %1,$0,%4\n\t" "sc %1,%2\n\t" /* attempt to store */ "beqz %1, 1b\n\t" /* if the store failed, spin */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "m" (*addr), "r" (value) : "memory"); return result; } #if defined(__mips_n64) || defined(__mips_n32) static __inline void atomic_set_64(__volatile uint64_t *p, uint64_t v) { uint64_t temp; __asm __volatile ( "1:\n\t" "lld %0, %3\n\t" /* load old value */ "or %0, %2, %0\n\t" /* calculate new value */ "scd %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_clear_64(__volatile uint64_t *p, uint64_t v) { uint64_t temp; v = ~v; __asm __volatile ( "1:\n\t" "lld %0, %3\n\t" /* load old value */ "and %0, %2, %0\n\t" /* calculate new value */ "scd %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_add_64(__volatile uint64_t *p, uint64_t v) { uint64_t temp; __asm __volatile ( "1:\n\t" "lld %0, %3\n\t" /* load old value */ "daddu %0, %2, %0\n\t" /* calculate new value */ "scd %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline void atomic_subtract_64(__volatile uint64_t *p, uint64_t v) { uint64_t temp; __asm __volatile ( "1:\n\t" "lld %0, %3\n\t" /* load old value */ "dsubu %0, %2\n\t" /* calculate new value */ "scd %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* spin if failed */ : "=&r" (temp), "=m" (*p) : "r" (v), "m" (*p) : "memory"); } static __inline uint64_t atomic_readandclear_64(__volatile uint64_t *addr) { uint64_t result,temp; __asm __volatile ( "1:\n\t" "lld %0, %3\n\t" /* load old value */ "li %1, 0\n\t" /* value to store */ "scd %1, %2\n\t" /* attempt to store */ "beqz %1, 1b\n\t" /* if the store failed, spin */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "m" (*addr) : "memory"); return result; } static __inline uint64_t atomic_readandset_64(__volatile uint64_t *addr, uint64_t value) { uint64_t result,temp; __asm __volatile ( "1:\n\t" "lld %0,%3\n\t" /* Load old value*/ "or %1,$0,%4\n\t" "scd %1,%2\n\t" /* attempt to store */ "beqz %1, 1b\n\t" /* if the store failed, spin */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "m" (*addr), "r" (value) : "memory"); return result; } #endif #define ATOMIC_ACQ_REL(NAME, WIDTH) \ static __inline void \ atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ { \ atomic_##NAME##_##WIDTH(p, v); \ mips_sync(); \ } \ \ static __inline void \ atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ { \ mips_sync(); \ atomic_##NAME##_##WIDTH(p, v); \ } /* Variants of simple arithmetic with memory barriers. */ ATOMIC_ACQ_REL(set, 8) ATOMIC_ACQ_REL(clear, 8) ATOMIC_ACQ_REL(add, 8) ATOMIC_ACQ_REL(subtract, 8) ATOMIC_ACQ_REL(set, 16) ATOMIC_ACQ_REL(clear, 16) ATOMIC_ACQ_REL(add, 16) ATOMIC_ACQ_REL(subtract, 16) ATOMIC_ACQ_REL(set, 32) ATOMIC_ACQ_REL(clear, 32) ATOMIC_ACQ_REL(add, 32) ATOMIC_ACQ_REL(subtract, 32) #if defined(__mips_n64) || defined(__mips_n32) ATOMIC_ACQ_REL(set, 64) ATOMIC_ACQ_REL(clear, 64) ATOMIC_ACQ_REL(add, 64) ATOMIC_ACQ_REL(subtract, 64) #endif #undef ATOMIC_ACQ_REL /* * We assume that a = b will do atomic loads and stores. */ #define ATOMIC_STORE_LOAD(WIDTH) \ static __inline uint##WIDTH##_t \ atomic_load_acq_##WIDTH(__volatile uint##WIDTH##_t *p) \ { \ uint##WIDTH##_t v; \ \ v = *p; \ mips_sync(); \ return (v); \ } \ \ static __inline void \ atomic_store_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\ { \ mips_sync(); \ *p = v; \ } ATOMIC_STORE_LOAD(32) ATOMIC_STORE_LOAD(64) #if !defined(__mips_n64) && !defined(__mips_n32) void atomic_store_64(__volatile uint64_t *, uint64_t *); void atomic_load_64(__volatile uint64_t *, uint64_t *); #else static __inline void atomic_store_64(__volatile uint64_t *p, uint64_t *v) { *p = *v; } static __inline void atomic_load_64(__volatile uint64_t *p, uint64_t *v) { *v = *p; } #endif #undef ATOMIC_STORE_LOAD /* * Atomically compare the value stored at *p with cmpval and if the * two values are equal, update the value of *p with newval. Returns * zero if the compare failed, nonzero otherwise. */ static __inline uint32_t atomic_cmpset_32(__volatile uint32_t* p, uint32_t cmpval, uint32_t newval) { uint32_t ret; __asm __volatile ( "1:\tll %0, %4\n\t" /* load old value */ "bne %0, %2, 2f\n\t" /* compare */ "move %0, %3\n\t" /* value to store */ "sc %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* if it failed, spin */ "j 3f\n\t" "2:\n\t" "li %0, 0\n\t" "3:\n" : "=&r" (ret), "=m" (*p) : "r" (cmpval), "r" (newval), "m" (*p) : "memory"); return ret; } /* * Atomically compare the value stored at *p with cmpval and if the * two values are equal, update the value of *p with newval. Returns * zero if the compare failed, nonzero otherwise. */ static __inline uint32_t atomic_cmpset_acq_32(__volatile uint32_t *p, uint32_t cmpval, uint32_t newval) { int retval; retval = atomic_cmpset_32(p, cmpval, newval); mips_sync(); return (retval); } static __inline uint32_t atomic_cmpset_rel_32(__volatile uint32_t *p, uint32_t cmpval, uint32_t newval) { mips_sync(); return (atomic_cmpset_32(p, cmpval, newval)); } /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline uint32_t atomic_fetchadd_32(__volatile uint32_t *p, uint32_t v) { uint32_t value, temp; __asm __volatile ( "1:\tll %0, %1\n\t" /* load old value */ "addu %2, %3, %0\n\t" /* calculate new value */ "sc %2, %1\n\t" /* attempt to store */ "beqz %2, 1b\n\t" /* spin if failed */ : "=&r" (value), "=m" (*p), "=&r" (temp) : "r" (v), "m" (*p)); return (value); } #if defined(__mips_n64) || defined(__mips_n32) /* * Atomically compare the value stored at *p with cmpval and if the * two values are equal, update the value of *p with newval. Returns * zero if the compare failed, nonzero otherwise. */ static __inline uint64_t atomic_cmpset_64(__volatile uint64_t* p, uint64_t cmpval, uint64_t newval) { uint64_t ret; __asm __volatile ( "1:\n\t" "lld %0, %4\n\t" /* load old value */ "bne %0, %2, 2f\n\t" /* compare */ "move %0, %3\n\t" /* value to store */ "scd %0, %1\n\t" /* attempt to store */ "beqz %0, 1b\n\t" /* if it failed, spin */ "j 3f\n\t" "2:\n\t" "li %0, 0\n\t" "3:\n" : "=&r" (ret), "=m" (*p) : "r" (cmpval), "r" (newval), "m" (*p) : "memory"); return ret; } /* * Atomically compare the value stored at *p with cmpval and if the * two values are equal, update the value of *p with newval. Returns * zero if the compare failed, nonzero otherwise. */ static __inline uint64_t atomic_cmpset_acq_64(__volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { int retval; retval = atomic_cmpset_64(p, cmpval, newval); mips_sync(); return (retval); } static __inline uint64_t atomic_cmpset_rel_64(__volatile uint64_t *p, uint64_t cmpval, uint64_t newval) { mips_sync(); return (atomic_cmpset_64(p, cmpval, newval)); } /* * Atomically add the value of v to the integer pointed to by p and return * the previous value of *p. */ static __inline uint64_t atomic_fetchadd_64(__volatile uint64_t *p, uint64_t v) { uint64_t value, temp; __asm __volatile ( "1:\n\t" "lld %0, %1\n\t" /* load old value */ "daddu %2, %3, %0\n\t" /* calculate new value */ "scd %2, %1\n\t" /* attempt to store */ "beqz %2, 1b\n\t" /* spin if failed */ : "=&r" (value), "=m" (*p), "=&r" (temp) : "r" (v), "m" (*p)); return (value); } #endif +static __inline void +atomic_thread_fence_acq(void) +{ + + mips_sync(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + mips_sync(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + mips_sync(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + mips_sync(); +} + /* Operations on chars. */ #define atomic_set_char atomic_set_8 #define atomic_set_acq_char atomic_set_acq_8 #define atomic_set_rel_char atomic_set_rel_8 #define atomic_clear_char atomic_clear_8 #define atomic_clear_acq_char atomic_clear_acq_8 #define atomic_clear_rel_char atomic_clear_rel_8 #define atomic_add_char atomic_add_8 #define atomic_add_acq_char atomic_add_acq_8 #define atomic_add_rel_char atomic_add_rel_8 #define atomic_subtract_char atomic_subtract_8 #define atomic_subtract_acq_char atomic_subtract_acq_8 #define atomic_subtract_rel_char atomic_subtract_rel_8 /* Operations on shorts. */ #define atomic_set_short atomic_set_16 #define atomic_set_acq_short atomic_set_acq_16 #define atomic_set_rel_short atomic_set_rel_16 #define atomic_clear_short atomic_clear_16 #define atomic_clear_acq_short atomic_clear_acq_16 #define atomic_clear_rel_short atomic_clear_rel_16 #define atomic_add_short atomic_add_16 #define atomic_add_acq_short atomic_add_acq_16 #define atomic_add_rel_short atomic_add_rel_16 #define atomic_subtract_short atomic_subtract_16 #define atomic_subtract_acq_short atomic_subtract_acq_16 #define atomic_subtract_rel_short atomic_subtract_rel_16 /* Operations on ints. */ #define atomic_set_int atomic_set_32 #define atomic_set_acq_int atomic_set_acq_32 #define atomic_set_rel_int atomic_set_rel_32 #define atomic_clear_int atomic_clear_32 #define atomic_clear_acq_int atomic_clear_acq_32 #define atomic_clear_rel_int atomic_clear_rel_32 #define atomic_add_int atomic_add_32 #define atomic_add_acq_int atomic_add_acq_32 #define atomic_add_rel_int atomic_add_rel_32 #define atomic_subtract_int atomic_subtract_32 #define atomic_subtract_acq_int atomic_subtract_acq_32 #define atomic_subtract_rel_int atomic_subtract_rel_32 #define atomic_cmpset_int atomic_cmpset_32 #define atomic_cmpset_acq_int atomic_cmpset_acq_32 #define atomic_cmpset_rel_int atomic_cmpset_rel_32 #define atomic_load_acq_int atomic_load_acq_32 #define atomic_store_rel_int atomic_store_rel_32 #define atomic_readandclear_int atomic_readandclear_32 #define atomic_readandset_int atomic_readandset_32 #define atomic_fetchadd_int atomic_fetchadd_32 /* * I think the following is right, even for n32. For n32 the pointers * are still 32-bits, so we need to operate on them as 32-bit quantities, * even though they are sign extended in operation. For longs, there's * no question because they are always 32-bits. */ #ifdef __mips_n64 /* Operations on longs. */ #define atomic_set_long atomic_set_64 #define atomic_set_acq_long atomic_set_acq_64 #define atomic_set_rel_long atomic_set_rel_64 #define atomic_clear_long atomic_clear_64 #define atomic_clear_acq_long atomic_clear_acq_64 #define atomic_clear_rel_long atomic_clear_rel_64 #define atomic_add_long atomic_add_64 #define atomic_add_acq_long atomic_add_acq_64 #define atomic_add_rel_long atomic_add_rel_64 #define atomic_subtract_long atomic_subtract_64 #define atomic_subtract_acq_long atomic_subtract_acq_64 #define atomic_subtract_rel_long atomic_subtract_rel_64 #define atomic_cmpset_long atomic_cmpset_64 #define atomic_cmpset_acq_long atomic_cmpset_acq_64 #define atomic_cmpset_rel_long atomic_cmpset_rel_64 #define atomic_load_acq_long atomic_load_acq_64 #define atomic_store_rel_long atomic_store_rel_64 #define atomic_fetchadd_long atomic_fetchadd_64 #define atomic_readandclear_long atomic_readandclear_64 #else /* !__mips_n64 */ /* Operations on longs. */ #define atomic_set_long(p, v) \ atomic_set_32((volatile u_int *)(p), (u_int)(v)) #define atomic_set_acq_long(p, v) \ atomic_set_acq_32((volatile u_int *)(p), (u_int)(v)) #define atomic_set_rel_long(p, v) \ atomic_set_rel_32((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_long(p, v) \ atomic_clear_32((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_acq_long(p, v) \ atomic_clear_acq_32((volatile u_int *)(p), (u_int)(v)) #define atomic_clear_rel_long(p, v) \ atomic_clear_rel_32((volatile u_int *)(p), (u_int)(v)) #define atomic_add_long(p, v) \ atomic_add_32((volatile u_int *)(p), (u_int)(v)) #define atomic_add_acq_long(p, v) \ atomic_add_32((volatile u_int *)(p), (u_int)(v)) #define atomic_add_rel_long(p, v) \ atomic_add_32((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_long(p, v) \ atomic_subtract_32((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_acq_long(p, v) \ atomic_subtract_acq_32((volatile u_int *)(p), (u_int)(v)) #define atomic_subtract_rel_long(p, v) \ atomic_subtract_rel_32((volatile u_int *)(p), (u_int)(v)) #define atomic_cmpset_long(p, cmpval, newval) \ atomic_cmpset_32((volatile u_int *)(p), (u_int)(cmpval), \ (u_int)(newval)) #define atomic_cmpset_acq_long(p, cmpval, newval) \ atomic_cmpset_acq_32((volatile u_int *)(p), (u_int)(cmpval), \ (u_int)(newval)) #define atomic_cmpset_rel_long(p, cmpval, newval) \ atomic_cmpset_rel_32((volatile u_int *)(p), (u_int)(cmpval), \ (u_int)(newval)) #define atomic_load_acq_long(p) \ (u_long)atomic_load_acq_32((volatile u_int *)(p)) #define atomic_store_rel_long(p, v) \ atomic_store_rel_32((volatile u_int *)(p), (u_int)(v)) #define atomic_fetchadd_long(p, v) \ atomic_fetchadd_32((volatile u_int *)(p), (u_int)(v)) #define atomic_readandclear_long(p) \ atomic_readandclear_32((volatile u_int *)(p)) #endif /* __mips_n64 */ /* Operations on pointers. */ #define atomic_set_ptr atomic_set_long #define atomic_set_acq_ptr atomic_set_acq_long #define atomic_set_rel_ptr atomic_set_rel_long #define atomic_clear_ptr atomic_clear_long #define atomic_clear_acq_ptr atomic_clear_acq_long #define atomic_clear_rel_ptr atomic_clear_rel_long #define atomic_add_ptr atomic_add_long #define atomic_add_acq_ptr atomic_add_acq_long #define atomic_add_rel_ptr atomic_add_rel_long #define atomic_subtract_ptr atomic_subtract_long #define atomic_subtract_acq_ptr atomic_subtract_acq_long #define atomic_subtract_rel_ptr atomic_subtract_rel_long #define atomic_cmpset_ptr atomic_cmpset_long #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long #define atomic_load_acq_ptr atomic_load_acq_long #define atomic_store_rel_ptr atomic_store_rel_long #define atomic_readandclear_ptr atomic_readandclear_long #endif /* ! _MACHINE_ATOMIC_H_ */ Index: head/sys/powerpc/include/atomic.h =================================================================== --- head/sys/powerpc/include/atomic.h (revision 285282) +++ head/sys/powerpc/include/atomic.h (revision 285283) @@ -1,733 +1,774 @@ /*- * Copyright (c) 2008 Marcel Moolenaar * Copyright (c) 2001 Benno Rice * Copyright (c) 2001 David E. O'Brien * Copyright (c) 1998 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif /* * The __ATOMIC_REL/ACQ() macros provide memory barriers only in conjunction * with the atomic lXarx/stXcx. sequences below. They are not exposed outside * of this file. See also Appendix B.2 of Book II of the architecture manual. * * Note that not all Book-E processors accept the light-weight sync variant. * In particular, early models of E500 cores are known to wedge. Bank on all * 64-bit capable CPUs to accept lwsync properly and pressimize 32-bit CPUs * to use the heavier-weight sync. */ #ifdef __powerpc64__ #define mb() __asm __volatile("lwsync" : : : "memory") #define rmb() __asm __volatile("lwsync" : : : "memory") #define wmb() __asm __volatile("lwsync" : : : "memory") #define __ATOMIC_REL() __asm __volatile("lwsync" : : : "memory") #define __ATOMIC_ACQ() __asm __volatile("isync" : : : "memory") #else #define mb() __asm __volatile("sync" : : : "memory") #define rmb() __asm __volatile("sync" : : : "memory") #define wmb() __asm __volatile("sync" : : : "memory") #define __ATOMIC_REL() __asm __volatile("sync" : : : "memory") #define __ATOMIC_ACQ() __asm __volatile("isync" : : : "memory") #endif /* * atomic_add(p, v) * { *p += v; } */ #define __atomic_add_int(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " add %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_add_int */ #ifdef __powerpc64__ #define __atomic_add_long(p, v, t) \ __asm __volatile( \ "1: ldarx %0, 0, %2\n" \ " add %0, %3, %0\n" \ " stdcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_add_long */ #else #define __atomic_add_long(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " add %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_add_long */ #endif #define _ATOMIC_ADD(type) \ static __inline void \ atomic_add_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_add_##type(p, v, t); \ } \ \ static __inline void \ atomic_add_acq_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_add_##type(p, v, t); \ __ATOMIC_ACQ(); \ } \ \ static __inline void \ atomic_add_rel_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __ATOMIC_REL(); \ __atomic_add_##type(p, v, t); \ } \ /* _ATOMIC_ADD */ _ATOMIC_ADD(int) _ATOMIC_ADD(long) #define atomic_add_32 atomic_add_int #define atomic_add_acq_32 atomic_add_acq_int #define atomic_add_rel_32 atomic_add_rel_int #ifdef __powerpc64__ #define atomic_add_64 atomic_add_long #define atomic_add_acq_64 atomic_add_acq_long #define atomic_add_rel_64 atomic_add_rel_long #define atomic_add_ptr atomic_add_long #define atomic_add_acq_ptr atomic_add_acq_long #define atomic_add_rel_ptr atomic_add_rel_long #else #define atomic_add_ptr atomic_add_int #define atomic_add_acq_ptr atomic_add_acq_int #define atomic_add_rel_ptr atomic_add_rel_int #endif #undef _ATOMIC_ADD #undef __atomic_add_long #undef __atomic_add_int /* * atomic_clear(p, v) * { *p &= ~v; } */ #define __atomic_clear_int(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " andc %0, %0, %3\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_clear_int */ #ifdef __powerpc64__ #define __atomic_clear_long(p, v, t) \ __asm __volatile( \ "1: ldarx %0, 0, %2\n" \ " andc %0, %0, %3\n" \ " stdcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_clear_long */ #else #define __atomic_clear_long(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " andc %0, %0, %3\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_clear_long */ #endif #define _ATOMIC_CLEAR(type) \ static __inline void \ atomic_clear_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_clear_##type(p, v, t); \ } \ \ static __inline void \ atomic_clear_acq_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_clear_##type(p, v, t); \ __ATOMIC_ACQ(); \ } \ \ static __inline void \ atomic_clear_rel_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __ATOMIC_REL(); \ __atomic_clear_##type(p, v, t); \ } \ /* _ATOMIC_CLEAR */ _ATOMIC_CLEAR(int) _ATOMIC_CLEAR(long) #define atomic_clear_32 atomic_clear_int #define atomic_clear_acq_32 atomic_clear_acq_int #define atomic_clear_rel_32 atomic_clear_rel_int #ifdef __powerpc64__ #define atomic_clear_64 atomic_clear_long #define atomic_clear_acq_64 atomic_clear_acq_long #define atomic_clear_rel_64 atomic_clear_rel_long #define atomic_clear_ptr atomic_clear_long #define atomic_clear_acq_ptr atomic_clear_acq_long #define atomic_clear_rel_ptr atomic_clear_rel_long #else #define atomic_clear_ptr atomic_clear_int #define atomic_clear_acq_ptr atomic_clear_acq_int #define atomic_clear_rel_ptr atomic_clear_rel_int #endif #undef _ATOMIC_CLEAR #undef __atomic_clear_long #undef __atomic_clear_int /* * atomic_cmpset(p, o, n) */ /* TODO -- see below */ /* * atomic_load_acq(p) */ /* TODO -- see below */ /* * atomic_readandclear(p) */ /* TODO -- see below */ /* * atomic_set(p, v) * { *p |= v; } */ #define __atomic_set_int(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " or %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_set_int */ #ifdef __powerpc64__ #define __atomic_set_long(p, v, t) \ __asm __volatile( \ "1: ldarx %0, 0, %2\n" \ " or %0, %3, %0\n" \ " stdcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_set_long */ #else #define __atomic_set_long(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " or %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_set_long */ #endif #define _ATOMIC_SET(type) \ static __inline void \ atomic_set_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_set_##type(p, v, t); \ } \ \ static __inline void \ atomic_set_acq_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_set_##type(p, v, t); \ __ATOMIC_ACQ(); \ } \ \ static __inline void \ atomic_set_rel_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __ATOMIC_REL(); \ __atomic_set_##type(p, v, t); \ } \ /* _ATOMIC_SET */ _ATOMIC_SET(int) _ATOMIC_SET(long) #define atomic_set_32 atomic_set_int #define atomic_set_acq_32 atomic_set_acq_int #define atomic_set_rel_32 atomic_set_rel_int #ifdef __powerpc64__ #define atomic_set_64 atomic_set_long #define atomic_set_acq_64 atomic_set_acq_long #define atomic_set_rel_64 atomic_set_rel_long #define atomic_set_ptr atomic_set_long #define atomic_set_acq_ptr atomic_set_acq_long #define atomic_set_rel_ptr atomic_set_rel_long #else #define atomic_set_ptr atomic_set_int #define atomic_set_acq_ptr atomic_set_acq_int #define atomic_set_rel_ptr atomic_set_rel_int #endif #undef _ATOMIC_SET #undef __atomic_set_long #undef __atomic_set_int /* * atomic_subtract(p, v) * { *p -= v; } */ #define __atomic_subtract_int(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " subf %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_subtract_int */ #ifdef __powerpc64__ #define __atomic_subtract_long(p, v, t) \ __asm __volatile( \ "1: ldarx %0, 0, %2\n" \ " subf %0, %3, %0\n" \ " stdcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_subtract_long */ #else #define __atomic_subtract_long(p, v, t) \ __asm __volatile( \ "1: lwarx %0, 0, %2\n" \ " subf %0, %3, %0\n" \ " stwcx. %0, 0, %2\n" \ " bne- 1b\n" \ : "=&r" (t), "=m" (*p) \ : "r" (p), "r" (v), "m" (*p) \ : "cr0", "memory") \ /* __atomic_subtract_long */ #endif #define _ATOMIC_SUBTRACT(type) \ static __inline void \ atomic_subtract_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_subtract_##type(p, v, t); \ } \ \ static __inline void \ atomic_subtract_acq_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __atomic_subtract_##type(p, v, t); \ __ATOMIC_ACQ(); \ } \ \ static __inline void \ atomic_subtract_rel_##type(volatile u_##type *p, u_##type v) { \ u_##type t; \ __ATOMIC_REL(); \ __atomic_subtract_##type(p, v, t); \ } \ /* _ATOMIC_SUBTRACT */ _ATOMIC_SUBTRACT(int) _ATOMIC_SUBTRACT(long) #define atomic_subtract_32 atomic_subtract_int #define atomic_subtract_acq_32 atomic_subtract_acq_int #define atomic_subtract_rel_32 atomic_subtract_rel_int #ifdef __powerpc64__ #define atomic_subtract_64 atomic_subtract_long #define atomic_subtract_acq_64 atomic_subract_acq_long #define atomic_subtract_rel_64 atomic_subtract_rel_long #define atomic_subtract_ptr atomic_subtract_long #define atomic_subtract_acq_ptr atomic_subtract_acq_long #define atomic_subtract_rel_ptr atomic_subtract_rel_long #else #define atomic_subtract_ptr atomic_subtract_int #define atomic_subtract_acq_ptr atomic_subtract_acq_int #define atomic_subtract_rel_ptr atomic_subtract_rel_int #endif #undef _ATOMIC_SUBTRACT #undef __atomic_subtract_long #undef __atomic_subtract_int /* * atomic_store_rel(p, v) */ /* TODO -- see below */ /* * Old/original implementations that still need revisiting. */ static __inline u_int atomic_readandclear_int(volatile u_int *addr) { u_int result,temp; #ifdef __GNUCLIKE_ASM __asm __volatile ( "\tsync\n" /* drain writes */ "1:\tlwarx %0, 0, %3\n\t" /* load old value */ "li %1, 0\n\t" /* load new value */ "stwcx. %1, 0, %3\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "r" (addr), "m" (*addr) : "cr0", "memory"); #endif return (result); } #ifdef __powerpc64__ static __inline u_long atomic_readandclear_long(volatile u_long *addr) { u_long result,temp; #ifdef __GNUCLIKE_ASM __asm __volatile ( "\tsync\n" /* drain writes */ "1:\tldarx %0, 0, %3\n\t" /* load old value */ "li %1, 0\n\t" /* load new value */ "stdcx. %1, 0, %3\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ : "=&r"(result), "=&r"(temp), "=m" (*addr) : "r" (addr), "m" (*addr) : "cr0", "memory"); #endif return (result); } #endif #define atomic_readandclear_32 atomic_readandclear_int #ifdef __powerpc64__ #define atomic_readandclear_64 atomic_readandclear_long #define atomic_readandclear_ptr atomic_readandclear_long #else static __inline u_long atomic_readandclear_long(volatile u_long *addr) { return ((u_long)atomic_readandclear_int((volatile u_int *)addr)); } #define atomic_readandclear_ptr atomic_readandclear_int #endif /* * We assume that a = b will do atomic loads and stores. */ #define ATOMIC_STORE_LOAD(TYPE) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE v; \ \ v = *p; \ mb(); \ return (v); \ } \ \ static __inline void \ atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ { \ mb(); \ *p = v; \ } ATOMIC_STORE_LOAD(int) #define atomic_load_acq_32 atomic_load_acq_int #define atomic_store_rel_32 atomic_store_rel_int #ifdef __powerpc64__ ATOMIC_STORE_LOAD(long) #define atomic_load_acq_64 atomic_load_acq_long #define atomic_store_rel_64 atomic_store_rel_long #define atomic_load_acq_ptr atomic_load_acq_long #define atomic_store_rel_ptr atomic_store_rel_long #else static __inline u_long atomic_load_acq_long(volatile u_long *addr) { return ((u_long)atomic_load_acq_int((volatile u_int *)addr)); } static __inline void atomic_store_rel_long(volatile u_long *addr, u_long val) { atomic_store_rel_int((volatile u_int *)addr, (u_int)val); } #define atomic_load_acq_ptr atomic_load_acq_int #define atomic_store_rel_ptr atomic_store_rel_int #endif #undef ATOMIC_STORE_LOAD /* * Atomically compare the value stored at *p with cmpval and if the * two values are equal, update the value of *p with newval. Returns * zero if the compare failed, nonzero otherwise. */ static __inline int atomic_cmpset_int(volatile u_int* p, u_int cmpval, u_int newval) { int ret; #ifdef __GNUCLIKE_ASM __asm __volatile ( "1:\tlwarx %0, 0, %2\n\t" /* load old value */ "cmplw %3, %0\n\t" /* compare */ "bne 2f\n\t" /* exit if not equal */ "stwcx. %4, 0, %2\n\t" /* attempt to store */ "bne- 1b\n\t" /* spin if failed */ "li %0, 1\n\t" /* success - retval = 1 */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" "stwcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ "li %0, 0\n\t" /* failure - retval = 0 */ "3:\n\t" : "=&r" (ret), "=m" (*p) : "r" (p), "r" (cmpval), "r" (newval), "m" (*p) : "cr0", "memory"); #endif return (ret); } static __inline int atomic_cmpset_long(volatile u_long* p, u_long cmpval, u_long newval) { int ret; #ifdef __GNUCLIKE_ASM __asm __volatile ( #ifdef __powerpc64__ "1:\tldarx %0, 0, %2\n\t" /* load old value */ "cmpld %3, %0\n\t" /* compare */ "bne 2f\n\t" /* exit if not equal */ "stdcx. %4, 0, %2\n\t" /* attempt to store */ #else "1:\tlwarx %0, 0, %2\n\t" /* load old value */ "cmplw %3, %0\n\t" /* compare */ "bne 2f\n\t" /* exit if not equal */ "stwcx. %4, 0, %2\n\t" /* attempt to store */ #endif "bne- 1b\n\t" /* spin if failed */ "li %0, 1\n\t" /* success - retval = 1 */ "b 3f\n\t" /* we've succeeded */ "2:\n\t" #ifdef __powerpc64__ "stdcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ #else "stwcx. %0, 0, %2\n\t" /* clear reservation (74xx) */ #endif "li %0, 0\n\t" /* failure - retval = 0 */ "3:\n\t" : "=&r" (ret), "=m" (*p) : "r" (p), "r" (cmpval), "r" (newval), "m" (*p) : "cr0", "memory"); #endif return (ret); } static __inline int atomic_cmpset_acq_int(volatile u_int *p, u_int cmpval, u_int newval) { int retval; retval = atomic_cmpset_int(p, cmpval, newval); __ATOMIC_ACQ(); return (retval); } static __inline int atomic_cmpset_rel_int(volatile u_int *p, u_int cmpval, u_int newval) { __ATOMIC_REL(); return (atomic_cmpset_int(p, cmpval, newval)); } static __inline int atomic_cmpset_acq_long(volatile u_long *p, u_long cmpval, u_long newval) { u_long retval; retval = atomic_cmpset_long(p, cmpval, newval); __ATOMIC_ACQ(); return (retval); } static __inline int atomic_cmpset_rel_long(volatile u_long *p, u_long cmpval, u_long newval) { __ATOMIC_REL(); return (atomic_cmpset_long(p, cmpval, newval)); } #define atomic_cmpset_32 atomic_cmpset_int #define atomic_cmpset_acq_32 atomic_cmpset_acq_int #define atomic_cmpset_rel_32 atomic_cmpset_rel_int #ifdef __powerpc64__ #define atomic_cmpset_64 atomic_cmpset_long #define atomic_cmpset_acq_64 atomic_cmpset_acq_long #define atomic_cmpset_rel_64 atomic_cmpset_rel_long #define atomic_cmpset_ptr atomic_cmpset_long #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long #else #define atomic_cmpset_ptr atomic_cmpset_int #define atomic_cmpset_acq_ptr atomic_cmpset_acq_int #define atomic_cmpset_rel_ptr atomic_cmpset_rel_int #endif static __inline u_int atomic_fetchadd_int(volatile u_int *p, u_int v) { u_int value; do { value = *p; } while (!atomic_cmpset_int(p, value, value + v)); return (value); } static __inline u_long atomic_fetchadd_long(volatile u_long *p, u_long v) { u_long value; do { value = *p; } while (!atomic_cmpset_long(p, value, value + v)); return (value); } static __inline u_int atomic_swap_32(volatile u_int *p, u_int v) { u_int prev; __asm __volatile( "1: lwarx %0,0,%2\n" " stwcx. %3,0,%2\n" " bne- 1b\n" : "=&r" (prev), "+m" (*(volatile u_int *)p) : "r" (p), "r" (v) : "cr0", "memory"); return (prev); } #ifdef __powerpc64__ static __inline u_long atomic_swap_64(volatile u_long *p, u_long v) { u_long prev; __asm __volatile( "1: ldarx %0,0,%2\n" " stdcx. %3,0,%2\n" " bne- 1b\n" : "=&r" (prev), "+m" (*(volatile u_long *)p) : "r" (p), "r" (v) : "cr0", "memory"); return (prev); } #endif #define atomic_fetchadd_32 atomic_fetchadd_int #define atomic_swap_int atomic_swap_32 #ifdef __powerpc64__ #define atomic_fetchadd_64 atomic_fetchadd_long #define atomic_swap_long atomic_swap_64 #define atomic_swap_ptr atomic_swap_64 #endif #undef __ATOMIC_REL #undef __ATOMIC_ACQ +static __inline void +atomic_thread_fence_acq(void) +{ + + /* See above comment about lwsync being broken on Book-E. */ +#ifdef __powerpc64__ + __asm __volatile("lwsync" : : : "memory"); +#else + __asm __volatile("sync" : : : "memory"); +#endif +} + +static __inline void +atomic_thread_fence_rel(void) +{ + +#ifdef __powerpc64__ + __asm __volatile("lwsync" : : : "memory"); +#else + __asm __volatile("sync" : : : "memory"); +#endif +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + +#ifdef __powerpc64__ + __asm __volatile("lwsync" : : : "memory"); +#else + __asm __volatile("sync" : : : "memory"); +#endif +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + __asm __volatile("sync" : : : "memory"); +} + #endif /* ! _MACHINE_ATOMIC_H_ */ Index: head/sys/sparc64/include/atomic.h =================================================================== --- head/sys/sparc64/include/atomic.h (revision 285282) +++ head/sys/sparc64/include/atomic.h (revision 285283) @@ -1,307 +1,336 @@ /*- * Copyright (c) 1998 Doug Rabson. * Copyright (c) 2001 Jake Burkholder. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/atomic.h,v 1.20 2001/02/11 * $FreeBSD$ */ #ifndef _MACHINE_ATOMIC_H_ #define _MACHINE_ATOMIC_H_ #include #define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory") #define wmb() mb() #define rmb() mb() /* Userland needs different ASI's. */ #ifdef _KERNEL #define __ASI_ATOMIC ASI_N #else #define __ASI_ATOMIC ASI_P #endif /* * Various simple arithmetic on memory which is atomic in the presence * of interrupts and multiple processors. See atomic(9) for details. * Note that efficient hardware support exists only for the 32 and 64 * bit variants; the 8 and 16 bit versions are not provided and should * not be used in MI code. * * This implementation takes advantage of the fact that the sparc64 * cas instruction is both a load and a store. The loop is often coded * as follows: * * do { * expect = *p; * new = expect + 1; * } while (cas(p, expect, new) != expect); * * which performs an unnnecessary load on each iteration that the cas * operation fails. Modified as follows: * * expect = *p; * for (;;) { * new = expect + 1; * result = cas(p, expect, new); * if (result == expect) * break; * expect = result; * } * * the return value of cas is used to avoid the extra reload. * * We only include a memory barrier in the rel variants as in total store * order which we use for running the kernel and all of the userland atomic * loads and stores behave as if the were followed by a membar with a mask * of #LoadLoad | #LoadStore | #StoreStore. In order to be also sufficient * for use of relaxed memory ordering, the atomic_cas() in the acq variants * additionally would have to be followed by a membar #LoadLoad | #LoadStore. * Due to the suggested assembly syntax of the membar operands containing a * # character, they cannot be used in macros. The cmask and mmask bits thus * are hard coded in machine/cpufunc.h and used here through macros. * Hopefully the bit numbers won't change in the future. */ #define itype(sz) uint ## sz ## _t #define atomic_cas_32(p, e, s) casa((p), (e), (s), __ASI_ATOMIC) #define atomic_cas_64(p, e, s) casxa((p), (e), (s), __ASI_ATOMIC) #define atomic_cas(p, e, s, sz) \ atomic_cas_ ## sz((p), (e), (s)) #define atomic_cas_acq(p, e, s, sz) ({ \ itype(sz) v; \ v = atomic_cas((p), (e), (s), sz); \ __compiler_membar(); \ v; \ }) #define atomic_cas_rel(p, e, s, sz) ({ \ itype(sz) v; \ membar(LoadStore | StoreStore); \ v = atomic_cas((p), (e), (s), sz); \ v; \ }) #define atomic_op(p, op, v, sz) ({ \ itype(sz) e, r, s; \ for (e = *(volatile itype(sz) *)(p);; e = r) { \ s = e op (v); \ r = atomic_cas_ ## sz((p), e, s); \ if (r == e) \ break; \ } \ e; \ }) #define atomic_op_acq(p, op, v, sz) ({ \ itype(sz) t; \ t = atomic_op((p), op, (v), sz); \ __compiler_membar(); \ t; \ }) #define atomic_op_rel(p, op, v, sz) ({ \ itype(sz) t; \ membar(LoadStore | StoreStore); \ t = atomic_op((p), op, (v), sz); \ t; \ }) #define atomic_ld_acq(p, sz) ({ \ itype(sz) v; \ v = atomic_cas((p), 0, 0, sz); \ __compiler_membar(); \ v; \ }) #define atomic_ld_clear(p, sz) ({ \ itype(sz) e, r; \ for (e = *(volatile itype(sz) *)(p);; e = r) { \ r = atomic_cas((p), e, 0, sz); \ if (r == e) \ break; \ } \ e; \ }) #define atomic_st(p, v, sz) do { \ itype(sz) e, r; \ for (e = *(volatile itype(sz) *)(p);; e = r) { \ r = atomic_cas((p), e, (v), sz); \ if (r == e) \ break; \ } \ } while (0) #define atomic_st_acq(p, v, sz) do { \ atomic_st((p), (v), sz); \ __compiler_membar(); \ } while (0) #define atomic_st_rel(p, v, sz) do { \ membar(LoadStore | StoreStore); \ atomic_st((p), (v), sz); \ } while (0) #define ATOMIC_GEN(name, ptype, vtype, atype, sz) \ \ static __inline vtype \ atomic_add_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op((p), +, (v), sz)); \ } \ static __inline vtype \ atomic_add_acq_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_acq((p), +, (v), sz)); \ } \ static __inline vtype \ atomic_add_rel_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_rel((p), +, (v), sz)); \ } \ \ static __inline vtype \ atomic_clear_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op((p), &, ~(v), sz)); \ } \ static __inline vtype \ atomic_clear_acq_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_acq((p), &, ~(v), sz)); \ } \ static __inline vtype \ atomic_clear_rel_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_rel((p), &, ~(v), sz)); \ } \ \ static __inline int \ atomic_cmpset_ ## name(volatile ptype p, vtype e, vtype s) \ { \ return (((vtype)atomic_cas((p), (e), (s), sz)) == (e)); \ } \ static __inline int \ atomic_cmpset_acq_ ## name(volatile ptype p, vtype e, vtype s) \ { \ return (((vtype)atomic_cas_acq((p), (e), (s), sz)) == (e)); \ } \ static __inline int \ atomic_cmpset_rel_ ## name(volatile ptype p, vtype e, vtype s) \ { \ return (((vtype)atomic_cas_rel((p), (e), (s), sz)) == (e)); \ } \ \ static __inline vtype \ atomic_load_ ## name(volatile ptype p) \ { \ return ((vtype)atomic_cas((p), 0, 0, sz)); \ } \ static __inline vtype \ atomic_load_acq_ ## name(volatile ptype p) \ { \ return ((vtype)atomic_cas_acq((p), 0, 0, sz)); \ } \ \ static __inline vtype \ atomic_readandclear_ ## name(volatile ptype p) \ { \ return ((vtype)atomic_ld_clear((p), sz)); \ } \ \ static __inline vtype \ atomic_set_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op((p), |, (v), sz)); \ } \ static __inline vtype \ atomic_set_acq_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_acq((p), |, (v), sz)); \ } \ static __inline vtype \ atomic_set_rel_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_rel((p), |, (v), sz)); \ } \ \ static __inline vtype \ atomic_subtract_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op((p), -, (v), sz)); \ } \ static __inline vtype \ atomic_subtract_acq_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_acq((p), -, (v), sz)); \ } \ static __inline vtype \ atomic_subtract_rel_ ## name(volatile ptype p, atype v) \ { \ return ((vtype)atomic_op_rel((p), -, (v), sz)); \ } \ \ static __inline void \ atomic_store_acq_ ## name(volatile ptype p, vtype v) \ { \ atomic_st_acq((p), (v), sz); \ } \ static __inline void \ atomic_store_rel_ ## name(volatile ptype p, vtype v) \ { \ atomic_st_rel((p), (v), sz); \ } +static __inline void +atomic_thread_fence_acq(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_acq_rel(void) +{ + + __compiler_membar(); +} + +static __inline void +atomic_thread_fence_seq_cst(void) +{ + + membar(LoadLoad | LoadStore | StoreStore | StoreLoad); +} + + ATOMIC_GEN(int, u_int *, u_int, u_int, 32); ATOMIC_GEN(32, uint32_t *, uint32_t, uint32_t, 32); ATOMIC_GEN(long, u_long *, u_long, u_long, 64); ATOMIC_GEN(64, uint64_t *, uint64_t, uint64_t, 64); ATOMIC_GEN(ptr, uintptr_t *, uintptr_t, uintptr_t, 64); #define atomic_fetchadd_int atomic_add_int #define atomic_fetchadd_32 atomic_add_32 #define atomic_fetchadd_long atomic_add_long #undef ATOMIC_GEN #undef atomic_cas #undef atomic_cas_acq #undef atomic_cas_rel #undef atomic_op #undef atomic_op_acq #undef atomic_op_rel #undef atomic_ld_acq #undef atomic_ld_clear #undef atomic_st #undef atomic_st_acq #undef atomic_st_rel #endif /* !_MACHINE_ATOMIC_H_ */