diff --git a/lib/libcompiler_rt/Makefile b/lib/libcompiler_rt/Makefile index 507e87c9ed00..8c560d7b3681 100644 --- a/lib/libcompiler_rt/Makefile +++ b/lib/libcompiler_rt/Makefile @@ -1,199 +1,200 @@ # $FreeBSD$ .include LIB= compiler_rt NO_PIC= WARNS?= 2 CFLAGS+=${PICFLAG} -fvisibility=hidden -DVISIBILITY_HIDDEN .if ${MACHINE_CPUARCH} == "amd64" CRTARCH=x86_64 .else CRTARCH=${MACHINE_CPUARCH} .endif CRTSRC=${.CURDIR}/../../contrib/compiler-rt/lib .PATH: ${CRTSRC}/${CRTARCH} ${CRTSRC} SRCF= absvdi2 \ absvsi2 \ absvti2 \ addvdi3 \ addvsi3 \ addvti3 \ ashldi3 \ ashlti3 \ ashrdi3 \ ashrti3 \ clear_cache \ clzdi2 \ clzsi2 \ clzti2 \ cmpdi2 \ cmpti2 \ comparedf2 \ comparesf2 \ ctzdi2 \ ctzsi2 \ ctzti2 \ divdc3 \ divdi3 \ divmoddi4 \ divmodsi4 \ divsc3 \ divti3 \ divxc3 \ enable_execute_stack \ eprintf \ ffsdi2 \ ffsti2 \ fixdfdi \ fixdfti \ fixsfdi \ fixsfti \ fixunsdfdi \ fixunsdfsi \ fixunsdfti \ fixunssfdi \ fixunssfsi \ fixunssfti \ fixunsxfdi \ fixunsxfsi \ fixunsxfti \ fixxfdi \ fixxfti \ floatdidf \ floatdisf \ floatdixf \ floattidf \ floattisf \ floattixf \ floatundidf \ floatundisf \ floatundixf \ floatunsidf \ floatunsisf \ floatuntidf \ floatuntisf \ floatuntixf \ int_util \ lshrdi3 \ lshrti3 \ moddi3 \ modti3 \ muldc3 \ muldi3 \ mulodi4 \ mulosi4 \ muloti4 \ mulsc3 \ multi3 \ mulvdi3 \ mulvsi3 \ mulvti3 \ mulxc3 \ negdf2 \ negdi2 \ negsf2 \ negti2 \ negvdi2 \ negvsi2 \ negvti2 \ paritydi2 \ paritysi2 \ parityti2 \ popcountdi2 \ popcountsi2 \ popcountti2 \ powidf2 \ powisf2 \ powitf2 \ powixf2 \ subvdi3 \ subvsi3 \ subvti3 \ trampoline_setup \ ucmpdi2 \ ucmpti2 \ udivdi3 \ udivmoddi4 \ udivmodsi4 \ udivmodti4 \ udivti3 \ umoddi3 \ umodti3 # These are already shipped by libc.a on arm and mips .if ${MACHINE_CPUARCH} != "arm" && ${MACHINE_CPUARCH} != "mips" SRCF+= adddf3 \ addsf3 \ divdf3 \ divsf3 \ extendsfdf2 \ fixdfsi \ fixsfsi \ floatsidf \ floatsisf \ muldf3 \ mulsf3 \ subdf3 \ subsf3 \ truncdfsf2 .endif .if ${MACHINE_CPUARCH} != "mips" && \ (${MACHINE_CPUARCH} != "arm" || ${MK_ARM_EABI} != "no") SRCF+= divsi3 \ modsi3 \ udivsi3 \ umodsi3 .endif # FreeBSD-specific atomic intrinsics. -.if ${MACHINE_CPUARCH} == "arm" +.if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "armv6" .PATH: ${.CURDIR}/../../sys/arm/arm SRCF+= stdatomic +CFLAGS+= -DEMIT_SYNC_ATOMICS .elif ${MACHINE_CPUARCH} == "mips" .PATH: ${.CURDIR}/../../sys/mips/mips SRCF+= stdatomic .endif .for file in ${SRCF} . if ${MACHINE_CPUARCH} != "arm" && exists(${CRTSRC}/${CRTARCH}/${file}.S) SRCS+= ${file}.S . else SRCS+= ${file}.c . endif .endfor .if ${MACHINE_CPUARCH} == "arm" && ${MK_ARM_EABI} != "no" SRCS+= aeabi_idivmod.S \ aeabi_ldivmod.S \ aeabi_memcmp.S \ aeabi_memcpy.S \ aeabi_memmove.S \ aeabi_memset.S \ aeabi_uidivmod.S \ aeabi_uldivmod.S .endif .if ${MK_INSTALLLIB} != "no" SYMLINKS+=libcompiler_rt.a ${LIBDIR}/libgcc.a .endif .if ${MK_PROFILE} != "no" SYMLINKS+=libcompiler_rt_p.a ${LIBDIR}/libgcc_p.a .endif .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \ ${MACHINE_CPUARCH} == "powerpc" AFLAGS+=--noexecstack ACFLAGS+=-Wa,--noexecstack .endif .include diff --git a/sys/arm/arm/stdatomic.c b/sys/arm/arm/stdatomic.c index 0b501fb51029..6720234c5e78 100644 --- a/sys/arm/arm/stdatomic.c +++ b/sys/arm/arm/stdatomic.c @@ -1,814 +1,865 @@ /*- * Copyright (c) 2013 Ed Schouten * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #ifdef _KERNEL #include "opt_global.h" #endif /* * Executing statements with interrupts disabled. */ #if defined(_KERNEL) && !defined(SMP) #define WITHOUT_INTERRUPTS(s) do { \ register_t regs; \ \ regs = intr_disable(); \ do s while (0); \ intr_restore(regs); \ } while (0) #endif /* _KERNEL && !SMP */ /* * Memory barriers. * * It turns out __sync_synchronize() does not emit any code when used * with GCC 4.2. Implement our own version that does work reliably. * * Although __sync_lock_test_and_set() should only perform an acquire * barrier, make it do a full barrier like the other functions. This * should make 's atomic_exchange_explicit() work reliably. */ #if defined(_KERNEL) && !defined(SMP) static inline void do_sync(void) { __asm volatile ("" : : : "memory"); } #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) static inline void do_sync(void) { __asm volatile ("dmb" : : : "memory"); } #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ defined(__ARM_ARCH_6ZK__) static inline void do_sync(void) { __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); } #endif #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS) /* * New C11 __atomic_* API. */ #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ defined(__ARM_ARCH_6ZK__) || \ defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) /* These systems should be supported by the compiler. */ #else /* __ARM_ARCH_5__ */ /* Clang doesn't allow us to reimplement builtins without this. */ #ifdef __clang__ #pragma redefine_extname __sync_synchronize_ext __sync_synchronize #define __sync_synchronize __sync_synchronize_ext #endif void __sync_synchronize(void) { } #ifdef _KERNEL #ifdef SMP #error "On SMP systems we should have proper atomic operations." #endif /* * On uniprocessor systems, we can perform the atomic operations by * disabling interrupts. */ #define EMIT_LOAD_N(N, uintN_t) \ uintN_t \ __atomic_load_##N(uintN_t *mem, int model __unused) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ }); \ return (ret); \ } #define EMIT_STORE_N(N, uintN_t) \ void \ __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ \ WITHOUT_INTERRUPTS({ \ *mem = val; \ }); \ } #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ _Bool \ __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \ uintN_t desired, int success __unused, int failure __unused) \ { \ _Bool ret; \ \ WITHOUT_INTERRUPTS({ \ if (*mem == *expected) { \ *mem = desired; \ ret = 1; \ } else { \ *expected = *mem; \ ret = 0; \ } \ }); \ return (ret); \ } #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \ uintN_t \ __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ *mem op val; \ }); \ return (ret); \ } #define EMIT_ALL_OPS_N(N, uintN_t) \ EMIT_LOAD_N(N, uintN_t) \ EMIT_STORE_N(N, uintN_t) \ EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \ EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \ EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=) EMIT_ALL_OPS_N(1, uint8_t) EMIT_ALL_OPS_N(2, uint16_t) EMIT_ALL_OPS_N(4, uint32_t) EMIT_ALL_OPS_N(8, uint64_t) +#undef EMIT_ALL_OPS_N #else /* !_KERNEL */ /* * For userspace on uniprocessor systems, we can implement the atomic * operations by using a Restartable Atomic Sequence. This makes the * kernel restart the code from the beginning when interrupted. */ #define EMIT_LOAD_N(N, uintN_t) \ uintN_t \ __atomic_load_##N(uintN_t *mem, int model __unused) \ { \ \ return (*mem); \ } #define EMIT_STORE_N(N, uintN_t) \ void \ __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ \ *mem = val; \ } #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ uintN_t \ __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"str" %3, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ _Bool \ __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \ uintN_t desired, int success __unused, int failure __unused) \ { \ uint32_t expected, old, temp, ras_start; \ \ expected = *pexpected; \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%6]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%6, #4]\n" \ \ "\t"ldr" %0, %5\n" /* Load old value. */ \ "\tcmp %0, %3\n" /* Compare to expected value. */\ "\t"streq" %4, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%6]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%6, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (expected), "r" (desired), "m" (*mem), \ "r" (ras_start)); \ if (old == expected) { \ return (1); \ } else { \ *pexpected = old; \ return (0); \ } \ } #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \ uintN_t \ __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ "\t"str" %2, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ EMIT_LOAD_N(N, uintN_t) \ EMIT_STORE_N(N, uintN_t) \ EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \ EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \ EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \ EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \ EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \ EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \ EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor") EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq") EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq") EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") +#undef EMIT_ALL_OPS_N #endif /* _KERNEL */ #endif #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */ -#if defined(__SYNC_ATOMICS) +#if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS) + +#ifdef __clang__ +#pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1 +#pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2 +#pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4 +#pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1 +#pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2 +#pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4 +#pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1 +#pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2 +#pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4 +#pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1 +#pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2 +#pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4 +#pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1 +#pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2 +#pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4 +#pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1 +#pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2 +#pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4 +#pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1 +#pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2 +#pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4 +#endif /* * Old __sync_* API. */ #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ defined(__ARM_ARCH_6ZK__) || \ defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) /* Implementations for old GCC versions, lacking support for atomics. */ typedef union { uint8_t v8[4]; uint32_t v32; } reg_t; /* * Given a memory address pointing to an 8-bit or 16-bit integer, return * the address of the 32-bit word containing it. */ static inline uint32_t * round_to_word(void *ptr) { return ((uint32_t *)((intptr_t)ptr & ~3)); } /* * Utility functions for loading and storing 8-bit and 16-bit integers * in 32-bit words at an offset corresponding with the location of the * atomic variable. */ static inline void put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val) { size_t offset; offset = (intptr_t)offset_ptr & 3; r->v8[offset] = val; } static inline uint8_t get_1(const reg_t *r, const uint8_t *offset_ptr) { size_t offset; offset = (intptr_t)offset_ptr & 3; return (r->v8[offset]); } static inline void put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val) { size_t offset; union { uint16_t in; uint8_t out[2]; } bytes; offset = (intptr_t)offset_ptr & 3; bytes.in = val; r->v8[offset] = bytes.out[0]; r->v8[offset + 1] = bytes.out[1]; } static inline uint16_t get_2(const reg_t *r, const uint16_t *offset_ptr) { size_t offset; union { uint8_t in[2]; uint16_t out; } bytes; offset = (intptr_t)offset_ptr & 3; bytes.in[0] = r->v8[offset]; bytes.in[1] = r->v8[offset + 1]; return (bytes.out); } /* * 8-bit and 16-bit routines. * * These operations are not natively supported by the CPU, so we use * some shifting and bitmasking on top of the 32-bit instructions. */ #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \ uintN_t \ -__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ +__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, negmask, old; \ uint32_t temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = 0x00000000; \ put_##N(&val32, mem, val); \ negmask.v32 = 0xffffffff; \ put_##N(&negmask, mem, 0); \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %6\n" /* Load old value. */ \ "\tand %2, %5, %0\n" /* Remove the old value. */ \ "\torr %2, %2, %4\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_LOCK_TEST_AND_SET_N(1, uint8_t) EMIT_LOCK_TEST_AND_SET_N(2, uint16_t) #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ uintN_t \ -__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ +__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uint32_t *mem32; \ reg_t expected32, desired32, posmask, old; \ uint32_t negmask, temp1, temp2; \ \ mem32 = round_to_word(mem); \ expected32.v32 = 0x00000000; \ put_##N(&expected32, mem, expected); \ desired32.v32 = 0x00000000; \ put_##N(&desired32, mem, desired); \ posmask.v32 = 0x00000000; \ put_##N(&posmask, mem, ~0); \ negmask = ~posmask.v32; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %8\n" /* Load old value. */ \ "\tand %2, %6, %0\n" /* Isolate the old value. */ \ "\tcmp %2, %4\n" /* Compare to expected value. */\ "\tbne 2f\n" /* Values are unequal. */ \ "\tand %2, %7, %0\n" /* Remove the old value. */ \ "\torr %2, %5\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ "2:" \ : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (expected32.v32), "r" (desired32.v32), \ "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t) EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t) #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \ uintN_t \ -__sync_##name##_##N(uintN_t *mem, uintN_t val) \ +__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, posmask, old; \ uint32_t negmask, temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = 0x00000000; \ put_##N(&val32, mem, val); \ posmask.v32 = 0x00000000; \ put_##N(&posmask, mem, ~0); \ negmask = ~posmask.v32; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %7\n" /* Load old value. */ \ "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ "\tand %2, %5\n" /* Isolate the new value. */ \ "\tand %3, %6, %0\n" /* Remove the old value. */ \ "\torr %2, %2, %3\n" /* Put in the new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \ "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add") EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub") EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add") EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub") #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \ uintN_t \ -__sync_##name##_##N(uintN_t *mem, uintN_t val) \ +__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t *mem32; \ reg_t val32, old; \ uint32_t temp1, temp2; \ \ mem32 = round_to_word(mem); \ val32.v32 = idempotence ? 0xffffffff : 0x00000000; \ put_##N(&val32, mem, val); \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %5\n" /* Load old value. */ \ "\t"op" %2, %4, %0\n" /* Calculate new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val32.v32), "m" (*mem32)); \ return (get_##N(&old, mem)); \ } EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1) EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0) EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0) EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0) /* * 32-bit routines. */ uint32_t -__sync_lock_test_and_set_4(uint32_t *mem, uint32_t val) +__sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val) { uint32_t old, temp; do_sync(); __asm volatile ( "1:" "\tldrex %0, %4\n" /* Load old value. */ "\tstrex %2, %3, %1\n" /* Attempt to store. */ "\tcmp %2, #0\n" /* Did it succeed? */ "\tbne 1b\n" /* Spin if failed. */ : "=&r" (old), "=m" (*mem), "=&r" (temp) : "r" (val), "m" (*mem)); return (old); } uint32_t -__sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected, +__sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected, uint32_t desired) { uint32_t old, temp; do_sync(); __asm volatile ( "1:" "\tldrex %0, %5\n" /* Load old value. */ "\tcmp %0, %3\n" /* Compare to expected value. */ "\tbne 2f\n" /* Values are unequal. */ "\tstrex %2, %4, %1\n" /* Attempt to store. */ "\tcmp %2, #0\n" /* Did it succeed? */ "\tbne 1b\n" /* Spin if failed. */ "2:" : "=&r" (old), "=m" (*mem), "=&r" (temp) : "r" (expected), "r" (desired), "m" (*mem)); return (old); } #define EMIT_FETCH_AND_OP_4(name, op) \ uint32_t \ -__sync_##name##_4(uint32_t *mem, uint32_t val) \ +__sync_##name##_4##_c(uint32_t *mem, uint32_t val) \ { \ uint32_t old, temp1, temp2; \ \ do_sync(); \ __asm volatile ( \ "1:" \ "\tldrex %0, %5\n" /* Load old value. */ \ "\t"op" %2, %0, %4\n" /* Calculate new value. */ \ "\tstrex %3, %2, %1\n" /* Attempt to store. */ \ "\tcmp %3, #0\n" /* Did it succeed? */ \ "\tbne 1b\n" /* Spin if failed. */ \ : "=&r" (old), "=m" (*mem), "=&r" (temp1), \ "=&r" (temp2) \ : "r" (val), "m" (*mem)); \ return (old); \ } EMIT_FETCH_AND_OP_4(fetch_and_add, "add") EMIT_FETCH_AND_OP_4(fetch_and_and, "and") EMIT_FETCH_AND_OP_4(fetch_and_or, "orr") EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub") EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor") #else /* __ARM_ARCH_5__ */ #ifdef _KERNEL #ifdef SMP #error "On SMP systems we should have proper atomic operations." #endif /* * On uniprocessor systems, we can perform the atomic operations by * disabling interrupts. */ #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ uintN_t \ __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ if (*mem == expected) \ *mem = desired; \ }); \ return (ret); \ } #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \ uintN_t \ __sync_##name##_##N(uintN_t *mem, uintN_t val) \ { \ uintN_t ret; \ \ WITHOUT_INTERRUPTS({ \ ret = *mem; \ *mem op val; \ }); \ return (ret); \ } #define EMIT_ALL_OPS_N(N, uintN_t) \ EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \ EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \ EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=) EMIT_ALL_OPS_N(1, uint8_t) EMIT_ALL_OPS_N(2, uint16_t) EMIT_ALL_OPS_N(4, uint32_t) EMIT_ALL_OPS_N(8, uint64_t) +#undef EMIT_ALL_OPS_N #else /* !_KERNEL */ /* * For userspace on uniprocessor systems, we can implement the atomic * operations by using a Restartable Atomic Sequence. This makes the * kernel restart the code from the beginning when interrupted. */ #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ uintN_t \ -__sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \ +__sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"str" %3, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ uintN_t \ -__sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \ +__sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \ uintN_t desired) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%6]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%6, #4]\n" \ \ "\t"ldr" %0, %5\n" /* Load old value. */ \ "\tcmp %0, %3\n" /* Compare to expected value. */\ "\t"streq" %4, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%6]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%6, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (expected), "r" (desired), "m" (*mem), \ "r" (ras_start)); \ return (old); \ } #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \ uintN_t \ -__sync_##name##_##N(uintN_t *mem, uintN_t val) \ +__sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \ { \ uint32_t old, temp, ras_start; \ \ ras_start = ARM_RAS_START; \ __asm volatile ( \ /* Set up Restartable Atomic Sequence. */ \ "1:" \ "\tadr %2, 1b\n" \ "\tstr %2, [%5]\n" \ "\tadr %2, 2f\n" \ "\tstr %2, [%5, #4]\n" \ \ "\t"ldr" %0, %4\n" /* Load old value. */ \ "\t"op" %2, %0, %3\n" /* Calculate new value. */ \ "\t"str" %2, %1\n" /* Store new value. */ \ \ /* Tear down Restartable Atomic Sequence. */ \ "2:" \ "\tmov %2, #0x00000000\n" \ "\tstr %2, [%5]\n" \ "\tmov %2, #0xffffffff\n" \ "\tstr %2, [%5, #4]\n" \ : "=&r" (old), "=m" (*mem), "=&r" (temp) \ : "r" (val), "m" (*mem), "r" (ras_start)); \ return (old); \ } #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \ EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \ EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \ EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor") EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb") EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh") EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq") +#ifndef __clang__ +__strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1); +__strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2); +__strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4); +__strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1); +__strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2); +__strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4); +__strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1); +__strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2); +__strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4); +__strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1); +__strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2); +__strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4); +__strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1); +__strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2); +__strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4); +__strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1); +__strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2); +__strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4); +__strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1); +__strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2); +__strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4); +#endif + #endif /* _KERNEL */ #endif #endif /* __SYNC_ATOMICS */