Index: vendor/compiler-rt/dist/lib/builtins/arm/comparesf2.S =================================================================== --- vendor/compiler-rt/dist/lib/builtins/arm/comparesf2.S (revision 312959) +++ vendor/compiler-rt/dist/lib/builtins/arm/comparesf2.S (revision 312960) @@ -1,296 +1,296 @@ //===-- comparesf2.S - Implement single-precision soft-float comparisons --===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the following soft-fp_t comparison routines: // // __eqsf2 __gesf2 __unordsf2 // __lesf2 __gtsf2 // __ltsf2 // __nesf2 // // The semantics of the routines grouped in each column are identical, so there // is a single implementation for each, with multiple names. // // The routines behave as follows: // // __lesf2(a,b) returns -1 if a < b // 0 if a == b // 1 if a > b // 1 if either a or b is NaN // // __gesf2(a,b) returns -1 if a < b // 0 if a == b // 1 if a > b // -1 if either a or b is NaN // // __unordsf2(a,b) returns 0 if both a and b are numbers // 1 if either a or b is NaN // // Note that __lesf2( ) and __gesf2( ) are identical except in their handling of // NaN values. // //===----------------------------------------------------------------------===// #include "../assembly.h" .syntax unified #if __ARM_ARCH_ISA_THUMB == 2 .thumb #endif @ int __eqsf2(float a, float b) .p2align 2 DEFINE_COMPILERRT_FUNCTION(__eqsf2) #if defined(COMPILER_RT_ARMHF_TARGET) vmov r0, s0 vmov r1, s1 #endif // Make copies of a and b with the sign bit shifted off the top. These will // be used to detect zeros and NaNs. #if __ARM_ARCH_ISA_THUMB == 1 push {r6, lr} lsls r2, r0, #1 lsls r3, r1, #1 #else mov r2, r0, lsl #1 mov r3, r1, lsl #1 #endif // We do the comparison in three stages (ignoring NaN values for the time // being). First, we orr the absolute values of a and b; this sets the Z // flag if both a and b are zero (of either sign). The shift of r3 doesn't // effect this at all, but it *does* make sure that the C flag is clear for // the subsequent operations. #if __ARM_ARCH_ISA_THUMB == 1 lsrs r6, r3, #1 orrs r6, r2, r6 #else orrs r12, r2, r3, lsr #1 #endif // Next, we check if a and b have the same or different signs. If they have // opposite signs, this eor will set the N flag. #if __ARM_ARCH_ISA_THUMB == 1 beq 1f movs r6, r0 eors r6, r1 1: #else it ne eorsne r12, r0, r1 #endif // If a and b are equal (either both zeros or bit identical; again, we're // ignoring NaNs for now), this subtract will zero out r0. If they have the // same sign, the flags are updated as they would be for a comparison of the // absolute values of a and b. #if __ARM_ARCH_ISA_THUMB == 1 bmi 1f subs r0, r2, r3 1: #else it pl subspl r0, r2, r3 #endif // If a is smaller in magnitude than b and both have the same sign, place // the negation of the sign of b in r0. Thus, if both are negative and // a > b, this sets r0 to 0; if both are positive and a < b, this sets // r0 to -1. // // This is also done if a and b have opposite signs and are not both zero, // because in that case the subtract was not performed and the C flag is // still clear from the shift argument in orrs; if a is positive and b // negative, this places 0 in r0; if a is negative and b positive, -1 is // placed in r0. #if __ARM_ARCH_ISA_THUMB == 1 bhs 1f // Here if a and b have the same sign and absA < absB, the result is thus // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan). movs r0, #1 lsrs r1, #31 bne LOCAL_LABEL(CHECK_NAN) negs r0, r0 b LOCAL_LABEL(CHECK_NAN) 1: #else it lo mvnlo r0, r1, asr #31 #endif // If a is greater in magnitude than b and both have the same sign, place // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed // in r0, which is the desired result. Conversely, if both are positive // and a > b, zero is placed in r0. #if __ARM_ARCH_ISA_THUMB == 1 bls 1f // Here both have the same sign and absA > absB. movs r0, #1 lsrs r1, #31 beq LOCAL_LABEL(CHECK_NAN) negs r0, r0 1: #else it hi movhi r0, r1, asr #31 #endif // If you've been keeping track, at this point r0 contains -1 if a < b and // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. // If a == b, then the Z flag is set, so we can get the correct final value // into r0 by simply or'ing with 1 if Z is clear. // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b. #if __ARM_ARCH_ISA_THUMB != 1 it ne orrne r0, r0, #1 #endif // Finally, we need to deal with NaNs. If either argument is NaN, replace // the value in r0 with 1. #if __ARM_ARCH_ISA_THUMB == 1 LOCAL_LABEL(CHECK_NAN): movs r6, #0xff lsls r6, #24 cmp r2, r6 bhi 1f cmp r3, r6 1: bls 2f movs r0, #1 2: pop {r6, pc} #else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 JMP(lr) #endif END_COMPILERRT_FUNCTION(__eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) @ int __gtsf2(float a, float b) .p2align 2 DEFINE_COMPILERRT_FUNCTION(__gtsf2) // Identical to the preceding except in that we return -1 for NaN values. // Given that the two paths share so much code, one might be tempted to // unify them; however, the extra code needed to do so makes the code size // to performance tradeoff very hard to justify for such small functions. #if defined(COMPILER_RT_ARMHF_TARGET) vmov r0, s0 vmov r1, s1 #endif #if __ARM_ARCH_ISA_THUMB == 1 push {r6, lr} lsls r2, r0, #1 lsls r3, r1, #1 lsrs r6, r3, #1 orrs r6, r2, r6 beq 1f movs r6, r0 eors r6, r1 1: bmi 2f subs r0, r2, r3 2: bhs 3f movs r0, #1 lsrs r1, #31 bne LOCAL_LABEL(CHECK_NAN_2) negs r0, r0 b LOCAL_LABEL(CHECK_NAN_2) 3: bls 4f movs r0, #1 lsrs r1, #31 beq LOCAL_LABEL(CHECK_NAN_2) negs r0, r0 4: LOCAL_LABEL(CHECK_NAN_2): movs r6, #0xff lsls r6, #24 cmp r2, r6 bhi 5f cmp r3, r6 5: bls 6f movs r0, #1 negs r0, r0 6: pop {r6, pc} #else mov r2, r0, lsl #1 mov r3, r1, lsl #1 orrs r12, r2, r3, lsr #1 it ne eorsne r12, r0, r1 it pl subspl r0, r2, r3 it lo mvnlo r0, r1, asr #31 it hi movhi r0, r1, asr #31 it ne orrne r0, r0, #1 cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #-1 JMP(lr) #endif END_COMPILERRT_FUNCTION(__gtsf2) DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) @ int __unordsf2(float a, float b) .p2align 2 DEFINE_COMPILERRT_FUNCTION(__unordsf2) #if defined(COMPILER_RT_ARMHF_TARGET) vmov r0, s0 vmov r1, s1 #endif // Return 1 for NaN values, 0 otherwise. lsls r2, r0, #1 lsls r3, r1, #1 movs r0, #0 #if __ARM_ARCH_ISA_THUMB == 1 movs r1, #0xff lsls r1, #24 cmp r2, r1 bhi 1f cmp r3, r1 1: bls 2f movs r0, #1 2: #else cmp r2, #0xff000000 ite ls cmpls r3, #0xff000000 movhi r0, #1 #endif JMP(lr) END_COMPILERRT_FUNCTION(__unordsf2) #if defined(COMPILER_RT_ARMHF_TARGET) -DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum): +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpum) vmov s0, r0 vmov s1, r1 b SYMBOL_NAME(__unordsf2) END_COMPILERRT_FUNCTION(__aeabi_fcmpum) #else DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) #endif NO_EXEC_STACK_DIRECTIVE Index: vendor/compiler-rt/dist/lib/xray/xray_arm.cc =================================================================== --- vendor/compiler-rt/dist/lib/xray/xray_arm.cc (revision 312959) +++ vendor/compiler-rt/dist/lib/xray/xray_arm.cc (revision 312960) @@ -1,156 +1,161 @@ //===-- xray_arm.cc ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // Implementation of ARM-specific routines (32-bit). // //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_common.h" #include "xray_defs.h" #include "xray_emulate_tsc.h" #include "xray_interface_internal.h" #include #include +extern "C" void __clear_cache(void* start, void* end); + namespace __xray { uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does // not have a constant frequency like TSC on x86[_64]; it may go faster or // slower depending on CPU's turbo or power saving modes. Furthermore, to // read from CP15 on ARM a kernel modification or a driver is needed. // We can not require this from users of compiler-rt. // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. // To get the measurements per second, we scale this by the number of // nanoseconds per second, pretending that the TSC frequency is 1GHz and // one TSC tick is 1 nanosecond. return NanosecondsPerSecond; } // The machine codes for some instructions used in runtime patching. enum class PatchOpcodes : uint32_t { PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} PO_BlxIp = 0xE12FFF3C, // BLX ip PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr} PO_B20 = 0xEA000005 // B #20 }; // 0xUUUUWXYZ -> 0x000W0XYZ inline static uint32_t getMovwMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { return (Value & 0xfff) | ((Value & 0xf000) << 4); } // 0xWXYZUUUU -> 0x000W0XYZ inline static uint32_t getMovtMask(const uint32_t Value) XRAY_NEVER_INSTRUMENT { return getMovwMask(Value >> 16); } // Writes the following instructions: // MOVW R, # // MOVT R, # inline static uint32_t * write32bitLoadReg(uint8_t regNo, uint32_t *Address, const uint32_t Value) XRAY_NEVER_INSTRUMENT { // This is a fatal error: we cannot just report it and continue execution. assert(regNo <= 15 && "Register number must be 0 to 15."); // MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ *Address = (0xE3000000 | (uint32_t(regNo) << 12) | getMovwMask(Value)); Address++; // MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ *Address = (0xE3400000 | (uint32_t(regNo) << 12) | getMovtMask(Value)); return Address + 1; } // Writes the following instructions: // MOVW r0, # // MOVT r0, # inline static uint32_t * Write32bitLoadR0(uint32_t *Address, const uint32_t Value) XRAY_NEVER_INSTRUMENT { return write32bitLoadReg(0, Address, Value); } // Writes the following instructions: // MOVW ip, # // MOVT ip, # inline static uint32_t * Write32bitLoadIP(uint32_t *Address, const uint32_t Value) XRAY_NEVER_INSTRUMENT { return write32bitLoadReg(12, Address, Value); } inline static bool patchSled(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled, void (*TracingHook)()) XRAY_NEVER_INSTRUMENT { // When |Enable| == true, // We replace the following compile-time stub (sled): // // xray_sled_n: // B #20 // 6 NOPs (24 bytes) // // With the following runtime patch: // // xray_sled_n: // PUSH {r0, lr} // MOVW r0, # // MOVT r0, # // MOVW ip, # // MOVT ip, # // BLX ip // POP {r0, lr} // // Replacement of the first 4-byte instruction should be the last and atomic // operation, so that the user code which reaches the sled concurrently // either jumps over the whole sled, or executes the whole sled when the // latter is ready. // // When |Enable|==false, we set back the first instruction in the sled to be // B #20 uint32_t *FirstAddress = reinterpret_cast(Sled.Address); + uint32_t *CurAddress = FirstAddress + 1; if (Enable) { - uint32_t *CurAddress = FirstAddress + 1; CurAddress = Write32bitLoadR0(CurAddress, reinterpret_cast(FuncId)); CurAddress = Write32bitLoadIP(CurAddress, reinterpret_cast(TracingHook)); *CurAddress = uint32_t(PatchOpcodes::PO_BlxIp); CurAddress++; *CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr); + CurAddress++; std::atomic_store_explicit( reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release); } else { std::atomic_store_explicit( reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); } + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry); } bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { // FIXME: In the future we'd need to distinguish between non-tail exits and // tail exits for better information preservation. return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } } // namespace __xray Index: vendor/compiler-rt/dist/test/xray/lit.cfg =================================================================== --- vendor/compiler-rt/dist/test/xray/lit.cfg (revision 312959) +++ vendor/compiler-rt/dist/test/xray/lit.cfg (revision 312960) @@ -1,39 +1,45 @@ # -*- Python -*- import os # Setup config name. config.name = 'XRay' + config.name_suffix # Setup source root. config.test_source_root = os.path.dirname(__file__) # Setup default compiler flags use with -fxray-instrument option. clang_xray_cflags = (['-fxray-instrument', config.target_cflags]) clang_xray_cxxflags = config.cxx_mode_flags + clang_xray_cflags def build_invocation(compile_flags): return ' ' + ' '.join([config.clang] + compile_flags) + ' ' # Setup substitutions. config.substitutions.append( ('%clang ', build_invocation([config.target_cflags]))) config.substitutions.append( ('%clangxx ', build_invocation(config.cxx_mode_flags + [config.target_cflags]))) config.substitutions.append( ('%clang_xray ', build_invocation(clang_xray_cflags))) config.substitutions.append( ('%clangxx_xray', build_invocation(clang_xray_cxxflags))) # Default test suffixes. config.suffixes = ['.c', '.cc', '.cpp'] -if config.host_os not in ['Linux'] or config.host_arch.find('64') == -1: +if config.host_os not in ['Linux']: config.unsupported = True +elif '64' not in config.host_arch: + if 'arm' in config.host_arch: + if '-mthumb' in config.target_cflags: + config.unsupported = True + else: + config.unsupported = True # Allow tests to use REQUIRES=stable-runtime. For use when you cannot use XFAIL # e.g. because the test sometimes passes, sometimes fails. if config.target_arch != 'aarch64': config.available_features.add('stable-runtime')