diff --git a/libc/shared/fp_bits.h b/libc/shared/fp_bits.h new file mode 100644 index 000000000000..e6bb1e17b80c --- /dev/null +++ b/libc/shared/fp_bits.h @@ -0,0 +1,23 @@ +//===-- Floating point number utils -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_FP_BITS_H +#define LLVM_LIBC_SHARED_FP_BITS_H + +#include "libc_common.h" +#include "src/__support/FPUtil/FPBits.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using fputil::FPBits; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_FP_BITS_H diff --git a/libc/shared/libc_common.h b/libc/shared/libc_common.h new file mode 100644 index 000000000000..c4560bbb0276 --- /dev/null +++ b/libc/shared/libc_common.h @@ -0,0 +1,26 @@ +//===-- Common defines for sharing LLVM libc with LLVM projects -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_LIBC_COMMON_H +#define LLVM_LIBC_SHARED_LIBC_COMMON_H + +// Use system errno. +#ifdef LIBC_ERRNO_MODE +#if LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_SYSTEM_INLINE +#error \ + "LIBC_ERRNO_MODE was set to something different from LIBC_ERRNO_MODE_SYSTEM_INLINE." +#endif // LIBC_ERRNO_MODE != LIBC_ERRNO_MODE_SYSTEM_INLINE +#else +#define LIBC_ERRNO_MODE LIBC_ERRNO_MODE_SYSTEM_INLINE +#endif // LIBC_ERRNO_MODE + +#ifndef LIBC_NAMESPACE +#define LIBC_NAMESPACE __llvm_libc +#endif // LIBC_NAMESPACE + +#endif // LLVM_LIBC_SHARED_LIBC_COMMON_H diff --git a/libc/shared/math.h b/libc/shared/math.h new file mode 100644 index 000000000000..b2f1a03e0940 --- /dev/null +++ b/libc/shared/math.h @@ -0,0 +1,23 @@ +//===-- Floating point math functions ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_H +#define LLVM_LIBC_SHARED_MATH_H + +#include "libc_common.h" + +#include "math/expf.h" +#include "math/expf16.h" +#include "math/frexpf.h" +#include "math/frexpf128.h" +#include "math/frexpf16.h" +#include "math/ldexpf.h" +#include "math/ldexpf128.h" +#include "math/ldexpf16.h" + +#endif // LLVM_LIBC_SHARED_MATH_H diff --git a/libc/shared/math/expf.h b/libc/shared/math/expf.h new file mode 100644 index 000000000000..a4e8b0751bb4 --- /dev/null +++ b/libc/shared/math/expf.h @@ -0,0 +1,23 @@ +//===-- Shared expf function ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXPF_H +#define LLVM_LIBC_SHARED_MATH_EXPF_H + +#include "shared/libc_common.h" +#include "src/__support/math/expf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::expf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_EXPF_H diff --git a/libc/shared/math/expf16.h b/libc/shared/math/expf16.h new file mode 100644 index 000000000000..a6a3e89e680d --- /dev/null +++ b/libc/shared/math/expf16.h @@ -0,0 +1,29 @@ +//===-- Shared expf16 function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXPF16_H +#define LLVM_LIBC_SHARED_MATH_EXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/expf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::expf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_EXPF16_H diff --git a/libc/shared/math/frexpf.h b/libc/shared/math/frexpf.h new file mode 100644 index 000000000000..35f23a70eb25 --- /dev/null +++ b/libc/shared/math/frexpf.h @@ -0,0 +1,24 @@ +//===-- Shared frexpf function ------------------------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_FREXPF_H +#define LLVM_LIBC_SHARED_MATH_FREXPF_H + +#include "shared/libc_common.h" +#include "src/__support/math/frexpf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::frexpf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_FREXPF_H diff --git a/libc/shared/math/frexpf128.h b/libc/shared/math/frexpf128.h new file mode 100644 index 000000000000..6b922bd73129 --- /dev/null +++ b/libc/shared/math/frexpf128.h @@ -0,0 +1,29 @@ +//===-- Shared frexpf128 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_FREXPF128_H +#define LLVM_LIBC_SHARED_MATH_FREXPF128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "shared/libc_common.h" +#include "src/__support/math/frexpf128.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::frexpf128; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SHARED_MATH_FREXPF128_H diff --git a/libc/shared/math/frexpf16.h b/libc/shared/math/frexpf16.h new file mode 100644 index 000000000000..24b2883a6f91 --- /dev/null +++ b/libc/shared/math/frexpf16.h @@ -0,0 +1,29 @@ +//===-- Shared frexpf16 function --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_FREXPF16_H +#define LLVM_LIBC_SHARED_MATH_FREXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" +#include "shared/libc_common.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/math/frexpf16.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::frexpf16; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_FREXPF16_H diff --git a/libc/shared/math/ldexpf.h b/libc/shared/math/ldexpf.h new file mode 100644 index 000000000000..497933c47321 --- /dev/null +++ b/libc/shared/math/ldexpf.h @@ -0,0 +1,23 @@ +//===-- Shared ldexpf function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF_H +#define LLVM_LIBC_SHARED_MATH_LDEXPF_H + +#include "shared/libc_common.h" +#include "src/__support/math/ldexpf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::ldexpf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_LDEXPF_H diff --git a/libc/shared/math/ldexpf128.h b/libc/shared/math/ldexpf128.h new file mode 100644 index 000000000000..d4066beb809c --- /dev/null +++ b/libc/shared/math/ldexpf128.h @@ -0,0 +1,29 @@ +//===-- Shared ldexpf128 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF128_H +#define LLVM_LIBC_SHARED_MATH_LDEXPF128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "shared/libc_common.h" +#include "src/__support/math/ldexpf128.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::ldexpf128; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SHARED_MATH_LDEXPF128_H diff --git a/libc/shared/math/ldexpf16.h b/libc/shared/math/ldexpf16.h new file mode 100644 index 000000000000..4c98c4c78d46 --- /dev/null +++ b/libc/shared/math/ldexpf16.h @@ -0,0 +1,31 @@ +//===-- Shared ldexpf16 function --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF16_H +#define LLVM_LIBC_SHARED_MATH_LDEXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "shared/libc_common.h" +#include "src/__support/math/ldexpf16.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace shared { + +using math::ldexpf16; + +} // namespace shared + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_LDEXPF16_H diff --git a/libc/shared/rpc.h b/libc/shared/rpc.h new file mode 100644 index 000000000000..7295efd4eee3 --- /dev/null +++ b/libc/shared/rpc.h @@ -0,0 +1,603 @@ +//===-- Shared memory RPC client / server interface -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a remote procedure call mechanism to communicate between +// heterogeneous devices that can share an address space atomically. We provide +// a client and a server to facilitate the remote call. The client makes request +// to the server using a shared communication channel. We use separate atomic +// signals to indicate which side, the client or the server is in ownership of +// the buffer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_RPC_H +#define LLVM_LIBC_SHARED_RPC_H + +#include "rpc_util.h" + +namespace rpc { + +/// Use scoped atomic variants if they are available for the target. +#if !__has_builtin(__scoped_atomic_load_n) +#define __scoped_atomic_load_n(src, ord, scp) __atomic_load_n(src, ord) +#define __scoped_atomic_store_n(dst, src, ord, scp) \ + __atomic_store_n(dst, src, ord) +#define __scoped_atomic_fetch_or(src, val, ord, scp) \ + __atomic_fetch_or(src, val, ord) +#define __scoped_atomic_fetch_and(src, val, ord, scp) \ + __atomic_fetch_and(src, val, ord) +#endif +#if !__has_builtin(__scoped_atomic_thread_fence) +#define __scoped_atomic_thread_fence(ord, scp) __atomic_thread_fence(ord) +#endif + +/// Generic codes that can be used whem implementing the server. +enum Status { + RPC_SUCCESS = 0x0, + RPC_ERROR = 0x1000, + RPC_UNHANDLED_OPCODE = 0x1001, +}; + +/// A fixed size channel used to communicate between the RPC client and server. +struct Buffer { + uint64_t data[8]; +}; +static_assert(sizeof(Buffer) == 64, "Buffer size mismatch"); + +/// The information associated with a packet. This indicates which operations to +/// perform and which threads are active in the slots. +struct Header { + uint64_t mask; + uint32_t opcode; +}; + +/// The maximum number of parallel ports that the RPC interface can support. +constexpr static uint64_t MAX_PORT_COUNT = 4096; + +/// A common process used to synchronize communication between a client and a +/// server. The process contains a read-only inbox and a write-only outbox used +/// for signaling ownership of the shared buffer between both sides. We assign +/// ownership of the buffer to the client if the inbox and outbox bits match, +/// otherwise it is owned by the server. +/// +/// This process is designed to allow the client and the server to exchange data +/// using a fixed size packet in a mostly arbitrary order using the 'send' and +/// 'recv' operations. The following restrictions to this scheme apply: +/// - The client will always start with a 'send' operation. +/// - The server will always start with a 'recv' operation. +/// - Every 'send' or 'recv' call is mirrored by the other process. +template struct Process { + RPC_ATTRS Process() = default; + RPC_ATTRS Process(const Process &) = delete; + RPC_ATTRS Process &operator=(const Process &) = delete; + RPC_ATTRS Process(Process &&) = default; + RPC_ATTRS Process &operator=(Process &&) = default; + RPC_ATTRS ~Process() = default; + + const uint32_t port_count = 0; + const uint32_t *const inbox = nullptr; + uint32_t *const outbox = nullptr; + Header *const header = nullptr; + Buffer *const packet = nullptr; + + static constexpr uint64_t NUM_BITS_IN_WORD = sizeof(uint32_t) * 8; + uint32_t lock[MAX_PORT_COUNT / NUM_BITS_IN_WORD] = {0}; + + RPC_ATTRS Process(uint32_t port_count, void *buffer) + : port_count(port_count), inbox(reinterpret_cast( + advance(buffer, inbox_offset(port_count)))), + outbox(reinterpret_cast( + advance(buffer, outbox_offset(port_count)))), + header(reinterpret_cast
( + advance(buffer, header_offset(port_count)))), + packet(reinterpret_cast( + advance(buffer, buffer_offset(port_count)))) {} + + /// Allocate a memory buffer sufficient to store the following equivalent + /// representation in memory. + /// + /// struct Equivalent { + /// Atomic primary[port_count]; + /// Atomic secondary[port_count]; + /// Header header[port_count]; + /// Buffer packet[port_count][lane_size]; + /// }; + RPC_ATTRS static constexpr uint64_t allocation_size(uint32_t port_count, + uint32_t lane_size) { + return buffer_offset(port_count) + buffer_bytes(port_count, lane_size); + } + + /// Retrieve the inbox state from memory shared between processes. + RPC_ATTRS uint32_t load_inbox(uint64_t lane_mask, uint32_t index) const { + return rpc::broadcast_value( + lane_mask, __scoped_atomic_load_n(&inbox[index], __ATOMIC_RELAXED, + __MEMORY_SCOPE_SYSTEM)); + } + + /// Retrieve the outbox state from memory shared between processes. + RPC_ATTRS uint32_t load_outbox(uint64_t lane_mask, uint32_t index) const { + return rpc::broadcast_value( + lane_mask, __scoped_atomic_load_n(&outbox[index], __ATOMIC_RELAXED, + __MEMORY_SCOPE_SYSTEM)); + } + + /// Signal to the other process that this one is finished with the buffer. + /// Equivalent to loading outbox followed by store of the inverted value + /// The outbox is write only by this warp and tracking the value locally is + /// cheaper than calling load_outbox to get the value to store. + RPC_ATTRS uint32_t invert_outbox(uint32_t index, uint32_t current_outbox) { + uint32_t inverted_outbox = !current_outbox; + __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM); + __scoped_atomic_store_n(&outbox[index], inverted_outbox, __ATOMIC_RELAXED, + __MEMORY_SCOPE_SYSTEM); + return inverted_outbox; + } + + // Given the current outbox and inbox values, wait until the inbox changes + // to indicate that this thread owns the buffer element. + RPC_ATTRS void wait_for_ownership(uint64_t lane_mask, uint32_t index, + uint32_t outbox, uint32_t in) { + while (buffer_unavailable(in, outbox)) { + sleep_briefly(); + in = load_inbox(lane_mask, index); + } + __scoped_atomic_thread_fence(__ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM); + } + + /// The packet is a linearly allocated array of buffers used to communicate + /// with the other process. This function returns the appropriate slot in this + /// array such that the process can operate on an entire warp or wavefront. + RPC_ATTRS Buffer *get_packet(uint32_t index, uint32_t lane_size) { + return &packet[index * lane_size]; + } + + /// Determines if this process needs to wait for ownership of the buffer. We + /// invert the condition on one of the processes to indicate that if one + /// process owns the buffer then the other does not. + RPC_ATTRS static bool buffer_unavailable(uint32_t in, uint32_t out) { + bool cond = in != out; + return Invert ? !cond : cond; + } + + /// Attempt to claim the lock at index. Return true on lock taken. + /// lane_mask is a bitmap of the threads in the warp that would hold the + /// single lock on success, e.g. the result of rpc::get_lane_mask() + /// The lock is held when the n-th bit of the lock bitfield is set. + RPC_ATTRS bool try_lock(uint64_t lane_mask, uint32_t index) { + // On amdgpu, test and set to the nth lock bit and a sync_lane would suffice + // On volta, need to handle differences between the threads running and + // the threads that were detected in the previous call to get_lane_mask() + // + // All threads in lane_mask try to claim the lock. At most one can succeed. + // There may be threads active which are not in lane mask which must not + // succeed in taking the lock, as otherwise it will leak. This is handled + // by making threads which are not in lane_mask or with 0, a no-op. + uint32_t id = rpc::get_lane_id(); + bool id_in_lane_mask = lane_mask & (1ul << id); + + // All threads in the warp call fetch_or. Possibly at the same time. + bool before = set_nth(lock, index, id_in_lane_mask); + uint64_t packed = rpc::ballot(lane_mask, before); + + // If every bit set in lane_mask is also set in packed, every single thread + // in the warp failed to get the lock. Ballot returns unset for threads not + // in the lane mask. + // + // Cases, per thread: + // mask==0 -> unspecified before, discarded by ballot -> 0 + // mask==1 and before==0 (success), set zero by ballot -> 0 + // mask==1 and before==1 (failure), set one by ballot -> 1 + // + // mask != packed implies at least one of the threads got the lock + // atomic semantics of fetch_or mean at most one of the threads for the lock + + // If holding the lock then the caller can load values knowing said loads + // won't move past the lock. No such guarantee is needed if the lock acquire + // failed. This conditional branch is expected to fold in the caller after + // inlining the current function. + bool holding_lock = lane_mask != packed; + if (holding_lock) + __scoped_atomic_thread_fence(__ATOMIC_ACQUIRE, __MEMORY_SCOPE_DEVICE); + return holding_lock; + } + + /// Unlock the lock at index. We need a lane sync to keep this function + /// convergent, otherwise the compiler will sink the store and deadlock. + RPC_ATTRS void unlock(uint64_t lane_mask, uint32_t index) { + // Do not move any writes past the unlock. + __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_DEVICE); + + // Use exactly one thread to clear the nth bit in the lock array Must + // restrict to a single thread to avoid one thread dropping the lock, then + // an unrelated warp claiming the lock, then a second thread in this warp + // dropping the lock again. + clear_nth(lock, index, rpc::is_first_lane(lane_mask)); + rpc::sync_lane(lane_mask); + } + + /// Number of bytes to allocate for an inbox or outbox. + RPC_ATTRS static constexpr uint64_t mailbox_bytes(uint32_t port_count) { + return port_count * sizeof(uint32_t); + } + + /// Number of bytes to allocate for the buffer containing the packets. + RPC_ATTRS static constexpr uint64_t buffer_bytes(uint32_t port_count, + uint32_t lane_size) { + return port_count * lane_size * sizeof(Buffer); + } + + /// Offset of the inbox in memory. This is the same as the outbox if inverted. + RPC_ATTRS static constexpr uint64_t inbox_offset(uint32_t port_count) { + return Invert ? mailbox_bytes(port_count) : 0; + } + + /// Offset of the outbox in memory. This is the same as the inbox if inverted. + RPC_ATTRS static constexpr uint64_t outbox_offset(uint32_t port_count) { + return Invert ? 0 : mailbox_bytes(port_count); + } + + /// Offset of the buffer containing the packets after the inbox and outbox. + RPC_ATTRS static constexpr uint64_t header_offset(uint32_t port_count) { + return align_up(2 * mailbox_bytes(port_count), alignof(Header)); + } + + /// Offset of the buffer containing the packets after the inbox and outbox. + RPC_ATTRS static constexpr uint64_t buffer_offset(uint32_t port_count) { + return align_up(header_offset(port_count) + port_count * sizeof(Header), + alignof(Buffer)); + } + + /// Conditionally set the n-th bit in the atomic bitfield. + RPC_ATTRS static constexpr uint32_t set_nth(uint32_t *bits, uint32_t index, + bool cond) { + uint32_t slot = index / NUM_BITS_IN_WORD; + uint32_t bit = index % NUM_BITS_IN_WORD; + return __scoped_atomic_fetch_or(&bits[slot], + static_cast(cond) << bit, + __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE) & + (1u << bit); + } + + /// Conditionally clear the n-th bit in the atomic bitfield. + RPC_ATTRS static constexpr uint32_t clear_nth(uint32_t *bits, uint32_t index, + bool cond) { + uint32_t slot = index / NUM_BITS_IN_WORD; + uint32_t bit = index % NUM_BITS_IN_WORD; + return __scoped_atomic_fetch_and(&bits[slot], + ~0u ^ (static_cast(cond) << bit), + __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE) & + (1u << bit); + } +}; + +/// Invokes a function across every active buffer across the total lane size. +template +RPC_ATTRS static void invoke_rpc(F &&fn, uint32_t lane_size, uint64_t lane_mask, + Buffer *slot) { + if constexpr (is_process_gpu()) { + fn(&slot[rpc::get_lane_id()], rpc::get_lane_id()); + } else { + for (uint32_t i = 0; i < lane_size; i += rpc::get_num_lanes()) + if (lane_mask & (1ul << i)) + fn(&slot[i], i); + } +} + +/// The port provides the interface to communicate between the multiple +/// processes. A port is conceptually an index into the memory provided by the +/// underlying process that is guarded by a lock bit. +template struct Port { + RPC_ATTRS Port(Process &process, uint64_t lane_mask, uint32_t lane_size, + uint32_t index, uint32_t out) + : process(process), lane_mask(lane_mask), lane_size(lane_size), + index(index), out(out), receive(false), owns_buffer(true) {} + RPC_ATTRS ~Port() = default; + +private: + RPC_ATTRS Port(const Port &) = delete; + RPC_ATTRS Port &operator=(const Port &) = delete; + RPC_ATTRS Port(Port &&) = default; + RPC_ATTRS Port &operator=(Port &&) = default; + + friend struct Client; + friend struct Server; + friend class rpc::optional>; + +public: + template RPC_ATTRS void recv(U use); + template RPC_ATTRS void send(F fill); + template RPC_ATTRS void send_and_recv(F fill, U use); + template RPC_ATTRS void recv_and_send(W work); + RPC_ATTRS void send_n(const void *const *src, uint64_t *size); + RPC_ATTRS void send_n(const void *src, uint64_t size); + template + RPC_ATTRS void recv_n(void **dst, uint64_t *size, A &&alloc); + + RPC_ATTRS uint32_t get_opcode() const { return process.header[index].opcode; } + + RPC_ATTRS uint32_t get_index() const { return index; } + + RPC_ATTRS void close() { + // Wait for all lanes to finish using the port. + rpc::sync_lane(lane_mask); + + // The server is passive, if it own the buffer when it closes we need to + // give ownership back to the client. + if (owns_buffer && T) + out = process.invert_outbox(index, out); + process.unlock(lane_mask, index); + } + +private: + Process &process; + uint64_t lane_mask; + uint32_t lane_size; + uint32_t index; + uint32_t out; + bool receive; + bool owns_buffer; +}; + +/// The RPC client used to make requests to the server. +struct Client { + RPC_ATTRS Client() = default; + RPC_ATTRS Client(const Client &) = delete; + RPC_ATTRS Client &operator=(const Client &) = delete; + RPC_ATTRS ~Client() = default; + + RPC_ATTRS Client(uint32_t port_count, void *buffer) + : process(port_count, buffer) {} + + using Port = rpc::Port; + template RPC_ATTRS Port open(); + +private: + Process process; +}; + +/// The RPC server used to respond to the client. +struct Server { + RPC_ATTRS Server() = default; + RPC_ATTRS Server(const Server &) = delete; + RPC_ATTRS Server &operator=(const Server &) = delete; + RPC_ATTRS ~Server() = default; + + RPC_ATTRS Server(uint32_t port_count, void *buffer) + : process(port_count, buffer) {} + + using Port = rpc::Port; + RPC_ATTRS rpc::optional try_open(uint32_t lane_size, + uint32_t start = 0); + RPC_ATTRS Port open(uint32_t lane_size); + + RPC_ATTRS static constexpr uint64_t allocation_size(uint32_t lane_size, + uint32_t port_count) { + return Process::allocation_size(port_count, lane_size); + } + +private: + Process process; +}; + +/// Applies \p fill to the shared buffer and initiates a send operation. +template template RPC_ATTRS void Port::send(F fill) { + uint32_t in = owns_buffer ? out ^ T : process.load_inbox(lane_mask, index); + + // We need to wait until we own the buffer before sending. + process.wait_for_ownership(lane_mask, index, out, in); + + // Apply the \p fill function to initialize the buffer and release the memory. + invoke_rpc(fill, lane_size, process.header[index].mask, + process.get_packet(index, lane_size)); + out = process.invert_outbox(index, out); + owns_buffer = false; + receive = false; +} + +/// Applies \p use to the shared buffer and acknowledges the send. +template template RPC_ATTRS void Port::recv(U use) { + // We only exchange ownership of the buffer during a receive if we are waiting + // for a previous receive to finish. + if (receive) { + out = process.invert_outbox(index, out); + owns_buffer = false; + } + + uint32_t in = owns_buffer ? out ^ T : process.load_inbox(lane_mask, index); + + // We need to wait until we own the buffer before receiving. + process.wait_for_ownership(lane_mask, index, out, in); + + // Apply the \p use function to read the memory out of the buffer. + invoke_rpc(use, lane_size, process.header[index].mask, + process.get_packet(index, lane_size)); + receive = true; + owns_buffer = true; +} + +/// Combines a send and receive into a single function. +template +template +RPC_ATTRS void Port::send_and_recv(F fill, U use) { + send(fill); + recv(use); +} + +/// Combines a receive and send operation into a single function. The \p work +/// function modifies the buffer in-place and the send is only used to initiate +/// the copy back. +template +template +RPC_ATTRS void Port::recv_and_send(W work) { + recv(work); + send([](Buffer *, uint32_t) { /* no-op */ }); +} + +/// Helper routine to simplify the interface when sending from the GPU using +/// thread private pointers to the underlying value. +template +RPC_ATTRS void Port::send_n(const void *src, uint64_t size) { + const void **src_ptr = &src; + uint64_t *size_ptr = &size; + send_n(src_ptr, size_ptr); +} + +/// Sends an arbitrarily sized data buffer \p src across the shared channel in +/// multiples of the packet length. +template +RPC_ATTRS void Port::send_n(const void *const *src, uint64_t *size) { + uint64_t num_sends = 0; + send([&](Buffer *buffer, uint32_t id) { + reinterpret_cast(buffer->data)[0] = lane_value(size, id); + num_sends = is_process_gpu() ? lane_value(size, id) + : rpc::max(lane_value(size, id), num_sends); + uint64_t len = + lane_value(size, id) > sizeof(Buffer::data) - sizeof(uint64_t) + ? sizeof(Buffer::data) - sizeof(uint64_t) + : lane_value(size, id); + rpc_memcpy(&buffer->data[1], lane_value(src, id), len); + }); + uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t); + uint64_t mask = process.header[index].mask; + while (rpc::ballot(mask, idx < num_sends)) { + send([=](Buffer *buffer, uint32_t id) { + uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data) + ? sizeof(Buffer::data) + : lane_value(size, id) - idx; + if (idx < lane_value(size, id)) + rpc_memcpy(buffer->data, advance(lane_value(src, id), idx), len); + }); + idx += sizeof(Buffer::data); + } +} + +/// Receives an arbitrarily sized data buffer across the shared channel in +/// multiples of the packet length. The \p alloc function is called with the +/// size of the data so that we can initialize the size of the \p dst buffer. +template +template +RPC_ATTRS void Port::recv_n(void **dst, uint64_t *size, A &&alloc) { + uint64_t num_recvs = 0; + recv([&](Buffer *buffer, uint32_t id) { + lane_value(size, id) = reinterpret_cast(buffer->data)[0]; + lane_value(dst, id) = + reinterpret_cast(alloc(lane_value(size, id))); + num_recvs = is_process_gpu() ? lane_value(size, id) + : rpc::max(lane_value(size, id), num_recvs); + uint64_t len = + lane_value(size, id) > sizeof(Buffer::data) - sizeof(uint64_t) + ? sizeof(Buffer::data) - sizeof(uint64_t) + : lane_value(size, id); + rpc_memcpy(lane_value(dst, id), &buffer->data[1], len); + }); + uint64_t idx = sizeof(Buffer::data) - sizeof(uint64_t); + uint64_t mask = process.header[index].mask; + while (rpc::ballot(mask, idx < num_recvs)) { + recv([=](Buffer *buffer, uint32_t id) { + uint64_t len = lane_value(size, id) - idx > sizeof(Buffer::data) + ? sizeof(Buffer::data) + : lane_value(size, id) - idx; + if (idx < lane_value(size, id)) + rpc_memcpy(advance(lane_value(dst, id), idx), buffer->data, len); + }); + idx += sizeof(Buffer::data); + } +} + +/// Continually attempts to open a port to use as the client. The client can +/// only open a port if we find an index that is in a valid sending state. That +/// is, there are send operations pending that haven't been serviced on this +/// port. Each port instance uses an associated \p opcode to tell the server +/// what to do. The Client interface provides the appropriate lane size to the +/// port using the platform's returned value. +template RPC_ATTRS Client::Port Client::open() { + // Repeatedly perform a naive linear scan for a port that can be opened to + // send data. + for (uint32_t index = 0;; ++index) { + // Start from the beginning if we run out of ports to check. + if (index >= process.port_count) + index = 0; + + // Attempt to acquire the lock on this index. + uint64_t lane_mask = rpc::get_lane_mask(); + if (!process.try_lock(lane_mask, index)) + continue; + + uint32_t in = process.load_inbox(lane_mask, index); + uint32_t out = process.load_outbox(lane_mask, index); + + // Once we acquire the index we need to check if we are in a valid sending + // state. + if (process.buffer_unavailable(in, out)) { + process.unlock(lane_mask, index); + continue; + } + + if (rpc::is_first_lane(lane_mask)) { + process.header[index].opcode = opcode; + process.header[index].mask = lane_mask; + } + rpc::sync_lane(lane_mask); + return Port(process, lane_mask, rpc::get_num_lanes(), index, out); + } +} + +/// Attempts to open a port to use as the server. The server can only open a +/// port if it has a pending receive operation +RPC_ATTRS rpc::optional +Server::try_open(uint32_t lane_size, uint32_t start) { + // Perform a naive linear scan for a port that has a pending request. + for (uint32_t index = start; index < process.port_count; ++index) { + uint64_t lane_mask = rpc::get_lane_mask(); + uint32_t in = process.load_inbox(lane_mask, index); + uint32_t out = process.load_outbox(lane_mask, index); + + // The server is passive, if there is no work pending don't bother + // opening a port. + if (process.buffer_unavailable(in, out)) + continue; + + // Attempt to acquire the lock on this index. + if (!process.try_lock(lane_mask, index)) + continue; + + in = process.load_inbox(lane_mask, index); + out = process.load_outbox(lane_mask, index); + + if (process.buffer_unavailable(in, out)) { + process.unlock(lane_mask, index); + continue; + } + + return Port(process, lane_mask, lane_size, index, out); + } + return rpc::nullopt; +} + +RPC_ATTRS Server::Port Server::open(uint32_t lane_size) { + for (;;) { + if (rpc::optional p = try_open(lane_size)) + return rpc::move(p.value()); + sleep_briefly(); + } +} + +#undef RPC_ATTRS +#if !__has_builtin(__scoped_atomic_load_n) +#undef __scoped_atomic_load_n +#undef __scoped_atomic_store_n +#undef __scoped_atomic_fetch_or +#undef __scoped_atomic_fetch_and +#endif +#if !__has_builtin(__scoped_atomic_thread_fence) +#undef __scoped_atomic_thread_fence +#endif + +} // namespace rpc + +#endif // LLVM_LIBC_SHARED_RPC_H diff --git a/libc/shared/rpc_opcodes.h b/libc/shared/rpc_opcodes.h new file mode 100644 index 000000000000..6de41cd1899e --- /dev/null +++ b/libc/shared/rpc_opcodes.h @@ -0,0 +1,53 @@ +//===-- Definition of RPC opcodes -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_RPC_OPCODES_H +#define LLVM_LIBC_SHARED_RPC_OPCODES_H + +#include "rpc.h" + +#define LLVM_LIBC_RPC_BASE 'c' +#define LLVM_LIBC_OPCODE(n) (LLVM_LIBC_RPC_BASE << 24 | n) + +typedef enum { + LIBC_NOOP = LLVM_LIBC_OPCODE(0), + LIBC_EXIT = LLVM_LIBC_OPCODE(1), + LIBC_WRITE_TO_STDOUT = LLVM_LIBC_OPCODE(2), + LIBC_WRITE_TO_STDERR = LLVM_LIBC_OPCODE(3), + LIBC_WRITE_TO_STREAM = LLVM_LIBC_OPCODE(4), + LIBC_WRITE_TO_STDOUT_NEWLINE = LLVM_LIBC_OPCODE(5), + LIBC_READ_FROM_STREAM = LLVM_LIBC_OPCODE(6), + LIBC_READ_FGETS = LLVM_LIBC_OPCODE(7), + LIBC_OPEN_FILE = LLVM_LIBC_OPCODE(8), + LIBC_CLOSE_FILE = LLVM_LIBC_OPCODE(9), + LIBC_MALLOC = LLVM_LIBC_OPCODE(10), + LIBC_FREE = LLVM_LIBC_OPCODE(11), + LIBC_HOST_CALL = LLVM_LIBC_OPCODE(12), + LIBC_ABORT = LLVM_LIBC_OPCODE(13), + LIBC_FEOF = LLVM_LIBC_OPCODE(14), + LIBC_FERROR = LLVM_LIBC_OPCODE(15), + LIBC_CLEARERR = LLVM_LIBC_OPCODE(16), + LIBC_FSEEK = LLVM_LIBC_OPCODE(17), + LIBC_FTELL = LLVM_LIBC_OPCODE(18), + LIBC_FFLUSH = LLVM_LIBC_OPCODE(19), + LIBC_UNGETC = LLVM_LIBC_OPCODE(20), + LIBC_PRINTF_TO_STDOUT = LLVM_LIBC_OPCODE(21), + LIBC_PRINTF_TO_STDERR = LLVM_LIBC_OPCODE(22), + LIBC_PRINTF_TO_STREAM = LLVM_LIBC_OPCODE(23), + LIBC_PRINTF_TO_STDOUT_PACKED = LLVM_LIBC_OPCODE(24), + LIBC_PRINTF_TO_STDERR_PACKED = LLVM_LIBC_OPCODE(25), + LIBC_PRINTF_TO_STREAM_PACKED = LLVM_LIBC_OPCODE(26), + LIBC_REMOVE = LLVM_LIBC_OPCODE(27), + LIBC_RENAME = LLVM_LIBC_OPCODE(28), + LIBC_SYSTEM = LLVM_LIBC_OPCODE(29), + LIBC_LAST = 0xFFFFFFFF, +} rpc_opcode_t; + +#undef LLVM_LIBC_OPCODE + +#endif // LLVM_LIBC_SHARED_RPC_OPCODES_H diff --git a/libc/shared/rpc_server.h b/libc/shared/rpc_server.h new file mode 100644 index 000000000000..46e35f13f0ea --- /dev/null +++ b/libc/shared/rpc_server.h @@ -0,0 +1,23 @@ +//===-- Shared RPC server interface -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_RPC_SERVER_H +#define LLVM_LIBC_SHARED_RPC_SERVER_H + +#include "libc_common.h" +#include "src/__support/RPC/rpc_server.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using LIBC_NAMESPACE::rpc::handle_libc_opcodes; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_RPC_SERVER_H diff --git a/libc/shared/rpc_util.h b/libc/shared/rpc_util.h new file mode 100644 index 000000000000..687814b7ff2a --- /dev/null +++ b/libc/shared/rpc_util.h @@ -0,0 +1,276 @@ +//===-- Shared memory RPC client / server utilities -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_RPC_UTIL_H +#define LLVM_LIBC_SHARED_RPC_UTIL_H + +#include +#include + +#if (defined(__NVPTX__) || defined(__AMDGPU__)) && \ + !((defined(__CUDA__) && !defined(__CUDA_ARCH__)) || \ + (defined(__HIP__) && !defined(__HIP_DEVICE_COMPILE__))) +#include +#define RPC_TARGET_IS_GPU +#endif + +// Workaround for missing __has_builtin in < GCC 10. +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#ifndef RPC_ATTRS +#if defined(__CUDA__) || defined(__HIP__) +#define RPC_ATTRS __attribute__((host, device)) inline +#else +#define RPC_ATTRS inline +#endif +#endif + +namespace rpc { + +template struct type_identity { + using type = T; +}; + +template struct type_constant { + static inline constexpr T value = v; +}; + +template struct remove_reference : type_identity {}; +template struct remove_reference : type_identity {}; +template struct remove_reference : type_identity {}; + +template struct is_const : type_constant {}; +template struct is_const : type_constant {}; + +/// Freestanding implementation of std::move. +template +RPC_ATTRS constexpr typename remove_reference::type &&move(T &&t) { + return static_cast::type &&>(t); +} + +/// Freestanding implementation of std::forward. +template +RPC_ATTRS constexpr T &&forward(typename remove_reference::type &value) { + return static_cast(value); +} +template +RPC_ATTRS constexpr T &&forward(typename remove_reference::type &&value) { + return static_cast(value); +} + +struct in_place_t { + RPC_ATTRS explicit in_place_t() = default; +}; + +struct nullopt_t { + RPC_ATTRS constexpr explicit nullopt_t() = default; +}; + +constexpr inline in_place_t in_place{}; +constexpr inline nullopt_t nullopt{}; + +/// Freestanding and minimal implementation of std::optional. +template class optional { + template struct OptionalStorage { + union { + char empty; + U stored_value; + }; + + bool in_use = false; + + RPC_ATTRS ~OptionalStorage() { reset(); } + + RPC_ATTRS constexpr OptionalStorage() : empty() {} + + template + RPC_ATTRS constexpr explicit OptionalStorage(in_place_t, Args &&...args) + : stored_value(forward(args)...) {} + + RPC_ATTRS constexpr void reset() { + if (in_use) + stored_value.~U(); + in_use = false; + } + }; + + OptionalStorage storage; + +public: + RPC_ATTRS constexpr optional() = default; + RPC_ATTRS constexpr optional(nullopt_t) {} + + RPC_ATTRS constexpr optional(const T &t) : storage(in_place, t) { + storage.in_use = true; + } + RPC_ATTRS constexpr optional(const optional &) = default; + + RPC_ATTRS constexpr optional(T &&t) : storage(in_place, move(t)) { + storage.in_use = true; + } + RPC_ATTRS constexpr optional(optional &&O) = default; + + RPC_ATTRS constexpr optional &operator=(T &&t) { + storage = move(t); + return *this; + } + RPC_ATTRS constexpr optional &operator=(optional &&) = default; + + RPC_ATTRS constexpr optional &operator=(const T &t) { + storage = t; + return *this; + } + RPC_ATTRS constexpr optional &operator=(const optional &) = default; + + RPC_ATTRS constexpr void reset() { storage.reset(); } + + RPC_ATTRS constexpr const T &value() const & { return storage.stored_value; } + + RPC_ATTRS constexpr T &value() & { return storage.stored_value; } + + RPC_ATTRS constexpr explicit operator bool() const { return storage.in_use; } + RPC_ATTRS constexpr bool has_value() const { return storage.in_use; } + RPC_ATTRS constexpr const T *operator->() const { + return &storage.stored_value; + } + RPC_ATTRS constexpr T *operator->() { return &storage.stored_value; } + RPC_ATTRS constexpr const T &operator*() const & { + return storage.stored_value; + } + RPC_ATTRS constexpr T &operator*() & { return storage.stored_value; } + + RPC_ATTRS constexpr T &&value() && { return move(storage.stored_value); } + RPC_ATTRS constexpr T &&operator*() && { return move(storage.stored_value); } +}; + +/// Suspend the thread briefly to assist the thread scheduler during busy loops. +RPC_ATTRS void sleep_briefly() { +#if __has_builtin(__nvvm_reflect) + if (__nvvm_reflect("__CUDA_ARCH") >= 700) + asm("nanosleep.u32 64;" ::: "memory"); +#elif __has_builtin(__builtin_amdgcn_s_sleep) + __builtin_amdgcn_s_sleep(2); +#elif __has_builtin(__builtin_ia32_pause) + __builtin_ia32_pause(); +#elif __has_builtin(__builtin_arm_isb) + __builtin_arm_isb(0xf); +#else + // Simply do nothing if sleeping isn't supported on this platform. +#endif +} + +/// Conditional to indicate if this process is running on the GPU. +RPC_ATTRS constexpr bool is_process_gpu() { +#ifdef RPC_TARGET_IS_GPU + return true; +#else + return false; +#endif +} + +/// Wait for all lanes in the group to complete. +RPC_ATTRS void sync_lane([[maybe_unused]] uint64_t lane_mask) { +#ifdef RPC_TARGET_IS_GPU + return __gpu_sync_lane(lane_mask); +#endif +} + +/// Copies the value from the first active thread to the rest. +RPC_ATTRS uint32_t broadcast_value([[maybe_unused]] uint64_t lane_mask, + uint32_t x) { +#ifdef RPC_TARGET_IS_GPU + return __gpu_read_first_lane_u32(lane_mask, x); +#else + return x; +#endif +} + +/// Returns the number lanes that participate in the RPC interface. +RPC_ATTRS uint32_t get_num_lanes() { +#ifdef RPC_TARGET_IS_GPU + return __gpu_num_lanes(); +#else + return 1; +#endif +} + +/// Returns the id of the thread inside of an AMD wavefront executing together. +RPC_ATTRS uint64_t get_lane_mask() { +#ifdef RPC_TARGET_IS_GPU + return __gpu_lane_mask(); +#else + return 1; +#endif +} + +/// Returns the id of the thread inside of an AMD wavefront executing together. +RPC_ATTRS uint32_t get_lane_id() { +#ifdef RPC_TARGET_IS_GPU + return __gpu_lane_id(); +#else + return 0; +#endif +} + +/// Conditional that is only true for a single thread in a lane. +RPC_ATTRS bool is_first_lane([[maybe_unused]] uint64_t lane_mask) { +#ifdef RPC_TARGET_IS_GPU + return __gpu_is_first_in_lane(lane_mask); +#else + return true; +#endif +} + +/// Returns a bitmask of threads in the current lane for which \p x is true. +RPC_ATTRS uint64_t ballot([[maybe_unused]] uint64_t lane_mask, bool x) { +#ifdef RPC_TARGET_IS_GPU + return __gpu_ballot(lane_mask, x); +#else + return x; +#endif +} + +/// Return \p val aligned "upwards" according to \p align. +template +RPC_ATTRS constexpr V align_up(V val, A align) { + return ((val + V(align) - 1) / V(align)) * V(align); +} + +/// Utility to provide a unified interface between the CPU and GPU's memory +/// model. On the GPU stack variables are always private to a lane so we can +/// simply use the variable passed in. On the CPU we need to allocate enough +/// space for the whole lane and index into it. +template RPC_ATTRS V &lane_value(V *val, uint32_t id) { + if constexpr (is_process_gpu()) + return *val; + return val[id]; +} + +/// Advance the \p p by \p bytes. +template RPC_ATTRS T *advance(T *ptr, U bytes) { + if constexpr (is_const::value) + return reinterpret_cast(reinterpret_cast(ptr) + + bytes); + else + return reinterpret_cast(reinterpret_cast(ptr) + bytes); +} + +/// Wrapper around the optimal memory copy implementation for the target. +RPC_ATTRS void rpc_memcpy(void *dst, const void *src, size_t count) { + __builtin_memcpy(dst, src, count); +} + +template RPC_ATTRS constexpr const T &max(const T &a, const T &b) { + return (a < b) ? b : a; +} + +} // namespace rpc + +#endif // LLVM_LIBC_SHARED_RPC_UTIL_H diff --git a/libc/shared/str_to_float.h b/libc/shared/str_to_float.h new file mode 100644 index 000000000000..dcc6027d6c77 --- /dev/null +++ b/libc/shared/str_to_float.h @@ -0,0 +1,28 @@ +//===-- String to float conversion utils ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_STR_TO_FLOAT_H +#define LLVM_LIBC_SHARED_STR_TO_FLOAT_H + +#include "libc_common.h" +#include "src/__support/str_to_float.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using internal::ExpandedFloat; +using internal::FloatConvertReturn; +using internal::RoundDirection; + +using internal::binary_exp_to_float; +using internal::decimal_exp_to_float; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_STR_TO_FLOAT_H diff --git a/libc/shared/str_to_integer.h b/libc/shared/str_to_integer.h new file mode 100644 index 000000000000..6ed38c932662 --- /dev/null +++ b/libc/shared/str_to_integer.h @@ -0,0 +1,25 @@ +//===-- String to int conversion utils --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_STR_TO_INTEGER_H +#define LLVM_LIBC_SHARED_STR_TO_INTEGER_H + +#include "libc_common.h" +#include "src/__support/str_to_integer.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using LIBC_NAMESPACE::StrToNumResult; + +using internal::strtointeger; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_STR_TO_INTEGER_H