Page MenuHomeFreeBSD

D3148.diff
No OneTemporary

D3148.diff

Index: head/sys/compat/cloudabi/cloudabi_clock.c
===================================================================
--- head/sys/compat/cloudabi/cloudabi_clock.c
+++ head/sys/compat/cloudabi/cloudabi_clock.c
@@ -80,31 +80,27 @@
return (0);
}
+/* Fetches the time value of a clock. */
int
-cloudabi_sys_clock_res_get(struct thread *td,
- struct cloudabi_sys_clock_res_get_args *uap)
+cloudabi_clock_time_get(struct thread *td, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t *ret)
{
struct timespec ts;
- cloudabi_timestamp_t cts;
int error;
clockid_t clockid;
- error = cloudabi_convert_clockid(uap->clock_id, &clockid);
- if (error != 0)
- return (error);
- error = kern_clock_getres(td, clockid, &ts);
+ error = cloudabi_convert_clockid(clock_id, &clockid);
if (error != 0)
return (error);
- error = cloudabi_convert_timespec(&ts, &cts);
+ error = kern_clock_gettime(td, clockid, &ts);
if (error != 0)
return (error);
- td->td_retval[0] = cts;
- return (0);
+ return (cloudabi_convert_timespec(&ts, ret));
}
int
-cloudabi_sys_clock_time_get(struct thread *td,
- struct cloudabi_sys_clock_time_get_args *uap)
+cloudabi_sys_clock_res_get(struct thread *td,
+ struct cloudabi_sys_clock_res_get_args *uap)
{
struct timespec ts;
cloudabi_timestamp_t cts;
@@ -114,7 +110,7 @@
error = cloudabi_convert_clockid(uap->clock_id, &clockid);
if (error != 0)
return (error);
- error = kern_clock_gettime(td, clockid, &ts);
+ error = kern_clock_getres(td, clockid, &ts);
if (error != 0)
return (error);
error = cloudabi_convert_timespec(&ts, &cts);
@@ -123,3 +119,15 @@
td->td_retval[0] = cts;
return (0);
}
+
+int
+cloudabi_sys_clock_time_get(struct thread *td,
+ struct cloudabi_sys_clock_time_get_args *uap)
+{
+ cloudabi_timestamp_t ts;
+ int error;
+
+ error = cloudabi_clock_time_get(td, uap->clock_id, &ts);
+ td->td_retval[0] = ts;
+ return (error);
+}
Index: head/sys/compat/cloudabi/cloudabi_futex.c
===================================================================
--- head/sys/compat/cloudabi/cloudabi_futex.c
+++ head/sys/compat/cloudabi/cloudabi_futex.c
@@ -26,22 +26,1197 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+
#include <compat/cloudabi/cloudabi_proto.h>
+#include <compat/cloudabi/cloudabi_syscalldefs.h>
+#include <compat/cloudabi/cloudabi_util.h>
+
+/*
+ * Futexes for CloudABI.
+ *
+ * On most systems, futexes are implemented as objects of a single type
+ * on which a set of operations can be performed. CloudABI makes a clear
+ * distinction between locks and condition variables. A lock may have
+ * zero or more associated condition variables. A condition variable is
+ * always associated with exactly one lock. There is a strict topology.
+ * This approach has two advantages:
+ *
+ * - This topology is guaranteed to be acyclic. Requeueing of threads
+ * only happens in one direction (from condition variables to locks).
+ * This eases locking.
+ * - It means that a futex object for a lock exists when it is unlocked,
+ * but has threads waiting on associated condition variables. Threads
+ * can be requeued to a lock even if the thread performing the wakeup
+ * does not have the lock mapped in its address space.
+ *
+ * This futex implementation only implements a single lock type, namely
+ * a read-write lock. A regular mutex type would not be necessary, as
+ * the read-write lock is as efficient as a mutex if used as such.
+ * Userspace futex locks are 32 bits in size:
+ *
+ * - 1 bit: has threads waiting in kernel-space.
+ * - 1 bit: is write-locked.
+ * - 30 bits:
+ * - if write-locked: thread ID of owner.
+ * - if not write-locked: number of read locks held.
+ *
+ * Condition variables are also 32 bits in size. Its value is modified
+ * by kernel-space exclusively. Zero indicates that it has no waiting
+ * threads. Non-zero indicates the opposite.
+ *
+ * This implementation is optimal, in the sense that it only wakes up
+ * threads if they can actually continue execution. It does not suffer
+ * from the thundering herd problem. If multiple threads waiting on a
+ * condition variable need to be woken up, only a single thread is
+ * scheduled. All other threads are 'donated' to this thread. After the
+ * thread manages to reacquire the lock, it requeues its donated threads
+ * to the lock.
+ *
+ * TODO(ed): Integrate this functionality into kern_umtx.c instead.
+ * TODO(ed): Store futex objects in a hash table.
+ * TODO(ed): Add actual priority inheritance.
+ * TODO(ed): Let futex_queue also take priorities into account.
+ * TODO(ed): Make locking fine-grained.
+ * TODO(ed): Perform sleeps until an actual absolute point in time,
+ * instead of converting the timestamp to a relative value.
+ */
+
+struct futex_address;
+struct futex_condvar;
+struct futex_lock;
+struct futex_queue;
+struct futex_waiter;
+
+/* Identifier of a location in memory. */
+struct futex_address {
+ /* For process-private objects: address space of the process. */
+ struct vmspace * fa_vmspace;
+ /* For process-shared objects: VM object containing the object. */
+ struct vm_object * fa_vmobject;
+
+ /* Memory address within address space or offset within VM object. */
+ uintptr_t fa_offset;
+};
+
+/* A set of waiting threads. */
+struct futex_queue {
+ STAILQ_HEAD(, futex_waiter) fq_list;
+ unsigned int fq_count;
+};
+
+/* Condition variables. */
+struct futex_condvar {
+ /* Address of the condition variable. */
+ struct futex_address fc_address;
+
+ /* The lock the waiters should be moved to when signalled. */
+ struct futex_lock * fc_lock;
+
+ /* Threads waiting on the condition variable. */
+ struct futex_queue fc_waiters;
+ /*
+ * Number of threads blocked on this condition variable, or
+ * being blocked on the lock after being requeued.
+ */
+ unsigned int fc_waitcount;
+
+ /* Global list pointers. */
+ LIST_ENTRY(futex_condvar) fc_next;
+};
+
+/* Read-write locks. */
+struct futex_lock {
+ /* Address of the lock. */
+ struct futex_address fl_address;
+
+ /*
+ * Current owner of the lock. LOCK_UNMANAGED if the lock is
+ * currently not owned by the kernel. LOCK_OWNER_UNKNOWN in case
+ * the owner is not known (e.g., when the lock is read-locked).
+ */
+ cloudabi_tid_t fl_owner;
+#define LOCK_UNMANAGED 0x0
+#define LOCK_OWNER_UNKNOWN 0x1
+
+ /* Writers blocked on the lock. */
+ struct futex_queue fl_writers;
+ /* Readers blocked on the lock. */
+ struct futex_queue fl_readers;
+ /* Number of threads blocked on this lock + condition variables. */
+ unsigned int fl_waitcount;
+
+ /* Global list pointers. */
+ LIST_ENTRY(futex_lock) fl_next;
+};
+
+/* Information associated with a thread blocked on an object. */
+struct futex_waiter {
+ /* Thread ID. */
+ cloudabi_tid_t fw_tid;
+ /* Condition variable used for waiting. */
+ struct cv fw_wait;
+
+ /* Queue this waiter is currently placed in. */
+ struct futex_queue * fw_queue;
+ /* List pointers of fw_queue. */
+ STAILQ_ENTRY(futex_waiter) fw_next;
+
+ /* Lock has been acquired. */
+ bool fw_locked;
+ /* If not locked, threads that should block after acquiring. */
+ struct futex_queue fw_donated;
+};
+
+/* Global data structures. */
+static MALLOC_DEFINE(M_FUTEX, "futex", "CloudABI futex");
+
+static struct sx futex_global_lock;
+SX_SYSINIT(futex_global_lock, &futex_global_lock, "CloudABI futex global lock");
+
+static LIST_HEAD(, futex_lock) futex_lock_list =
+ LIST_HEAD_INITIALIZER(&futex_lock_list);
+static LIST_HEAD(, futex_condvar) futex_condvar_list =
+ LIST_HEAD_INITIALIZER(&futex_condvar_list);
+
+/* Utility functions. */
+static void futex_lock_assert(const struct futex_lock *);
+static struct futex_lock *futex_lock_lookup_locked(struct futex_address *);
+static void futex_lock_release(struct futex_lock *);
+static int futex_lock_tryrdlock(struct futex_lock *, cloudabi_lock_t *);
+static int futex_lock_unmanage(struct futex_lock *, cloudabi_lock_t *);
+static int futex_lock_update_owner(struct futex_lock *, cloudabi_lock_t *);
+static int futex_lock_wake_up_next(struct futex_lock *, cloudabi_lock_t *);
+static unsigned int futex_queue_count(const struct futex_queue *);
+static void futex_queue_init(struct futex_queue *);
+static void futex_queue_requeue(struct futex_queue *, struct futex_queue *,
+ unsigned int);
+static int futex_queue_sleep(struct futex_queue *, struct futex_lock *,
+ struct futex_waiter *, struct thread *, cloudabi_clockid_t,
+ cloudabi_timestamp_t, cloudabi_timestamp_t);
+static cloudabi_tid_t futex_queue_tid_best(const struct futex_queue *);
+static void futex_queue_wake_up_all(struct futex_queue *);
+static void futex_queue_wake_up_best(struct futex_queue *);
+static void futex_queue_wake_up_donate(struct futex_queue *, unsigned int);
+static int futex_user_load(uint32_t *, uint32_t *);
+static int futex_user_store(uint32_t *, uint32_t);
+static int futex_user_cmpxchg(uint32_t *, uint32_t, uint32_t *, uint32_t);
+
+/*
+ * futex_address operations.
+ */
+
+static int
+futex_address_create(struct futex_address *fa, struct thread *td,
+ const void *object, cloudabi_mflags_t scope)
+{
+ struct vmspace *vs;
+ struct vm_object *vo;
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vm_pindex_t pindex;
+ vm_prot_t prot;
+ boolean_t wired;
+
+ /*
+ * Most of the time objects are stored in privately mapped
+ * anonymous memory. For these objects we wouldn't need to look
+ * up the corresponding VM object. The scope hint provided by
+ * userspace allows us to skip the VM map lookup for the common
+ * case.
+ *
+ * POSIX does permit enabling PTHREAD_PROCESS_SHARED on a lock
+ * stored in a private mapping, at the cost of additional
+ * performance overhead. Fall back to identifying the object by
+ * virtual memory address if the mapping isn't shared.
+ */
+ vs = td->td_proc->p_vmspace;
+ switch (scope) {
+ case CLOUDABI_MAP_SHARED:
+ map = &vs->vm_map;
+ if (vm_map_lookup(&map, (vm_offset_t)object,
+ VM_PROT_COPY | VM_PROT_WRITE, &entry, &vo, &pindex, &prot,
+ &wired) != KERN_SUCCESS)
+ return (EFAULT);
+
+ if (entry->inheritance == VM_INHERIT_SHARE) {
+ /*
+ * Address corresponds to a shared mapping.
+ * Identify the address by its VM object.
+ */
+ fa->fa_vmspace = NULL;
+ fa->fa_vmobject = vo;
+ vm_object_reference(vo);
+ fa->fa_offset = entry->offset - entry->start +
+ (vm_offset_t)object;
+ vm_map_lookup_done(map, entry);
+ return (0);
+ }
+ vm_map_lookup_done(map, entry);
+ /* FALLTHROUGH */
+ case CLOUDABI_MAP_PRIVATE:
+ /*
+ * Address corresponds to a private mapping. Never
+ * identify the address by its VM object, as shadow
+ * objects may get inserted if another thread forks.
+ * Simply use the VM space instead.
+ */
+ fa->fa_vmspace = vs;
+ fa->fa_vmobject = NULL;
+ fa->fa_offset = (uintptr_t)object;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+}
+
+static void
+futex_address_free(struct futex_address *fa)
+{
+
+ if (fa->fa_vmobject != NULL)
+ vm_object_deallocate(fa->fa_vmobject);
+}
+
+static bool
+futex_address_match(const struct futex_address *fa1,
+ const struct futex_address *fa2)
+{
+
+ /* Either fa_vmspace or fa_vmobject is NULL. */
+ return (fa1->fa_vmspace == fa2->fa_vmspace &&
+ fa1->fa_vmobject == fa2->fa_vmobject &&
+ fa1->fa_offset == fa2->fa_offset);
+}
+
+/*
+ * futex_condvar operations.
+ */
+
+static void
+futex_condvar_assert(const struct futex_condvar *fc)
+{
+
+ KASSERT(fc->fc_waitcount >= futex_queue_count(&fc->fc_waiters),
+ ("Total number of waiters cannot be smaller than the wait queue"));
+ futex_lock_assert(fc->fc_lock);
+}
+
+static int
+futex_condvar_lookup(struct thread *td, const cloudabi_condvar_t *address,
+ cloudabi_mflags_t scope, struct futex_condvar **fcret)
+{
+ struct futex_address fa_condvar;
+ struct futex_condvar *fc;
+ int error;
+
+ error = futex_address_create(&fa_condvar, td, address, scope);
+ if (error != 0)
+ return (error);
+
+ sx_xlock(&futex_global_lock);
+ LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
+ if (futex_address_match(&fc->fc_address, &fa_condvar)) {
+ /* Found matching lock object. */
+ futex_address_free(&fa_condvar);
+ futex_condvar_assert(fc);
+ *fcret = fc;
+ return (0);
+ }
+ }
+ sx_xunlock(&futex_global_lock);
+ futex_address_free(&fa_condvar);
+ return (ENOENT);
+}
+
+static int
+futex_condvar_lookup_or_create(struct thread *td,
+ const cloudabi_condvar_t *condvar, cloudabi_mflags_t condvar_scope,
+ const cloudabi_lock_t *lock, cloudabi_mflags_t lock_scope,
+ struct futex_condvar **fcret)
+{
+ struct futex_address fa_condvar, fa_lock;
+ struct futex_condvar *fc;
+ struct futex_lock *fl;
+ int error;
+
+ error = futex_address_create(&fa_condvar, td, condvar, condvar_scope);
+ if (error != 0)
+ return (error);
+ error = futex_address_create(&fa_lock, td, lock, lock_scope);
+ if (error != 0) {
+ futex_address_free(&fa_condvar);
+ return (error);
+ }
+
+ sx_xlock(&futex_global_lock);
+ LIST_FOREACH(fc, &futex_condvar_list, fc_next) {
+ if (!futex_address_match(&fc->fc_address, &fa_condvar))
+ continue;
+ fl = fc->fc_lock;
+ if (!futex_address_match(&fl->fl_address, &fa_lock)) {
+ /* Condition variable is owned by a different lock. */
+ futex_address_free(&fa_condvar);
+ futex_address_free(&fa_lock);
+ sx_xunlock(&futex_global_lock);
+ return (EINVAL);
+ }
+
+ /* Found fully matching condition variable. */
+ futex_address_free(&fa_condvar);
+ futex_address_free(&fa_lock);
+ futex_condvar_assert(fc);
+ *fcret = fc;
+ return (0);
+ }
+
+ /* None found. Create new condition variable object. */
+ fc = malloc(sizeof(*fc), M_FUTEX, M_WAITOK);
+ fc->fc_address = fa_condvar;
+ fc->fc_lock = futex_lock_lookup_locked(&fa_lock);
+ futex_queue_init(&fc->fc_waiters);
+ fc->fc_waitcount = 0;
+ LIST_INSERT_HEAD(&futex_condvar_list, fc, fc_next);
+ *fcret = fc;
+ return (0);
+}
+
+static void
+futex_condvar_release(struct futex_condvar *fc)
+{
+ struct futex_lock *fl;
+
+ futex_condvar_assert(fc);
+ fl = fc->fc_lock;
+ if (fc->fc_waitcount == 0) {
+ /* Condition variable has no waiters. Deallocate it. */
+ futex_address_free(&fc->fc_address);
+ LIST_REMOVE(fc, fc_next);
+ free(fc, M_FUTEX);
+ }
+ futex_lock_release(fl);
+}
+
+static int
+futex_condvar_unmanage(struct futex_condvar *fc,
+ cloudabi_condvar_t *condvar)
+{
+
+ if (futex_queue_count(&fc->fc_waiters) != 0)
+ return (0);
+ return (futex_user_store(condvar, CLOUDABI_CONDVAR_HAS_NO_WAITERS));
+}
+
+/*
+ * futex_lock operations.
+ */
+
+static void
+futex_lock_assert(const struct futex_lock *fl)
+{
+
+ /*
+ * A futex lock can only be kernel-managed if it has waiters.
+ * Vice versa: if a futex lock has waiters, it must be
+ * kernel-managed.
+ */
+ KASSERT((fl->fl_owner == LOCK_UNMANAGED) ==
+ (futex_queue_count(&fl->fl_readers) == 0 &&
+ futex_queue_count(&fl->fl_writers) == 0),
+ ("Managed locks must have waiting threads"));
+ KASSERT(fl->fl_waitcount != 0 || fl->fl_owner == LOCK_UNMANAGED,
+ ("Lock with no waiters must be unmanaged"));
+}
+
+static int
+futex_lock_lookup(struct thread *td, const cloudabi_lock_t *address,
+ cloudabi_mflags_t scope, struct futex_lock **flret)
+{
+ struct futex_address fa;
+ int error;
+
+ error = futex_address_create(&fa, td, address, scope);
+ if (error != 0)
+ return (error);
+
+ sx_xlock(&futex_global_lock);
+ *flret = futex_lock_lookup_locked(&fa);
+ return (0);
+}
+
+static struct futex_lock *
+futex_lock_lookup_locked(struct futex_address *fa)
+{
+ struct futex_lock *fl;
+
+ LIST_FOREACH(fl, &futex_lock_list, fl_next) {
+ if (futex_address_match(&fl->fl_address, fa)) {
+ /* Found matching lock object. */
+ futex_address_free(fa);
+ futex_lock_assert(fl);
+ return (fl);
+ }
+ }
+
+ /* None found. Create new lock object. */
+ fl = malloc(sizeof(*fl), M_FUTEX, M_WAITOK);
+ fl->fl_address = *fa;
+ fl->fl_owner = LOCK_UNMANAGED;
+ futex_queue_init(&fl->fl_readers);
+ futex_queue_init(&fl->fl_writers);
+ fl->fl_waitcount = 0;
+ LIST_INSERT_HEAD(&futex_lock_list, fl, fl_next);
+ return (fl);
+}
+
+static int
+futex_lock_rdlock(struct futex_lock *fl, struct thread *td,
+ cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision)
+{
+ struct futex_waiter fw;
+ int error;
+
+ error = futex_lock_tryrdlock(fl, lock);
+ if (error == EBUSY) {
+ /* Suspend execution. */
+ KASSERT(fl->fl_owner != LOCK_UNMANAGED,
+ ("Attempted to sleep on an unmanaged lock"));
+ error = futex_queue_sleep(&fl->fl_readers, fl, &fw, td,
+ clock_id, timeout, precision);
+ KASSERT((error == 0) == fw.fw_locked,
+ ("Should have locked write lock on success"));
+ KASSERT(futex_queue_count(&fw.fw_donated) == 0,
+ ("Lock functions cannot receive threads"));
+ }
+ if (error != 0)
+ futex_lock_unmanage(fl, lock);
+ return (error);
+}
+
+static void
+futex_lock_release(struct futex_lock *fl)
+{
+
+ futex_lock_assert(fl);
+ if (fl->fl_waitcount == 0) {
+ /* Lock object is unreferenced. Deallocate it. */
+ KASSERT(fl->fl_owner == LOCK_UNMANAGED,
+ ("Attempted to free a managed lock"));
+ futex_address_free(&fl->fl_address);
+ LIST_REMOVE(fl, fl_next);
+ free(fl, M_FUTEX);
+ }
+ sx_xunlock(&futex_global_lock);
+}
+
+static int
+futex_lock_unmanage(struct futex_lock *fl, cloudabi_lock_t *lock)
+{
+ cloudabi_lock_t cmp, old;
+ int error;
+
+ if (futex_queue_count(&fl->fl_readers) == 0 &&
+ futex_queue_count(&fl->fl_writers) == 0) {
+ /* Lock should be unmanaged. */
+ fl->fl_owner = LOCK_UNMANAGED;
+
+ /* Clear kernel-managed bit. */
+ error = futex_user_load(lock, &old);
+ if (error != 0)
+ return (error);
+ for (;;) {
+ cmp = old;
+ error = futex_user_cmpxchg(lock, cmp, &old,
+ cmp & ~CLOUDABI_LOCK_KERNEL_MANAGED);
+ if (error != 0)
+ return (error);
+ if (old == cmp)
+ break;
+ }
+ }
+ return (0);
+}
+
+/* Sets an owner of a lock, based on a userspace lock value. */
+static void
+futex_lock_set_owner(struct futex_lock *fl, cloudabi_lock_t lock)
+{
+
+ /* Lock has no explicit owner. */
+ if ((lock & ~CLOUDABI_LOCK_WRLOCKED) == 0) {
+ fl->fl_owner = LOCK_OWNER_UNKNOWN;
+ return;
+ }
+ lock &= ~(CLOUDABI_LOCK_WRLOCKED | CLOUDABI_LOCK_KERNEL_MANAGED);
+
+ /* Don't allow userspace to silently unlock. */
+ if (lock == LOCK_UNMANAGED) {
+ fl->fl_owner = LOCK_OWNER_UNKNOWN;
+ return;
+ }
+ fl->fl_owner = lock;
+}
+
+static int
+futex_lock_unlock(struct futex_lock *fl, struct thread *td,
+ cloudabi_lock_t *lock)
+{
+ int error;
+
+ /* Validate that this thread is allowed to unlock. */
+ error = futex_lock_update_owner(fl, lock);
+ if (error != 0)
+ return (error);
+ if (fl->fl_owner != LOCK_UNMANAGED && fl->fl_owner != td->td_tid)
+ return (EPERM);
+ return (futex_lock_wake_up_next(fl, lock));
+}
+
+/* Syncs in the owner of the lock from userspace if needed. */
+static int
+futex_lock_update_owner(struct futex_lock *fl, cloudabi_lock_t *address)
+{
+ cloudabi_lock_t lock;
+ int error;
+
+ if (fl->fl_owner == LOCK_OWNER_UNKNOWN) {
+ error = futex_user_load(address, &lock);
+ if (error != 0)
+ return (error);
+ futex_lock_set_owner(fl, lock);
+ }
+ return (0);
+}
+
+static int
+futex_lock_tryrdlock(struct futex_lock *fl, cloudabi_lock_t *address)
+{
+ cloudabi_lock_t old, cmp;
+ int error;
+
+ if (fl->fl_owner != LOCK_UNMANAGED) {
+ /* Lock is already acquired. */
+ return (EBUSY);
+ }
+
+ old = CLOUDABI_LOCK_UNLOCKED;
+ for (;;) {
+ if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
+ /*
+ * Userspace lock is kernel-managed, even though
+ * the kernel disagrees.
+ */
+ return (EINVAL);
+ }
+
+ if ((old & CLOUDABI_LOCK_WRLOCKED) == 0) {
+ /*
+ * Lock is not write-locked. Attempt to acquire
+ * it by increasing the read count.
+ */
+ cmp = old;
+ error = futex_user_cmpxchg(address, cmp, &old, cmp + 1);
+ if (error != 0)
+ return (error);
+ if (old == cmp) {
+ /* Success. */
+ return (0);
+ }
+ } else {
+ /* Lock is write-locked. Make it kernel-managed. */
+ cmp = old;
+ error = futex_user_cmpxchg(address, cmp, &old,
+ cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
+ if (error != 0)
+ return (error);
+ if (old == cmp) {
+ /* Success. */
+ futex_lock_set_owner(fl, cmp);
+ return (EBUSY);
+ }
+ }
+ }
+}
+
+static int
+futex_lock_trywrlock(struct futex_lock *fl, cloudabi_lock_t *address,
+ cloudabi_tid_t tid, bool force_kernel_managed)
+{
+ cloudabi_lock_t old, new, cmp;
+ int error;
+
+ if (fl->fl_owner == tid) {
+ /* Attempted to acquire lock recursively. */
+ return (EDEADLK);
+ }
+ if (fl->fl_owner != LOCK_UNMANAGED) {
+ /* Lock is already acquired. */
+ return (EBUSY);
+ }
+
+ old = CLOUDABI_LOCK_UNLOCKED;
+ for (;;) {
+ if ((old & CLOUDABI_LOCK_KERNEL_MANAGED) != 0) {
+ /*
+ * Userspace lock is kernel-managed, even though
+ * the kernel disagrees.
+ */
+ return (EINVAL);
+ }
+ if (old == (tid | CLOUDABI_LOCK_WRLOCKED)) {
+ /* Attempted to acquire lock recursively. */
+ return (EDEADLK);
+ }
+
+ if (old == CLOUDABI_LOCK_UNLOCKED) {
+ /* Lock is unlocked. Attempt to acquire it. */
+ new = tid | CLOUDABI_LOCK_WRLOCKED;
+ if (force_kernel_managed)
+ new |= CLOUDABI_LOCK_KERNEL_MANAGED;
+ error = futex_user_cmpxchg(address,
+ CLOUDABI_LOCK_UNLOCKED, &old, new);
+ if (error != 0)
+ return (error);
+ if (old == CLOUDABI_LOCK_UNLOCKED) {
+ /* Success. */
+ if (force_kernel_managed)
+ fl->fl_owner = tid;
+ return (0);
+ }
+ } else {
+ /* Lock is still locked. Make it kernel-managed. */
+ cmp = old;
+ error = futex_user_cmpxchg(address, cmp, &old,
+ cmp | CLOUDABI_LOCK_KERNEL_MANAGED);
+ if (error != 0)
+ return (error);
+ if (old == cmp) {
+ /* Success. */
+ futex_lock_set_owner(fl, cmp);
+ return (EBUSY);
+ }
+ }
+ }
+}
+
+static int
+futex_lock_wake_up_next(struct futex_lock *fl, cloudabi_lock_t *lock)
+{
+ cloudabi_tid_t tid;
+ int error;
+
+ /*
+ * Determine which thread(s) to wake up. Prefer waking up
+ * writers over readers to prevent write starvation.
+ */
+ if (futex_queue_count(&fl->fl_writers) > 0) {
+ /* Transfer ownership to a single write-locker. */
+ if (futex_queue_count(&fl->fl_writers) > 1 ||
+ futex_queue_count(&fl->fl_readers) > 0) {
+ /* Lock should remain managed afterwards. */
+ tid = futex_queue_tid_best(&fl->fl_writers);
+ error = futex_user_store(lock,
+ tid | CLOUDABI_LOCK_WRLOCKED |
+ CLOUDABI_LOCK_KERNEL_MANAGED);
+ if (error != 0)
+ return (error);
+
+ futex_queue_wake_up_best(&fl->fl_writers);
+ fl->fl_owner = tid;
+ } else {
+ /* Lock can become unmanaged afterwards. */
+ error = futex_user_store(lock,
+ futex_queue_tid_best(&fl->fl_writers) |
+ CLOUDABI_LOCK_WRLOCKED);
+ if (error != 0)
+ return (error);
+
+ futex_queue_wake_up_best(&fl->fl_writers);
+ fl->fl_owner = LOCK_UNMANAGED;
+ }
+ } else {
+ /* Transfer ownership to all read-lockers (if any). */
+ error = futex_user_store(lock,
+ futex_queue_count(&fl->fl_readers));
+ if (error != 0)
+ return (error);
+
+ /* Wake up all threads. */
+ futex_queue_wake_up_all(&fl->fl_readers);
+ fl->fl_owner = LOCK_UNMANAGED;
+ }
+ return (0);
+}
+
+static int
+futex_lock_wrlock(struct futex_lock *fl, struct thread *td,
+ cloudabi_lock_t *lock, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision,
+ struct futex_queue *donated)
+{
+ struct futex_waiter fw;
+ int error;
+
+ error = futex_lock_trywrlock(fl, lock, td->td_tid,
+ futex_queue_count(donated) > 0);
+
+ if (error == 0 || error == EBUSY) {
+ /* Put donated threads in queue before suspending. */
+ KASSERT(futex_queue_count(donated) == 0 ||
+ fl->fl_owner != LOCK_UNMANAGED,
+ ("Lock should be managed if we are going to donate"));
+ futex_queue_requeue(donated, &fl->fl_writers, UINT_MAX);
+ } else {
+ /*
+ * This thread cannot deal with the donated threads.
+ * Wake up the next thread and let it try it by itself.
+ */
+ futex_queue_wake_up_donate(donated, UINT_MAX);
+ }
+
+ if (error == EBUSY) {
+ /* Suspend execution if the lock was busy. */
+ KASSERT(fl->fl_owner != LOCK_UNMANAGED,
+ ("Attempted to sleep on an unmanaged lock"));
+ error = futex_queue_sleep(&fl->fl_writers, fl, &fw, td,
+ clock_id, timeout, precision);
+ KASSERT((error == 0) == fw.fw_locked,
+ ("Should have locked write lock on success"));
+ KASSERT(futex_queue_count(&fw.fw_donated) == 0,
+ ("Lock functions cannot receive threads"));
+ }
+ if (error != 0)
+ futex_lock_unmanage(fl, lock);
+ return (error);
+}
+
+/*
+ * futex_queue operations.
+ */
+
+static cloudabi_tid_t
+futex_queue_tid_best(const struct futex_queue *fq)
+{
+
+ return (STAILQ_FIRST(&fq->fq_list)->fw_tid);
+}
+
+static unsigned int
+futex_queue_count(const struct futex_queue *fq)
+{
+
+ return (fq->fq_count);
+}
+
+static void
+futex_queue_init(struct futex_queue *fq)
+{
+
+ STAILQ_INIT(&fq->fq_list);
+ fq->fq_count = 0;
+}
+
+/* Converts a relative timestamp to an sbintime. */
+static sbintime_t
+futex_queue_convert_timestamp_relative(cloudabi_timestamp_t ts)
+{
+ cloudabi_timestamp_t s, ns;
+
+ s = ts / 1000000000;
+ ns = ts % 1000000000;
+ if (s > INT32_MAX)
+ return (INT64_MAX);
+ return ((s << 32) + (ns << 32) / 1000000000);
+}
+
+/* Converts an absolute timestamp and precision to a pair of sbintime values. */
+static int
+futex_queue_convert_timestamp(struct thread *td, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision,
+ sbintime_t *sbttimeout, sbintime_t *sbtprecision)
+{
+ cloudabi_timestamp_t now;
+ int error;
+
+ /* Make the time relative. */
+ error = cloudabi_clock_time_get(td, clock_id, &now);
+ if (error != 0)
+ return (error);
+ timeout = timeout < now ? 0 : timeout - now;
+
+ *sbttimeout = futex_queue_convert_timestamp_relative(timeout);
+ *sbtprecision = futex_queue_convert_timestamp_relative(precision);
+ return (0);
+}
+
+static int
+futex_queue_sleep(struct futex_queue *fq, struct futex_lock *fl,
+ struct futex_waiter *fw, struct thread *td, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision)
+{
+ sbintime_t sbttimeout, sbtprecision;
+ int error;
+
+ /* Initialize futex_waiter object. */
+ fw->fw_tid = td->td_tid;
+ fw->fw_locked = false;
+ futex_queue_init(&fw->fw_donated);
+
+ if (timeout != UINT64_MAX) {
+ /* Convert timeout duration. */
+ error = futex_queue_convert_timestamp(td, clock_id, timeout,
+ precision, &sbttimeout, &sbtprecision);
+ if (error != 0)
+ return (error);
+ }
+
+ /* Place object in the queue. */
+ fw->fw_queue = fq;
+ STAILQ_INSERT_TAIL(&fq->fq_list, fw, fw_next);
+ ++fq->fq_count;
+
+ cv_init(&fw->fw_wait, "futex");
+ ++fl->fl_waitcount;
+
+ futex_lock_assert(fl);
+ if (timeout == UINT64_MAX) {
+ /* Wait without a timeout. */
+ error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
+ } else {
+ /* Wait respecting the timeout. */
+ error = cv_timedwait_sig_sbt(&fw->fw_wait, &futex_global_lock,
+ sbttimeout, sbtprecision, 0);
+ futex_lock_assert(fl);
+ if (error == EWOULDBLOCK &&
+ fw->fw_queue != NULL && fw->fw_queue != fq) {
+ /*
+ * We got signalled on a condition variable, but
+ * observed a timeout while waiting to reacquire
+ * the lock. In other words, we didn't actually
+ * time out. Go back to sleep and wait for the
+ * lock to be reacquired.
+ */
+ error = cv_wait_sig(&fw->fw_wait, &futex_global_lock);
+ }
+ }
+ futex_lock_assert(fl);
+
+ --fl->fl_waitcount;
+ cv_destroy(&fw->fw_wait);
+
+ fq = fw->fw_queue;
+ if (fq == NULL) {
+ /* Thread got dequeued, so we've slept successfully. */
+ return (0);
+ }
+
+ /* Thread is still enqueued. Remove it. */
+ KASSERT(error != 0, ("Woken up thread is still enqueued"));
+ STAILQ_REMOVE(&fq->fq_list, fw, futex_waiter, fw_next);
+ --fq->fq_count;
+ return (error == EWOULDBLOCK ? ETIMEDOUT : error);
+}
+
+/* Moves up to nwaiters waiters from one queue to another. */
+static void
+futex_queue_requeue(struct futex_queue *fqfrom, struct futex_queue *fqto,
+ unsigned int nwaiters)
+{
+ struct futex_waiter *fw;
+
+ /* Move waiters to the target queue. */
+ while (nwaiters-- > 0 && !STAILQ_EMPTY(&fqfrom->fq_list)) {
+ fw = STAILQ_FIRST(&fqfrom->fq_list);
+ STAILQ_REMOVE_HEAD(&fqfrom->fq_list, fw_next);
+ --fqfrom->fq_count;
+
+ fw->fw_queue = fqto;
+ STAILQ_INSERT_TAIL(&fqto->fq_list, fw, fw_next);
+ ++fqto->fq_count;
+ }
+}
+
+/* Wakes up all waiters in a queue. */
+static void
+futex_queue_wake_up_all(struct futex_queue *fq)
+{
+ struct futex_waiter *fw;
+
+ STAILQ_FOREACH(fw, &fq->fq_list, fw_next) {
+ fw->fw_locked = true;
+ fw->fw_queue = NULL;
+ cv_signal(&fw->fw_wait);
+ }
+
+ STAILQ_INIT(&fq->fq_list);
+ fq->fq_count = 0;
+}
+
+/*
+ * Wakes up the best waiter (i.e., the waiter having the highest
+ * priority) in a queue.
+ */
+static void
+futex_queue_wake_up_best(struct futex_queue *fq)
+{
+ struct futex_waiter *fw;
+
+ fw = STAILQ_FIRST(&fq->fq_list);
+ fw->fw_locked = true;
+ fw->fw_queue = NULL;
+ cv_signal(&fw->fw_wait);
+
+ STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
+ --fq->fq_count;
+}
+
+static void
+futex_queue_wake_up_donate(struct futex_queue *fq, unsigned int nwaiters)
+{
+ struct futex_waiter *fw;
+
+ fw = STAILQ_FIRST(&fq->fq_list);
+ if (fw == NULL)
+ return;
+ fw->fw_locked = false;
+ fw->fw_queue = NULL;
+ cv_signal(&fw->fw_wait);
+
+ STAILQ_REMOVE_HEAD(&fq->fq_list, fw_next);
+ --fq->fq_count;
+ futex_queue_requeue(fq, &fw->fw_donated, nwaiters);
+}
+
+/*
+ * futex_user operations. Used to adjust values in userspace.
+ */
+
+static int
+futex_user_load(uint32_t *obj, uint32_t *val)
+{
+
+ return (fueword32(obj, val) != 0 ? EFAULT : 0);
+}
+
+static int
+futex_user_store(uint32_t *obj, uint32_t val)
+{
+
+ return (suword32(obj, val) != 0 ? EFAULT : 0);
+}
+
+static int
+futex_user_cmpxchg(uint32_t *obj, uint32_t cmp, uint32_t *old, uint32_t new)
+{
+
+ return (casueword32(obj, cmp, old, new) != 0 ? EFAULT : 0);
+}
+
+/*
+ * Blocking calls: acquiring locks, waiting on condition variables.
+ */
+
+int
+cloudabi_futex_condvar_wait(struct thread *td, cloudabi_condvar_t *condvar,
+ cloudabi_mflags_t condvar_scope, cloudabi_lock_t *lock,
+ cloudabi_mflags_t lock_scope, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision)
+{
+ struct futex_condvar *fc;
+ struct futex_lock *fl;
+ struct futex_waiter fw;
+ int error, error2;
+
+ /* Lookup condition variable object. */
+ error = futex_condvar_lookup_or_create(td, condvar, condvar_scope, lock,
+ lock_scope, &fc);
+ if (error != 0)
+ return (error);
+ fl = fc->fc_lock;
+
+ /*
+ * Set the condition variable to something other than
+ * CLOUDABI_CONDVAR_HAS_NO_WAITERS to make userspace threads
+ * call into the kernel to perform wakeups.
+ */
+ error = futex_user_store(condvar, ~CLOUDABI_CONDVAR_HAS_NO_WAITERS);
+ if (error != 0) {
+ futex_condvar_release(fc);
+ return (error);
+ }
+
+ /* Drop the lock. */
+ error = futex_lock_unlock(fl, td, lock);
+ if (error != 0) {
+ futex_condvar_unmanage(fc, condvar);
+ futex_condvar_release(fc);
+ return (error);
+ }
+
+ /* Go to sleep. */
+ ++fc->fc_waitcount;
+ error = futex_queue_sleep(&fc->fc_waiters, fc->fc_lock, &fw, td,
+ clock_id, timeout, precision);
+ if (fw.fw_locked) {
+ /* Waited and got the lock assigned to us. */
+ KASSERT(futex_queue_count(&fw.fw_donated) == 0,
+ ("Received threads while being locked"));
+ } else if (error == 0 || error == ETIMEDOUT) {
+ if (error != 0)
+ futex_condvar_unmanage(fc, condvar);
+ /*
+ * Got woken up without having the lock assigned to us.
+ * This can happen in two cases:
+ *
+ * 1. We observed a timeout on a condition variable.
+ * 2. We got signalled on a condition variable while the
+ * associated lock is unlocked. We are the first
+ * thread that gets woken up. This thread is
+ * responsible for reacquiring the userspace lock.
+ */
+ error2 = futex_lock_wrlock(fl, td, lock,
+ CLOUDABI_CLOCK_MONOTONIC, UINT64_MAX, 0, &fw.fw_donated);
+ if (error2 != 0)
+ error = error2;
+ } else {
+ KASSERT(futex_queue_count(&fw.fw_donated) == 0,
+ ("Received threads on error"));
+ futex_condvar_unmanage(fc, condvar);
+ futex_lock_unmanage(fl, lock);
+ }
+ --fc->fc_waitcount;
+ futex_condvar_release(fc);
+ return (error);
+}
+
+int
+cloudabi_futex_lock_rdlock(struct thread *td, cloudabi_lock_t *lock,
+ cloudabi_mflags_t scope, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision)
+{
+ struct futex_lock *fl;
+ int error;
+
+ /* Look up lock object. */
+ error = futex_lock_lookup(td, lock, scope, &fl);
+ if (error != 0)
+ return (error);
+
+ error = futex_lock_rdlock(fl, td, lock, clock_id, timeout,
+ precision);
+ futex_lock_release(fl);
+ return (error);
+}
+
+int
+cloudabi_futex_lock_wrlock(struct thread *td, cloudabi_lock_t *lock,
+ cloudabi_mflags_t scope, cloudabi_clockid_t clock_id,
+ cloudabi_timestamp_t timeout, cloudabi_timestamp_t precision)
+{
+ struct futex_lock *fl;
+ struct futex_queue fq;
+ int error;
+
+ /* Look up lock object. */
+ error = futex_lock_lookup(td, lock, scope, &fl);
+ if (error != 0)
+ return (error);
+
+ futex_queue_init(&fq);
+ error = futex_lock_wrlock(fl, td, lock, clock_id, timeout,
+ precision, &fq);
+ futex_lock_release(fl);
+ return (error);
+}
+
+/*
+ * Non-blocking calls: releasing locks, signalling condition variables.
+ */
int
cloudabi_sys_condvar_signal(struct thread *td,
struct cloudabi_sys_condvar_signal_args *uap)
{
+ struct futex_condvar *fc;
+ struct futex_lock *fl;
+ cloudabi_nthreads_t nwaiters;
+ int error;
+
+ nwaiters = uap->nwaiters;
+ if (nwaiters == 0) {
+ /* No threads to wake up. */
+ return (0);
+ }
+
+ /* Look up futex object. */
+ error = futex_condvar_lookup(td, uap->condvar, uap->scope, &fc);
+ if (error != 0) {
+ /* Race condition: condition variable with no waiters. */
+ return (error == ENOENT ? 0 : error);
+ }
+ fl = fc->fc_lock;
+
+ if (fl->fl_owner == LOCK_UNMANAGED) {
+ /*
+ * The lock is currently not managed by the kernel,
+ * meaning we must attempt to acquire the userspace lock
+ * first. We cannot requeue threads to an unmanaged lock,
+ * as these threads will then never be scheduled.
+ *
+ * Unfortunately, the memory address of the lock is
+ * unknown from this context, meaning that we cannot
+ * acquire the lock on behalf of the first thread to be
+ * scheduled. The lock may even not be mapped within the
+ * address space of the current thread.
+ *
+ * To solve this, wake up a single waiter that will
+ * attempt to acquire the lock. Donate all of the other
+ * waiters that need to be woken up to this waiter, so
+ * it can requeue them after acquiring the lock.
+ */
+ futex_queue_wake_up_donate(&fc->fc_waiters, nwaiters - 1);
+ } else {
+ /*
+ * Lock is already managed by the kernel. This makes it
+ * easy, as we can requeue the threads from the
+ * condition variable directly to the associated lock.
+ */
+ futex_queue_requeue(&fc->fc_waiters, &fl->fl_writers, nwaiters);
+ }
- /* Not implemented. */
- return (ENOSYS);
+ /* Clear userspace condition variable if all waiters are gone. */
+ error = futex_condvar_unmanage(fc, uap->condvar);
+ futex_condvar_release(fc);
+ return (error);
}
int
cloudabi_sys_lock_unlock(struct thread *td,
struct cloudabi_sys_lock_unlock_args *uap)
{
+ struct futex_lock *fl;
+ int error;
- /* Not implemented. */
- return (ENOSYS);
+ error = futex_lock_lookup(td, uap->lock, uap->scope, &fl);
+ if (error != 0)
+ return (error);
+ error = futex_lock_unlock(fl, td, uap->lock);
+ futex_lock_release(fl);
+ return (error);
}
Index: head/sys/compat/cloudabi/cloudabi_thread.c
===================================================================
--- head/sys/compat/cloudabi/cloudabi_thread.c
+++ head/sys/compat/cloudabi/cloudabi_thread.c
@@ -29,16 +29,30 @@
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/sched.h>
+#include <sys/syscallsubr.h>
#include <compat/cloudabi/cloudabi_proto.h>
+#include <compat/cloudabi/cloudabi_syscalldefs.h>
int
cloudabi_sys_thread_exit(struct thread *td,
struct cloudabi_sys_thread_exit_args *uap)
{
-
- /* Not implemented. */
- return (ENOSYS);
+ struct cloudabi_sys_lock_unlock_args cloudabi_sys_lock_unlock_args = {
+ .lock = uap->lock,
+ .scope = uap->scope,
+ };
+
+ /* Wake up joining thread. */
+ cloudabi_sys_lock_unlock(td, &cloudabi_sys_lock_unlock_args);
+
+ /*
+ * Attempt to terminate the thread. Terminate the process if
+ * it's the last thread.
+ */
+ kern_thr_exit(td);
+ exit1(td, 0, 0);
+ /* NOTREACHED */
}
int
Index: head/sys/compat/cloudabi/cloudabi_util.h
===================================================================
--- head/sys/compat/cloudabi/cloudabi_util.h
+++ head/sys/compat/cloudabi/cloudabi_util.h
@@ -30,12 +30,33 @@
#include <compat/cloudabi/cloudabi_syscalldefs.h>
+struct thread;
struct timespec;
+/* Fetches the time value of a clock. */
+int cloudabi_clock_time_get(struct thread *, cloudabi_clockid_t,
+ cloudabi_timestamp_t *);
+
/* Converts a FreeBSD errno to a CloudABI errno. */
cloudabi_errno_t cloudabi_convert_errno(int);
/* Converts a struct timespec to a CloudABI timestamp. */
int cloudabi_convert_timespec(const struct timespec *, cloudabi_timestamp_t *);
+/*
+ * Blocking futex functions.
+ *
+ * These functions are called by CloudABI's polling system calls to
+ * sleep on a lock or condition variable.
+ */
+int cloudabi_futex_condvar_wait(struct thread *, cloudabi_condvar_t *,
+ cloudabi_mflags_t, cloudabi_lock_t *, cloudabi_mflags_t, cloudabi_clockid_t,
+ cloudabi_timestamp_t, cloudabi_timestamp_t);
+int cloudabi_futex_lock_rdlock(struct thread *, cloudabi_lock_t *,
+ cloudabi_mflags_t, cloudabi_clockid_t, cloudabi_timestamp_t,
+ cloudabi_timestamp_t);
+int cloudabi_futex_lock_wrlock(struct thread *, cloudabi_lock_t *,
+ cloudabi_mflags_t, cloudabi_clockid_t, cloudabi_timestamp_t,
+ cloudabi_timestamp_t);
+
#endif

File Metadata

Mime Type
text/plain
Expires
Fri, Dec 20, 12:38 PM (20 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15530281
Default Alt Text
D3148.diff (37 KB)

Event Timeline