diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c index 62d7ac68a56a..5b69952fa86e 100644 --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -1,781 +1,779 @@ /* * Copyright (c) 1995 John Birrell . * Copyright (c) 2006 David Xu . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by John Birrell. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "namespace.h" #include #include #include #include #include #include #include #include "un-namespace.h" #include "thr_private.h" #if defined(_PTHREADS_INVARIANTS) #define MUTEX_INIT_LINK(m) do { \ (m)->m_qe.tqe_prev = NULL; \ (m)->m_qe.tqe_next = NULL; \ } while (0) #define MUTEX_ASSERT_IS_OWNED(m) do { \ if (__predict_false((m)->m_qe.tqe_prev == NULL))\ PANIC("mutex is not on list"); \ } while (0) #define MUTEX_ASSERT_NOT_OWNED(m) do { \ if (__predict_false((m)->m_qe.tqe_prev != NULL || \ (m)->m_qe.tqe_next != NULL)) \ PANIC("mutex is on list"); \ } while (0) #else #define MUTEX_INIT_LINK(m) #define MUTEX_ASSERT_IS_OWNED(m) #define MUTEX_ASSERT_NOT_OWNED(m) #endif /* * For adaptive mutexes, how many times to spin doing trylock2 * before entering the kernel to block */ #define MUTEX_ADAPTIVE_SPINS 2000 /* * Prototypes */ int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr); int __pthread_mutex_trylock(pthread_mutex_t *mutex); int __pthread_mutex_lock(pthread_mutex_t *mutex); int __pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime); int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); int _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count); int _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count); int __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count); int _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count); int _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count); int __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count); static int mutex_self_trylock(pthread_mutex_t); static int mutex_self_lock(pthread_mutex_t, const struct timespec *abstime); static int mutex_unlock_common(pthread_mutex_t *); +static int mutex_lock_sleep(struct pthread *, pthread_mutex_t, + const struct timespec *); __weak_reference(__pthread_mutex_init, pthread_mutex_init); __strong_reference(__pthread_mutex_init, _pthread_mutex_init); __weak_reference(__pthread_mutex_lock, pthread_mutex_lock); __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock); __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock); __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock); __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock); __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock); /* Single underscore versions provided for libc internal usage: */ /* No difference between libc and application usage of these: */ __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy); __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock); __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling); __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling); __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np); __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np); __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np); __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np); __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np); __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np); __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np); static int mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr, void *(calloc_cb)(size_t, size_t)) { const struct pthread_mutex_attr *attr; struct pthread_mutex *pmutex; if (mutex_attr == NULL) { attr = &_pthread_mutexattr_default; } else { attr = *mutex_attr; if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK || attr->m_type >= PTHREAD_MUTEX_TYPE_MAX) return (EINVAL); if (attr->m_protocol < PTHREAD_PRIO_NONE || attr->m_protocol > PTHREAD_PRIO_PROTECT) return (EINVAL); } if ((pmutex = (pthread_mutex_t) calloc_cb(1, sizeof(struct pthread_mutex))) == NULL) return (ENOMEM); pmutex->m_type = attr->m_type; pmutex->m_owner = NULL; pmutex->m_count = 0; pmutex->m_refcount = 0; pmutex->m_spinloops = 0; pmutex->m_yieldloops = 0; MUTEX_INIT_LINK(pmutex); switch(attr->m_protocol) { case PTHREAD_PRIO_INHERIT: pmutex->m_lock.m_owner = UMUTEX_UNOWNED; pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT; break; case PTHREAD_PRIO_PROTECT: pmutex->m_lock.m_owner = UMUTEX_CONTESTED; pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT; pmutex->m_lock.m_ceilings[0] = attr->m_ceiling; break; case PTHREAD_PRIO_NONE: pmutex->m_lock.m_owner = UMUTEX_UNOWNED; pmutex->m_lock.m_flags = 0; } if (pmutex->m_type == PTHREAD_MUTEX_ADAPTIVE_NP) { pmutex->m_spinloops = _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS; pmutex->m_yieldloops = _thr_yieldloops; } *mutex = pmutex; return (0); } static int init_static(struct pthread *thread, pthread_mutex_t *mutex) { int ret; THR_LOCK_ACQUIRE(thread, &_mutex_static_lock); if (*mutex == NULL) ret = mutex_init(mutex, NULL, calloc); else ret = 0; THR_LOCK_RELEASE(thread, &_mutex_static_lock); return (ret); } static void set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m) { struct pthread_mutex *m2; m2 = TAILQ_LAST(&curthread->pp_mutexq, mutex_queue); if (m2 != NULL) m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0]; else m->m_lock.m_ceilings[1] = -1; } int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr) { return mutex_init(mutex, mutex_attr, calloc); } /* This function is used internally by malloc. */ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)) { static const struct pthread_mutex_attr attr = { .m_type = PTHREAD_MUTEX_NORMAL, .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0 }; static const struct pthread_mutex_attr *pattr = &attr; return mutex_init(mutex, (pthread_mutexattr_t *)&pattr, calloc_cb); } void _mutex_fork(struct pthread *curthread) { struct pthread_mutex *m; /* * Fix mutex ownership for child process. * note that process shared mutex should not * be inherited because owner is forking thread * which is in parent process, they should be * removed from the owned mutex list, current, * process shared mutex is not supported, so I * am not worried. */ TAILQ_FOREACH(m, &curthread->mutexq, m_qe) m->m_lock.m_owner = TID(curthread); TAILQ_FOREACH(m, &curthread->pp_mutexq, m_qe) m->m_lock.m_owner = TID(curthread) | UMUTEX_CONTESTED; } int _pthread_mutex_destroy(pthread_mutex_t *mutex) { struct pthread *curthread = _get_curthread(); pthread_mutex_t m; uint32_t id; int ret = 0; if (__predict_false(*mutex == NULL)) ret = EINVAL; else { id = TID(curthread); /* * Try to lock the mutex structure, we only need to * try once, if failed, the mutex is in used. */ ret = _thr_umutex_trylock(&(*mutex)->m_lock, id); if (ret) return (ret); m = *mutex; /* * Check mutex other fields to see if this mutex is * in use. Mostly for prority mutex types, or there * are condition variables referencing it. */ if (m->m_owner != NULL || m->m_refcount != 0) { if (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) set_inherited_priority(curthread, m); _thr_umutex_unlock(&m->m_lock, id); ret = EBUSY; } else { /* * Save a pointer to the mutex so it can be free'd * and set the caller's pointer to NULL. */ *mutex = NULL; if (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) set_inherited_priority(curthread, m); _thr_umutex_unlock(&m->m_lock, id); MUTEX_ASSERT_NOT_OWNED(m); free(m); } } return (ret); } #define ENQUEUE_MUTEX(curthread, m) \ do { \ (m)->m_owner = curthread; \ /* Add to the list of owned mutexes: */ \ MUTEX_ASSERT_NOT_OWNED((m)); \ if (((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) \ TAILQ_INSERT_TAIL(&curthread->mutexq, (m), m_qe);\ else \ TAILQ_INSERT_TAIL(&curthread->pp_mutexq, (m), m_qe);\ } while (0) static int mutex_trylock_common(struct pthread *curthread, pthread_mutex_t *mutex) { struct pthread_mutex *m; uint32_t id; int ret; id = TID(curthread); m = *mutex; ret = _thr_umutex_trylock(&m->m_lock, id); if (ret == 0) { ENQUEUE_MUTEX(curthread, m); } else if (m->m_owner == curthread) { ret = mutex_self_trylock(m); } /* else {} */ return (ret); } int __pthread_mutex_trylock(pthread_mutex_t *mutex) { struct pthread *curthread = _get_curthread(); int ret; /* * If the mutex is statically initialized, perform the dynamic * initialization: */ if (__predict_false(*mutex == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); } return (mutex_trylock_common(curthread, mutex)); } static int -mutex_lock_sleep(struct pthread *curthread, pthread_mutex_t m, - const struct timespec * abstime) +mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m, + const struct timespec *abstime) { - struct timespec ts, ts2; - uint32_t id; - int ret; + uint32_t id, owner; int count; + int ret; - id = TID(curthread); - if (__predict_false(m->m_owner == curthread)) - return mutex_self_lock(m, abstime); + if (m->m_owner == curthread) + return mutex_self_lock(m, abstime); + id = TID(curthread); /* * For adaptive mutexes, spin for a bit in the expectation * that if the application requests this mutex type then * the lock is likely to be released quickly and it is * faster than entering the kernel */ - if (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) + if (m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) goto sleep_in_kernel; if (!_thr_is_smp) goto yield_loop; count = m->m_spinloops; while (count--) { - if (m->m_lock.m_owner == UMUTEX_UNOWNED) { - ret = _thr_umutex_trylock2(&m->m_lock, id); - if (ret == 0) + owner = m->m_lock.m_owner; + if ((owner & ~UMUTEX_CONTESTED) == 0) { + if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { + ret = 0; goto done; + } } CPU_SPINWAIT; } yield_loop: count = m->m_yieldloops; while (count--) { _sched_yield(); - ret = _thr_umutex_trylock2(&m->m_lock, id); - if (ret == 0) - goto done; + owner = m->m_lock.m_owner; + if ((owner & ~UMUTEX_CONTESTED) == 0) { + if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { + ret = 0; + goto done; + } + } } sleep_in_kernel: if (abstime == NULL) { - ret = __thr_umutex_lock(&m->m_lock); + ret = __thr_umutex_lock(&m->m_lock, id); } else if (__predict_false( - abstime->tv_sec < 0 || abstime->tv_nsec < 0 || + abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)) { ret = EINVAL; } else { - clock_gettime(CLOCK_REALTIME, &ts); - TIMESPEC_SUB(&ts2, abstime, &ts); - ret = __thr_umutex_timedlock(&m->m_lock, &ts2); - /* - * Timed out wait is not restarted if - * it was interrupted, not worth to do it. - */ - if (ret == EINTR) - ret = ETIMEDOUT; + ret = __thr_umutex_timedlock(&m->m_lock, id, abstime); } done: if (ret == 0) ENQUEUE_MUTEX(curthread, m); + return (ret); } static inline int mutex_lock_common(struct pthread *curthread, struct pthread_mutex *m, - const struct timespec * abstime) + const struct timespec *abstime) { - uint32_t id; - int ret; - id = TID(curthread); - ret = _thr_umutex_trylock2(&m->m_lock, id); - if (ret == 0) + if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) { ENQUEUE_MUTEX(curthread, m); - else - ret = mutex_lock_sleep(curthread, m, abstime); - return (ret); + return (0); + } + + return (mutex_lock_sleep(curthread, m, abstime)); } int __pthread_mutex_lock(pthread_mutex_t *mutex) { struct pthread *curthread; struct pthread_mutex *m; int ret; _thr_check_init(); curthread = _get_curthread(); /* * If the mutex is statically initialized, perform the dynamic * initialization: */ if (__predict_false((m = *mutex) == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); m = *mutex; } + return (mutex_lock_common(curthread, m, NULL)); } int __pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime) { struct pthread *curthread; struct pthread_mutex *m; int ret; _thr_check_init(); curthread = _get_curthread(); /* * If the mutex is statically initialized, perform the dynamic * initialization: */ if (__predict_false((m = *mutex) == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); m = *mutex; } return (mutex_lock_common(curthread, m, abstime)); } int _pthread_mutex_unlock(pthread_mutex_t *m) { return (mutex_unlock_common(m)); } int _mutex_cv_lock(pthread_mutex_t *m, int count) { int ret; ret = mutex_lock_common(_get_curthread(), *m, NULL); if (ret == 0) { (*m)->m_refcount--; (*m)->m_count += count; } return (ret); } static int mutex_self_trylock(pthread_mutex_t m) { int ret; switch (m->m_type) { case PTHREAD_MUTEX_ERRORCHECK: case PTHREAD_MUTEX_NORMAL: ret = EBUSY; break; case PTHREAD_MUTEX_RECURSIVE: /* Increment the lock count: */ if (m->m_count + 1 > 0) { m->m_count++; ret = 0; } else ret = EAGAIN; break; default: /* Trap invalid mutex types; */ ret = EINVAL; } return (ret); } static int mutex_self_lock(pthread_mutex_t m, const struct timespec *abstime) { struct timespec ts1, ts2; int ret; switch (m->m_type) { case PTHREAD_MUTEX_ERRORCHECK: case PTHREAD_MUTEX_ADAPTIVE_NP: if (abstime) { if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { ret = EINVAL; } else { clock_gettime(CLOCK_REALTIME, &ts1); TIMESPEC_SUB(&ts2, abstime, &ts1); __sys_nanosleep(&ts2, NULL); ret = ETIMEDOUT; } } else { /* * POSIX specifies that mutexes should return * EDEADLK if a recursive lock is detected. */ ret = EDEADLK; } break; case PTHREAD_MUTEX_NORMAL: /* * What SS2 define as a 'normal' mutex. Intentionally * deadlock on attempts to get a lock you already own. */ ret = 0; if (abstime) { if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { ret = EINVAL; } else { clock_gettime(CLOCK_REALTIME, &ts1); TIMESPEC_SUB(&ts2, abstime, &ts1); __sys_nanosleep(&ts2, NULL); ret = ETIMEDOUT; } } else { ts1.tv_sec = 30; ts1.tv_nsec = 0; for (;;) __sys_nanosleep(&ts1, NULL); } break; case PTHREAD_MUTEX_RECURSIVE: /* Increment the lock count: */ if (m->m_count + 1 > 0) { m->m_count++; ret = 0; } else ret = EAGAIN; break; default: /* Trap invalid mutex types; */ ret = EINVAL; } return (ret); } static int mutex_unlock_common(pthread_mutex_t *mutex) { struct pthread *curthread = _get_curthread(); struct pthread_mutex *m; uint32_t id; if (__predict_false((m = *mutex) == NULL)) return (EINVAL); /* * Check if the running thread is not the owner of the mutex. */ if (__predict_false(m->m_owner != curthread)) return (EPERM); id = TID(curthread); if (__predict_false( m->m_type == PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) { m->m_count--; } else { m->m_owner = NULL; /* Remove the mutex from the threads queue. */ MUTEX_ASSERT_IS_OWNED(m); if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) TAILQ_REMOVE(&curthread->mutexq, m, m_qe); else { TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); set_inherited_priority(curthread, m); } MUTEX_INIT_LINK(m); _thr_umutex_unlock(&m->m_lock, id); } return (0); } int _mutex_cv_unlock(pthread_mutex_t *mutex, int *count) { struct pthread *curthread = _get_curthread(); struct pthread_mutex *m; if (__predict_false((m = *mutex) == NULL)) return (EINVAL); /* * Check if the running thread is not the owner of the mutex. */ if (__predict_false(m->m_owner != curthread)) return (EPERM); /* * Clear the count in case this is a recursive mutex. */ *count = m->m_count; m->m_refcount++; m->m_count = 0; m->m_owner = NULL; /* Remove the mutex from the threads queue. */ MUTEX_ASSERT_IS_OWNED(m); if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) TAILQ_REMOVE(&curthread->mutexq, m, m_qe); else { TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); set_inherited_priority(curthread, m); } MUTEX_INIT_LINK(m); _thr_umutex_unlock(&m->m_lock, TID(curthread)); return (0); } int _pthread_mutex_getprioceiling(pthread_mutex_t *mutex, int *prioceiling) { int ret; if (*mutex == NULL) ret = EINVAL; else if (((*mutex)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) ret = EINVAL; else { *prioceiling = (*mutex)->m_lock.m_ceilings[0]; ret = 0; } return(ret); } int _pthread_mutex_setprioceiling(pthread_mutex_t *mutex, int ceiling, int *old_ceiling) { struct pthread *curthread = _get_curthread(); struct pthread_mutex *m, *m1, *m2; int ret; m = *mutex; if (m == NULL || (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling); if (ret != 0) return (ret); if (m->m_owner == curthread) { MUTEX_ASSERT_IS_OWNED(m); m1 = TAILQ_PREV(m, mutex_queue, m_qe); m2 = TAILQ_NEXT(m, m_qe); if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) || (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) { TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); TAILQ_FOREACH(m2, &curthread->pp_mutexq, m_qe) { if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) { TAILQ_INSERT_BEFORE(m2, m, m_qe); return (0); } } TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe); } } return (0); } int _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count) { if (*mutex == NULL) return (EINVAL); *count = (*mutex)->m_spinloops; return (0); } int __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count) { struct pthread *curthread = _get_curthread(); int ret; if (__predict_false(*mutex == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); } (*mutex)->m_spinloops = count; return (0); } int _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count) { if (*mutex == NULL) return (EINVAL); *count = (*mutex)->m_yieldloops; return (0); } int __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count) { struct pthread *curthread = _get_curthread(); int ret; if (__predict_false(*mutex == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); } (*mutex)->m_yieldloops = count; return (0); } int _pthread_mutex_isowned_np(pthread_mutex_t *mutex) { struct pthread *curthread = _get_curthread(); int ret; if (__predict_false(*mutex == NULL)) { ret = init_static(curthread, mutex); if (__predict_false(ret)) return (ret); } return ((*mutex)->m_owner == curthread); } diff --git a/lib/libthr/thread/thr_umtx.c b/lib/libthr/thread/thr_umtx.c index 3efe8847a1a8..7c59bb78ccb8 100644 --- a/lib/libthr/thread/thr_umtx.c +++ b/lib/libthr/thread/thr_umtx.c @@ -1,166 +1,216 @@ /* * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #include "thr_private.h" #include "thr_umtx.h" #ifndef HAS__UMTX_OP_ERR int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2) { if (_umtx_op(obj, op, val, uaddr, uaddr2) == -1) return (errno); return (0); } #endif void _thr_umutex_init(struct umutex *mtx) { static struct umutex default_mtx = DEFAULT_UMUTEX; *mtx = default_mtx; } int -__thr_umutex_lock(struct umutex *mtx) +__thr_umutex_lock(struct umutex *mtx, uint32_t id) { - return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0); + uint32_t owner; + + if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { + for (;;) { + /* wait in kernel */ + _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0); + + owner = mtx->m_owner; + if ((owner & ~UMUTEX_CONTESTED) == 0 && + atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner)) + return (0); + } + } + + return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0); } int -__thr_umutex_timedlock(struct umutex *mtx, - const struct timespec *timeout) +__thr_umutex_timedlock(struct umutex *mtx, uint32_t id, + const struct timespec *ets) { - if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && - timeout->tv_nsec <= 0))) + struct timespec timo, cts; + uint32_t owner; + int ret; + + clock_gettime(CLOCK_REALTIME, &cts); + TIMESPEC_SUB(&timo, ets, &cts); + + if (timo.tv_sec < 0) return (ETIMEDOUT); - return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, - __DECONST(void *, timeout)); + + for (;;) { + if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { + + /* wait in kernel */ + ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, &timo); + + /* now try to lock it */ + owner = mtx->m_owner; + if ((owner & ~UMUTEX_CONTESTED) == 0 && + atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner)) + return (0); + } else { + ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, &timo); + if (ret == 0) + break; + } + if (ret == ETIMEDOUT) + break; + clock_gettime(CLOCK_REALTIME, &cts); + TIMESPEC_SUB(&timo, ets, &cts); + if (timo.tv_sec < 0 || (timo.tv_sec == 0 && timo.tv_nsec == 0)) { + ret = ETIMEDOUT; + break; + } + } + return (ret); } int -__thr_umutex_unlock(struct umutex *mtx) +__thr_umutex_unlock(struct umutex *mtx, uint32_t id) { + if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { + atomic_cmpset_rel_32(&mtx->m_owner, id | UMUTEX_CONTESTED, UMUTEX_CONTESTED); + return _umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE, 0, 0, 0); + } return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0); } int __thr_umutex_trylock(struct umutex *mtx) { return _umtx_op_err(mtx, UMTX_OP_MUTEX_TRYLOCK, 0, 0, 0); } int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling, uint32_t *oldceiling) { return _umtx_op_err(mtx, UMTX_OP_SET_CEILING, ceiling, oldceiling, 0); } int _thr_umtx_wait(volatile long *mtx, long id, const struct timespec *timeout) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) return (ETIMEDOUT); return _umtx_op_err(__DEVOLATILE(void *, mtx), UMTX_OP_WAIT, id, 0, __DECONST(void*, timeout)); } int _thr_umtx_wait_uint(volatile u_int *mtx, u_int id, const struct timespec *timeout, int shared) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) return (ETIMEDOUT); return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, 0, __DECONST(void*, timeout)); } int _thr_umtx_wake(volatile void *mtx, int nr_wakeup, int shared) { return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAKE : UMTX_OP_WAKE_PRIVATE, nr_wakeup, 0, 0); } void _thr_ucond_init(struct ucond *cv) { bzero(cv, sizeof(struct ucond)); } int _thr_ucond_wait(struct ucond *cv, struct umutex *m, const struct timespec *timeout, int check_unparking) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) { - __thr_umutex_unlock(m); + struct pthread *curthread = _get_curthread(); + _thr_umutex_unlock(m, TID(curthread)); return (ETIMEDOUT); } return _umtx_op_err(cv, UMTX_OP_CV_WAIT, check_unparking ? UMTX_CHECK_UNPARKING : 0, m, __DECONST(void*, timeout)); } int _thr_ucond_signal(struct ucond *cv) { if (!cv->c_has_waiters) return (0); return _umtx_op_err(cv, UMTX_OP_CV_SIGNAL, 0, NULL, NULL); } int _thr_ucond_broadcast(struct ucond *cv) { if (!cv->c_has_waiters) return (0); return _umtx_op_err(cv, UMTX_OP_CV_BROADCAST, 0, NULL, NULL); } int __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) { return _umtx_op_err(rwlock, UMTX_OP_RW_RDLOCK, flags, NULL, tsp); } int __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) { return _umtx_op_err(rwlock, UMTX_OP_RW_WRLOCK, 0, NULL, tsp); } int __thr_rwlock_unlock(struct urwlock *rwlock) { return _umtx_op_err(rwlock, UMTX_OP_RW_UNLOCK, 0, NULL, NULL); } diff --git a/lib/libthr/thread/thr_umtx.h b/lib/libthr/thread/thr_umtx.h index 0ef75a7e49e2..41b5f9686783 100644 --- a/lib/libthr/thread/thr_umtx.h +++ b/lib/libthr/thread/thr_umtx.h @@ -1,181 +1,185 @@ /*- * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _THR_FBSD_UMTX_H_ #define _THR_FBSD_UMTX_H_ #include #include #define DEFAULT_UMUTEX {0,0, {0,0},{0,0,0,0}} -int __thr_umutex_lock(struct umutex *mtx) __hidden; -int __thr_umutex_timedlock(struct umutex *mtx, +int __thr_umutex_lock(struct umutex *mtx, uint32_t id) __hidden; +int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id, const struct timespec *timeout) __hidden; -int __thr_umutex_unlock(struct umutex *mtx) __hidden; +int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) __hidden; int __thr_umutex_trylock(struct umutex *mtx) __hidden; int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling, uint32_t *oldceiling) __hidden; void _thr_umutex_init(struct umutex *mtx) __hidden; int _thr_umtx_wait(volatile long *mtx, long exp, const struct timespec *timeout) __hidden; int _thr_umtx_wait_uint(volatile u_int *mtx, u_int exp, const struct timespec *timeout, int shared) __hidden; int _thr_umtx_wake(volatile void *mtx, int count, int shared) __hidden; int _thr_ucond_wait(struct ucond *cv, struct umutex *m, const struct timespec *timeout, int check_unpaking) __hidden; void _thr_ucond_init(struct ucond *cv) __hidden; int _thr_ucond_signal(struct ucond *cv) __hidden; int _thr_ucond_broadcast(struct ucond *cv) __hidden; int __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) __hidden; int __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) __hidden; int __thr_rwlock_unlock(struct urwlock *rwlock) __hidden; static inline int _thr_umutex_trylock(struct umutex *mtx, uint32_t id) { if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id)) return (0); if ((mtx->m_flags & UMUTEX_PRIO_PROTECT) == 0) return (EBUSY); return (__thr_umutex_trylock(mtx)); } static inline int _thr_umutex_trylock2(struct umutex *mtx, uint32_t id) { - if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id)) + if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id) != 0) return (0); + if ((uint32_t)mtx->m_owner == UMUTEX_CONTESTED && + __predict_true((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0)) + if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED)) + return (0); return (EBUSY); } static inline int _thr_umutex_lock(struct umutex *mtx, uint32_t id) { - if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id)) + if (_thr_umutex_trylock2(mtx, id) == 0) return (0); - return (__thr_umutex_lock(mtx)); + return (__thr_umutex_lock(mtx, id)); } static inline int _thr_umutex_timedlock(struct umutex *mtx, uint32_t id, const struct timespec *timeout) { - if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id)) + if (_thr_umutex_trylock2(mtx, id) == 0) return (0); - return (__thr_umutex_timedlock(mtx, timeout)); + return (__thr_umutex_timedlock(mtx, id, timeout)); } static inline int _thr_umutex_unlock(struct umutex *mtx, uint32_t id) { if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED)) return (0); - return (__thr_umutex_unlock(mtx)); + return (__thr_umutex_unlock(mtx, id)); } static inline int _thr_rwlock_tryrdlock(struct urwlock *rwlock, int flags) { int32_t state; int32_t wrflags; if (flags & URWLOCK_PREFER_READER || rwlock->rw_flags & URWLOCK_PREFER_READER) wrflags = URWLOCK_WRITE_OWNER; else wrflags = URWLOCK_WRITE_OWNER | URWLOCK_WRITE_WAITERS; state = rwlock->rw_state; while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) return (EAGAIN); if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state + 1)) return (0); state = rwlock->rw_state; } return (EBUSY); } static inline int _thr_rwlock_trywrlock(struct urwlock *rwlock) { int32_t state; state = rwlock->rw_state; while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER)) return (0); state = rwlock->rw_state; } return (EBUSY); } static inline int _thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) { if (_thr_rwlock_tryrdlock(rwlock, flags) == 0) return (0); return (__thr_rwlock_rdlock(rwlock, flags, tsp)); } static inline int _thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) { if (_thr_rwlock_trywrlock(rwlock) == 0) return (0); return (__thr_rwlock_wrlock(rwlock, tsp)); } static inline int _thr_rwlock_unlock(struct urwlock *rwlock) { int32_t state; state = rwlock->rw_state; if (state & URWLOCK_WRITE_OWNER) { if (atomic_cmpset_rel_32(&rwlock->rw_state, URWLOCK_WRITE_OWNER, 0)) return (0); } else { for (;;) { if (__predict_false(URWLOCK_READER_COUNT(state) == 0)) return (EPERM); if (!((state & URWLOCK_WRITE_WAITERS) && URWLOCK_READER_COUNT(state) == 1)) { if (atomic_cmpset_rel_32(&rwlock->rw_state, state, state-1)) return (0); state = rwlock->rw_state; } else { break; } } } return (__thr_rwlock_unlock(rwlock)); } #endif diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 8e5fcab3b867..09573ab8c368 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1,3257 +1,3364 @@ /*- * Copyright (c) 2004, David Xu * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_IA32 #include #endif #define TYPE_SIMPLE_WAIT 0 #define TYPE_CV 1 #define TYPE_SIMPLE_LOCK 2 #define TYPE_NORMAL_UMUTEX 3 #define TYPE_PI_UMUTEX 4 #define TYPE_PP_UMUTEX 5 #define TYPE_RWLOCK 6 +#define _UMUTEX_TRY 1 +#define _UMUTEX_WAIT 2 + /* Key to represent a unique userland synchronous object */ struct umtx_key { int hash; int type; int shared; union { struct { vm_object_t object; uintptr_t offset; } shared; struct { struct vmspace *vs; uintptr_t addr; } private; struct { void *a; uintptr_t b; } both; } info; }; /* Priority inheritance mutex info. */ struct umtx_pi { /* Owner thread */ struct thread *pi_owner; /* Reference count */ int pi_refcount; /* List entry to link umtx holding by thread */ TAILQ_ENTRY(umtx_pi) pi_link; /* List entry in hash */ TAILQ_ENTRY(umtx_pi) pi_hashlink; /* List for waiters */ TAILQ_HEAD(,umtx_q) pi_blocked; /* Identify a userland lock object */ struct umtx_key pi_key; }; /* A userland synchronous object user. */ struct umtx_q { /* Linked list for the hash. */ TAILQ_ENTRY(umtx_q) uq_link; /* Umtx key. */ struct umtx_key uq_key; /* Umtx flags. */ int uq_flags; #define UQF_UMTXQ 0x0001 /* The thread waits on. */ struct thread *uq_thread; /* * Blocked on PI mutex. read can use chain lock * or umtx_lock, write must have both chain lock and * umtx_lock being hold. */ struct umtx_pi *uq_pi_blocked; /* On blocked list */ TAILQ_ENTRY(umtx_q) uq_lockq; /* Thread contending with us */ TAILQ_HEAD(,umtx_pi) uq_pi_contested; /* Inherited priority from PP mutex */ u_char uq_inherited_pri; }; TAILQ_HEAD(umtxq_head, umtx_q); /* Userland lock object's wait-queue chain */ struct umtxq_chain { /* Lock for this chain. */ struct mtx uc_lock; /* List of sleep queues. */ struct umtxq_head uc_queue[2]; #define UMTX_SHARED_QUEUE 0 #define UMTX_EXCLUSIVE_QUEUE 1 /* Busy flag */ char uc_busy; /* Chain lock waiters */ int uc_waiters; /* All PI in the list */ TAILQ_HEAD(,umtx_pi) uc_pi_list; }; #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) /* * Don't propagate time-sharing priority, there is a security reason, * a user can simply introduce PI-mutex, let thread A lock the mutex, * and let another thread B block on the mutex, because B is * sleeping, its priority will be boosted, this causes A's priority to * be boosted via priority propagating too and will never be lowered even * if it is using 100%CPU, this is unfair to other processes. */ #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ PRI_MAX_TIMESHARE : (td)->td_user_pri) #define GOLDEN_RATIO_PRIME 2654404609U #define UMTX_CHAINS 128 #define UMTX_SHIFTS (__WORD_BIT - 7) #define THREAD_SHARE 0 #define PROCESS_SHARE 1 #define AUTO_SHARE 2 #define GET_SHARE(flags) \ (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) #define BUSY_SPINS 200 static uma_zone_t umtx_pi_zone; static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); static int umtx_pi_allocated; SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, &umtx_pi_allocated, 0, "Allocated umtx_pi"); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); static void umtxq_lock(struct umtx_key *key); static void umtxq_unlock(struct umtx_key *key); static void umtxq_busy(struct umtx_key *key); static void umtxq_unbusy(struct umtx_key *key); static void umtxq_insert_queue(struct umtx_q *uq, int q); static void umtxq_remove_queue(struct umtx_q *uq, int q); static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); static int umtxq_count(struct umtx_key *key); static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); static int umtx_key_get(void *addr, int type, int share, struct umtx_key *key); static void umtx_key_release(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused); SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) static struct mtx umtx_lock; static void umtxq_sysinit(void *arg __unused) { int i, j; umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]); TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]); TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); umtxq_chains[i][j].uc_busy = 0; umtxq_chains[i][j].uc_waiters = 0; } } mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, EVENTHANDLER_PRI_ANY); } struct umtx_q * umtxq_alloc(void) { struct umtx_q *uq; uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); TAILQ_INIT(&uq->uq_pi_contested); uq->uq_inherited_pri = PRI_MAX; return (uq); } void umtxq_free(struct umtx_q *uq) { free(uq, M_UMTX); } static inline void umtxq_hash(struct umtx_key *key) { unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; } static inline int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) { return (k1->type == k2->type && k1->info.both.a == k2->info.both.a && k1->info.both.b == k2->info.both.b); } static inline struct umtxq_chain * umtxq_getchain(struct umtx_key *key) { if (key->type <= TYPE_CV) return (&umtxq_chains[1][key->hash]); return (&umtxq_chains[0][key->hash]); } /* * Lock a chain. */ static inline void umtxq_lock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_lock(&uc->uc_lock); } /* * Unlock a chain. */ static inline void umtxq_unlock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_unlock(&uc->uc_lock); } /* * Set chain to busy state when following operation * may be blocked (kernel mutex can not be used). */ static inline void umtxq_busy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); if (uc->uc_busy) { #ifdef SMP if (smp_cpus > 1) { int count = BUSY_SPINS; if (count > 0) { umtxq_unlock(key); while (uc->uc_busy && --count > 0) cpu_spinwait(); umtxq_lock(key); } } #endif while (uc->uc_busy) { uc->uc_waiters++; msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); uc->uc_waiters--; } } uc->uc_busy = 1; } /* * Unbusy a chain. */ static inline void umtxq_unbusy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); KASSERT(uc->uc_busy != 0, ("not busy")); uc->uc_busy = 0; if (uc->uc_waiters) wakeup_one(uc); } static inline void umtxq_insert_queue(struct umtx_q *uq, int q) { struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link); uq->uq_flags |= UQF_UMTXQ; } static inline void umtxq_remove_queue(struct umtx_q *uq, int q) { struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (uq->uq_flags & UQF_UMTXQ) { TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link); uq->uq_flags &= ~UQF_UMTXQ; } } /* * Check if there are multiple waiters */ static int umtxq_count(struct umtx_key *key) { struct umtxq_chain *uc; struct umtx_q *uq; int count = 0; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { if (umtx_key_match(&uq->uq_key, key)) { if (++count > 1) break; } } return (count); } /* * Check if there are multiple PI waiters and returns first * waiter. */ static int umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) { struct umtxq_chain *uc; struct umtx_q *uq; int count = 0; *first = NULL; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) { if (umtx_key_match(&uq->uq_key, key)) { if (++count > 1) break; *first = uq; } } return (count); } /* * Wake up threads waiting on an userland object. */ static int umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) { struct umtxq_chain *uc; struct umtx_q *uq, *next; int ret; ret = 0; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) { if (umtx_key_match(&uq->uq_key, key)) { umtxq_remove_queue(uq, q); wakeup(uq); if (++ret >= n_wake) break; } } return (ret); } /* * Wake up specified thread. */ static inline void umtxq_signal_thread(struct umtx_q *uq) { struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); umtxq_remove(uq); wakeup(uq); } /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. */ static inline int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) { struct umtxq_chain *uc; int error; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (!(uq->uq_flags & UQF_UMTXQ)) return (0); error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); if (error == EWOULDBLOCK) error = ETIMEDOUT; return (error); } /* * Convert userspace address into unique logical address. */ static int umtx_key_get(void *addr, int type, int share, struct umtx_key *key) { struct thread *td = curthread; vm_map_t map; vm_map_entry_t entry; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; key->type = type; if (share == THREAD_SHARE) { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } else { MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); map = &td->td_proc->p_vmspace->vm_map; if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, &entry, &key->info.shared.object, &pindex, &prot, &wired) != KERN_SUCCESS) { return EFAULT; } if ((share == PROCESS_SHARE) || (share == AUTO_SHARE && VM_INHERIT_SHARE == entry->inheritance)) { key->shared = 1; key->info.shared.offset = entry->offset + entry->start - (vm_offset_t)addr; vm_object_reference(key->info.shared.object); } else { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } vm_map_lookup_done(map, entry); } umtxq_hash(key); return (0); } /* * Release key. */ static inline void umtx_key_release(struct umtx_key *key) { if (key->shared) vm_object_deallocate(key->info.shared.object); } /* * Lock a umtx object. */ static int _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) { struct umtx_q *uq; u_long owner; u_long old; int error = 0; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMTX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If no one owns it but it is contested try to acquire it. */ if (owner == UMTX_CONTESTED) { owner = casuword(&umtx->u_owner, UMTX_CONTESTED, id | UMTX_CONTESTED); if (owner == UMTX_CONTESTED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If this failed the lock has changed, restart. */ continue; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtx", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock a umtx object. */ static int do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; if (timeout == NULL) { error = _do_lock_umtx(td, umtx, id, 0); /* Mutex locking is restarted if it is interrupted. */ if (error == EINTR) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a umtx object. */ static int do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) { struct umtx_key key; u_long owner; u_long old; int error; int count; /* * Make sure we own this mtx. */ owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMTX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMTX_CONTESTED) == 0) { old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword(&umtx->u_owner, owner, count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } #ifdef COMPAT_IA32 /* * Lock a umtx object. */ static int _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) { struct umtx_q *uq; uint32_t owner; uint32_t old; int error = 0; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword32(m, UMUTEX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { owner = casuword32(m, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If this failed the lock has changed, restart. */ continue; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(m, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtx", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock a umtx object. */ static int do_lock_umtx32(struct thread *td, void *m, uint32_t id, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; if (timeout == NULL) { error = _do_lock_umtx32(td, m, id, 0); /* Mutex locking is restarted if it is interrupted. */ if (error == EINTR) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a umtx object. */ static int do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) { struct umtx_key key; uint32_t owner; uint32_t old; int error; int count; /* * Make sure we own this mtx. */ owner = fuword32(m); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(m, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(m, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } #endif /* * Fetch and compare value, sleep on the address if value is not changed. */ static int do_wait(struct thread *td, void *addr, u_long id, struct timespec *timeout, int compat32, int is_private) { struct umtx_q *uq; struct timespec ts, ts2, ts3; struct timeval tv; u_long tmp; int error = 0; uq = td->td_umtxq; if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); if (compat32 == 0) tmp = fuword(addr); else tmp = fuword32(addr); if (tmp != id) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } else if (timeout == NULL) { umtxq_lock(&uq->uq_key); error = umtxq_sleep(uq, "uwait", 0); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); umtxq_lock(&uq->uq_key); for (;;) { error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); if (!(uq->uq_flags & UQF_UMTXQ)) break; if (error != ETIMEDOUT) break; umtxq_unlock(&uq->uq_key); getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; umtxq_lock(&uq->uq_key); break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); umtxq_lock(&uq->uq_key); } umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } /* * Wake up threads sleeping on the specified address. */ int kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) { struct umtx_key key; int ret; if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) return (ret); umtxq_lock(&key); ret = umtxq_signal(&key, n_wake); umtxq_unlock(&key); umtx_key_release(&key); return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, - int try) + int mode) { struct umtx_q *uq; uint32_t owner, old, id; int error = 0; id = td->td_tid; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { - /* - * Try the uncontested case. This should be done in userland. - */ - owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); - - /* The acquire succeeded. */ - if (owner == UMUTEX_UNOWNED) - return (0); - - /* The address was invalid. */ - if (owner == -1) - return (EFAULT); - - /* If no one owns it but it is contested try to acquire it. */ - if (owner == UMUTEX_CONTESTED) { - owner = casuword32(&m->m_owner, - UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); + if (mode == _UMUTEX_WAIT) { + if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) + return (0); + } else { + /* + * Try the uncontested case. This should be done in userland. + */ + owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); - if (owner == UMUTEX_CONTESTED) + /* The acquire succeeded. */ + if (owner == UMUTEX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); - /* If this failed the lock has changed, restart. */ - continue; + /* If no one owns it but it is contested try to acquire it. */ + if (owner == UMUTEX_CONTESTED) { + owner = casuword32(&m->m_owner, + UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); + + if (owner == UMUTEX_CONTESTED) + return (0); + + /* The address was invalid. */ + if (owner == -1) + return (EFAULT); + + /* If this failed the lock has changed, restart. */ + continue; + } } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) return (EDEADLK); - if (try != 0) + if (mode == _UMUTEX_TRY) return (EBUSY); /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); - umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); + umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); + umtxq_unbusy(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtxn", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ /* * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; uint32_t owner, old, id; int error; int count; id = td->td_tid; /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); - /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(&m->m_owner, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } +/* + * Check if the mutex is available and wake up a waiter, + * only for simple mutex. + */ +static int +do_wake_umutex(struct thread *td, struct umutex *m) +{ + struct umtx_key key; + uint32_t owner; + uint32_t flags; + int error; + int count; + + owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); + if (owner == -1) + return (EFAULT); + + if ((owner & ~UMUTEX_CONTESTED) != 0) + return (0); + + flags = fuword32(&m->m_flags); + + /* We should only ever be in here for contested locks */ + if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), + &key)) != 0) + return (error); + + umtxq_lock(&key); + umtxq_busy(&key); + count = umtxq_count(&key); + umtxq_unlock(&key); + + if (count <= 1) + owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); + + umtxq_lock(&key); + if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) + umtxq_signal(&key, 1); + umtxq_unbusy(&key); + umtxq_unlock(&key); + umtx_key_release(&key); + return (0); +} + static inline struct umtx_pi * umtx_pi_alloc(int flags) { struct umtx_pi *pi; pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); TAILQ_INIT(&pi->pi_blocked); atomic_add_int(&umtx_pi_allocated, 1); return (pi); } static inline void umtx_pi_free(struct umtx_pi *pi) { uma_zfree(umtx_pi_zone, pi); atomic_add_int(&umtx_pi_allocated, -1); } /* * Adjust the thread's position on a pi_state after its priority has been * changed. */ static int umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) { struct umtx_q *uq, *uq1, *uq2; struct thread *td1; mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (0); uq = td->td_umtxq; /* * Check if the thread needs to be moved on the blocked chain. * It needs to be moved if either its priority is lower than * the previous thread or higher than the next thread. */ uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); uq2 = TAILQ_NEXT(uq, uq_lockq); if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { /* * Remove thread from blocked chain and determine where * it should be moved to. */ TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { td1 = uq1->uq_thread; MPASS(td1->td_proc->p_magic == P_MAGIC); if (UPRI(td1) > UPRI(td)) break; } if (uq1 == NULL) TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); else TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); } return (1); } /* * Propagate priority when a thread is blocked on POSIX * PI mutex. */ static void umtx_propagate_priority(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; int pri; mtx_assert(&umtx_lock, MA_OWNED); pri = UPRI(td); uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) return; for (;;) { td = pi->pi_owner; if (td == NULL) return; MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); if (UPRI(td) <= pri) return; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); /* * Pick up the lock that td is blocked on. */ uq = td->td_umtxq; pi = uq->uq_pi_blocked; /* Resort td on the list if needed. */ if (!umtx_pi_adjust_thread(pi, td)) break; } } /* * Unpropagate priority for a PI mutex when a thread blocked on * it is interrupted by signal or resumed by others. */ static void umtx_unpropagate_priority(struct umtx_pi *pi) { struct umtx_q *uq, *uq_owner; struct umtx_pi *pi2; int pri, oldpri; mtx_assert(&umtx_lock, MA_OWNED); while (pi != NULL && pi->pi_owner != NULL) { pri = PRI_MAX; uq_owner = pi->pi_owner->td_umtxq; TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { uq = TAILQ_FIRST(&pi2->pi_blocked); if (uq != NULL) { if (pri > UPRI(uq->uq_thread)) pri = UPRI(uq->uq_thread); } } if (pri > uq_owner->uq_inherited_pri) pri = uq_owner->uq_inherited_pri; thread_lock(pi->pi_owner); oldpri = pi->pi_owner->td_user_pri; sched_unlend_user_prio(pi->pi_owner, pri); thread_unlock(pi->pi_owner); umtx_pi_adjust_locked(pi->pi_owner, oldpri); pi = uq_owner->uq_pi_blocked; } } /* * Insert a PI mutex into owned list. */ static void umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq_owner; uq_owner = owner->td_umtxq; mtx_assert(&umtx_lock, MA_OWNED); if (pi->pi_owner != NULL) panic("pi_ower != NULL"); pi->pi_owner = owner; TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); } /* * Claim ownership of a PI mutex. */ static int umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq, *uq_owner; uq_owner = owner->td_umtxq; mtx_lock_spin(&umtx_lock); if (pi->pi_owner == owner) { mtx_unlock_spin(&umtx_lock); return (0); } if (pi->pi_owner != NULL) { /* * userland may have already messed the mutex, sigh. */ mtx_unlock_spin(&umtx_lock); return (EPERM); } umtx_pi_setowner(pi, owner); uq = TAILQ_FIRST(&pi->pi_blocked); if (uq != NULL) { int pri; pri = UPRI(uq->uq_thread); thread_lock(owner); if (pri < UPRI(owner)) sched_lend_user_prio(owner, pri); thread_unlock(owner); } mtx_unlock_spin(&umtx_lock); return (0); } static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; MPASS(pi != NULL); /* Resort the turnstile on the list. */ if (!umtx_pi_adjust_thread(pi, td)) return; /* * If our priority was lowered and we are at the head of the * turnstile, then propagate our new priority up the chain. */ if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) umtx_propagate_priority(td); } /* * Adjust a thread's order position in its blocked PI mutex, * this may result new priority propagating process. */ void umtx_pi_adjust(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; mtx_lock_spin(&umtx_lock); /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; if (pi != NULL) umtx_pi_adjust_locked(td, oldpri); mtx_unlock_spin(&umtx_lock); } /* * Sleep on a PI mutex. */ static int umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, const char *wmesg, int timo) { struct umtxq_chain *uc; struct thread *td, *td1; struct umtx_q *uq1; int pri; int error = 0; td = uq->uq_thread; KASSERT(td == curthread, ("inconsistent uq_thread")); uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); umtxq_insert(uq); if (pi->pi_owner == NULL) { /* XXX * Current, We only support process private PI-mutex, * non-contended PI-mutexes are locked in userland. * Process shared PI-mutex should always be initialized * by kernel and be registered in kernel, locking should * always be done by kernel to avoid security problems. * For process private PI-mutex, we can find owner * thread and boost its priority safely. */ PROC_LOCK(curproc); td1 = thread_find(curproc, owner); mtx_lock_spin(&umtx_lock); if (td1 != NULL && pi->pi_owner == NULL) { uq1 = td1->td_umtxq; umtx_pi_setowner(pi, td1); } PROC_UNLOCK(curproc); } else { mtx_lock_spin(&umtx_lock); } TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { pri = UPRI(uq1->uq_thread); if (pri > UPRI(td)) break; } if (uq1 != NULL) TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); else TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); uq->uq_pi_blocked = pi; thread_lock(td); td->td_flags |= TDF_UPIBLOCKED; thread_unlock(td); mtx_unlock_spin(&umtx_lock); umtxq_unlock(&uq->uq_key); mtx_lock_spin(&umtx_lock); umtx_propagate_priority(td); mtx_unlock_spin(&umtx_lock); umtxq_lock(&uq->uq_key); if (uq->uq_flags & UQF_UMTXQ) { error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); if (error == EWOULDBLOCK) error = ETIMEDOUT; if (uq->uq_flags & UQF_UMTXQ) { umtxq_busy(&uq->uq_key); umtxq_remove(uq); umtxq_unbusy(&uq->uq_key); } } umtxq_unlock(&uq->uq_key); mtx_lock_spin(&umtx_lock); uq->uq_pi_blocked = NULL; thread_lock(td); td->td_flags &= ~TDF_UPIBLOCKED; thread_unlock(td); TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); umtx_unpropagate_priority(pi); mtx_unlock_spin(&umtx_lock); umtxq_lock(&uq->uq_key); return (error); } /* * Add reference count for a PI mutex. */ static void umtx_pi_ref(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); pi->pi_refcount++; } /* * Decrease reference count for a PI mutex, if the counter * is decreased to zero, its memory space is freed. */ static void umtx_pi_unref(struct umtx_pi *pi) { struct umtxq_chain *uc; int free = 0; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT(pi->pi_refcount > 0, ("invalid reference count")); if (--pi->pi_refcount == 0) { mtx_lock_spin(&umtx_lock); if (pi->pi_owner != NULL) { TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); pi->pi_owner = NULL; } KASSERT(TAILQ_EMPTY(&pi->pi_blocked), ("blocked queue not empty")); mtx_unlock_spin(&umtx_lock); TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); free = 1; } if (free) umtx_pi_free(pi); } /* * Find a PI mutex in hash table. */ static struct umtx_pi * umtx_pi_lookup(struct umtx_key *key) { struct umtxq_chain *uc; struct umtx_pi *pi; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { if (umtx_key_match(&pi->pi_key, key)) { return (pi); } } return (NULL); } /* * Insert a PI mutex into hash table. */ static inline void umtx_pi_insert(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); } /* * Lock a PI mutex. */ static int _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, int try) { struct umtx_q *uq; struct umtx_pi *pi, *new_pi; uint32_t id, owner, old; int error; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi == NULL) { new_pi = umtx_pi_alloc(M_NOWAIT); if (new_pi == NULL) { umtxq_unlock(&uq->uq_key); new_pi = umtx_pi_alloc(M_WAITOK); new_pi->pi_key = uq->uq_key; umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi != NULL) { umtx_pi_free(new_pi); new_pi = NULL; } } if (new_pi != NULL) { new_pi->pi_key = uq->uq_key; umtx_pi_insert(new_pi); pi = new_pi; } } umtx_pi_ref(pi); umtxq_unlock(&uq->uq_key); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) { error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { umtxq_lock(&uq->uq_key); error = umtx_pi_claim(pi, td); umtxq_unlock(&uq->uq_key); break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } /* If this failed the lock has changed, restart. */ continue; } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); error = EFAULT; break; } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ if (old == owner) error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, "umtxpi", timo); umtxq_unlock(&uq->uq_key); } umtxq_lock(&uq->uq_key); umtx_pi_unref(pi); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PI mutex. */ static int do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; uint32_t owner, old, id; int error; int count; int pri; id = td->td_tid; /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count_pi(&key, &uq_first); if (uq_first != NULL) { pi = uq_first->uq_pi_blocked; if (pi->pi_owner != curthread) { umtxq_unbusy(&key); umtxq_unlock(&key); /* userland messed the mutex */ return (EPERM); } uq_me = curthread->td_umtxq; mtx_lock_spin(&umtx_lock); pi->pi_owner = NULL; TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); uq_first = TAILQ_FIRST(&pi->pi_blocked); pri = PRI_MAX; TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); if (uq_first2 != NULL) { if (pri > UPRI(uq_first2->uq_thread)) pri = UPRI(uq_first2->uq_thread); } } thread_lock(curthread); sched_unlend_user_prio(curthread, pri); thread_unlock(curthread); mtx_unlock_spin(&umtx_lock); } umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(&m->m_owner, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); if (uq_first != NULL) umtxq_signal_thread(uq_first); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } /* * Lock a PP mutex. */ static int _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, int try) { struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t ceiling; uint32_t owner, id; int error, pri, old_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); for (;;) { old_inherited_pri = uq->uq_inherited_pri; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); if (ceiling > RTP_PRIO_MAX) { error = EINVAL; goto out; } mtx_lock_spin(&umtx_lock); if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { mtx_unlock_spin(&umtx_lock); error = EINVAL; goto out; } if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; thread_lock(td); if (uq->uq_inherited_pri < UPRI(td)) sched_lend_user_prio(td, uq->uq_inherited_pri); thread_unlock(td); } mtx_unlock_spin(&umtx_lock); owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } if (error != 0) { mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } out: umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PP mutex. */ static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t owner, id; uint32_t rceiling; int error, pri, new_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); if (error != 0) return (error); if (rceiling == -1) new_inherited_pri = PRI_MAX; else { rceiling = RTP_PRIO_MAX - rceiling; if (rceiling > RTP_PRIO_MAX) return (EINVAL); new_inherited_pri = PRI_MIN_REALTIME + rceiling; } if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_unlock(&key); /* * For priority protected mutex, always set unlocked state * to UMUTEX_CONTESTED, so that userland always enters kernel * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); if (error == -1) error = EFAULT; else { mtx_lock_spin(&umtx_lock); if (su != 0) uq->uq_inherited_pri = new_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } umtx_key_release(&key); return (error); } static int do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, uint32_t *old_ceiling) { struct umtx_q *uq; uint32_t save_ceiling; uint32_t owner, id; uint32_t flags; int error; flags = fuword32(&m->m_flags); if ((flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); if (ceiling > RTP_PRIO_MAX) return (EINVAL); id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); for (;;) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); save_ceiling = fuword32(&m->m_ceilings[0]); owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { suword32(&m->m_ceilings[0], ceiling); suword32(__DEVOLATILE(uint32_t *, &m->m_owner), UMUTEX_CONTESTED); error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { suword32(&m->m_ceilings[0], ceiling); error = 0; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", 0); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtxq_lock(&uq->uq_key); if (error == 0) umtxq_signal(&uq->uq_key, INT_MAX); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0 && old_ceiling != NULL) suword32(old_ceiling, save_ceiling); return (error); } static int _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, - int try) + int mode) { switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: - return (_do_lock_normal(td, m, flags, timo, try)); + return (_do_lock_normal(td, m, flags, timo, mode)); case UMUTEX_PRIO_INHERIT: - return (_do_lock_pi(td, m, flags, timo, try)); + return (_do_lock_pi(td, m, flags, timo, mode)); case UMUTEX_PRIO_PROTECT: - return (_do_lock_pp(td, m, flags, timo, try)); + return (_do_lock_pp(td, m, flags, timo, mode)); } return (EINVAL); } /* * Lock a userland POSIX mutex. */ static int do_lock_umutex(struct thread *td, struct umutex *m, - struct timespec *timeout, int try) + struct timespec *timeout, int mode) { struct timespec ts, ts2, ts3; struct timeval tv; uint32_t flags; int error; flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); if (timeout == NULL) { - error = _do_lock_umutex(td, m, flags, 0, try); + error = _do_lock_umutex(td, m, flags, 0, mode); /* Mutex locking is restarted if it is interrupted. */ - if (error == EINTR) + if (error == EINTR && mode != _UMUTEX_WAIT) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { - error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try); + error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a userland POSIX mutex. */ static int do_unlock_umutex(struct thread *td, struct umutex *m) { uint32_t flags; flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: return (do_unlock_normal(td, m, flags)); case UMUTEX_PRIO_INHERIT: return (do_unlock_pi(td, m, flags)); case UMUTEX_PRIO_PROTECT: return (do_unlock_pp(td, m, flags)); } return (EINVAL); } static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, struct timespec *timeout, u_long wflags) { struct umtx_q *uq; struct timeval tv; struct timespec cts, ets, tts; uint32_t flags; int error; uq = td->td_umtxq; flags = fuword32(&cv->c_flags); error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * The magic thing is we should set c_has_waiters to 1 before * releasing user mutex. */ suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); error = do_unlock_umutex(td, m); umtxq_lock(&uq->uq_key); if (error == 0) { if ((wflags & UMTX_CHECK_UNPARKING) && (td->td_pflags & TDP_WAKEUP)) { td->td_pflags &= ~TDP_WAKEUP; error = EINTR; } else if (timeout == NULL) { error = umtxq_sleep(uq, "ucond", 0); } else { getnanouptime(&ets); timespecadd(&ets, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&cts); if (timespeccmp(&cts, &ets, >=)) { error = ETIMEDOUT; break; } tts = ets; timespecsub(&tts, &cts); TIMESPEC_TO_TIMEVAL(&tv, &tts); } } } if (error != 0) { if ((uq->uq_flags & UQF_UMTXQ) == 0) { /* * If we concurrently got do_cv_signal()d * and we got an error or UNIX signals or a timeout, * then, perform another umtxq_signal to avoid * consuming the wakeup. This may cause supurious * wakeup for another thread which was just queued, * but SUSV3 explicitly allows supurious wakeup to * occur, and indeed a kernel based implementation * can not avoid it. */ if (!umtxq_signal(&uq->uq_key, 1)) error = 0; } if (error == ERESTART) error = EINTR; } umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland condition variable. */ static int do_cv_signal(struct thread *td, struct ucond *cv) { struct umtx_key key; int error, cnt, nwake; uint32_t flags; flags = fuword32(&cv->c_flags); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); nwake = umtxq_signal(&key, 1); if (cnt <= nwake) { umtxq_unlock(&key); error = suword32( __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); umtxq_lock(&key); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_cv_broadcast(struct thread *td, struct ucond *cv) { struct umtx_key key; int error; uint32_t flags; flags = fuword32(&cv->c_flags); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_signal(&key, INT_MAX); umtxq_unlock(&key); error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); umtxq_lock(&key); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo) { struct umtx_q *uq; uint32_t flags, wrflags; int32_t state, oldstate; int32_t blocked_readers; int error; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); wrflags = URWLOCK_WRITE_OWNER; if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) wrflags |= URWLOCK_WRITE_WAITERS; for (;;) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); /* try to lock it */ while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { umtx_key_release(&uq->uq_key); return (EAGAIN); } oldstate = casuword32(&rwlock->rw_state, state, state + 1); if (oldstate == state) { umtx_key_release(&uq->uq_key); return (0); } state = oldstate; } if (error) break; /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* set read contention bit */ while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); if (oldstate == state) goto sleep; state = oldstate; } /* state is changed while setting flags, restart */ if (!(state & wrflags)) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); continue; } sleep: /* contention bit is set, before sleeping, increase read waiter count */ blocked_readers = fuword32(&rwlock->rw_blocked_readers); suword32(&rwlock->rw_blocked_readers, blocked_readers+1); while (state & wrflags) { umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "urdlck", timo); umtxq_busy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); if (error) break; state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); } /* decrease read waiter count, and may clear read contention bit */ blocked_readers = fuword32(&rwlock->rw_blocked_readers); suword32(&rwlock->rw_blocked_readers, blocked_readers-1); if (blocked_readers == 1) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_READ_WAITERS); if (oldstate == state) break; state = oldstate; } } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); return (error); } static int do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = do_rw_rdlock(td, obj, val, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } if (error == ERESTART) error = EINTR; return (error); } static int do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int32_t blocked_writers; int error; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); for (;;) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); if (oldstate == state) { umtx_key_release(&uq->uq_key); return (0); } state = oldstate; } if (error) break; /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && (state & URWLOCK_WRITE_WAITERS) == 0) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); if (oldstate == state) goto sleep; state = oldstate; } if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); continue; } sleep: blocked_writers = fuword32(&rwlock->rw_blocked_writers); suword32(&rwlock->rw_blocked_writers, blocked_writers+1); while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { umtxq_lock(&uq->uq_key); umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "uwrlck", timo); umtxq_busy(&uq->uq_key); umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unlock(&uq->uq_key); if (error) break; state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); } blocked_writers = fuword32(&rwlock->rw_blocked_writers); suword32(&rwlock->rw_blocked_writers, blocked_writers-1); if (blocked_writers == 1) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_WRITE_WAITERS); if (oldstate == state) break; state = oldstate; } } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); return (error); } static int do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = do_rw_wrlock(td, obj, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } if (error == ERESTART) error = EINTR; return (error); } static int do_rw_unlock(struct thread *td, struct urwlock *rwlock) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int error, q, count; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); if (state & URWLOCK_WRITE_OWNER) { for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_WRITE_OWNER); if (oldstate != state) { state = oldstate; if (!(oldstate & URWLOCK_WRITE_OWNER)) { error = EPERM; goto out; } } else break; } } else if (URWLOCK_READER_COUNT(state) != 0) { for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state - 1); if (oldstate != state) { state = oldstate; if (URWLOCK_READER_COUNT(oldstate) == 0) { error = EPERM; goto out; } } else break; } } else { error = EPERM; goto out; } count = 0; if (!(flags & URWLOCK_PREFER_READER)) { if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } else if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } } else { if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } else if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } } if (count) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, count, q); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } out: umtx_key_release(&uq->uq_key); return (error); } int _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) /* struct umtx *umtx */ { return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); } int _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) /* struct umtx *umtx */ { return do_unlock_umtx(td, uap->umtx, td->td_tid); } static int __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_lock_umtx(td, uap->obj, uap->val, ts)); } static int __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) { return (do_unlock_umtx(td, uap->obj, uap->val)); } static int __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 0, 0); } static int __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 0); } static int __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 1); } static int __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 0)); } static int __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 1)); } static int __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, 0); } static int __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) { - return do_lock_umutex(td, uap->obj, NULL, 1); + return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); +} + +static int +__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) +{ + struct timespec *ts, timeout; + int error; + + /* Allow a null timespec (wait forever). */ + if (uap->uaddr2 == NULL) + ts = NULL; + else { + error = copyin(uap->uaddr2, &timeout, + sizeof(timeout)); + if (error != 0) + return (error); + if (timeout.tv_nsec >= 1000000000 || + timeout.tv_nsec < 0) { + return (EINVAL); + } + ts = &timeout; + } + return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); +} + +static int +__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) +{ + return do_wake_umutex(td, uap->obj); } static int __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) { return do_unlock_umutex(td, uap->obj); } static int __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) { return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); } static int __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) { return do_cv_signal(td, uap->obj); } static int __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) { return do_cv_broadcast(td, uap->obj); } static int __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_wrlock2(td, uap->obj, &timeout); } return (error); } static int __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) { return do_rw_unlock(td, uap->obj); } typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); static _umtx_op_func op_table[] = { __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ __umtx_op_wait, /* UMTX_OP_WAIT */ __umtx_op_wake, /* UMTX_OP_WAKE */ __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ - __umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */ + __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ + __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ + __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ }; int _umtx_op(struct thread *td, struct _umtx_op_args *uap) { if ((unsigned)uap->op < UMTX_OP_MAX) return (*op_table[uap->op])(td, uap); return (EINVAL); } #ifdef COMPAT_IA32 int freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) /* struct umtx *umtx */ { return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); } int freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) /* struct umtx *umtx */ { return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); } struct timespec32 { u_int32_t tv_sec; u_int32_t tv_nsec; }; static inline int copyin_timeout32(void *addr, struct timespec *tsp) { struct timespec32 ts32; int error; error = copyin(addr, &ts32, sizeof(struct timespec32)); if (error == 0) { tsp->tv_sec = ts32.tv_sec; tsp->tv_nsec = ts32.tv_nsec; } return (error); } static int __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_lock_umtx32(td, uap->obj, uap->val, ts)); } static int __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) { return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); } static int __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 0); } static int __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, 0); } +static int +__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) +{ + struct timespec *ts, timeout; + int error; + + /* Allow a null timespec (wait forever). */ + if (uap->uaddr2 == NULL) + ts = NULL; + else { + error = copyin_timeout32(uap->uaddr2, &timeout); + if (error != 0) + return (error); + if (timeout.tv_nsec >= 1000000000 || + timeout.tv_nsec < 0) + return (EINVAL); + ts = &timeout; + } + return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); +} + static int __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_wrlock2(td, uap->obj, &timeout); } return (error); } static int __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 1); } static _umtx_op_func op_table_compat32[] = { __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ __umtx_op_wake, /* UMTX_OP_WAKE */ __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ - __umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */ + __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ + __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ + __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */ }; int freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) { if ((unsigned)uap->op < UMTX_OP_MAX) return (*op_table_compat32[uap->op])(td, (struct _umtx_op_args *)uap); return (EINVAL); } #endif void umtx_thread_init(struct thread *td) { td->td_umtxq = umtxq_alloc(); td->td_umtxq->uq_thread = td; } void umtx_thread_fini(struct thread *td) { umtxq_free(td->td_umtxq); } /* * It will be called when new thread is created, e.g fork(). */ void umtx_thread_alloc(struct thread *td) { struct umtx_q *uq; uq = td->td_umtxq; uq->uq_inherited_pri = PRI_MAX; KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); KASSERT(uq->uq_thread == td, ("uq_thread != td")); KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); } /* * exec() hook. */ static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused) { umtx_thread_cleanup(curthread); } /* * thread_exit() hook. */ void umtx_thread_exit(struct thread *td) { umtx_thread_cleanup(td); } /* * clean up umtx data. */ static void umtx_thread_cleanup(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; if ((uq = td->td_umtxq) == NULL) return; mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = PRI_MAX; while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { pi->pi_owner = NULL; TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); } thread_lock(td); td->td_flags &= ~TDF_UBORROWING; thread_unlock(td); mtx_unlock_spin(&umtx_lock); } diff --git a/sys/sys/umtx.h b/sys/sys/umtx.h index 2857a1a8093d..2d45677ee1ae 100644 --- a/sys/sys/umtx.h +++ b/sys/sys/umtx.h @@ -1,202 +1,204 @@ /*- * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef _SYS_UMTX_H_ #define _SYS_UMTX_H_ #include #include /* * See pthread_* */ #define UMTX_UNOWNED 0x0 #define UMTX_CONTESTED LONG_MIN struct umtx { volatile u_long u_owner; /* Owner of the mutex. */ }; #define USYNC_PROCESS_SHARED 0x0001 /* Process shared sync objs */ #define UMUTEX_UNOWNED 0x0 #define UMUTEX_CONTESTED 0x80000000U #define UMUTEX_ERROR_CHECK 0x0002 /* Error-checking mutex */ #define UMUTEX_PRIO_INHERIT 0x0004 /* Priority inherited mutex */ #define UMUTEX_PRIO_PROTECT 0x0008 /* Priority protect mutex */ struct umutex { volatile __lwpid_t m_owner; /* Owner of the mutex */ uint32_t m_flags; /* Flags of the mutex */ uint32_t m_ceilings[2]; /* Priority protect ceiling */ uint32_t m_spare[4]; }; struct ucond { volatile uint32_t c_has_waiters; /* Has waiters in kernel */ uint32_t c_flags; /* Flags of the condition variable */ uint32_t c_spare[2]; /* Spare space */ }; struct urwlock { volatile int32_t rw_state; uint32_t rw_flags; uint32_t rw_blocked_readers; uint32_t rw_blocked_writers; uint32_t rw_spare[4]; }; /* urwlock flags */ #define URWLOCK_PREFER_READER 0x0002 #define URWLOCK_WRITE_OWNER 0x80000000U #define URWLOCK_WRITE_WAITERS 0x40000000U #define URWLOCK_READ_WAITERS 0x20000000U #define URWLOCK_MAX_READERS 0x1fffffffU #define URWLOCK_READER_COUNT(c) ((c) & URWLOCK_MAX_READERS) /* op code for _umtx_op */ #define UMTX_OP_LOCK 0 #define UMTX_OP_UNLOCK 1 #define UMTX_OP_WAIT 2 #define UMTX_OP_WAKE 3 #define UMTX_OP_MUTEX_TRYLOCK 4 #define UMTX_OP_MUTEX_LOCK 5 #define UMTX_OP_MUTEX_UNLOCK 6 #define UMTX_OP_SET_CEILING 7 #define UMTX_OP_CV_WAIT 8 #define UMTX_OP_CV_SIGNAL 9 #define UMTX_OP_CV_BROADCAST 10 #define UMTX_OP_WAIT_UINT 11 #define UMTX_OP_RW_RDLOCK 12 #define UMTX_OP_RW_WRLOCK 13 #define UMTX_OP_RW_UNLOCK 14 #define UMTX_OP_WAIT_UINT_PRIVATE 15 #define UMTX_OP_WAKE_PRIVATE 16 -#define UMTX_OP_MAX 17 +#define UMTX_OP_MUTEX_WAIT 17 +#define UMTX_OP_MUTEX_WAKE 18 +#define UMTX_OP_MAX 19 /* flags for UMTX_OP_CV_WAIT */ #define UMTX_CHECK_UNPARKING 0x01 #ifndef _KERNEL int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2); /* * Old (deprecated) userland mutex system calls. */ int _umtx_lock(struct umtx *mtx); int _umtx_unlock(struct umtx *mtx); /* * Standard api. Try uncontested acquire/release and asks the * kernel to resolve failures. */ static __inline void umtx_init(struct umtx *umtx) { umtx->u_owner = UMTX_UNOWNED; } static __inline u_long umtx_owner(struct umtx *umtx) { return (umtx->u_owner & ~LONG_MIN); } static __inline int umtx_lock(struct umtx *umtx, u_long id) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) if (_umtx_lock(umtx) == -1) return (errno); return (0); } static __inline int umtx_trylock(struct umtx *umtx, u_long id) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) return (EBUSY); return (0); } static __inline int umtx_timedlock(struct umtx *umtx, u_long id, const struct timespec *timeout) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) if (_umtx_op(umtx, UMTX_OP_LOCK, id, 0, __DECONST(void *, timeout)) == -1) return (errno); return (0); } static __inline int umtx_unlock(struct umtx *umtx, u_long id) { if (atomic_cmpset_rel_long(&umtx->u_owner, id, UMTX_UNOWNED) == 0) if (_umtx_unlock(umtx) == -1) return (errno); return (0); } static __inline int umtx_wait(u_long *p, long val, const struct timespec *timeout) { if (_umtx_op(p, UMTX_OP_WAIT, val, 0, __DECONST(void *, timeout)) == -1) return (errno); return (0); } /* Wake threads waiting on a user address. */ static __inline int umtx_wake(u_long *p, int nr_wakeup) { if (_umtx_op(p, UMTX_OP_WAKE, nr_wakeup, 0, 0) == -1) return (errno); return (0); } #else struct thread; struct umtx_q *umtxq_alloc(void); void umtxq_free(struct umtx_q *); int kern_umtx_wake(struct thread *, void *, int, int); void umtx_pi_adjust(struct thread *, u_char); void umtx_thread_init(struct thread *); void umtx_thread_fini(struct thread *); void umtx_thread_alloc(struct thread *); void umtx_thread_exit(struct thread *); #endif /* !_KERNEL */ #endif /* !_SYS_UMTX_H_ */