diff --git a/lib/libthr/thread/Makefile.inc b/lib/libthr/thread/Makefile.inc index 4ce90b55dcae..6d571d4bd39d 100644 --- a/lib/libthr/thread/Makefile.inc +++ b/lib/libthr/thread/Makefile.inc @@ -1,56 +1,57 @@ # $FreeBSD$ # thr sources .PATH: ${.CURDIR}/thread SRCS+= \ thr_affinity.c \ thr_attr.c \ thr_barrier.c \ thr_barrierattr.c \ thr_cancel.c \ thr_clean.c \ thr_concurrency.c \ thr_cond.c \ thr_condattr.c \ thr_create.c \ thr_detach.c \ thr_equal.c \ thr_event.c \ thr_exit.c \ thr_fork.c \ thr_getprio.c \ thr_getcpuclockid.c \ thr_getschedparam.c \ thr_info.c \ thr_init.c \ thr_join.c \ thr_list.c \ thr_kern.c \ thr_kill.c \ thr_main_np.c \ thr_multi_np.c \ thr_mutex.c \ thr_mutexattr.c \ thr_once.c \ thr_printf.c \ thr_pspinlock.c \ thr_resume_np.c \ thr_rtld.c \ thr_rwlock.c \ thr_rwlockattr.c \ thr_self.c \ thr_sem.c \ thr_setprio.c \ thr_setschedparam.c \ thr_sig.c \ thr_single_np.c \ + thr_sleepq.c \ thr_spec.c \ thr_spinlock.c \ thr_stack.c \ thr_syscalls.c \ thr_suspend_np.c \ thr_switch_np.c \ thr_symbols.c \ thr_umtx.c \ thr_yield.c diff --git a/lib/libthr/thread/thr_cond.c b/lib/libthr/thread/thr_cond.c index 03b5cdd7c6f3..6ec6d4c6da0b 100644 --- a/lib/libthr/thread/thr_cond.c +++ b/lib/libthr/thread/thr_cond.c @@ -1,305 +1,482 @@ /* * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "namespace.h" #include #include #include #include #include #include "un-namespace.h" #include "thr_private.h" /* * Prototypes */ int __pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex); int __pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec * abstime); static int cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr); static int cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime, int cancel); -static int cond_signal_common(pthread_cond_t *cond, int broadcast); +static int cond_signal_common(pthread_cond_t *cond); +static int cond_broadcast_common(pthread_cond_t *cond); /* * Double underscore versions are cancellation points. Single underscore * versions are not and are provided for libc internal usage (which * shouldn't introduce cancellation points). */ __weak_reference(__pthread_cond_wait, pthread_cond_wait); __weak_reference(__pthread_cond_timedwait, pthread_cond_timedwait); __weak_reference(_pthread_cond_init, pthread_cond_init); __weak_reference(_pthread_cond_destroy, pthread_cond_destroy); __weak_reference(_pthread_cond_signal, pthread_cond_signal); __weak_reference(_pthread_cond_broadcast, pthread_cond_broadcast); +#define CV_PSHARED(cvp) (((cvp)->__flags & USYNC_PROCESS_SHARED) != 0) + static int cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr) { - pthread_cond_t pcond; - int rval = 0; + struct pthread_cond *cvp; + int error = 0; - if ((pcond = (pthread_cond_t) + if ((cvp = (pthread_cond_t) calloc(1, sizeof(struct pthread_cond))) == NULL) { - rval = ENOMEM; + error = ENOMEM; } else { /* * Initialise the condition variable structure: */ if (cond_attr == NULL || *cond_attr == NULL) { - pcond->c_pshared = 0; - pcond->c_clockid = CLOCK_REALTIME; + cvp->__clock_id = CLOCK_REALTIME; } else { - pcond->c_pshared = (*cond_attr)->c_pshared; - pcond->c_clockid = (*cond_attr)->c_clockid; + if ((*cond_attr)->c_pshared) + cvp->__flags |= USYNC_PROCESS_SHARED; + cvp->__clock_id = (*cond_attr)->c_clockid; } - _thr_umutex_init(&pcond->c_lock); - *cond = pcond; + *cond = cvp; } - /* Return the completion status: */ - return (rval); + return (error); } static int init_static(struct pthread *thread, pthread_cond_t *cond) { int ret; THR_LOCK_ACQUIRE(thread, &_cond_static_lock); if (*cond == NULL) ret = cond_init(cond, NULL); else ret = 0; THR_LOCK_RELEASE(thread, &_cond_static_lock); return (ret); } #define CHECK_AND_INIT_COND \ - if (__predict_false((cv = (*cond)) <= THR_COND_DESTROYED)) { \ - if (cv == THR_COND_INITIALIZER) { \ + if (__predict_false((cvp = (*cond)) <= THR_COND_DESTROYED)) { \ + if (cvp == THR_COND_INITIALIZER) { \ int ret; \ ret = init_static(_get_curthread(), cond); \ if (ret) \ return (ret); \ - } else if (cv == THR_COND_DESTROYED) { \ + } else if (cvp == THR_COND_DESTROYED) { \ return (EINVAL); \ } \ - cv = *cond; \ + cvp = *cond; \ } int _pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr) { *cond = NULL; return (cond_init(cond, cond_attr)); } int _pthread_cond_destroy(pthread_cond_t *cond) { - struct pthread *curthread = _get_curthread(); - struct pthread_cond *cv; - int rval = 0; - - if ((cv = *cond) == THR_COND_INITIALIZER) - rval = 0; - else if (cv == THR_COND_DESTROYED) - rval = EINVAL; + struct pthread_cond *cvp; + int error = 0; + + if ((cvp = *cond) == THR_COND_INITIALIZER) + error = 0; + else if (cvp == THR_COND_DESTROYED) + error = EINVAL; else { - cv = *cond; - THR_UMUTEX_LOCK(curthread, &cv->c_lock); + cvp = *cond; *cond = THR_COND_DESTROYED; - THR_UMUTEX_UNLOCK(curthread, &cv->c_lock); /* * Free the memory allocated for the condition * variable structure: */ - free(cv); - } - return (rval); -} - -struct cond_cancel_info -{ - pthread_mutex_t *mutex; - pthread_cond_t *cond; - int count; -}; - -static void -cond_cancel_handler(void *arg) -{ - struct pthread *curthread = _get_curthread(); - struct cond_cancel_info *info = (struct cond_cancel_info *)arg; - pthread_cond_t cv; - - if (info->cond != NULL) { - cv = *(info->cond); - THR_UMUTEX_UNLOCK(curthread, &cv->c_lock); + free(cvp); } - _mutex_cv_lock(info->mutex, info->count); + return (error); } /* * Cancellation behaivor: * Thread may be canceled at start, if thread is canceled, it means it * did not get a wakeup from pthread_cond_signal(), otherwise, it is * not canceled. * Thread cancellation never cause wakeup from pthread_cond_signal() * to be lost. */ static int -cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex, +cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp, const struct timespec *abstime, int cancel) { struct pthread *curthread = _get_curthread(); - struct timespec ts, ts2, *tsp; - struct cond_cancel_info info; - pthread_cond_t cv; - int ret; + int recurse; + int error, error2 = 0; + + error = _mutex_cv_detach(mp, &recurse); + if (error != 0) + return (error); + + if (cancel) { + _thr_cancel_enter2(curthread, 0); + error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters, + (struct umutex *)&mp->m_lock, abstime, + CVWAIT_ABSTIME|CVWAIT_CLOCKID); + _thr_cancel_leave(curthread, 0); + } else { + error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters, + (struct umutex *)&mp->m_lock, abstime, + CVWAIT_ABSTIME|CVWAIT_CLOCKID); + } /* - * If the condition variable is statically initialized, - * perform the dynamic initialization: + * Note that PP mutex and ROBUST mutex may return + * interesting error codes. */ - CHECK_AND_INIT_COND - - cv = *cond; - THR_UMUTEX_LOCK(curthread, &cv->c_lock); - ret = _mutex_cv_unlock(mutex, &info.count); - if (__predict_false(ret != 0)) { - THR_UMUTEX_UNLOCK(curthread, &cv->c_lock); - return (ret); + if (error == 0) { + error2 = _mutex_cv_lock(mp, recurse); + } else if (error == EINTR || error == ETIMEDOUT) { + error2 = _mutex_cv_lock(mp, recurse); + if (error2 == 0 && cancel) + _thr_testcancel(curthread); + if (error == EINTR) + error = 0; + } else { + /* We know that it didn't unlock the mutex. */ + error2 = _mutex_cv_attach(mp, recurse); + if (error2 == 0 && cancel) + _thr_testcancel(curthread); } + return (error2 != 0 ? error2 : error); +} + +/* + * Thread waits in userland queue whenever possible, when thread + * is signaled or broadcasted, it is removed from the queue, and + * is saved in curthread's defer_waiters[] buffer, but won't be + * woken up until mutex is unlocked. + */ + +static int +cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp, + const struct timespec *abstime, int cancel) +{ + struct pthread *curthread = _get_curthread(); + struct sleepqueue *sq; + int recurse; + int error; - info.mutex = mutex; - info.cond = cond; + if (curthread->wchan != NULL) + PANIC("thread was already on queue."); - if (abstime != NULL) { - clock_gettime(cv->c_clockid, &ts); - TIMESPEC_SUB(&ts2, abstime, &ts); - tsp = &ts2; - } else - tsp = NULL; + if (cancel) + _thr_testcancel(curthread); - if (cancel) { - THR_CLEANUP_PUSH(curthread, cond_cancel_handler, &info); - _thr_cancel_enter2(curthread, 0); - ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 1); - info.cond = NULL; - _thr_cancel_leave(curthread, (ret != 0)); - THR_CLEANUP_POP(curthread, 0); - } else { - ret = _thr_ucond_wait(&cv->c_kerncv, &cv->c_lock, tsp, 0); + _sleepq_lock(cvp); + /* + * set __has_user_waiters before unlocking mutex, this allows + * us to check it without locking in pthread_cond_signal(). + */ + cvp->__has_user_waiters = 1; + curthread->will_sleep = 1; + (void)_mutex_cv_unlock(mp, &recurse); + curthread->mutex_obj = mp; + _sleepq_add(cvp, curthread); + for(;;) { + _thr_clear_wake(curthread); + _sleepq_unlock(cvp); + + if (cancel) { + _thr_cancel_enter2(curthread, 0); + error = _thr_sleep(curthread, cvp->__clock_id, abstime); + _thr_cancel_leave(curthread, 0); + } else { + error = _thr_sleep(curthread, cvp->__clock_id, abstime); + } + + if (curthread->wchan == NULL) { + error = 0; + goto out; + } + + _sleepq_lock(cvp); + if (curthread->wchan == NULL) { + error = 0; + break; + } else if (cancel && SHOULD_CANCEL(curthread)) { + sq = _sleepq_lookup(cvp); + cvp->__has_user_waiters = + _sleepq_remove(sq, curthread); + _sleepq_unlock(cvp); + curthread->mutex_obj = NULL; + _mutex_cv_lock(mp, recurse); + if (!THR_IN_CRITICAL(curthread)) + _pthread_exit(PTHREAD_CANCELED); + else /* this should not happen */ + return (0); + } else if (error == ETIMEDOUT) { + sq = _sleepq_lookup(cvp); + cvp->__has_user_waiters = + _sleepq_remove(sq, curthread); + break; + } } - if (ret == EINTR) - ret = 0; - _mutex_cv_lock(mutex, info.count); - return (ret); + _sleepq_unlock(cvp); +out: + curthread->mutex_obj = NULL; + _mutex_cv_lock(mp, recurse); + return (error); +} + +static int +cond_wait_common(pthread_cond_t *cond, pthread_mutex_t *mutex, + const struct timespec *abstime, int cancel) +{ + struct pthread *curthread = _get_curthread(); + struct pthread_cond *cvp; + struct pthread_mutex *mp; + int error; + + CHECK_AND_INIT_COND + + mp = *mutex; + + if ((error = _mutex_owned(curthread, mp)) != 0) + return (error); + + if (curthread->attr.sched_policy != SCHED_OTHER || + (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT|UMUTEX_PRIO_INHERIT| + USYNC_PROCESS_SHARED)) != 0 || + (cvp->__flags & USYNC_PROCESS_SHARED) != 0) + return cond_wait_kernel(cvp, mp, abstime, cancel); + else + return cond_wait_user(cvp, mp, abstime, cancel); } int _pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { return (cond_wait_common(cond, mutex, NULL, 0)); } int __pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) { return (cond_wait_common(cond, mutex, NULL, 1)); } int -_pthread_cond_timedwait(pthread_cond_t * cond, pthread_mutex_t * mutex, +_pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec * abstime) { if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) return (EINVAL); return (cond_wait_common(cond, mutex, abstime, 0)); } int __pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) { if (abstime == NULL || abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) return (EINVAL); return (cond_wait_common(cond, mutex, abstime, 1)); } static int -cond_signal_common(pthread_cond_t *cond, int broadcast) +cond_signal_common(pthread_cond_t *cond) { struct pthread *curthread = _get_curthread(); - pthread_cond_t cv; - int ret = 0; + struct pthread *td; + struct pthread_cond *cvp; + struct pthread_mutex *mp; + struct sleepqueue *sq; + int *waddr; + int pshared; /* * If the condition variable is statically initialized, perform dynamic * initialization. */ CHECK_AND_INIT_COND - THR_UMUTEX_LOCK(curthread, &cv->c_lock); - if (!broadcast) - ret = _thr_ucond_signal(&cv->c_kerncv); - else - ret = _thr_ucond_broadcast(&cv->c_kerncv); - THR_UMUTEX_UNLOCK(curthread, &cv->c_lock); - return (ret); + pshared = CV_PSHARED(cvp); + + _thr_ucond_signal((struct ucond *)&cvp->__has_kern_waiters); + + if (pshared || cvp->__has_user_waiters == 0) + return (0); + + curthread = _get_curthread(); + waddr = NULL; + _sleepq_lock(cvp); + sq = _sleepq_lookup(cvp); + if (sq == NULL) { + _sleepq_unlock(cvp); + return (0); + } + + td = _sleepq_first(sq); + mp = td->mutex_obj; + cvp->__has_user_waiters = _sleepq_remove(sq, td); + if (mp->m_owner == curthread) { + if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; + } + curthread->defer_waiters[curthread->nwaiter_defer++] = + &td->wake_addr->value; + mp->m_flags |= PMUTEX_FLAG_DEFERED; + } else { + waddr = &td->wake_addr->value; + } + _sleepq_unlock(cvp); + if (waddr != NULL) + _thr_set_wake(waddr); + return (0); +} + +struct broadcast_arg { + struct pthread *curthread; + unsigned int *waddrs[MAX_DEFER_WAITERS]; + int count; +}; + +static void +drop_cb(struct pthread *td, void *arg) +{ + struct broadcast_arg *ba = arg; + struct pthread_mutex *mp; + struct pthread *curthread = ba->curthread; + + mp = td->mutex_obj; + if (mp->m_owner == curthread) { + if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; + } + curthread->defer_waiters[curthread->nwaiter_defer++] = + &td->wake_addr->value; + mp->m_flags |= PMUTEX_FLAG_DEFERED; + } else { + if (ba->count >= MAX_DEFER_WAITERS) { + _thr_wake_all(ba->waddrs, ba->count); + ba->count = 0; + } + ba->waddrs[ba->count++] = &td->wake_addr->value; + } +} + +static int +cond_broadcast_common(pthread_cond_t *cond) +{ + int pshared; + struct pthread_cond *cvp; + struct sleepqueue *sq; + struct broadcast_arg ba; + + /* + * If the condition variable is statically initialized, perform dynamic + * initialization. + */ + CHECK_AND_INIT_COND + + pshared = CV_PSHARED(cvp); + + _thr_ucond_broadcast((struct ucond *)&cvp->__has_kern_waiters); + + if (pshared || cvp->__has_user_waiters == 0) + return (0); + + ba.curthread = _get_curthread(); + ba.count = 0; + + _sleepq_lock(cvp); + sq = _sleepq_lookup(cvp); + if (sq == NULL) { + _sleepq_unlock(cvp); + return (0); + } + _sleepq_drop(sq, drop_cb, &ba); + cvp->__has_user_waiters = 0; + _sleepq_unlock(cvp); + if (ba.count > 0) + _thr_wake_all(ba.waddrs, ba.count); + return (0); } int _pthread_cond_signal(pthread_cond_t * cond) { - return (cond_signal_common(cond, 0)); + return (cond_signal_common(cond)); } int _pthread_cond_broadcast(pthread_cond_t * cond) { - return (cond_signal_common(cond, 1)); + return (cond_broadcast_common(cond)); } diff --git a/lib/libthr/thread/thr_init.c b/lib/libthr/thread/thr_init.c index b10c2277f15f..7e0721524fb5 100644 --- a/lib/libthr/thread/thr_init.c +++ b/lib/libthr/thread/thr_init.c @@ -1,475 +1,477 @@ /* * Copyright (c) 2003 Daniel M. Eischen * Copyright (c) 1995-1998 John Birrell * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by John Birrell. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "namespace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "libc_private.h" #include "thr_private.h" char *_usrstack; struct pthread *_thr_initial; int _libthr_debug; int _thread_event_mask; struct pthread *_thread_last_event; pthreadlist _thread_list = TAILQ_HEAD_INITIALIZER(_thread_list); pthreadlist _thread_gc_list = TAILQ_HEAD_INITIALIZER(_thread_gc_list); int _thread_active_threads = 1; atfork_head _thr_atfork_list = TAILQ_HEAD_INITIALIZER(_thr_atfork_list); struct urwlock _thr_atfork_lock = DEFAULT_URWLOCK; struct pthread_prio _thr_priorities[3] = { {RTP_PRIO_MIN, RTP_PRIO_MAX, 0}, /* FIFO */ {0, 0, 63}, /* OTHER */ {RTP_PRIO_MIN, RTP_PRIO_MAX, 0} /* RR */ }; struct pthread_attr _pthread_attr_default = { .sched_policy = SCHED_OTHER, .sched_inherit = PTHREAD_INHERIT_SCHED, .prio = 0, .suspend = THR_CREATE_RUNNING, .flags = PTHREAD_SCOPE_SYSTEM, .stackaddr_attr = NULL, .stacksize_attr = THR_STACK_DEFAULT, .guardsize_attr = 0, .cpusetsize = 0, .cpuset = NULL }; struct pthread_mutex_attr _pthread_mutexattr_default = { .m_type = PTHREAD_MUTEX_DEFAULT, .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0 }; struct pthread_mutex_attr _pthread_mutexattr_adaptive_default = { .m_type = PTHREAD_MUTEX_ADAPTIVE_NP, .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0 }; /* Default condition variable attributes: */ struct pthread_cond_attr _pthread_condattr_default = { .c_pshared = PTHREAD_PROCESS_PRIVATE, .c_clockid = CLOCK_REALTIME }; pid_t _thr_pid; int _thr_is_smp = 0; size_t _thr_guard_default; size_t _thr_stack_default = THR_STACK_DEFAULT; size_t _thr_stack_initial = THR_STACK_INITIAL; int _thr_page_size; int _thr_spinloops; int _thr_yieldloops; int _gc_count; struct umutex _mutex_static_lock = DEFAULT_UMUTEX; struct umutex _cond_static_lock = DEFAULT_UMUTEX; struct umutex _rwlock_static_lock = DEFAULT_UMUTEX; struct umutex _keytable_lock = DEFAULT_UMUTEX; struct urwlock _thr_list_lock = DEFAULT_URWLOCK; struct umutex _thr_event_lock = DEFAULT_UMUTEX; int __pthread_cond_wait(pthread_cond_t *, pthread_mutex_t *); int __pthread_mutex_lock(pthread_mutex_t *); int __pthread_mutex_trylock(pthread_mutex_t *); void _thread_init_hack(void) __attribute__ ((constructor)); static void init_private(void); static void init_main_thread(struct pthread *thread); /* * All weak references used within libc should be in this table. * This is so that static libraries will work. */ STATIC_LIB_REQUIRE(_fork); STATIC_LIB_REQUIRE(_pthread_getspecific); STATIC_LIB_REQUIRE(_pthread_key_create); STATIC_LIB_REQUIRE(_pthread_key_delete); STATIC_LIB_REQUIRE(_pthread_mutex_destroy); STATIC_LIB_REQUIRE(_pthread_mutex_init); STATIC_LIB_REQUIRE(_pthread_mutex_lock); STATIC_LIB_REQUIRE(_pthread_mutex_trylock); STATIC_LIB_REQUIRE(_pthread_mutex_unlock); STATIC_LIB_REQUIRE(_pthread_mutexattr_init); STATIC_LIB_REQUIRE(_pthread_mutexattr_destroy); STATIC_LIB_REQUIRE(_pthread_mutexattr_settype); STATIC_LIB_REQUIRE(_pthread_once); STATIC_LIB_REQUIRE(_pthread_setspecific); STATIC_LIB_REQUIRE(_raise); STATIC_LIB_REQUIRE(_sem_destroy); STATIC_LIB_REQUIRE(_sem_getvalue); STATIC_LIB_REQUIRE(_sem_init); STATIC_LIB_REQUIRE(_sem_post); STATIC_LIB_REQUIRE(_sem_timedwait); STATIC_LIB_REQUIRE(_sem_trywait); STATIC_LIB_REQUIRE(_sem_wait); STATIC_LIB_REQUIRE(_sigaction); STATIC_LIB_REQUIRE(_sigprocmask); STATIC_LIB_REQUIRE(_sigsuspend); STATIC_LIB_REQUIRE(_sigtimedwait); STATIC_LIB_REQUIRE(_sigwait); STATIC_LIB_REQUIRE(_sigwaitinfo); STATIC_LIB_REQUIRE(_spinlock); STATIC_LIB_REQUIRE(_spinlock_debug); STATIC_LIB_REQUIRE(_spinunlock); STATIC_LIB_REQUIRE(_thread_init_hack); /* * These are needed when linking statically. All references within * libgcc (and in the future libc) to these routines are weak, but * if they are not (strongly) referenced by the application or other * libraries, then the actual functions will not be loaded. */ STATIC_LIB_REQUIRE(_pthread_once); STATIC_LIB_REQUIRE(_pthread_key_create); STATIC_LIB_REQUIRE(_pthread_key_delete); STATIC_LIB_REQUIRE(_pthread_getspecific); STATIC_LIB_REQUIRE(_pthread_setspecific); STATIC_LIB_REQUIRE(_pthread_mutex_init); STATIC_LIB_REQUIRE(_pthread_mutex_destroy); STATIC_LIB_REQUIRE(_pthread_mutex_lock); STATIC_LIB_REQUIRE(_pthread_mutex_trylock); STATIC_LIB_REQUIRE(_pthread_mutex_unlock); STATIC_LIB_REQUIRE(_pthread_create); /* Pull in all symbols required by libthread_db */ STATIC_LIB_REQUIRE(_thread_state_running); #define DUAL_ENTRY(entry) \ (pthread_func_t)entry, (pthread_func_t)entry static pthread_func_t jmp_table[][2] = { {DUAL_ENTRY(_pthread_atfork)}, /* PJT_ATFORK */ {DUAL_ENTRY(_pthread_attr_destroy)}, /* PJT_ATTR_DESTROY */ {DUAL_ENTRY(_pthread_attr_getdetachstate)}, /* PJT_ATTR_GETDETACHSTATE */ {DUAL_ENTRY(_pthread_attr_getguardsize)}, /* PJT_ATTR_GETGUARDSIZE */ {DUAL_ENTRY(_pthread_attr_getinheritsched)}, /* PJT_ATTR_GETINHERITSCHED */ {DUAL_ENTRY(_pthread_attr_getschedparam)}, /* PJT_ATTR_GETSCHEDPARAM */ {DUAL_ENTRY(_pthread_attr_getschedpolicy)}, /* PJT_ATTR_GETSCHEDPOLICY */ {DUAL_ENTRY(_pthread_attr_getscope)}, /* PJT_ATTR_GETSCOPE */ {DUAL_ENTRY(_pthread_attr_getstackaddr)}, /* PJT_ATTR_GETSTACKADDR */ {DUAL_ENTRY(_pthread_attr_getstacksize)}, /* PJT_ATTR_GETSTACKSIZE */ {DUAL_ENTRY(_pthread_attr_init)}, /* PJT_ATTR_INIT */ {DUAL_ENTRY(_pthread_attr_setdetachstate)}, /* PJT_ATTR_SETDETACHSTATE */ {DUAL_ENTRY(_pthread_attr_setguardsize)}, /* PJT_ATTR_SETGUARDSIZE */ {DUAL_ENTRY(_pthread_attr_setinheritsched)}, /* PJT_ATTR_SETINHERITSCHED */ {DUAL_ENTRY(_pthread_attr_setschedparam)}, /* PJT_ATTR_SETSCHEDPARAM */ {DUAL_ENTRY(_pthread_attr_setschedpolicy)}, /* PJT_ATTR_SETSCHEDPOLICY */ {DUAL_ENTRY(_pthread_attr_setscope)}, /* PJT_ATTR_SETSCOPE */ {DUAL_ENTRY(_pthread_attr_setstackaddr)}, /* PJT_ATTR_SETSTACKADDR */ {DUAL_ENTRY(_pthread_attr_setstacksize)}, /* PJT_ATTR_SETSTACKSIZE */ {DUAL_ENTRY(_pthread_cancel)}, /* PJT_CANCEL */ {DUAL_ENTRY(_pthread_cleanup_pop)}, /* PJT_CLEANUP_POP */ {DUAL_ENTRY(_pthread_cleanup_push)}, /* PJT_CLEANUP_PUSH */ {DUAL_ENTRY(_pthread_cond_broadcast)}, /* PJT_COND_BROADCAST */ {DUAL_ENTRY(_pthread_cond_destroy)}, /* PJT_COND_DESTROY */ {DUAL_ENTRY(_pthread_cond_init)}, /* PJT_COND_INIT */ {DUAL_ENTRY(_pthread_cond_signal)}, /* PJT_COND_SIGNAL */ {DUAL_ENTRY(_pthread_cond_timedwait)}, /* PJT_COND_TIMEDWAIT */ {(pthread_func_t)__pthread_cond_wait, (pthread_func_t)_pthread_cond_wait}, /* PJT_COND_WAIT */ {DUAL_ENTRY(_pthread_detach)}, /* PJT_DETACH */ {DUAL_ENTRY(_pthread_equal)}, /* PJT_EQUAL */ {DUAL_ENTRY(_pthread_exit)}, /* PJT_EXIT */ {DUAL_ENTRY(_pthread_getspecific)}, /* PJT_GETSPECIFIC */ {DUAL_ENTRY(_pthread_join)}, /* PJT_JOIN */ {DUAL_ENTRY(_pthread_key_create)}, /* PJT_KEY_CREATE */ {DUAL_ENTRY(_pthread_key_delete)}, /* PJT_KEY_DELETE*/ {DUAL_ENTRY(_pthread_kill)}, /* PJT_KILL */ {DUAL_ENTRY(_pthread_main_np)}, /* PJT_MAIN_NP */ {DUAL_ENTRY(_pthread_mutexattr_destroy)}, /* PJT_MUTEXATTR_DESTROY */ {DUAL_ENTRY(_pthread_mutexattr_init)}, /* PJT_MUTEXATTR_INIT */ {DUAL_ENTRY(_pthread_mutexattr_settype)}, /* PJT_MUTEXATTR_SETTYPE */ {DUAL_ENTRY(_pthread_mutex_destroy)}, /* PJT_MUTEX_DESTROY */ {DUAL_ENTRY(_pthread_mutex_init)}, /* PJT_MUTEX_INIT */ {(pthread_func_t)__pthread_mutex_lock, (pthread_func_t)_pthread_mutex_lock}, /* PJT_MUTEX_LOCK */ {(pthread_func_t)__pthread_mutex_trylock, (pthread_func_t)_pthread_mutex_trylock},/* PJT_MUTEX_TRYLOCK */ {DUAL_ENTRY(_pthread_mutex_unlock)}, /* PJT_MUTEX_UNLOCK */ {DUAL_ENTRY(_pthread_once)}, /* PJT_ONCE */ {DUAL_ENTRY(_pthread_rwlock_destroy)}, /* PJT_RWLOCK_DESTROY */ {DUAL_ENTRY(_pthread_rwlock_init)}, /* PJT_RWLOCK_INIT */ {DUAL_ENTRY(_pthread_rwlock_rdlock)}, /* PJT_RWLOCK_RDLOCK */ {DUAL_ENTRY(_pthread_rwlock_tryrdlock)},/* PJT_RWLOCK_TRYRDLOCK */ {DUAL_ENTRY(_pthread_rwlock_trywrlock)},/* PJT_RWLOCK_TRYWRLOCK */ {DUAL_ENTRY(_pthread_rwlock_unlock)}, /* PJT_RWLOCK_UNLOCK */ {DUAL_ENTRY(_pthread_rwlock_wrlock)}, /* PJT_RWLOCK_WRLOCK */ {DUAL_ENTRY(_pthread_self)}, /* PJT_SELF */ {DUAL_ENTRY(_pthread_setcancelstate)}, /* PJT_SETCANCELSTATE */ {DUAL_ENTRY(_pthread_setcanceltype)}, /* PJT_SETCANCELTYPE */ {DUAL_ENTRY(_pthread_setspecific)}, /* PJT_SETSPECIFIC */ {DUAL_ENTRY(_pthread_sigmask)}, /* PJT_SIGMASK */ {DUAL_ENTRY(_pthread_testcancel)}, /* PJT_TESTCANCEL */ {DUAL_ENTRY(__pthread_cleanup_pop_imp)},/* PJT_CLEANUP_POP_IMP */ {DUAL_ENTRY(__pthread_cleanup_push_imp)},/* PJT_CLEANUP_PUSH_IMP */ {DUAL_ENTRY(_pthread_cancel_enter)}, /* PJT_CANCEL_ENTER */ {DUAL_ENTRY(_pthread_cancel_leave)} /* PJT_CANCEL_LEAVE */ }; static int init_once = 0; /* * For the shared version of the threads library, the above is sufficient. * But for the archive version of the library, we need a little bit more. * Namely, we must arrange for this particular module to be pulled in from * the archive library at link time. To accomplish that, we define and * initialize a variable, "_thread_autoinit_dummy_decl". This variable is * referenced (as an extern) from libc/stdlib/exit.c. This will always * create a need for this module, ensuring that it is present in the * executable. */ extern int _thread_autoinit_dummy_decl; int _thread_autoinit_dummy_decl = 0; void _thread_init_hack(void) { _libpthread_init(NULL); } /* * Threaded process initialization. * * This is only called under two conditions: * * 1) Some thread routines have detected that the library hasn't yet * been initialized (_thr_initial == NULL && curthread == NULL), or * * 2) An explicit call to reinitialize after a fork (indicated * by curthread != NULL) */ void _libpthread_init(struct pthread *curthread) { int fd, first = 0; /* Check if this function has already been called: */ if ((_thr_initial != NULL) && (curthread == NULL)) /* Only initialize the threaded application once. */ return; /* * Check the size of the jump table to make sure it is preset * with the correct number of entries. */ if (sizeof(jmp_table) != (sizeof(pthread_func_t) * PJT_MAX * 2)) PANIC("Thread jump table not properly initialized"); memcpy(__thr_jtable, jmp_table, sizeof(jmp_table)); /* * Check for the special case of this process running as * or in place of init as pid = 1: */ if ((_thr_pid = getpid()) == 1) { /* * Setup a new session for this process which is * assumed to be running as root. */ if (setsid() == -1) PANIC("Can't set session ID"); if (revoke(_PATH_CONSOLE) != 0) PANIC("Can't revoke console"); if ((fd = __sys_open(_PATH_CONSOLE, O_RDWR)) < 0) PANIC("Can't open console"); if (setlogin("root") == -1) PANIC("Can't set login to root"); if (_ioctl(fd, TIOCSCTTY, (char *) NULL) == -1) PANIC("Can't set controlling terminal"); } /* Initialize pthread private data. */ init_private(); /* Set the initial thread. */ if (curthread == NULL) { first = 1; /* Create and initialize the initial thread. */ curthread = _thr_alloc(NULL); if (curthread == NULL) PANIC("Can't allocate initial thread"); init_main_thread(curthread); } /* * Add the thread to the thread list queue. */ THR_LIST_ADD(curthread); _thread_active_threads = 1; /* Setup the thread specific data */ _tcb_set(curthread->tcb); if (first) { _thr_initial = curthread; _thr_signal_init(); if (_thread_event_mask & TD_CREATE) _thr_report_creation(curthread, curthread); } } /* * This function and pthread_create() do a lot of the same things. * It'd be nice to consolidate the common stuff in one place. */ static void init_main_thread(struct pthread *thread) { struct sched_param sched_param; /* Setup the thread attributes. */ thr_self(&thread->tid); thread->attr = _pthread_attr_default; /* * Set up the thread stack. * * Create a red zone below the main stack. All other stacks * are constrained to a maximum size by the parameters * passed to mmap(), but this stack is only limited by * resource limits, so this stack needs an explicitly mapped * red zone to protect the thread stack that is just beyond. */ if (mmap(_usrstack - _thr_stack_initial - _thr_guard_default, _thr_guard_default, 0, MAP_ANON, -1, 0) == MAP_FAILED) PANIC("Cannot allocate red zone for initial thread"); /* * Mark the stack as an application supplied stack so that it * isn't deallocated. * * XXX - I'm not sure it would hurt anything to deallocate * the main thread stack because deallocation doesn't * actually free() it; it just puts it in the free * stack queue for later reuse. */ thread->attr.stackaddr_attr = _usrstack - _thr_stack_initial; thread->attr.stacksize_attr = _thr_stack_initial; thread->attr.guardsize_attr = _thr_guard_default; thread->attr.flags |= THR_STACK_USER; /* * Write a magic value to the thread structure * to help identify valid ones: */ thread->magic = THR_MAGIC; thread->cancel_enable = 1; thread->cancel_async = 0; thr_set_name(thread->tid, "initial thread"); /* Initialize the mutex queue: */ TAILQ_INIT(&thread->mutexq); TAILQ_INIT(&thread->pp_mutexq); thread->state = PS_RUNNING; _thr_getscheduler(thread->tid, &thread->attr.sched_policy, &sched_param); thread->attr.prio = sched_param.sched_priority; #ifdef _PTHREAD_FORCED_UNWIND thread->unwind_stackend = _usrstack; #endif /* Others cleared to zero by thr_alloc() */ } static void init_private(void) { size_t len; int mib[2]; char *env; _thr_umutex_init(&_mutex_static_lock); _thr_umutex_init(&_cond_static_lock); _thr_umutex_init(&_rwlock_static_lock); _thr_umutex_init(&_keytable_lock); _thr_urwlock_init(&_thr_atfork_lock); _thr_umutex_init(&_thr_event_lock); _thr_once_init(); _thr_spinlock_init(); _thr_list_init(); + _thr_wake_addr_init(); + _sleepq_init(); /* * Avoid reinitializing some things if they don't need to be, * e.g. after a fork(). */ if (init_once == 0) { /* Find the stack top */ mib[0] = CTL_KERN; mib[1] = KERN_USRSTACK; len = sizeof (_usrstack); if (sysctl(mib, 2, &_usrstack, &len, NULL, 0) == -1) PANIC("Cannot get kern.usrstack from sysctl"); len = sizeof(_thr_is_smp); sysctlbyname("kern.smp.cpus", &_thr_is_smp, &len, NULL, 0); _thr_is_smp = (_thr_is_smp > 1); _thr_page_size = getpagesize(); _thr_guard_default = _thr_page_size; _pthread_attr_default.guardsize_attr = _thr_guard_default; _pthread_attr_default.stacksize_attr = _thr_stack_default; env = getenv("LIBPTHREAD_SPINLOOPS"); if (env) _thr_spinloops = atoi(env); env = getenv("LIBPTHREAD_YIELDLOOPS"); if (env) _thr_yieldloops = atoi(env); TAILQ_INIT(&_thr_atfork_list); } init_once = 1; } diff --git a/lib/libthr/thread/thr_kern.c b/lib/libthr/thread/thr_kern.c index 3ad33ad995a2..48f7c6506d7e 100644 --- a/lib/libthr/thread/thr_kern.c +++ b/lib/libthr/thread/thr_kern.c @@ -1,132 +1,224 @@ /* * Copyright (c) 2005 David Xu * Copyright (C) 2003 Daniel M. Eischen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include +#include #include #include "thr_private.h" /*#define DEBUG_THREAD_KERN */ #ifdef DEBUG_THREAD_KERN #define DBG_MSG stdout_debug #else #define DBG_MSG(x...) #endif +static struct umutex addr_lock; +static struct wake_addr *wake_addr_head; +static struct wake_addr default_wake_addr; + /* * This is called when the first thread (other than the initial * thread) is created. */ int _thr_setthreaded(int threaded) { if (((threaded == 0) ^ (__isthreaded == 0)) == 0) return (0); __isthreaded = threaded; if (threaded != 0) { _thr_rtld_init(); } else { _thr_rtld_fini(); } return (0); } void _thr_assert_lock_level() { PANIC("locklevel <= 0"); } int _rtp_to_schedparam(const struct rtprio *rtp, int *policy, struct sched_param *param) { switch(rtp->type) { case RTP_PRIO_REALTIME: *policy = SCHED_RR; param->sched_priority = RTP_PRIO_MAX - rtp->prio; break; case RTP_PRIO_FIFO: *policy = SCHED_FIFO; param->sched_priority = RTP_PRIO_MAX - rtp->prio; break; default: *policy = SCHED_OTHER; param->sched_priority = 0; break; } return (0); } int _schedparam_to_rtp(int policy, const struct sched_param *param, struct rtprio *rtp) { switch(policy) { case SCHED_RR: rtp->type = RTP_PRIO_REALTIME; rtp->prio = RTP_PRIO_MAX - param->sched_priority; break; case SCHED_FIFO: rtp->type = RTP_PRIO_FIFO; rtp->prio = RTP_PRIO_MAX - param->sched_priority; break; case SCHED_OTHER: default: rtp->type = RTP_PRIO_NORMAL; rtp->prio = 0; break; } return (0); } int _thr_getscheduler(lwpid_t lwpid, int *policy, struct sched_param *param) { struct rtprio rtp; int ret; ret = rtprio_thread(RTP_LOOKUP, lwpid, &rtp); if (ret == -1) return (ret); _rtp_to_schedparam(&rtp, policy, param); return (0); } int _thr_setscheduler(lwpid_t lwpid, int policy, const struct sched_param *param) { struct rtprio rtp; _schedparam_to_rtp(policy, param, &rtp); return (rtprio_thread(RTP_SET, lwpid, &rtp)); } + +void +_thr_wake_addr_init(void) +{ + _thr_umutex_init(&addr_lock); + wake_addr_head = NULL; +} + +/* + * Allocate wake-address, the memory area is never freed after + * allocated, this becauses threads may be referencing it. + */ +struct wake_addr * +_thr_alloc_wake_addr(void) +{ + struct pthread *curthread; + struct wake_addr *p; + + if (_thr_initial == NULL) { + return &default_wake_addr; + } + + curthread = _get_curthread(); + + THR_LOCK_ACQUIRE(curthread, &addr_lock); + if (wake_addr_head == NULL) { + unsigned i; + unsigned pagesize = getpagesize(); + struct wake_addr *pp = (struct wake_addr *) + mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0); + for (i = 1; i < pagesize/sizeof(struct wake_addr); ++i) + pp[i].link = &pp[i+1]; + pp[i-1].link = NULL; + wake_addr_head = &pp[1]; + p = &pp[0]; + } else { + p = wake_addr_head; + wake_addr_head = p->link; + } + THR_LOCK_RELEASE(curthread, &addr_lock); + p->value = 0; + return (p); +} + +void +_thr_release_wake_addr(struct wake_addr *wa) +{ + struct pthread *curthread = _get_curthread(); + + if (wa == &default_wake_addr) + return; + THR_LOCK_ACQUIRE(curthread, &addr_lock); + wa->link = wake_addr_head; + wake_addr_head = wa; + THR_LOCK_RELEASE(curthread, &addr_lock); +} + +/* Sleep on thread wakeup address */ +int +_thr_sleep(struct pthread *curthread, int clockid, + const struct timespec *abstime) +{ + + curthread->will_sleep = 0; + if (curthread->nwaiter_defer > 0) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; + } + + if (curthread->wake_addr->value != 0) + return (0); + + return _thr_umtx_timedwait_uint(&curthread->wake_addr->value, 0, + clockid, abstime, 0); +} + +void +_thr_wake_all(unsigned int *waddrs[], int count) +{ + int i; + + for (i = 0; i < count; ++i) + *waddrs[i] = 1; + _umtx_op(waddrs, UMTX_OP_NWAKE_PRIVATE, count, NULL, NULL); +} diff --git a/lib/libthr/thread/thr_list.c b/lib/libthr/thread/thr_list.c index 7541fd3f48a0..249501cc5293 100644 --- a/lib/libthr/thread/thr_list.c +++ b/lib/libthr/thread/thr_list.c @@ -1,349 +1,353 @@ /* * Copyright (c) 2005 David Xu * Copyright (C) 2003 Daniel M. Eischen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include "thr_private.h" #include "libc_private.h" /*#define DEBUG_THREAD_LIST */ #ifdef DEBUG_THREAD_LIST #define DBG_MSG stdout_debug #else #define DBG_MSG(x...) #endif #define MAX_THREADS 100000 /* * Define a high water mark for the maximum number of threads that * will be cached. Once this level is reached, any extra threads * will be free()'d. */ #define MAX_CACHED_THREADS 100 /* * We've got to keep track of everything that is allocated, not only * to have a speedy free list, but also so they can be deallocated * after a fork(). */ static TAILQ_HEAD(, pthread) free_threadq; static struct umutex free_thread_lock = DEFAULT_UMUTEX; static struct umutex tcb_lock = DEFAULT_UMUTEX; static int free_thread_count = 0; static int inited = 0; static int total_threads; LIST_HEAD(thread_hash_head, pthread); #define HASH_QUEUES 128 static struct thread_hash_head thr_hashtable[HASH_QUEUES]; #define THREAD_HASH(thrd) (((unsigned long)thrd >> 8) % HASH_QUEUES) static void thr_destroy(struct pthread *curthread, struct pthread *thread); void _thr_list_init(void) { int i; _gc_count = 0; total_threads = 1; _thr_urwlock_init(&_thr_list_lock); TAILQ_INIT(&_thread_list); TAILQ_INIT(&free_threadq); _thr_umutex_init(&free_thread_lock); _thr_umutex_init(&tcb_lock); if (inited) { for (i = 0; i < HASH_QUEUES; ++i) LIST_INIT(&thr_hashtable[i]); } inited = 1; } void _thr_gc(struct pthread *curthread) { struct pthread *td, *td_next; TAILQ_HEAD(, pthread) worklist; TAILQ_INIT(&worklist); THREAD_LIST_WRLOCK(curthread); /* Check the threads waiting for GC. */ TAILQ_FOREACH_SAFE(td, &_thread_gc_list, gcle, td_next) { if (td->tid != TID_TERMINATED) { /* make sure we are not still in userland */ continue; } _thr_stack_free(&td->attr); THR_GCLIST_REMOVE(td); TAILQ_INSERT_HEAD(&worklist, td, gcle); } THREAD_LIST_UNLOCK(curthread); while ((td = TAILQ_FIRST(&worklist)) != NULL) { TAILQ_REMOVE(&worklist, td, gcle); /* * XXX we don't free initial thread, because there might * have some code referencing initial thread. */ if (td == _thr_initial) { DBG_MSG("Initial thread won't be freed\n"); continue; } _thr_free(curthread, td); } } struct pthread * _thr_alloc(struct pthread *curthread) { struct pthread *thread = NULL; struct tcb *tcb; if (curthread != NULL) { if (GC_NEEDED()) _thr_gc(curthread); if (free_thread_count > 0) { THR_LOCK_ACQUIRE(curthread, &free_thread_lock); if ((thread = TAILQ_FIRST(&free_threadq)) != NULL) { TAILQ_REMOVE(&free_threadq, thread, tle); free_thread_count--; } THR_LOCK_RELEASE(curthread, &free_thread_lock); } } if (thread == NULL) { if (total_threads > MAX_THREADS) return (NULL); atomic_fetchadd_int(&total_threads, 1); thread = malloc(sizeof(struct pthread)); if (thread == NULL) { atomic_fetchadd_int(&total_threads, -1); return (NULL); } } if (curthread != NULL) { THR_LOCK_ACQUIRE(curthread, &tcb_lock); tcb = _tcb_ctor(thread, 0 /* not initial tls */); THR_LOCK_RELEASE(curthread, &tcb_lock); } else { tcb = _tcb_ctor(thread, 1 /* initial tls */); } if (tcb != NULL) { memset(thread, 0, sizeof(*thread)); thread->tcb = tcb; + thread->sleepqueue = _sleepq_alloc(); + thread->wake_addr = _thr_alloc_wake_addr(); } else { thr_destroy(curthread, thread); atomic_fetchadd_int(&total_threads, -1); thread = NULL; } return (thread); } void _thr_free(struct pthread *curthread, struct pthread *thread) { DBG_MSG("Freeing thread %p\n", thread); /* * Always free tcb, as we only know it is part of RTLD TLS * block, but don't know its detail and can not assume how * it works, so better to avoid caching it here. */ if (curthread != NULL) { THR_LOCK_ACQUIRE(curthread, &tcb_lock); _tcb_dtor(thread->tcb); THR_LOCK_RELEASE(curthread, &tcb_lock); } else { _tcb_dtor(thread->tcb); } thread->tcb = NULL; if ((curthread == NULL) || (free_thread_count >= MAX_CACHED_THREADS)) { + _sleepq_free(thread->sleepqueue); + _thr_release_wake_addr(thread->wake_addr); thr_destroy(curthread, thread); atomic_fetchadd_int(&total_threads, -1); } else { /* * Add the thread to the free thread list, this also avoids * pthread id is reused too quickly, may help some buggy apps. */ THR_LOCK_ACQUIRE(curthread, &free_thread_lock); TAILQ_INSERT_TAIL(&free_threadq, thread, tle); free_thread_count++; THR_LOCK_RELEASE(curthread, &free_thread_lock); } } static void thr_destroy(struct pthread *curthread __unused, struct pthread *thread) { free(thread); } /* * Add the thread to the list of all threads and increment * number of active threads. */ void _thr_link(struct pthread *curthread, struct pthread *thread) { THREAD_LIST_WRLOCK(curthread); THR_LIST_ADD(thread); THREAD_LIST_UNLOCK(curthread); atomic_add_int(&_thread_active_threads, 1); } /* * Remove an active thread. */ void _thr_unlink(struct pthread *curthread, struct pthread *thread) { THREAD_LIST_WRLOCK(curthread); THR_LIST_REMOVE(thread); THREAD_LIST_UNLOCK(curthread); atomic_add_int(&_thread_active_threads, -1); } void _thr_hash_add(struct pthread *thread) { struct thread_hash_head *head; head = &thr_hashtable[THREAD_HASH(thread)]; LIST_INSERT_HEAD(head, thread, hle); } void _thr_hash_remove(struct pthread *thread) { LIST_REMOVE(thread, hle); } struct pthread * _thr_hash_find(struct pthread *thread) { struct pthread *td; struct thread_hash_head *head; head = &thr_hashtable[THREAD_HASH(thread)]; LIST_FOREACH(td, head, hle) { if (td == thread) return (thread); } return (NULL); } /* * Find a thread in the linked list of active threads and add a reference * to it. Threads with positive reference counts will not be deallocated * until all references are released. */ int _thr_ref_add(struct pthread *curthread, struct pthread *thread, int include_dead) { int ret; if (thread == NULL) /* Invalid thread: */ return (EINVAL); if ((ret = _thr_find_thread(curthread, thread, include_dead)) == 0) { thread->refcount++; THR_CRITICAL_ENTER(curthread); THR_THREAD_UNLOCK(curthread, thread); } /* Return zero if the thread exists: */ return (ret); } void _thr_ref_delete(struct pthread *curthread, struct pthread *thread) { THR_THREAD_LOCK(curthread, thread); thread->refcount--; _thr_try_gc(curthread, thread); THR_CRITICAL_LEAVE(curthread); } /* entered with thread lock held, exit with thread lock released */ void _thr_try_gc(struct pthread *curthread, struct pthread *thread) { if (THR_SHOULD_GC(thread)) { THR_REF_ADD(curthread, thread); THR_THREAD_UNLOCK(curthread, thread); THREAD_LIST_WRLOCK(curthread); THR_THREAD_LOCK(curthread, thread); THR_REF_DEL(curthread, thread); if (THR_SHOULD_GC(thread)) { THR_LIST_REMOVE(thread); THR_GCLIST_ADD(thread); } THR_THREAD_UNLOCK(curthread, thread); THREAD_LIST_UNLOCK(curthread); } else { THR_THREAD_UNLOCK(curthread, thread); } } /* return with thread lock held if thread is found */ int _thr_find_thread(struct pthread *curthread, struct pthread *thread, int include_dead) { struct pthread *pthread; int ret; if (thread == NULL) return (EINVAL); ret = 0; THREAD_LIST_RDLOCK(curthread); pthread = _thr_hash_find(thread); if (pthread) { THR_THREAD_LOCK(curthread, pthread); if (include_dead == 0 && pthread->state == PS_DEAD) { THR_THREAD_UNLOCK(curthread, pthread); ret = ESRCH; } } else { ret = ESRCH; } THREAD_LIST_UNLOCK(curthread); return (ret); } diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c index 29f91ec6fbfe..bd1fc2b550e3 100644 --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -1,759 +1,797 @@ /* * Copyright (c) 1995 John Birrell . * Copyright (c) 2006 David Xu . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by John Birrell. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "namespace.h" #include #include #include #include #include #include #include #include "un-namespace.h" #include "thr_private.h" #if defined(_PTHREADS_INVARIANTS) #define MUTEX_INIT_LINK(m) do { \ (m)->m_qe.tqe_prev = NULL; \ (m)->m_qe.tqe_next = NULL; \ } while (0) #define MUTEX_ASSERT_IS_OWNED(m) do { \ if (__predict_false((m)->m_qe.tqe_prev == NULL))\ PANIC("mutex is not on list"); \ } while (0) #define MUTEX_ASSERT_NOT_OWNED(m) do { \ if (__predict_false((m)->m_qe.tqe_prev != NULL || \ (m)->m_qe.tqe_next != NULL)) \ PANIC("mutex is on list"); \ } while (0) #else #define MUTEX_INIT_LINK(m) #define MUTEX_ASSERT_IS_OWNED(m) #define MUTEX_ASSERT_NOT_OWNED(m) #endif /* * For adaptive mutexes, how many times to spin doing trylock2 * before entering the kernel to block */ #define MUTEX_ADAPTIVE_SPINS 2000 /* * Prototypes */ int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr); int __pthread_mutex_trylock(pthread_mutex_t *mutex); int __pthread_mutex_lock(pthread_mutex_t *mutex); int __pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime); int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); int _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count); int _pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count); int __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count); int _pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count); int _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count); int __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count); static int mutex_self_trylock(pthread_mutex_t); static int mutex_self_lock(pthread_mutex_t, const struct timespec *abstime); -static int mutex_unlock_common(pthread_mutex_t *); +static int mutex_unlock_common(struct pthread_mutex *, int); static int mutex_lock_sleep(struct pthread *, pthread_mutex_t, const struct timespec *); __weak_reference(__pthread_mutex_init, pthread_mutex_init); __strong_reference(__pthread_mutex_init, _pthread_mutex_init); __weak_reference(__pthread_mutex_lock, pthread_mutex_lock); __strong_reference(__pthread_mutex_lock, _pthread_mutex_lock); __weak_reference(__pthread_mutex_timedlock, pthread_mutex_timedlock); __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock); __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock); __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock); /* Single underscore versions provided for libc internal usage: */ /* No difference between libc and application usage of these: */ __weak_reference(_pthread_mutex_destroy, pthread_mutex_destroy); __weak_reference(_pthread_mutex_unlock, pthread_mutex_unlock); __weak_reference(_pthread_mutex_getprioceiling, pthread_mutex_getprioceiling); __weak_reference(_pthread_mutex_setprioceiling, pthread_mutex_setprioceiling); __weak_reference(__pthread_mutex_setspinloops_np, pthread_mutex_setspinloops_np); __strong_reference(__pthread_mutex_setspinloops_np, _pthread_mutex_setspinloops_np); __weak_reference(_pthread_mutex_getspinloops_np, pthread_mutex_getspinloops_np); __weak_reference(__pthread_mutex_setyieldloops_np, pthread_mutex_setyieldloops_np); __strong_reference(__pthread_mutex_setyieldloops_np, _pthread_mutex_setyieldloops_np); __weak_reference(_pthread_mutex_getyieldloops_np, pthread_mutex_getyieldloops_np); __weak_reference(_pthread_mutex_isowned_np, pthread_mutex_isowned_np); static int mutex_init(pthread_mutex_t *mutex, const struct pthread_mutex_attr *mutex_attr, void *(calloc_cb)(size_t, size_t)) { const struct pthread_mutex_attr *attr; struct pthread_mutex *pmutex; if (mutex_attr == NULL) { attr = &_pthread_mutexattr_default; } else { attr = mutex_attr; if (attr->m_type < PTHREAD_MUTEX_ERRORCHECK || attr->m_type >= PTHREAD_MUTEX_TYPE_MAX) return (EINVAL); if (attr->m_protocol < PTHREAD_PRIO_NONE || attr->m_protocol > PTHREAD_PRIO_PROTECT) return (EINVAL); } if ((pmutex = (pthread_mutex_t) calloc_cb(1, sizeof(struct pthread_mutex))) == NULL) return (ENOMEM); - pmutex->m_type = attr->m_type; + pmutex->m_flags = attr->m_type; pmutex->m_owner = NULL; pmutex->m_count = 0; - pmutex->m_refcount = 0; pmutex->m_spinloops = 0; pmutex->m_yieldloops = 0; MUTEX_INIT_LINK(pmutex); switch(attr->m_protocol) { case PTHREAD_PRIO_NONE: pmutex->m_lock.m_owner = UMUTEX_UNOWNED; pmutex->m_lock.m_flags = 0; break; case PTHREAD_PRIO_INHERIT: pmutex->m_lock.m_owner = UMUTEX_UNOWNED; pmutex->m_lock.m_flags = UMUTEX_PRIO_INHERIT; break; case PTHREAD_PRIO_PROTECT: pmutex->m_lock.m_owner = UMUTEX_CONTESTED; pmutex->m_lock.m_flags = UMUTEX_PRIO_PROTECT; pmutex->m_lock.m_ceilings[0] = attr->m_ceiling; break; } - if (pmutex->m_type == PTHREAD_MUTEX_ADAPTIVE_NP) { + if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) { pmutex->m_spinloops = _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS; pmutex->m_yieldloops = _thr_yieldloops; } *mutex = pmutex; return (0); } static int init_static(struct pthread *thread, pthread_mutex_t *mutex) { int ret; THR_LOCK_ACQUIRE(thread, &_mutex_static_lock); if (*mutex == THR_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_default, calloc); else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, calloc); else ret = 0; THR_LOCK_RELEASE(thread, &_mutex_static_lock); return (ret); } static void set_inherited_priority(struct pthread *curthread, struct pthread_mutex *m) { struct pthread_mutex *m2; m2 = TAILQ_LAST(&curthread->pp_mutexq, mutex_queue); if (m2 != NULL) m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0]; else m->m_lock.m_ceilings[1] = -1; } int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr) { return mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, calloc); } /* This function is used internally by malloc. */ int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)) { static const struct pthread_mutex_attr attr = { .m_type = PTHREAD_MUTEX_NORMAL, .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0 }; int ret; ret = mutex_init(mutex, &attr, calloc_cb); if (ret == 0) - (*mutex)->m_private = 1; + (*mutex)->m_flags |= PMUTEX_FLAG_PRIVATE; return (ret); } void _mutex_fork(struct pthread *curthread) { struct pthread_mutex *m; /* * Fix mutex ownership for child process. * note that process shared mutex should not * be inherited because owner is forking thread * which is in parent process, they should be * removed from the owned mutex list, current, * process shared mutex is not supported, so I * am not worried. */ TAILQ_FOREACH(m, &curthread->mutexq, m_qe) m->m_lock.m_owner = TID(curthread); TAILQ_FOREACH(m, &curthread->pp_mutexq, m_qe) m->m_lock.m_owner = TID(curthread) | UMUTEX_CONTESTED; } int _pthread_mutex_destroy(pthread_mutex_t *mutex) { pthread_mutex_t m; int ret; m = *mutex; if (m < THR_MUTEX_DESTROYED) { ret = 0; } else if (m == THR_MUTEX_DESTROYED) { ret = EINVAL; } else { - if (m->m_owner != NULL || m->m_refcount != 0) { + if (m->m_owner != NULL) { ret = EBUSY; } else { *mutex = THR_MUTEX_DESTROYED; MUTEX_ASSERT_NOT_OWNED(m); free(m); ret = 0; } } return (ret); } #define ENQUEUE_MUTEX(curthread, m) \ do { \ (m)->m_owner = curthread; \ /* Add to the list of owned mutexes: */ \ MUTEX_ASSERT_NOT_OWNED((m)); \ if (((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) \ TAILQ_INSERT_TAIL(&curthread->mutexq, (m), m_qe);\ else \ TAILQ_INSERT_TAIL(&curthread->pp_mutexq, (m), m_qe);\ } while (0) +#define DEQUEUE_MUTEX(curthread, m) \ + (m)->m_owner = NULL; \ + MUTEX_ASSERT_IS_OWNED(m); \ + if (__predict_true(((m)->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)) \ + TAILQ_REMOVE(&curthread->mutexq, (m), m_qe); \ + else { \ + TAILQ_REMOVE(&curthread->pp_mutexq, (m), m_qe); \ + set_inherited_priority(curthread, m); \ + } \ + MUTEX_INIT_LINK(m); + #define CHECK_AND_INIT_MUTEX \ if (__predict_false((m = *mutex) <= THR_MUTEX_DESTROYED)) { \ if (m == THR_MUTEX_DESTROYED) \ return (EINVAL); \ int ret; \ ret = init_static(_get_curthread(), mutex); \ if (ret) \ return (ret); \ m = *mutex; \ } static int mutex_trylock_common(pthread_mutex_t *mutex) { struct pthread *curthread = _get_curthread(); struct pthread_mutex *m = *mutex; uint32_t id; int ret; id = TID(curthread); - if (m->m_private) + if (m->m_flags & PMUTEX_FLAG_PRIVATE) THR_CRITICAL_ENTER(curthread); ret = _thr_umutex_trylock(&m->m_lock, id); if (__predict_true(ret == 0)) { ENQUEUE_MUTEX(curthread, m); } else if (m->m_owner == curthread) { ret = mutex_self_trylock(m); } /* else {} */ - if (ret && m->m_private) + if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE)) THR_CRITICAL_LEAVE(curthread); return (ret); } int __pthread_mutex_trylock(pthread_mutex_t *mutex) { struct pthread_mutex *m; CHECK_AND_INIT_MUTEX return (mutex_trylock_common(mutex)); } static int mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m, const struct timespec *abstime) { uint32_t id, owner; int count; int ret; if (m->m_owner == curthread) return mutex_self_lock(m, abstime); id = TID(curthread); /* * For adaptive mutexes, spin for a bit in the expectation * that if the application requests this mutex type then * the lock is likely to be released quickly and it is * faster than entering the kernel */ if (__predict_false( (m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) != 0)) goto sleep_in_kernel; if (!_thr_is_smp) goto yield_loop; count = m->m_spinloops; while (count--) { owner = m->m_lock.m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0) { if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { ret = 0; goto done; } } CPU_SPINWAIT; } yield_loop: count = m->m_yieldloops; while (count--) { _sched_yield(); owner = m->m_lock.m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0) { if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { ret = 0; goto done; } } } sleep_in_kernel: if (abstime == NULL) { ret = __thr_umutex_lock(&m->m_lock, id); } else if (__predict_false( abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)) { ret = EINVAL; } else { ret = __thr_umutex_timedlock(&m->m_lock, id, abstime); } done: if (ret == 0) ENQUEUE_MUTEX(curthread, m); return (ret); } static inline int mutex_lock_common(struct pthread_mutex *m, - const struct timespec *abstime) + const struct timespec *abstime, int cvattach) { struct pthread *curthread = _get_curthread(); int ret; - if (m->m_private) + if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE) THR_CRITICAL_ENTER(curthread); if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) { ENQUEUE_MUTEX(curthread, m); ret = 0; } else { ret = mutex_lock_sleep(curthread, m, abstime); } - if (ret && m->m_private) + if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE) && !cvattach) THR_CRITICAL_LEAVE(curthread); return (ret); } int __pthread_mutex_lock(pthread_mutex_t *mutex) { struct pthread_mutex *m; _thr_check_init(); CHECK_AND_INIT_MUTEX - return (mutex_lock_common(m, NULL)); + return (mutex_lock_common(m, NULL, 0)); } int __pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime) { struct pthread_mutex *m; _thr_check_init(); CHECK_AND_INIT_MUTEX - return (mutex_lock_common(m, abstime)); + return (mutex_lock_common(m, abstime, 0)); } int -_pthread_mutex_unlock(pthread_mutex_t *m) +_pthread_mutex_unlock(pthread_mutex_t *mutex) { - return (mutex_unlock_common(m)); + struct pthread_mutex *mp; + + mp = *mutex; + return (mutex_unlock_common(mp, 0)); } int -_mutex_cv_lock(pthread_mutex_t *mutex, int count) +_mutex_cv_lock(struct pthread_mutex *m, int count) { - struct pthread_mutex *m; - int ret; + int error; - m = *mutex; - ret = mutex_lock_common(m, NULL); - if (ret == 0) { - m->m_refcount--; - m->m_count += count; + error = mutex_lock_common(m, NULL, 1); + if (error == 0) + m->m_count = count; + return (error); +} + +int +_mutex_cv_unlock(struct pthread_mutex *m, int *count) +{ + + /* + * Clear the count in case this is a recursive mutex. + */ + *count = m->m_count; + m->m_count = 0; + (void)mutex_unlock_common(m, 1); + return (0); +} + +int +_mutex_cv_attach(struct pthread_mutex *m, int count) +{ + struct pthread *curthread = _get_curthread(); + int error; + + ENQUEUE_MUTEX(curthread, m); + m->m_count = count; + return (error); +} + +int +_mutex_cv_detach(struct pthread_mutex *mp, int *recurse) +{ + struct pthread *curthread = _get_curthread(); + int defered; + int error; + + if ((error = _mutex_owned(curthread, mp)) != 0) + return (error); + + /* + * Clear the count in case this is a recursive mutex. + */ + *recurse = mp->m_count; + mp->m_count = 0; + DEQUEUE_MUTEX(curthread, mp); + + /* Will this happen in real-world ? */ + if ((mp->m_flags & PMUTEX_FLAG_DEFERED) != 0) { + defered = 1; + mp->m_flags &= ~PMUTEX_FLAG_DEFERED; + } else + defered = 0; + + if (defered) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; } - return (ret); + return (0); } static int mutex_self_trylock(struct pthread_mutex *m) { int ret; - switch (m->m_type) { + switch (PMUTEX_TYPE(m->m_flags)) { case PTHREAD_MUTEX_ERRORCHECK: case PTHREAD_MUTEX_NORMAL: ret = EBUSY; break; case PTHREAD_MUTEX_RECURSIVE: /* Increment the lock count: */ if (m->m_count + 1 > 0) { m->m_count++; ret = 0; } else ret = EAGAIN; break; default: /* Trap invalid mutex types; */ ret = EINVAL; } return (ret); } static int mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime) { struct timespec ts1, ts2; int ret; - switch (m->m_type) { + switch (PMUTEX_TYPE(m->m_flags)) { case PTHREAD_MUTEX_ERRORCHECK: case PTHREAD_MUTEX_ADAPTIVE_NP: if (abstime) { if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { ret = EINVAL; } else { clock_gettime(CLOCK_REALTIME, &ts1); TIMESPEC_SUB(&ts2, abstime, &ts1); __sys_nanosleep(&ts2, NULL); ret = ETIMEDOUT; } } else { /* * POSIX specifies that mutexes should return * EDEADLK if a recursive lock is detected. */ ret = EDEADLK; } break; case PTHREAD_MUTEX_NORMAL: /* * What SS2 define as a 'normal' mutex. Intentionally * deadlock on attempts to get a lock you already own. */ ret = 0; if (abstime) { if (abstime->tv_sec < 0 || abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { ret = EINVAL; } else { clock_gettime(CLOCK_REALTIME, &ts1); TIMESPEC_SUB(&ts2, abstime, &ts1); __sys_nanosleep(&ts2, NULL); ret = ETIMEDOUT; } } else { ts1.tv_sec = 30; ts1.tv_nsec = 0; for (;;) __sys_nanosleep(&ts1, NULL); } break; case PTHREAD_MUTEX_RECURSIVE: /* Increment the lock count: */ if (m->m_count + 1 > 0) { m->m_count++; ret = 0; } else ret = EAGAIN; break; default: /* Trap invalid mutex types; */ ret = EINVAL; } return (ret); } static int -mutex_unlock_common(pthread_mutex_t *mutex) +mutex_unlock_common(struct pthread_mutex *m, int cv) { struct pthread *curthread = _get_curthread(); - struct pthread_mutex *m; uint32_t id; + int defered; - m = *mutex; if (__predict_false(m <= THR_MUTEX_DESTROYED)) { if (m == THR_MUTEX_DESTROYED) return (EINVAL); return (EPERM); } /* * Check if the running thread is not the owner of the mutex. */ if (__predict_false(m->m_owner != curthread)) return (EPERM); id = TID(curthread); if (__predict_false( - m->m_type == PTHREAD_MUTEX_RECURSIVE && + PMUTEX_TYPE(m->m_flags) == PTHREAD_MUTEX_RECURSIVE && m->m_count > 0)) { m->m_count--; } else { - m->m_owner = NULL; - /* Remove the mutex from the threads queue. */ - MUTEX_ASSERT_IS_OWNED(m); - if (__predict_true((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)) - TAILQ_REMOVE(&curthread->mutexq, m, m_qe); - else { - TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); - set_inherited_priority(curthread, m); - } - MUTEX_INIT_LINK(m); - _thr_umutex_unlock(&m->m_lock, id); - } - if (m->m_private) - THR_CRITICAL_LEAVE(curthread); - return (0); -} - -int -_mutex_cv_unlock(pthread_mutex_t *mutex, int *count) -{ - struct pthread *curthread = _get_curthread(); - struct pthread_mutex *m; - - m = *mutex; - if (__predict_false(m <= THR_MUTEX_DESTROYED)) { - if (m == THR_MUTEX_DESTROYED) - return (EINVAL); - return (EPERM); - } + if (curthread->will_sleep == 0 && (m->m_flags & PMUTEX_FLAG_DEFERED) != 0) { + defered = 1; + m->m_flags &= ~PMUTEX_FLAG_DEFERED; + } else + defered = 0; - /* - * Check if the running thread is not the owner of the mutex. - */ - if (__predict_false(m->m_owner != curthread)) - return (EPERM); + DEQUEUE_MUTEX(curthread, m); + _thr_umutex_unlock(&m->m_lock, id); - /* - * Clear the count in case this is a recursive mutex. - */ - *count = m->m_count; - m->m_refcount++; - m->m_count = 0; - m->m_owner = NULL; - /* Remove the mutex from the threads queue. */ - MUTEX_ASSERT_IS_OWNED(m); - if (__predict_true((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0)) - TAILQ_REMOVE(&curthread->mutexq, m, m_qe); - else { - TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); - set_inherited_priority(curthread, m); + if (defered) { + _thr_wake_all(curthread->defer_waiters, + curthread->nwaiter_defer); + curthread->nwaiter_defer = 0; + } } - MUTEX_INIT_LINK(m); - _thr_umutex_unlock(&m->m_lock, TID(curthread)); - - if (m->m_private) + if (!cv && m->m_flags & PMUTEX_FLAG_PRIVATE) THR_CRITICAL_LEAVE(curthread); return (0); } int _pthread_mutex_getprioceiling(pthread_mutex_t *mutex, int *prioceiling) { struct pthread_mutex *m; int ret; m = *mutex; if ((m <= THR_MUTEX_DESTROYED) || (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) ret = EINVAL; else { *prioceiling = m->m_lock.m_ceilings[0]; ret = 0; } return (ret); } int _pthread_mutex_setprioceiling(pthread_mutex_t *mutex, int ceiling, int *old_ceiling) { struct pthread *curthread = _get_curthread(); struct pthread_mutex *m, *m1, *m2; int ret; m = *mutex; if ((m <= THR_MUTEX_DESTROYED) || (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); ret = __thr_umutex_set_ceiling(&m->m_lock, ceiling, old_ceiling); if (ret != 0) return (ret); if (m->m_owner == curthread) { MUTEX_ASSERT_IS_OWNED(m); m1 = TAILQ_PREV(m, mutex_queue, m_qe); m2 = TAILQ_NEXT(m, m_qe); if ((m1 != NULL && m1->m_lock.m_ceilings[0] > (u_int)ceiling) || (m2 != NULL && m2->m_lock.m_ceilings[0] < (u_int)ceiling)) { TAILQ_REMOVE(&curthread->pp_mutexq, m, m_qe); TAILQ_FOREACH(m2, &curthread->pp_mutexq, m_qe) { if (m2->m_lock.m_ceilings[0] > (u_int)ceiling) { TAILQ_INSERT_BEFORE(m2, m, m_qe); return (0); } } TAILQ_INSERT_TAIL(&curthread->pp_mutexq, m, m_qe); } } return (0); } int _pthread_mutex_getspinloops_np(pthread_mutex_t *mutex, int *count) { struct pthread_mutex *m; CHECK_AND_INIT_MUTEX *count = m->m_spinloops; return (0); } int __pthread_mutex_setspinloops_np(pthread_mutex_t *mutex, int count) { struct pthread_mutex *m; CHECK_AND_INIT_MUTEX m->m_spinloops = count; return (0); } int _pthread_mutex_getyieldloops_np(pthread_mutex_t *mutex, int *count) { struct pthread_mutex *m; CHECK_AND_INIT_MUTEX *count = m->m_yieldloops; return (0); } int __pthread_mutex_setyieldloops_np(pthread_mutex_t *mutex, int count) { struct pthread_mutex *m; CHECK_AND_INIT_MUTEX m->m_yieldloops = count; return (0); } int _pthread_mutex_isowned_np(pthread_mutex_t *mutex) { struct pthread_mutex *m; m = *mutex; if (m <= THR_MUTEX_DESTROYED) return (0); return (m->m_owner == _get_curthread()); } + +int +_mutex_owned(struct pthread *curthread, const struct pthread_mutex *mp) +{ + if (__predict_false(mp <= THR_MUTEX_DESTROYED)) { + if (mp == THR_MUTEX_DESTROYED) + return (EINVAL); + return (EPERM); + } + if (mp->m_owner != curthread) + return (EPERM); + return (0); +} diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h index 7180d123cae9..9df97aa43118 100644 --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -1,807 +1,904 @@ /* * Copyright (C) 2005 Daniel M. Eischen * Copyright (c) 2005 David Xu * Copyright (c) 1995-1998 John Birrell . * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _THR_PRIVATE_H #define _THR_PRIVATE_H /* * Include files. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SYM_FB10(sym) __CONCAT(sym, _fb10) #define SYM_FBP10(sym) __CONCAT(sym, _fbp10) #define WEAK_REF(sym, alias) __weak_reference(sym, alias) #define SYM_COMPAT(sym, impl, ver) __sym_compat(sym, impl, ver) #define SYM_DEFAULT(sym, impl, ver) __sym_default(sym, impl, ver) #define FB10_COMPAT(func, sym) \ WEAK_REF(func, SYM_FB10(sym)); \ SYM_COMPAT(sym, SYM_FB10(sym), FBSD_1.0) #define FB10_COMPAT_PRIVATE(func, sym) \ WEAK_REF(func, SYM_FBP10(sym)); \ SYM_DEFAULT(sym, SYM_FBP10(sym), FBSDprivate_1.0) #include "pthread_md.h" #include "thr_umtx.h" #include "thread_db.h" #ifdef _PTHREAD_FORCED_UNWIND #define _BSD_SOURCE #include #endif typedef TAILQ_HEAD(pthreadlist, pthread) pthreadlist; typedef TAILQ_HEAD(atfork_head, pthread_atfork) atfork_head; TAILQ_HEAD(mutex_queue, pthread_mutex); /* Signal to do cancellation */ #define SIGCANCEL 32 /* * Kernel fatal error handler macro. */ #define PANIC(string) _thread_exit(__FILE__,__LINE__,string) /* Output debug messages like this: */ #define stdout_debug(args...) _thread_printf(STDOUT_FILENO, ##args) #define stderr_debug(args...) _thread_printf(STDERR_FILENO, ##args) #ifdef _PTHREADS_INVARIANTS #define THR_ASSERT(cond, msg) do { \ if (__predict_false(!(cond))) \ PANIC(msg); \ } while (0) #else #define THR_ASSERT(cond, msg) #endif #ifdef PIC # define STATIC_LIB_REQUIRE(name) #else # define STATIC_LIB_REQUIRE(name) __asm (".globl " #name) #endif #define TIMESPEC_ADD(dst, src, val) \ do { \ (dst)->tv_sec = (src)->tv_sec + (val)->tv_sec; \ (dst)->tv_nsec = (src)->tv_nsec + (val)->tv_nsec; \ if ((dst)->tv_nsec >= 1000000000) { \ (dst)->tv_sec++; \ (dst)->tv_nsec -= 1000000000; \ } \ } while (0) #define TIMESPEC_SUB(dst, src, val) \ do { \ (dst)->tv_sec = (src)->tv_sec - (val)->tv_sec; \ (dst)->tv_nsec = (src)->tv_nsec - (val)->tv_nsec; \ if ((dst)->tv_nsec < 0) { \ (dst)->tv_sec--; \ (dst)->tv_nsec += 1000000000; \ } \ } while (0) /* XXX These values should be same as those defined in pthread.h */ #define THR_MUTEX_INITIALIZER ((struct pthread_mutex *)NULL) #define THR_ADAPTIVE_MUTEX_INITIALIZER ((struct pthread_mutex *)1) #define THR_MUTEX_DESTROYED ((struct pthread_mutex *)2) #define THR_COND_INITIALIZER ((struct pthread_cond *)NULL) #define THR_COND_DESTROYED ((struct pthread_cond *)1) #define THR_RWLOCK_INITIALIZER ((struct pthread_rwlock *)NULL) #define THR_RWLOCK_DESTROYED ((struct pthread_rwlock *)1) +#define PMUTEX_FLAG_TYPE_MASK 0x0ff +#define PMUTEX_FLAG_PRIVATE 0x100 +#define PMUTEX_FLAG_DEFERED 0x200 +#define PMUTEX_TYPE(mtxflags) ((mtxflags) & PMUTEX_FLAG_TYPE_MASK) + +#define MAX_DEFER_WAITERS 50 + struct pthread_mutex { /* * Lock for accesses to this structure. */ struct umutex m_lock; - enum pthread_mutextype m_type; + int m_flags; struct pthread *m_owner; int m_count; - int m_refcount; int m_spinloops; int m_yieldloops; - int m_private; /* * Link for all mutexes a thread currently owns. */ TAILQ_ENTRY(pthread_mutex) m_qe; }; struct pthread_mutex_attr { enum pthread_mutextype m_type; int m_protocol; int m_ceiling; }; #define PTHREAD_MUTEXATTR_STATIC_INITIALIZER \ { PTHREAD_MUTEX_DEFAULT, PTHREAD_PRIO_NONE, 0, MUTEX_FLAGS_PRIVATE } struct pthread_cond { - struct umutex c_lock; - struct ucond c_kerncv; - int c_pshared; - int c_clockid; + __uint32_t __has_user_waiters; + __uint32_t __has_kern_waiters; + __uint32_t __flags; + __uint32_t __clock_id; }; struct pthread_cond_attr { int c_pshared; int c_clockid; }; struct pthread_barrier { struct umutex b_lock; struct ucond b_cv; volatile int64_t b_cycle; volatile int b_count; volatile int b_waiters; }; struct pthread_barrierattr { int pshared; }; struct pthread_spinlock { struct umutex s_lock; }; /* * Flags for condition variables. */ #define COND_FLAGS_PRIVATE 0x01 #define COND_FLAGS_INITED 0x02 #define COND_FLAGS_BUSY 0x04 /* * Cleanup definitions. */ struct pthread_cleanup { struct pthread_cleanup *prev; void (*routine)(void *); void *routine_arg; int onheap; }; #define THR_CLEANUP_PUSH(td, func, arg) { \ struct pthread_cleanup __cup; \ \ __cup.routine = func; \ __cup.routine_arg = arg; \ __cup.onheap = 0; \ __cup.prev = (td)->cleanup; \ (td)->cleanup = &__cup; #define THR_CLEANUP_POP(td, exec) \ (td)->cleanup = __cup.prev; \ if ((exec) != 0) \ __cup.routine(__cup.routine_arg); \ } struct pthread_atfork { TAILQ_ENTRY(pthread_atfork) qe; void (*prepare)(void); void (*parent)(void); void (*child)(void); }; struct pthread_attr { #define pthread_attr_start_copy sched_policy int sched_policy; int sched_inherit; int prio; int suspend; #define THR_STACK_USER 0x100 /* 0xFF reserved for */ int flags; void *stackaddr_attr; size_t stacksize_attr; size_t guardsize_attr; #define pthread_attr_end_copy cpuset cpuset_t *cpuset; size_t cpusetsize; }; +struct wake_addr { + struct wake_addr *link; + unsigned int value; + char pad[12]; +}; + +struct sleepqueue { + TAILQ_HEAD(, pthread) sq_blocked; + SLIST_HEAD(, sleepqueue) sq_freeq; + LIST_ENTRY(sleepqueue) sq_hash; + SLIST_ENTRY(sleepqueue) sq_flink; + void *sq_wchan; + int sq_type; +}; + /* * Thread creation state attributes. */ #define THR_CREATE_RUNNING 0 #define THR_CREATE_SUSPENDED 1 /* * Miscellaneous definitions. */ #define THR_STACK_DEFAULT (sizeof(void *) / 4 * 1024 * 1024) /* * Maximum size of initial thread's stack. This perhaps deserves to be larger * than the stacks of other threads, since many applications are likely to run * almost entirely on this stack. */ #define THR_STACK_INITIAL (THR_STACK_DEFAULT * 2) /* * Define priorities returned by kernel. */ #define THR_MIN_PRIORITY (_thr_priorities[SCHED_OTHER-1].pri_min) #define THR_MAX_PRIORITY (_thr_priorities[SCHED_OTHER-1].pri_max) #define THR_DEF_PRIORITY (_thr_priorities[SCHED_OTHER-1].pri_default) #define THR_MIN_RR_PRIORITY (_thr_priorities[SCHED_RR-1].pri_min) #define THR_MAX_RR_PRIORITY (_thr_priorities[SCHED_RR-1].pri_max) #define THR_DEF_RR_PRIORITY (_thr_priorities[SCHED_RR-1].pri_default) /* XXX The SCHED_FIFO should have same priority range as SCHED_RR */ #define THR_MIN_FIFO_PRIORITY (_thr_priorities[SCHED_FIFO_1].pri_min) #define THR_MAX_FIFO_PRIORITY (_thr_priorities[SCHED_FIFO-1].pri_max) #define THR_DEF_FIFO_PRIORITY (_thr_priorities[SCHED_FIFO-1].pri_default) struct pthread_prio { int pri_min; int pri_max; int pri_default; }; struct pthread_rwlockattr { int pshared; }; struct pthread_rwlock { struct urwlock lock; struct pthread *owner; }; /* * Thread states. */ enum pthread_state { PS_RUNNING, PS_DEAD }; struct pthread_specific_elem { const void *data; int seqno; }; struct pthread_key { volatile int allocated; int seqno; void (*destructor)(void *); }; /* * lwpid_t is 32bit but kernel thr API exports tid as long type * in very earily date. */ #define TID(thread) ((uint32_t) ((thread)->tid)) /* * Thread structure. */ struct pthread { /* Kernel thread id. */ long tid; #define TID_TERMINATED 1 /* * Lock for accesses to this thread structure. */ struct umutex lock; /* Internal condition variable cycle number. */ uint32_t cycle; /* How many low level locks the thread held. */ int locklevel; /* * Set to non-zero when this thread has entered a critical * region. We allow for recursive entries into critical regions. */ int critical_count; /* Signal blocked counter. */ int sigblock; /* Queue entry for list of all threads. */ TAILQ_ENTRY(pthread) tle; /* link for all threads in process */ /* Queue entry for GC lists. */ TAILQ_ENTRY(pthread) gcle; /* Hash queue entry. */ LIST_ENTRY(pthread) hle; + /* Sleep queue entry */ + TAILQ_ENTRY(pthread) wle; + /* Threads reference count. */ int refcount; /* * Thread start routine, argument, stack pointer and thread * attributes. */ void *(*start_routine)(void *); void *arg; struct pthread_attr attr; #define SHOULD_CANCEL(thr) \ ((thr)->cancel_pending && (thr)->cancel_enable && \ (thr)->no_cancel == 0) /* Cancellation is enabled */ int cancel_enable; /* Cancellation request is pending */ int cancel_pending; /* Thread is at cancellation point */ int cancel_point; /* Cancellation is temporarily disabled */ int no_cancel; /* Asynchronouse cancellation is enabled */ int cancel_async; /* Cancellation is in progress */ int cancelling; /* Thread temporary signal mask. */ sigset_t sigmask; /* Thread should unblock SIGCANCEL. */ int unblock_sigcancel; /* In sigsuspend state */ int in_sigsuspend; /* deferred signal info */ siginfo_t deferred_siginfo; /* signal mask to restore. */ sigset_t deferred_sigmask; /* the sigaction should be used for deferred signal. */ struct sigaction deferred_sigact; /* Force new thread to exit. */ int force_exit; /* Thread state: */ enum pthread_state state; /* * Error variable used instead of errno. The function __error() * returns a pointer to this. */ int error; /* * The joiner is the thread that is joining to this thread. The * join status keeps track of a join operation to another thread. */ struct pthread *joiner; /* Miscellaneous flags; only set with scheduling lock held. */ int flags; #define THR_FLAGS_PRIVATE 0x0001 #define THR_FLAGS_NEED_SUSPEND 0x0002 /* thread should be suspended */ #define THR_FLAGS_SUSPENDED 0x0004 /* thread is suspended */ #define THR_FLAGS_DETACHED 0x0008 /* thread is detached */ /* Thread list flags; only set with thread list lock held. */ int tlflags; #define TLFLAGS_GC_SAFE 0x0001 /* thread safe for cleaning */ #define TLFLAGS_IN_TDLIST 0x0002 /* thread in all thread list */ #define TLFLAGS_IN_GCLIST 0x0004 /* thread in gc list */ /* Queue of currently owned NORMAL or PRIO_INHERIT type mutexes. */ struct mutex_queue mutexq; /* Queue of all owned PRIO_PROTECT mutexes. */ struct mutex_queue pp_mutexq; void *ret; struct pthread_specific_elem *specific; int specific_data_count; /* Number rwlocks rdlocks held. */ int rdlock_count; /* * Current locks bitmap for rtld. */ int rtld_bits; /* Thread control block */ struct tcb *tcb; /* Cleanup handlers Link List */ struct pthread_cleanup *cleanup; #ifdef _PTHREAD_FORCED_UNWIND struct _Unwind_Exception ex; void *unwind_stackend; int unwind_disabled; #endif /* * Magic value to help recognize a valid thread structure * from an invalid one: */ #define THR_MAGIC ((u_int32_t) 0xd09ba115) u_int32_t magic; /* Enable event reporting */ int report_events; /* Event mask */ int event_mask; /* Event */ td_event_msg_t event_buf; + + struct wake_addr *wake_addr; +#define WAKE_ADDR(td) ((td)->wake_addr) + + /* Sleep queue */ + struct sleepqueue *sleepqueue; + + /* Wait channel */ + void *wchan; + + /* Referenced mutex. */ + struct pthread_mutex *mutex_obj; + + /* Thread will sleep. */ + int will_sleep; + + /* Number of threads deferred. */ + int nwaiter_defer; + + /* Deferred threads from pthread_cond_signal. */ + unsigned int *defer_waiters[MAX_DEFER_WAITERS]; }; #define THR_SHOULD_GC(thrd) \ ((thrd)->refcount == 0 && (thrd)->state == PS_DEAD && \ ((thrd)->flags & THR_FLAGS_DETACHED) != 0) #define THR_IN_CRITICAL(thrd) \ (((thrd)->locklevel > 0) || \ ((thrd)->critical_count > 0)) #define THR_CRITICAL_ENTER(thrd) \ (thrd)->critical_count++ #define THR_CRITICAL_LEAVE(thrd) \ do { \ (thrd)->critical_count--; \ _thr_ast(thrd); \ } while (0) #define THR_UMUTEX_TRYLOCK(thrd, lck) \ _thr_umutex_trylock((lck), TID(thrd)) #define THR_UMUTEX_LOCK(thrd, lck) \ _thr_umutex_lock((lck), TID(thrd)) #define THR_UMUTEX_TIMEDLOCK(thrd, lck, timo) \ _thr_umutex_timedlock((lck), TID(thrd), (timo)) #define THR_UMUTEX_UNLOCK(thrd, lck) \ _thr_umutex_unlock((lck), TID(thrd)) #define THR_LOCK_ACQUIRE(thrd, lck) \ do { \ (thrd)->locklevel++; \ _thr_umutex_lock(lck, TID(thrd)); \ } while (0) +#define THR_LOCK_ACQUIRE_SPIN(thrd, lck) \ +do { \ + (thrd)->locklevel++; \ + _thr_umutex_lock_spin(lck, TID(thrd)); \ +} while (0) + #ifdef _PTHREADS_INVARIANTS #define THR_ASSERT_LOCKLEVEL(thrd) \ do { \ if (__predict_false((thrd)->locklevel <= 0)) \ _thr_assert_lock_level(); \ } while (0) #else #define THR_ASSERT_LOCKLEVEL(thrd) #endif #define THR_LOCK_RELEASE(thrd, lck) \ do { \ THR_ASSERT_LOCKLEVEL(thrd); \ _thr_umutex_unlock((lck), TID(thrd)); \ (thrd)->locklevel--; \ _thr_ast(thrd); \ } while (0) #define THR_LOCK(curthrd) THR_LOCK_ACQUIRE(curthrd, &(curthrd)->lock) #define THR_UNLOCK(curthrd) THR_LOCK_RELEASE(curthrd, &(curthrd)->lock) #define THR_THREAD_LOCK(curthrd, thr) THR_LOCK_ACQUIRE(curthrd, &(thr)->lock) #define THR_THREAD_UNLOCK(curthrd, thr) THR_LOCK_RELEASE(curthrd, &(thr)->lock) #define THREAD_LIST_RDLOCK(curthrd) \ do { \ (curthrd)->locklevel++; \ _thr_rwl_rdlock(&_thr_list_lock); \ } while (0) #define THREAD_LIST_WRLOCK(curthrd) \ do { \ (curthrd)->locklevel++; \ _thr_rwl_wrlock(&_thr_list_lock); \ } while (0) #define THREAD_LIST_UNLOCK(curthrd) \ do { \ _thr_rwl_unlock(&_thr_list_lock); \ (curthrd)->locklevel--; \ _thr_ast(curthrd); \ } while (0) /* * Macros to insert/remove threads to the all thread list and * the gc list. */ #define THR_LIST_ADD(thrd) do { \ if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) == 0) { \ TAILQ_INSERT_HEAD(&_thread_list, thrd, tle); \ _thr_hash_add(thrd); \ (thrd)->tlflags |= TLFLAGS_IN_TDLIST; \ } \ } while (0) #define THR_LIST_REMOVE(thrd) do { \ if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) != 0) { \ TAILQ_REMOVE(&_thread_list, thrd, tle); \ _thr_hash_remove(thrd); \ (thrd)->tlflags &= ~TLFLAGS_IN_TDLIST; \ } \ } while (0) #define THR_GCLIST_ADD(thrd) do { \ if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) == 0) { \ TAILQ_INSERT_HEAD(&_thread_gc_list, thrd, gcle);\ (thrd)->tlflags |= TLFLAGS_IN_GCLIST; \ _gc_count++; \ } \ } while (0) #define THR_GCLIST_REMOVE(thrd) do { \ if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) != 0) { \ TAILQ_REMOVE(&_thread_gc_list, thrd, gcle); \ (thrd)->tlflags &= ~TLFLAGS_IN_GCLIST; \ _gc_count--; \ } \ } while (0) #define THR_REF_ADD(curthread, pthread) { \ THR_CRITICAL_ENTER(curthread); \ pthread->refcount++; \ } while (0) #define THR_REF_DEL(curthread, pthread) { \ pthread->refcount--; \ THR_CRITICAL_LEAVE(curthread); \ } while (0) #define GC_NEEDED() (_gc_count >= 5) #define SHOULD_REPORT_EVENT(curthr, e) \ (curthr->report_events && \ (((curthr)->event_mask | _thread_event_mask ) & e) != 0) extern int __isthreaded; /* * Global variables for the pthread kernel. */ extern char *_usrstack __hidden; extern struct pthread *_thr_initial __hidden; /* For debugger */ extern int _libthr_debug; extern int _thread_event_mask; extern struct pthread *_thread_last_event; /* List of all threads: */ extern pthreadlist _thread_list; /* List of threads needing GC: */ extern pthreadlist _thread_gc_list __hidden; extern int _thread_active_threads; extern atfork_head _thr_atfork_list __hidden; extern struct urwlock _thr_atfork_lock __hidden; /* Default thread attributes: */ extern struct pthread_attr _pthread_attr_default __hidden; /* Default mutex attributes: */ extern struct pthread_mutex_attr _pthread_mutexattr_default __hidden; extern struct pthread_mutex_attr _pthread_mutexattr_adaptive_default __hidden; /* Default condition variable attributes: */ extern struct pthread_cond_attr _pthread_condattr_default __hidden; extern struct pthread_prio _thr_priorities[] __hidden; extern pid_t _thr_pid __hidden; extern int _thr_is_smp __hidden; extern size_t _thr_guard_default __hidden; extern size_t _thr_stack_default __hidden; extern size_t _thr_stack_initial __hidden; extern int _thr_page_size __hidden; extern int _thr_spinloops __hidden; extern int _thr_yieldloops __hidden; /* Garbage thread count. */ extern int _gc_count __hidden; extern struct umutex _mutex_static_lock __hidden; extern struct umutex _cond_static_lock __hidden; extern struct umutex _rwlock_static_lock __hidden; extern struct umutex _keytable_lock __hidden; extern struct urwlock _thr_list_lock __hidden; extern struct umutex _thr_event_lock __hidden; /* * Function prototype definitions. */ __BEGIN_DECLS int _thr_setthreaded(int) __hidden; -int _mutex_cv_lock(pthread_mutex_t *, int count) __hidden; -int _mutex_cv_unlock(pthread_mutex_t *, int *count) __hidden; +int _mutex_cv_lock(struct pthread_mutex *, int count) __hidden; +int _mutex_cv_unlock(struct pthread_mutex *, int *count) __hidden; +int _mutex_cv_attach(struct pthread_mutex *, int count) __hidden; +int _mutex_cv_detach(struct pthread_mutex *, int *count) __hidden; +int _mutex_owned(struct pthread *, const struct pthread_mutex *) __hidden; int _mutex_reinit(pthread_mutex_t *) __hidden; void _mutex_fork(struct pthread *curthread) __hidden; void _libpthread_init(struct pthread *) __hidden; struct pthread *_thr_alloc(struct pthread *) __hidden; void _thread_exit(const char *, int, const char *) __hidden __dead2; int _thr_ref_add(struct pthread *, struct pthread *, int) __hidden; void _thr_ref_delete(struct pthread *, struct pthread *) __hidden; void _thr_ref_delete_unlocked(struct pthread *, struct pthread *) __hidden; int _thr_find_thread(struct pthread *, struct pthread *, int) __hidden; void _thr_rtld_init(void) __hidden; void _thr_rtld_fini(void) __hidden; void _thr_rtld_postfork_child(void) __hidden; int _thr_stack_alloc(struct pthread_attr *) __hidden; void _thr_stack_free(struct pthread_attr *) __hidden; void _thr_free(struct pthread *, struct pthread *) __hidden; void _thr_gc(struct pthread *) __hidden; void _thread_cleanupspecific(void) __hidden; void _thread_printf(int, const char *, ...) __hidden; void _thr_spinlock_init(void) __hidden; void _thr_cancel_enter(struct pthread *) __hidden; void _thr_cancel_enter2(struct pthread *, int) __hidden; void _thr_cancel_leave(struct pthread *, int) __hidden; void _thr_testcancel(struct pthread *) __hidden; void _thr_signal_block(struct pthread *) __hidden; void _thr_signal_unblock(struct pthread *) __hidden; void _thr_signal_init(void) __hidden; void _thr_signal_deinit(void) __hidden; int _thr_send_sig(struct pthread *, int sig) __hidden; void _thr_list_init(void) __hidden; void _thr_hash_add(struct pthread *) __hidden; void _thr_hash_remove(struct pthread *) __hidden; struct pthread *_thr_hash_find(struct pthread *) __hidden; void _thr_link(struct pthread *, struct pthread *) __hidden; void _thr_unlink(struct pthread *, struct pthread *) __hidden; void _thr_assert_lock_level(void) __hidden __dead2; void _thr_ast(struct pthread *) __hidden; void _thr_once_init(void) __hidden; void _thr_report_creation(struct pthread *curthread, struct pthread *newthread) __hidden; void _thr_report_death(struct pthread *curthread) __hidden; int _thr_getscheduler(lwpid_t, int *, struct sched_param *) __hidden; int _thr_setscheduler(lwpid_t, int, const struct sched_param *) __hidden; void _thr_signal_prefork(void) __hidden; void _thr_signal_postfork(void) __hidden; void _thr_signal_postfork_child(void) __hidden; void _thr_try_gc(struct pthread *, struct pthread *) __hidden; int _rtp_to_schedparam(const struct rtprio *rtp, int *policy, struct sched_param *param) __hidden; int _schedparam_to_rtp(int policy, const struct sched_param *param, struct rtprio *rtp) __hidden; void _thread_bp_create(void); void _thread_bp_death(void); int _sched_yield(void); void _pthread_cleanup_push(void (*)(void *), void *); void _pthread_cleanup_pop(int); void _pthread_exit_mask(void *status, sigset_t *mask) __dead2 __hidden; void _pthread_cancel_enter(int maycancel); void _pthread_cancel_leave(int maycancel); /* #include */ #ifdef _SYS_FCNTL_H_ int __sys_fcntl(int, int, ...); int __sys_open(const char *, int, ...); int __sys_openat(int, const char *, int, ...); #endif /* #include */ #ifdef _SIGNAL_H_ int __sys_kill(pid_t, int); int __sys_sigaction(int, const struct sigaction *, struct sigaction *); int __sys_sigpending(sigset_t *); int __sys_sigprocmask(int, const sigset_t *, sigset_t *); int __sys_sigsuspend(const sigset_t *); int __sys_sigreturn(const ucontext_t *); int __sys_sigaltstack(const struct sigaltstack *, struct sigaltstack *); int __sys_sigwait(const sigset_t *, int *); int __sys_sigtimedwait(const sigset_t *, siginfo_t *, const struct timespec *); int __sys_sigwaitinfo(const sigset_t *set, siginfo_t *info); #endif /* #include */ #ifdef _TIME_H_ int __sys_nanosleep(const struct timespec *, struct timespec *); #endif /* #include */ #ifdef _SYS_UCONTEXT_H_ int __sys_setcontext(const ucontext_t *ucp); int __sys_swapcontext(ucontext_t *oucp, const ucontext_t *ucp); #endif /* #include */ #ifdef _UNISTD_H_ int __sys_close(int); int __sys_fork(void); pid_t __sys_getpid(void); ssize_t __sys_read(int, void *, size_t); ssize_t __sys_write(int, const void *, size_t); void __sys_exit(int); #endif int _umtx_op_err(void *, int op, u_long, void *, void *) __hidden; static inline int _thr_isthreaded(void) { return (__isthreaded != 0); } static inline int _thr_is_inited(void) { return (_thr_initial != NULL); } static inline void _thr_check_init(void) { if (_thr_initial == NULL) _libpthread_init(NULL); } +struct wake_addr *_thr_alloc_wake_addr(void); +void _thr_release_wake_addr(struct wake_addr *); +int _thr_sleep(struct pthread *, int, const struct timespec *); + +void _thr_wake_addr_init(void) __hidden; + +static inline void +_thr_clear_wake(struct pthread *td) +{ + td->wake_addr->value = 0; +} + +static inline int +_thr_is_woken(struct pthread *td) +{ + return td->wake_addr->value != 0; +} + +static inline void +_thr_set_wake(unsigned int *waddr) +{ + *waddr = 1; + _thr_umtx_wake(waddr, INT_MAX, 0); +} + +void _thr_wake_all(unsigned int *waddrs[], int) __hidden; + +static inline struct pthread * +_sleepq_first(struct sleepqueue *sq) +{ + return TAILQ_FIRST(&sq->sq_blocked); +} + +void _sleepq_init(void) __hidden; +struct sleepqueue *_sleepq_alloc(void) __hidden; +void _sleepq_free(struct sleepqueue *) __hidden; +void _sleepq_lock(void *) __hidden; +void _sleepq_unlock(void *) __hidden; +struct sleepqueue *_sleepq_lookup(void *) __hidden; +void _sleepq_add(void *, struct pthread *) __hidden; +int _sleepq_remove(struct sleepqueue *, struct pthread *) __hidden; +void _sleepq_drop(struct sleepqueue *, + void (*cb)(struct pthread *, void *arg), void *) __hidden; + struct dl_phdr_info; void __pthread_cxa_finalize(struct dl_phdr_info *phdr_info); void _thr_tsd_unload(struct dl_phdr_info *phdr_info) __hidden; void _thr_sigact_unload(struct dl_phdr_info *phdr_info) __hidden; __END_DECLS #endif /* !_THR_PRIVATE_H */ diff --git a/lib/libthr/thread/thr_umtx.c b/lib/libthr/thread/thr_umtx.c index dabfa3597921..33c363761981 100644 --- a/lib/libthr/thread/thr_umtx.c +++ b/lib/libthr/thread/thr_umtx.c @@ -1,265 +1,318 @@ /* * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #include "thr_private.h" #include "thr_umtx.h" #ifndef HAS__UMTX_OP_ERR int _umtx_op_err(void *obj, int op, u_long val, void *uaddr, void *uaddr2) { if (_umtx_op(obj, op, val, uaddr, uaddr2) == -1) return (errno); return (0); } #endif void _thr_umutex_init(struct umutex *mtx) { static struct umutex default_mtx = DEFAULT_UMUTEX; *mtx = default_mtx; } void _thr_urwlock_init(struct urwlock *rwl) { static struct urwlock default_rwl = DEFAULT_URWLOCK; *rwl = default_rwl; } int __thr_umutex_lock(struct umutex *mtx, uint32_t id) { uint32_t owner; if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { for (;;) { /* wait in kernel */ _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0); owner = mtx->m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0 && atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner)) return (0); } } return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0); } +#define SPINLOOPS 1000 + +int +__thr_umutex_lock_spin(struct umutex *mtx, uint32_t id) +{ + uint32_t owner; + + if (!_thr_is_smp) + return __thr_umutex_lock(mtx, id); + + if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { + for (;;) { + int count = SPINLOOPS; + while (count--) { + owner = mtx->m_owner; + if ((owner & ~UMUTEX_CONTESTED) == 0) { + if (atomic_cmpset_acq_32( + &mtx->m_owner, + owner, id|owner)) { + return (0); + } + } + CPU_SPINWAIT; + } + + /* wait in kernel */ + _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0); + } + } + + return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0); +} + int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id, const struct timespec *ets) { struct timespec timo, cts; uint32_t owner; int ret; clock_gettime(CLOCK_REALTIME, &cts); TIMESPEC_SUB(&timo, ets, &cts); if (timo.tv_sec < 0) return (ETIMEDOUT); for (;;) { if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { /* wait in kernel */ ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, &timo); /* now try to lock it */ owner = mtx->m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0 && atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner)) return (0); } else { ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, &timo); if (ret == 0) break; } if (ret == ETIMEDOUT) break; clock_gettime(CLOCK_REALTIME, &cts); TIMESPEC_SUB(&timo, ets, &cts); if (timo.tv_sec < 0 || (timo.tv_sec == 0 && timo.tv_nsec == 0)) { ret = ETIMEDOUT; break; } } return (ret); } int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) { #ifndef __ia64__ /* XXX this logic has a race-condition on ia64. */ if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) { atomic_cmpset_rel_32(&mtx->m_owner, id | UMUTEX_CONTESTED, UMUTEX_CONTESTED); return _umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE, 0, 0, 0); } #endif /* __ia64__ */ return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0); } int __thr_umutex_trylock(struct umutex *mtx) { return _umtx_op_err(mtx, UMTX_OP_MUTEX_TRYLOCK, 0, 0, 0); } int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling, uint32_t *oldceiling) { return _umtx_op_err(mtx, UMTX_OP_SET_CEILING, ceiling, oldceiling, 0); } int _thr_umtx_wait(volatile long *mtx, long id, const struct timespec *timeout) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) return (ETIMEDOUT); return _umtx_op_err(__DEVOLATILE(void *, mtx), UMTX_OP_WAIT, id, 0, __DECONST(void*, timeout)); } int _thr_umtx_wait_uint(volatile u_int *mtx, u_int id, const struct timespec *timeout, int shared) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) return (ETIMEDOUT); return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, 0, __DECONST(void*, timeout)); } +int +_thr_umtx_timedwait_uint(volatile u_int *mtx, u_int id, int clockid, + const struct timespec *abstime, int shared) +{ + struct timespec ts, ts2, *tsp; + + if (abstime != NULL) { + clock_gettime(clockid, &ts); + TIMESPEC_SUB(&ts2, abstime, &ts); + if (ts2.tv_sec < 0 || ts2.tv_nsec <= 0) + return (ETIMEDOUT); + tsp = &ts2; + } else { + tsp = NULL; + } + return _umtx_op_err(__DEVOLATILE(void *, mtx), + shared ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT_UINT_PRIVATE, id, NULL, + tsp); +} + int _thr_umtx_wake(volatile void *mtx, int nr_wakeup, int shared) { return _umtx_op_err(__DEVOLATILE(void *, mtx), shared ? UMTX_OP_WAKE : UMTX_OP_WAKE_PRIVATE, nr_wakeup, 0, 0); } void _thr_ucond_init(struct ucond *cv) { bzero(cv, sizeof(struct ucond)); } int _thr_ucond_wait(struct ucond *cv, struct umutex *m, const struct timespec *timeout, int check_unparking) { if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0))) { struct pthread *curthread = _get_curthread(); _thr_umutex_unlock(m, TID(curthread)); return (ETIMEDOUT); } return _umtx_op_err(cv, UMTX_OP_CV_WAIT, check_unparking ? UMTX_CHECK_UNPARKING : 0, m, __DECONST(void*, timeout)); } int _thr_ucond_signal(struct ucond *cv) { if (!cv->c_has_waiters) return (0); return _umtx_op_err(cv, UMTX_OP_CV_SIGNAL, 0, NULL, NULL); } int _thr_ucond_broadcast(struct ucond *cv) { if (!cv->c_has_waiters) return (0); return _umtx_op_err(cv, UMTX_OP_CV_BROADCAST, 0, NULL, NULL); } int __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) { return _umtx_op_err(rwlock, UMTX_OP_RW_RDLOCK, flags, NULL, tsp); } int __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) { return _umtx_op_err(rwlock, UMTX_OP_RW_WRLOCK, 0, NULL, tsp); } int __thr_rwlock_unlock(struct urwlock *rwlock) { return _umtx_op_err(rwlock, UMTX_OP_RW_UNLOCK, 0, NULL, NULL); } void _thr_rwl_rdlock(struct urwlock *rwlock) { int ret; for (;;) { if (_thr_rwlock_tryrdlock(rwlock, URWLOCK_PREFER_READER) == 0) return; ret = __thr_rwlock_rdlock(rwlock, URWLOCK_PREFER_READER, NULL); if (ret == 0) return; if (ret != EINTR) PANIC("rdlock error"); } } void _thr_rwl_wrlock(struct urwlock *rwlock) { int ret; for (;;) { if (_thr_rwlock_trywrlock(rwlock) == 0) return; ret = __thr_rwlock_wrlock(rwlock, NULL); if (ret == 0) return; if (ret != EINTR) PANIC("wrlock error"); } } void _thr_rwl_unlock(struct urwlock *rwlock) { if (_thr_rwlock_unlock(rwlock)) PANIC("unlock error"); } diff --git a/lib/libthr/thread/thr_umtx.h b/lib/libthr/thread/thr_umtx.h index 3f53fafcf7de..0a8034b364e2 100644 --- a/lib/libthr/thread/thr_umtx.h +++ b/lib/libthr/thread/thr_umtx.h @@ -1,196 +1,207 @@ /*- * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _THR_FBSD_UMTX_H_ #define _THR_FBSD_UMTX_H_ #include #include #define DEFAULT_UMUTEX {0,0,{0,0},{0,0,0,0}} #define DEFAULT_URWLOCK {0,0,0,0,{0,0,0,0}} int __thr_umutex_lock(struct umutex *mtx, uint32_t id) __hidden; +int __thr_umutex_lock_spin(struct umutex *mtx, uint32_t id) __hidden; int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id, const struct timespec *timeout) __hidden; int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) __hidden; int __thr_umutex_trylock(struct umutex *mtx) __hidden; int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling, uint32_t *oldceiling) __hidden; void _thr_umutex_init(struct umutex *mtx) __hidden; void _thr_urwlock_init(struct urwlock *rwl) __hidden; int _thr_umtx_wait(volatile long *mtx, long exp, const struct timespec *timeout) __hidden; int _thr_umtx_wait_uint(volatile u_int *mtx, u_int exp, const struct timespec *timeout, int shared) __hidden; +int _thr_umtx_timedwait_uint(volatile u_int *mtx, u_int exp, int clockid, + const struct timespec *timeout, int shared) __hidden; int _thr_umtx_wake(volatile void *mtx, int count, int shared) __hidden; int _thr_ucond_wait(struct ucond *cv, struct umutex *m, const struct timespec *timeout, int check_unpaking) __hidden; void _thr_ucond_init(struct ucond *cv) __hidden; int _thr_ucond_signal(struct ucond *cv) __hidden; int _thr_ucond_broadcast(struct ucond *cv) __hidden; int __thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) __hidden; int __thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) __hidden; int __thr_rwlock_unlock(struct urwlock *rwlock) __hidden; /* Internal used only */ void _thr_rwl_rdlock(struct urwlock *rwlock) __hidden; void _thr_rwl_wrlock(struct urwlock *rwlock) __hidden; void _thr_rwl_unlock(struct urwlock *rwlock) __hidden; static inline int _thr_umutex_trylock(struct umutex *mtx, uint32_t id) { if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id)) return (0); if ((mtx->m_flags & UMUTEX_PRIO_PROTECT) == 0) return (EBUSY); return (__thr_umutex_trylock(mtx)); } static inline int _thr_umutex_trylock2(struct umutex *mtx, uint32_t id) { if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id) != 0) return (0); if ((uint32_t)mtx->m_owner == UMUTEX_CONTESTED && __predict_true((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0)) if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED)) return (0); return (EBUSY); } static inline int _thr_umutex_lock(struct umutex *mtx, uint32_t id) { if (_thr_umutex_trylock2(mtx, id) == 0) return (0); return (__thr_umutex_lock(mtx, id)); } +static inline int +_thr_umutex_lock_spin(struct umutex *mtx, uint32_t id) +{ + if (_thr_umutex_trylock2(mtx, id) == 0) + return (0); + return (__thr_umutex_lock_spin(mtx, id)); +} + static inline int _thr_umutex_timedlock(struct umutex *mtx, uint32_t id, const struct timespec *timeout) { if (_thr_umutex_trylock2(mtx, id) == 0) return (0); return (__thr_umutex_timedlock(mtx, id, timeout)); } static inline int _thr_umutex_unlock(struct umutex *mtx, uint32_t id) { if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED)) return (0); return (__thr_umutex_unlock(mtx, id)); } static inline int _thr_rwlock_tryrdlock(struct urwlock *rwlock, int flags) { int32_t state; int32_t wrflags; if (flags & URWLOCK_PREFER_READER || rwlock->rw_flags & URWLOCK_PREFER_READER) wrflags = URWLOCK_WRITE_OWNER; else wrflags = URWLOCK_WRITE_OWNER | URWLOCK_WRITE_WAITERS; state = rwlock->rw_state; while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) return (EAGAIN); if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state + 1)) return (0); state = rwlock->rw_state; } return (EBUSY); } static inline int _thr_rwlock_trywrlock(struct urwlock *rwlock) { int32_t state; state = rwlock->rw_state; while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { if (atomic_cmpset_acq_32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER)) return (0); state = rwlock->rw_state; } return (EBUSY); } static inline int _thr_rwlock_rdlock(struct urwlock *rwlock, int flags, struct timespec *tsp) { if (_thr_rwlock_tryrdlock(rwlock, flags) == 0) return (0); return (__thr_rwlock_rdlock(rwlock, flags, tsp)); } static inline int _thr_rwlock_wrlock(struct urwlock *rwlock, struct timespec *tsp) { if (_thr_rwlock_trywrlock(rwlock) == 0) return (0); return (__thr_rwlock_wrlock(rwlock, tsp)); } static inline int _thr_rwlock_unlock(struct urwlock *rwlock) { int32_t state; state = rwlock->rw_state; if (state & URWLOCK_WRITE_OWNER) { if (atomic_cmpset_rel_32(&rwlock->rw_state, URWLOCK_WRITE_OWNER, 0)) return (0); } else { for (;;) { if (__predict_false(URWLOCK_READER_COUNT(state) == 0)) return (EPERM); if (!((state & (URWLOCK_WRITE_WAITERS | URWLOCK_READ_WAITERS)) && URWLOCK_READER_COUNT(state) == 1)) { if (atomic_cmpset_rel_32(&rwlock->rw_state, state, state-1)) return (0); state = rwlock->rw_state; } else { break; } } } return (__thr_rwlock_unlock(rwlock)); } #endif diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 77e47e56b872..580d6303e382 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1,3595 +1,3685 @@ /*- * Copyright (c) 2004, David Xu * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #endif enum { TYPE_SIMPLE_WAIT, TYPE_CV, TYPE_SEM, TYPE_SIMPLE_LOCK, TYPE_NORMAL_UMUTEX, TYPE_PI_UMUTEX, TYPE_PP_UMUTEX, TYPE_RWLOCK }; #define _UMUTEX_TRY 1 #define _UMUTEX_WAIT 2 /* Key to represent a unique userland synchronous object */ struct umtx_key { int hash; int type; int shared; union { struct { vm_object_t object; uintptr_t offset; } shared; struct { struct vmspace *vs; uintptr_t addr; } private; struct { void *a; uintptr_t b; } both; } info; }; /* Priority inheritance mutex info. */ struct umtx_pi { /* Owner thread */ struct thread *pi_owner; /* Reference count */ int pi_refcount; /* List entry to link umtx holding by thread */ TAILQ_ENTRY(umtx_pi) pi_link; /* List entry in hash */ TAILQ_ENTRY(umtx_pi) pi_hashlink; /* List for waiters */ TAILQ_HEAD(,umtx_q) pi_blocked; /* Identify a userland lock object */ struct umtx_key pi_key; }; /* A userland synchronous object user. */ struct umtx_q { /* Linked list for the hash. */ TAILQ_ENTRY(umtx_q) uq_link; /* Umtx key. */ struct umtx_key uq_key; /* Umtx flags. */ int uq_flags; #define UQF_UMTXQ 0x0001 /* The thread waits on. */ struct thread *uq_thread; /* * Blocked on PI mutex. read can use chain lock * or umtx_lock, write must have both chain lock and * umtx_lock being hold. */ struct umtx_pi *uq_pi_blocked; /* On blocked list */ TAILQ_ENTRY(umtx_q) uq_lockq; /* Thread contending with us */ TAILQ_HEAD(,umtx_pi) uq_pi_contested; /* Inherited priority from PP mutex */ u_char uq_inherited_pri; /* Spare queue ready to be reused */ struct umtxq_queue *uq_spare_queue; /* The queue we on */ struct umtxq_queue *uq_cur_queue; }; TAILQ_HEAD(umtxq_head, umtx_q); /* Per-key wait-queue */ struct umtxq_queue { struct umtxq_head head; struct umtx_key key; LIST_ENTRY(umtxq_queue) link; int length; }; LIST_HEAD(umtxq_list, umtxq_queue); /* Userland lock object's wait-queue chain */ struct umtxq_chain { /* Lock for this chain. */ struct mtx uc_lock; /* List of sleep queues. */ struct umtxq_list uc_queue[2]; #define UMTX_SHARED_QUEUE 0 #define UMTX_EXCLUSIVE_QUEUE 1 LIST_HEAD(, umtxq_queue) uc_spare_queue; /* Busy flag */ char uc_busy; /* Chain lock waiters */ int uc_waiters; /* All PI in the list */ TAILQ_HEAD(,umtx_pi) uc_pi_list; }; #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy")) /* * Don't propagate time-sharing priority, there is a security reason, * a user can simply introduce PI-mutex, let thread A lock the mutex, * and let another thread B block on the mutex, because B is * sleeping, its priority will be boosted, this causes A's priority to * be boosted via priority propagating too and will never be lowered even * if it is using 100%CPU, this is unfair to other processes. */ #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ PRI_MAX_TIMESHARE : (td)->td_user_pri) #define GOLDEN_RATIO_PRIME 2654404609U #define UMTX_CHAINS 128 #define UMTX_SHIFTS (__WORD_BIT - 7) #define THREAD_SHARE 0 #define PROCESS_SHARE 1 #define AUTO_SHARE 2 #define GET_SHARE(flags) \ (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) #define BUSY_SPINS 200 static uma_zone_t umtx_pi_zone; static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); static int umtx_pi_allocated; SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug"); SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, &umtx_pi_allocated, 0, "Allocated umtx_pi"); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); static void umtxq_lock(struct umtx_key *key); static void umtxq_unlock(struct umtx_key *key); static void umtxq_busy(struct umtx_key *key); static void umtxq_unbusy(struct umtx_key *key); static void umtxq_insert_queue(struct umtx_q *uq, int q); static void umtxq_remove_queue(struct umtx_q *uq, int q); static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo); static int umtxq_count(struct umtx_key *key); static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2); static int umtx_key_get(void *addr, int type, int share, struct umtx_key *key); static void umtx_key_release(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri); static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused); SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) static struct mtx umtx_lock; static void umtxq_sysinit(void *arg __unused) { int i, j; umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, MTX_DEF | MTX_DUPOK); LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); umtxq_chains[i][j].uc_busy = 0; umtxq_chains[i][j].uc_waiters = 0; } } mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN); EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, EVENTHANDLER_PRI_ANY); } struct umtx_q * umtxq_alloc(void) { struct umtx_q *uq; uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); TAILQ_INIT(&uq->uq_spare_queue->head); TAILQ_INIT(&uq->uq_pi_contested); uq->uq_inherited_pri = PRI_MAX; return (uq); } void umtxq_free(struct umtx_q *uq) { MPASS(uq->uq_spare_queue != NULL); free(uq->uq_spare_queue, M_UMTX); free(uq, M_UMTX); } static inline void umtxq_hash(struct umtx_key *key) { unsigned n = (uintptr_t)key->info.both.a + key->info.both.b; key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; } static inline int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) { return (k1->type == k2->type && k1->info.both.a == k2->info.both.a && k1->info.both.b == k2->info.both.b); } static inline struct umtxq_chain * umtxq_getchain(struct umtx_key *key) { if (key->type <= TYPE_SEM) return (&umtxq_chains[1][key->hash]); return (&umtxq_chains[0][key->hash]); } /* * Lock a chain. */ static inline void umtxq_lock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_lock(&uc->uc_lock); } /* * Unlock a chain. */ static inline void umtxq_unlock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_unlock(&uc->uc_lock); } /* * Set chain to busy state when following operation * may be blocked (kernel mutex can not be used). */ static inline void umtxq_busy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); if (uc->uc_busy) { #ifdef SMP if (smp_cpus > 1) { int count = BUSY_SPINS; if (count > 0) { umtxq_unlock(key); while (uc->uc_busy && --count > 0) cpu_spinwait(); umtxq_lock(key); } } #endif while (uc->uc_busy) { uc->uc_waiters++; msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); uc->uc_waiters--; } } uc->uc_busy = 1; } /* * Unbusy a chain. */ static inline void umtxq_unbusy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); KASSERT(uc->uc_busy != 0, ("not busy")); uc->uc_busy = 0; if (uc->uc_waiters) wakeup_one(uc); } static struct umtxq_queue * umtxq_queue_lookup(struct umtx_key *key, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); LIST_FOREACH(uh, &uc->uc_queue[q], link) { if (umtx_key_match(&uh->key, key)) return (uh); } return (NULL); } static inline void umtxq_insert_queue(struct umtx_q *uq, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); uh = umtxq_queue_lookup(&uq->uq_key, q); if (uh != NULL) { LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); } else { uh = uq->uq_spare_queue; uh->key = uq->uq_key; LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); } uq->uq_spare_queue = NULL; TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); uh->length++; uq->uq_flags |= UQF_UMTXQ; uq->uq_cur_queue = uh; return; } static inline void umtxq_remove_queue(struct umtx_q *uq, int q) { struct umtxq_chain *uc; struct umtxq_queue *uh; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (uq->uq_flags & UQF_UMTXQ) { uh = uq->uq_cur_queue; TAILQ_REMOVE(&uh->head, uq, uq_link); uh->length--; uq->uq_flags &= ~UQF_UMTXQ; if (TAILQ_EMPTY(&uh->head)) { KASSERT(uh->length == 0, ("inconsistent umtxq_queue length")); LIST_REMOVE(uh, link); } else { uh = LIST_FIRST(&uc->uc_spare_queue); KASSERT(uh != NULL, ("uc_spare_queue is empty")); LIST_REMOVE(uh, link); } uq->uq_spare_queue = uh; uq->uq_cur_queue = NULL; } } /* * Check if there are multiple waiters */ static int umtxq_count(struct umtx_key *key) { struct umtxq_chain *uc; struct umtxq_queue *uh; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) return (uh->length); return (0); } /* * Check if there are multiple PI waiters and returns first * waiter. */ static int umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) { struct umtxq_chain *uc; struct umtxq_queue *uh; *first = NULL; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) { *first = TAILQ_FIRST(&uh->head); return (uh->length); } return (0); } /* * Wake up threads waiting on an userland object. */ static int umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) { struct umtxq_chain *uc; struct umtxq_queue *uh; struct umtx_q *uq; int ret; ret = 0; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); uh = umtxq_queue_lookup(key, q); if (uh != NULL) { while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { umtxq_remove_queue(uq, q); wakeup(uq); if (++ret >= n_wake) return (ret); } } return (ret); } /* * Wake up specified thread. */ static inline void umtxq_signal_thread(struct umtx_q *uq) { struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); umtxq_remove(uq); wakeup(uq); } /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. */ static inline int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo) { struct umtxq_chain *uc; int error; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (!(uq->uq_flags & UQF_UMTXQ)) return (0); error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); if (error == EWOULDBLOCK) error = ETIMEDOUT; return (error); } /* * Convert userspace address into unique logical address. */ static int umtx_key_get(void *addr, int type, int share, struct umtx_key *key) { struct thread *td = curthread; vm_map_t map; vm_map_entry_t entry; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; key->type = type; if (share == THREAD_SHARE) { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } else { MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); map = &td->td_proc->p_vmspace->vm_map; if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, &entry, &key->info.shared.object, &pindex, &prot, &wired) != KERN_SUCCESS) { return EFAULT; } if ((share == PROCESS_SHARE) || (share == AUTO_SHARE && VM_INHERIT_SHARE == entry->inheritance)) { key->shared = 1; key->info.shared.offset = entry->offset + entry->start - (vm_offset_t)addr; vm_object_reference(key->info.shared.object); } else { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } vm_map_lookup_done(map, entry); } umtxq_hash(key); return (0); } /* * Release key. */ static inline void umtx_key_release(struct umtx_key *key) { if (key->shared) vm_object_deallocate(key->info.shared.object); } /* * Lock a umtx object. */ static int _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo) { struct umtx_q *uq; u_long owner; u_long old; int error = 0; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMTX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If no one owns it but it is contested try to acquire it. */ if (owner == UMTX_CONTESTED) { owner = casuword(&umtx->u_owner, UMTX_CONTESTED, id | UMTX_CONTESTED); if (owner == UMTX_CONTESTED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If this failed the lock has changed, restart. */ continue; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtx", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock a umtx object. */ static int do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; if (timeout == NULL) { error = _do_lock_umtx(td, umtx, id, 0); /* Mutex locking is restarted if it is interrupted. */ if (error == EINTR) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = _do_lock_umtx(td, umtx, id, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a umtx object. */ static int do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id) { struct umtx_key key; u_long owner; u_long old; int error; int count; /* * Make sure we own this mtx. */ owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMTX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMTX_CONTESTED) == 0) { old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE, &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword(&umtx->u_owner, owner, count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } #ifdef COMPAT_FREEBSD32 /* * Lock a umtx object. */ static int _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo) { struct umtx_q *uq; uint32_t owner; uint32_t old; int error = 0; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword32(m, UMUTEX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { owner = casuword32(m, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If this failed the lock has changed, restart. */ continue; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(m, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtx", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock a umtx object. */ static int do_lock_umtx32(struct thread *td, void *m, uint32_t id, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; if (timeout == NULL) { error = _do_lock_umtx32(td, m, id, 0); /* Mutex locking is restarted if it is interrupted. */ if (error == EINTR) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = _do_lock_umtx32(td, m, id, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a umtx object. */ static int do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id) { struct umtx_key key; uint32_t owner; uint32_t old; int error; int count; /* * Make sure we own this mtx. */ owner = fuword32(m); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(m, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE, &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(m, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } #endif /* * Fetch and compare value, sleep on the address if value is not changed. */ static int do_wait(struct thread *td, void *addr, u_long id, struct timespec *timeout, int compat32, int is_private) { struct umtx_q *uq; struct timespec ts, ts2, ts3; struct timeval tv; u_long tmp; int error = 0; uq = td->td_umtxq; if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); if (compat32 == 0) tmp = fuword(addr); else tmp = (unsigned int)fuword32(addr); if (tmp != id) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } else if (timeout == NULL) { umtxq_lock(&uq->uq_key); error = umtxq_sleep(uq, "uwait", 0); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); umtxq_lock(&uq->uq_key); for (;;) { error = umtxq_sleep(uq, "uwait", tvtohz(&tv)); if (!(uq->uq_flags & UQF_UMTXQ)) { error = 0; break; } if (error != ETIMEDOUT) break; umtxq_unlock(&uq->uq_key); getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; umtxq_lock(&uq->uq_key); break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); umtxq_lock(&uq->uq_key); } umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } /* * Wake up threads sleeping on the specified address. */ int kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) { struct umtx_key key; int ret; if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) return (ret); umtxq_lock(&key); ret = umtxq_signal(&key, n_wake); umtxq_unlock(&key); umtx_key_release(&key); return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo, int mode) { struct umtx_q *uq; uint32_t owner, old, id; int error = 0; id = td->td_tid; uq = td->td_umtxq; /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { owner = fuword32(__DEVOLATILE(void *, &m->m_owner)); if (mode == _UMUTEX_WAIT) { if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED) return (0); } else { /* * Try the uncontested case. This should be done in userland. */ owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) return (0); /* The address was invalid. */ if (owner == -1) return (EFAULT); /* If this failed the lock has changed, restart. */ continue; } } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) return (EDEADLK); if (mode == _UMUTEX_TRY) return (EBUSY); /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); if (old == owner) error = umtxq_sleep(uq, "umtxn", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); } return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ /* * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; uint32_t owner, old, id; int error; int count; id = td->td_tid; /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(&m->m_owner, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_signal(&key,1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } /* * Check if the mutex is available and wake up a waiter, * only for simple mutex. */ static int do_wake_umutex(struct thread *td, struct umutex *m) { struct umtx_key key; uint32_t owner; uint32_t flags; int error; int count; owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != 0) return (0); flags = fuword32(&m->m_flags); /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); if (count <= 1) owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED); umtxq_lock(&key); if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (0); } static inline struct umtx_pi * umtx_pi_alloc(int flags) { struct umtx_pi *pi; pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); TAILQ_INIT(&pi->pi_blocked); atomic_add_int(&umtx_pi_allocated, 1); return (pi); } static inline void umtx_pi_free(struct umtx_pi *pi) { uma_zfree(umtx_pi_zone, pi); atomic_add_int(&umtx_pi_allocated, -1); } /* * Adjust the thread's position on a pi_state after its priority has been * changed. */ static int umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) { struct umtx_q *uq, *uq1, *uq2; struct thread *td1; mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (0); uq = td->td_umtxq; /* * Check if the thread needs to be moved on the blocked chain. * It needs to be moved if either its priority is lower than * the previous thread or higher than the next thread. */ uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); uq2 = TAILQ_NEXT(uq, uq_lockq); if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { /* * Remove thread from blocked chain and determine where * it should be moved to. */ TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { td1 = uq1->uq_thread; MPASS(td1->td_proc->p_magic == P_MAGIC); if (UPRI(td1) > UPRI(td)) break; } if (uq1 == NULL) TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); else TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); } return (1); } /* * Propagate priority when a thread is blocked on POSIX * PI mutex. */ static void umtx_propagate_priority(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; int pri; mtx_assert(&umtx_lock, MA_OWNED); pri = UPRI(td); uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) return; for (;;) { td = pi->pi_owner; if (td == NULL || td == curthread) return; MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); thread_lock(td); if (td->td_lend_user_pri > pri) sched_lend_user_prio(td, pri); else { thread_unlock(td); break; } thread_unlock(td); /* * Pick up the lock that td is blocked on. */ uq = td->td_umtxq; pi = uq->uq_pi_blocked; /* Resort td on the list if needed. */ if (!umtx_pi_adjust_thread(pi, td)) break; } } /* * Unpropagate priority for a PI mutex when a thread blocked on * it is interrupted by signal or resumed by others. */ static void umtx_unpropagate_priority(struct umtx_pi *pi) { struct umtx_q *uq, *uq_owner; struct umtx_pi *pi2; int pri, oldpri; mtx_assert(&umtx_lock, MA_OWNED); while (pi != NULL && pi->pi_owner != NULL) { pri = PRI_MAX; uq_owner = pi->pi_owner->td_umtxq; TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { uq = TAILQ_FIRST(&pi2->pi_blocked); if (uq != NULL) { if (pri > UPRI(uq->uq_thread)) pri = UPRI(uq->uq_thread); } } if (pri > uq_owner->uq_inherited_pri) pri = uq_owner->uq_inherited_pri; thread_lock(pi->pi_owner); oldpri = pi->pi_owner->td_user_pri; sched_unlend_user_prio(pi->pi_owner, pri); thread_unlock(pi->pi_owner); if (uq_owner->uq_pi_blocked != NULL) umtx_pi_adjust_locked(pi->pi_owner, oldpri); pi = uq_owner->uq_pi_blocked; } } /* * Insert a PI mutex into owned list. */ static void umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq_owner; uq_owner = owner->td_umtxq; mtx_assert(&umtx_lock, MA_OWNED); if (pi->pi_owner != NULL) panic("pi_ower != NULL"); pi->pi_owner = owner; TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); } /* * Claim ownership of a PI mutex. */ static int umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq, *uq_owner; uq_owner = owner->td_umtxq; mtx_lock_spin(&umtx_lock); if (pi->pi_owner == owner) { mtx_unlock_spin(&umtx_lock); return (0); } if (pi->pi_owner != NULL) { /* * userland may have already messed the mutex, sigh. */ mtx_unlock_spin(&umtx_lock); return (EPERM); } umtx_pi_setowner(pi, owner); uq = TAILQ_FIRST(&pi->pi_blocked); if (uq != NULL) { int pri; pri = UPRI(uq->uq_thread); thread_lock(owner); if (pri < UPRI(owner)) sched_lend_user_prio(owner, pri); thread_unlock(owner); } mtx_unlock_spin(&umtx_lock); return (0); } static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; MPASS(pi != NULL); /* Resort the turnstile on the list. */ if (!umtx_pi_adjust_thread(pi, td)) return; /* * If our priority was lowered and we are at the head of the * turnstile, then propagate our new priority up the chain. */ if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri) umtx_propagate_priority(td); } /* * Adjust a thread's order position in its blocked PI mutex, * this may result new priority propagating process. */ void umtx_pi_adjust(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; mtx_lock_spin(&umtx_lock); /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; if (pi != NULL) umtx_pi_adjust_locked(td, oldpri); mtx_unlock_spin(&umtx_lock); } /* * Sleep on a PI mutex. */ static int umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, const char *wmesg, int timo) { struct umtxq_chain *uc; struct thread *td, *td1; struct umtx_q *uq1; int pri; int error = 0; td = uq->uq_thread; KASSERT(td == curthread, ("inconsistent uq_thread")); uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); UMTXQ_BUSY_ASSERT(uc); umtxq_insert(uq); mtx_lock_spin(&umtx_lock); if (pi->pi_owner == NULL) { mtx_unlock_spin(&umtx_lock); /* XXX Only look up thread in current process. */ td1 = tdfind(owner, curproc->p_pid); mtx_lock_spin(&umtx_lock); if (td1 != NULL) { if (pi->pi_owner == NULL) umtx_pi_setowner(pi, td1); PROC_UNLOCK(td1->td_proc); } } TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { pri = UPRI(uq1->uq_thread); if (pri > UPRI(td)) break; } if (uq1 != NULL) TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); else TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); uq->uq_pi_blocked = pi; thread_lock(td); td->td_flags |= TDF_UPIBLOCKED; thread_unlock(td); umtx_propagate_priority(td); mtx_unlock_spin(&umtx_lock); umtxq_unbusy(&uq->uq_key); if (uq->uq_flags & UQF_UMTXQ) { error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo); if (error == EWOULDBLOCK) error = ETIMEDOUT; if (uq->uq_flags & UQF_UMTXQ) { umtxq_remove(uq); } } mtx_lock_spin(&umtx_lock); uq->uq_pi_blocked = NULL; thread_lock(td); td->td_flags &= ~TDF_UPIBLOCKED; thread_unlock(td); TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); umtx_unpropagate_priority(pi); mtx_unlock_spin(&umtx_lock); umtxq_unlock(&uq->uq_key); return (error); } /* * Add reference count for a PI mutex. */ static void umtx_pi_ref(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); pi->pi_refcount++; } /* * Decrease reference count for a PI mutex, if the counter * is decreased to zero, its memory space is freed. */ static void umtx_pi_unref(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT(pi->pi_refcount > 0, ("invalid reference count")); if (--pi->pi_refcount == 0) { mtx_lock_spin(&umtx_lock); if (pi->pi_owner != NULL) { TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); pi->pi_owner = NULL; } KASSERT(TAILQ_EMPTY(&pi->pi_blocked), ("blocked queue not empty")); mtx_unlock_spin(&umtx_lock); TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); umtx_pi_free(pi); } } /* * Find a PI mutex in hash table. */ static struct umtx_pi * umtx_pi_lookup(struct umtx_key *key) { struct umtxq_chain *uc; struct umtx_pi *pi; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { if (umtx_key_match(&pi->pi_key, key)) { return (pi); } } return (NULL); } /* * Insert a PI mutex into hash table. */ static inline void umtx_pi_insert(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); } /* * Lock a PI mutex. */ static int _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo, int try) { struct umtx_q *uq; struct umtx_pi *pi, *new_pi; uint32_t id, owner, old; int error; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi == NULL) { new_pi = umtx_pi_alloc(M_NOWAIT); if (new_pi == NULL) { umtxq_unlock(&uq->uq_key); new_pi = umtx_pi_alloc(M_WAITOK); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi != NULL) { umtx_pi_free(new_pi); new_pi = NULL; } } if (new_pi != NULL) { new_pi->pi_key = uq->uq_key; umtx_pi_insert(new_pi); pi = new_pi; } } umtx_pi_ref(pi); umtxq_unlock(&uq->uq_key); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id); /* The acquire succeeded. */ if (owner == UMUTEX_UNOWNED) { error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED) { owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); error = umtx_pi_claim(pi, td); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } /* If this failed the lock has changed, restart. */ continue; } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (old == -1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); error = EFAULT; break; } umtxq_lock(&uq->uq_key); /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ if (old == owner) error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, "umtxpi", timo); else { umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } } umtxq_lock(&uq->uq_key); umtx_pi_unref(pi); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PI mutex. */ static int do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; uint32_t owner, old, id; int error; int count; int pri; id = td->td_tid; /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED); if (old == -1) return (EFAULT); if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count_pi(&key, &uq_first); if (uq_first != NULL) { mtx_lock_spin(&umtx_lock); pi = uq_first->uq_pi_blocked; KASSERT(pi != NULL, ("pi == NULL?")); if (pi->pi_owner != curthread) { mtx_unlock_spin(&umtx_lock); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); /* userland messed the mutex */ return (EPERM); } uq_me = curthread->td_umtxq; pi->pi_owner = NULL; TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link); /* get highest priority thread which is still sleeping. */ uq_first = TAILQ_FIRST(&pi->pi_blocked); while (uq_first != NULL && (uq_first->uq_flags & UQF_UMTXQ) == 0) { uq_first = TAILQ_NEXT(uq_first, uq_lockq); } pri = PRI_MAX; TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); if (uq_first2 != NULL) { if (pri > UPRI(uq_first2->uq_thread)) pri = UPRI(uq_first2->uq_thread); } } thread_lock(curthread); sched_unlend_user_prio(curthread, pri); thread_unlock(curthread); mtx_unlock_spin(&umtx_lock); if (uq_first) umtxq_signal_thread(uq_first); } umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ old = casuword32(&m->m_owner, owner, count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED); umtxq_lock(&key); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (old == -1) return (EFAULT); if (old != owner) return (EINVAL); return (0); } /* * Lock a PP mutex. */ static int _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo, int try) { struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t ceiling; uint32_t owner, id; int error, pri, old_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); for (;;) { old_inherited_pri = uq->uq_inherited_pri; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]); if (ceiling > RTP_PRIO_MAX) { error = EINVAL; goto out; } mtx_lock_spin(&umtx_lock); if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { mtx_unlock_spin(&umtx_lock); error = EINVAL; goto out; } if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; thread_lock(td); if (uq->uq_inherited_pri < UPRI(td)) sched_lend_user_prio(td, uq->uq_inherited_pri); thread_unlock(td); } mtx_unlock_spin(&umtx_lock); owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } if ((flags & UMUTEX_ERROR_CHECK) != 0 && (owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } if (error != 0) { mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } out: umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PP mutex. */ static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t owner, id; uint32_t rceiling; int error, pri, new_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); /* * Make sure we own this mtx. */ owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner)); if (owner == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); if (error != 0) return (error); if (rceiling == -1) new_inherited_pri = PRI_MAX; else { rceiling = RTP_PRIO_MAX - rceiling; if (rceiling > RTP_PRIO_MAX) return (EINVAL); new_inherited_pri = PRI_MIN_REALTIME + rceiling; } if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_unlock(&key); /* * For priority protected mutex, always set unlocked state * to UMUTEX_CONTESTED, so that userland always enters kernel * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner), UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); if (error == -1) error = EFAULT; else { mtx_lock_spin(&umtx_lock); if (su != 0) uq->uq_inherited_pri = new_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_unlend_user_prio(td, pri); thread_unlock(td); mtx_unlock_spin(&umtx_lock); } umtx_key_release(&key); return (error); } static int do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, uint32_t *old_ceiling) { struct umtx_q *uq; uint32_t save_ceiling; uint32_t owner, id; uint32_t flags; int error; flags = fuword32(&m->m_flags); if ((flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); if (ceiling > RTP_PRIO_MAX) return (EINVAL); id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); for (;;) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); save_ceiling = fuword32(&m->m_ceilings[0]); owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED); if (owner == UMUTEX_CONTESTED) { suword32(&m->m_ceilings[0], ceiling); suword32(__DEVOLATILE(uint32_t *, &m->m_owner), UMUTEX_CONTESTED); error = 0; break; } /* The address was invalid. */ if (owner == -1) { error = EFAULT; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { suword32(&m->m_ceilings[0], ceiling); error = 0; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", 0); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtxq_lock(&uq->uq_key); if (error == 0) umtxq_signal(&uq->uq_key, INT_MAX); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0 && old_ceiling != NULL) suword32(old_ceiling, save_ceiling); return (error); } static int _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo, int mode) { switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: return (_do_lock_normal(td, m, flags, timo, mode)); case UMUTEX_PRIO_INHERIT: return (_do_lock_pi(td, m, flags, timo, mode)); case UMUTEX_PRIO_PROTECT: return (_do_lock_pp(td, m, flags, timo, mode)); } return (EINVAL); } /* * Lock a userland POSIX mutex. */ static int do_lock_umutex(struct thread *td, struct umutex *m, struct timespec *timeout, int mode) { struct timespec ts, ts2, ts3; struct timeval tv; uint32_t flags; int error; flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); if (timeout == NULL) { error = _do_lock_umutex(td, m, flags, 0, mode); /* Mutex locking is restarted if it is interrupted. */ if (error == EINTR && mode != _UMUTEX_WAIT) error = ERESTART; } else { getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a userland POSIX mutex. */ static int do_unlock_umutex(struct thread *td, struct umutex *m) { uint32_t flags; flags = fuword32(&m->m_flags); if (flags == -1) return (EFAULT); switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: return (do_unlock_normal(td, m, flags)); case UMUTEX_PRIO_INHERIT: return (do_unlock_pi(td, m, flags)); case UMUTEX_PRIO_PROTECT: return (do_unlock_pp(td, m, flags)); } return (EINVAL); } static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, struct timespec *timeout, u_long wflags) { struct umtx_q *uq; struct timeval tv; struct timespec cts, ets, tts; uint32_t flags; + uint32_t clockid; int error; uq = td->td_umtxq; flags = fuword32(&cv->c_flags); error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); + + if ((wflags & CVWAIT_CLOCKID) != 0) { + clockid = fuword32(&cv->c_clockid); + if (clockid < CLOCK_REALTIME || + clockid >= CLOCK_THREAD_CPUTIME_ID) { + /* hmm, only HW clock id will work. */ + return (EINVAL); + } + } else { + clockid = CLOCK_REALTIME; + } + umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* - * The magic thing is we should set c_has_waiters to 1 before - * releasing user mutex. + * Set c_has_waiters to 1 before releasing user mutex, also + * don't modify cache line when unnecessary. */ - suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); + if (fuword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters)) == 0) + suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1); umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); error = do_unlock_umutex(td, m); umtxq_lock(&uq->uq_key); if (error == 0) { - if ((wflags & UMTX_CHECK_UNPARKING) && - (td->td_pflags & TDP_WAKEUP)) { - td->td_pflags &= ~TDP_WAKEUP; - error = EINTR; - } else if (timeout == NULL) { + if (timeout == NULL) { error = umtxq_sleep(uq, "ucond", 0); } else { - getnanouptime(&ets); - timespecadd(&ets, timeout); - TIMESPEC_TO_TIMEVAL(&tv, timeout); + if ((wflags & CVWAIT_ABSTIME) == 0) { + kern_clock_gettime(td, clockid, &ets); + timespecadd(&ets, timeout); + tts = *timeout; + } else { /* absolute time */ + ets = *timeout; + tts = *timeout; + kern_clock_gettime(td, clockid, &cts); + timespecsub(&tts, &cts); + } + TIMESPEC_TO_TIMEVAL(&tv, &tts); for (;;) { error = umtxq_sleep(uq, "ucond", tvtohz(&tv)); if (error != ETIMEDOUT) break; - getnanouptime(&cts); + kern_clock_gettime(td, clockid, &cts); if (timespeccmp(&cts, &ets, >=)) { error = ETIMEDOUT; break; } tts = ets; timespecsub(&tts, &cts); TIMESPEC_TO_TIMEVAL(&tv, &tts); } } } if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { - umtxq_remove(uq); + /* + * This must be timeout,interrupted by signal or + * surprious wakeup, clear c_has_waiter flag when + * necessary. + */ + umtxq_busy(&uq->uq_key); + if ((uq->uq_flags & UQF_UMTXQ) != 0) { + int oldlen = uq->uq_cur_queue->length; + umtxq_remove(uq); + if (oldlen == 1) { + umtxq_unlock(&uq->uq_key); + suword32( + __DEVOLATILE(uint32_t *, + &cv->c_has_waiters), 0); + umtxq_lock(&uq->uq_key); + } + } + umtxq_unbusy(&uq->uq_key); if (error == ERESTART) error = EINTR; } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland condition variable. */ static int do_cv_signal(struct thread *td, struct ucond *cv) { struct umtx_key key; int error, cnt, nwake; uint32_t flags; flags = fuword32(&cv->c_flags); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); nwake = umtxq_signal(&key, 1); if (cnt <= nwake) { umtxq_unlock(&key); error = suword32( __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); umtxq_lock(&key); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_cv_broadcast(struct thread *td, struct ucond *cv) { struct umtx_key key; int error; uint32_t flags; flags = fuword32(&cv->c_flags); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_signal(&key, INT_MAX); umtxq_unlock(&key); error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0); umtxq_lock(&key); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo) { struct umtx_q *uq; uint32_t flags, wrflags; int32_t state, oldstate; int32_t blocked_readers; int error; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); wrflags = URWLOCK_WRITE_OWNER; if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) wrflags |= URWLOCK_WRITE_WAITERS; for (;;) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); /* try to lock it */ while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { umtx_key_release(&uq->uq_key); return (EAGAIN); } oldstate = casuword32(&rwlock->rw_state, state, state + 1); if (oldstate == state) { umtx_key_release(&uq->uq_key); return (0); } state = oldstate; } if (error) break; /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * re-read the state, in case it changed between the try-lock above * and the check below */ state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); /* set read contention bit */ while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS); if (oldstate == state) goto sleep; state = oldstate; } /* state is changed while setting flags, restart */ if (!(state & wrflags)) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); continue; } sleep: /* contention bit is set, before sleeping, increase read waiter count */ blocked_readers = fuword32(&rwlock->rw_blocked_readers); suword32(&rwlock->rw_blocked_readers, blocked_readers+1); while (state & wrflags) { umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "urdlck", timo); umtxq_busy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); if (error) break; state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); } /* decrease read waiter count, and may clear read contention bit */ blocked_readers = fuword32(&rwlock->rw_blocked_readers); suword32(&rwlock->rw_blocked_readers, blocked_readers-1); if (blocked_readers == 1) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_READ_WAITERS); if (oldstate == state) break; state = oldstate; } } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); return (error); } static int do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = do_rw_rdlock(td, obj, val, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } if (error == ERESTART) error = EINTR; return (error); } static int do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int32_t blocked_writers; int32_t blocked_readers; int error; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); blocked_readers = 0; for (;;) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER); if (oldstate == state) { umtx_key_release(&uq->uq_key); return (0); } state = oldstate; } if (error) { if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) && blocked_readers != 0) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } break; } /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * re-read the state, in case it changed between the try-lock above * and the check below */ state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && (state & URWLOCK_WRITE_WAITERS) == 0) { oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS); if (oldstate == state) goto sleep; state = oldstate; } if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); continue; } sleep: blocked_writers = fuword32(&rwlock->rw_blocked_writers); suword32(&rwlock->rw_blocked_writers, blocked_writers+1); while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { umtxq_lock(&uq->uq_key); umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "uwrlck", timo); umtxq_busy(&uq->uq_key); umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unlock(&uq->uq_key); if (error) break; state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); } blocked_writers = fuword32(&rwlock->rw_blocked_writers); suword32(&rwlock->rw_blocked_writers, blocked_writers-1); if (blocked_writers == 1) { state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_WRITE_WAITERS); if (oldstate == state) break; state = oldstate; } blocked_readers = fuword32(&rwlock->rw_blocked_readers); } else blocked_readers = 0; umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } umtx_key_release(&uq->uq_key); return (error); } static int do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout) { struct timespec ts, ts2, ts3; struct timeval tv; int error; getnanouptime(&ts); timespecadd(&ts, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = do_rw_wrlock(td, obj, tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&ts2); if (timespeccmp(&ts2, &ts, >=)) { error = ETIMEDOUT; break; } ts3 = ts; timespecsub(&ts3, &ts2); TIMESPEC_TO_TIMEVAL(&tv, &ts3); } if (error == ERESTART) error = EINTR; return (error); } static int do_rw_unlock(struct thread *td, struct urwlock *rwlock) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int error, q, count; uq = td->td_umtxq; flags = fuword32(&rwlock->rw_flags); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state)); if (state & URWLOCK_WRITE_OWNER) { for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state & ~URWLOCK_WRITE_OWNER); if (oldstate != state) { state = oldstate; if (!(oldstate & URWLOCK_WRITE_OWNER)) { error = EPERM; goto out; } } else break; } } else if (URWLOCK_READER_COUNT(state) != 0) { for (;;) { oldstate = casuword32(&rwlock->rw_state, state, state - 1); if (oldstate != state) { state = oldstate; if (URWLOCK_READER_COUNT(oldstate) == 0) { error = EPERM; goto out; } } else break; } } else { error = EPERM; goto out; } count = 0; if (!(flags & URWLOCK_PREFER_READER)) { if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } else if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } } else { if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } else if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } } if (count) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, count, q); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } out: umtx_key_release(&uq->uq_key); return (error); } static int do_sem_wait(struct thread *td, struct _usem *sem, struct timespec *timeout) { struct umtx_q *uq; struct timeval tv; struct timespec cts, ets, tts; uint32_t flags, count; int error; uq = td->td_umtxq; flags = fuword32(&sem->_flags); error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); if (fuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters)) == 0) casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1); count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count)); if (count != 0) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (0); } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtxq_lock(&uq->uq_key); if (timeout == NULL) { error = umtxq_sleep(uq, "usem", 0); } else { getnanouptime(&ets); timespecadd(&ets, timeout); TIMESPEC_TO_TIMEVAL(&tv, timeout); for (;;) { error = umtxq_sleep(uq, "usem", tvtohz(&tv)); if (error != ETIMEDOUT) break; getnanouptime(&cts); if (timespeccmp(&cts, &ets, >=)) { error = ETIMEDOUT; break; } tts = ets; timespecsub(&tts, &cts); TIMESPEC_TO_TIMEVAL(&tv, &tts); } } if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { umtxq_remove(uq); if (error == ERESTART) error = EINTR; } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland condition variable. */ static int do_sem_wake(struct thread *td, struct _usem *sem) { struct umtx_key key; int error, cnt, nwake; uint32_t flags; flags = fuword32(&sem->_flags); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); nwake = umtxq_signal(&key, 1); if (cnt <= nwake) { umtxq_unlock(&key); error = suword32( __DEVOLATILE(uint32_t *, &sem->_has_waiters), 0); umtxq_lock(&key); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } int _umtx_lock(struct thread *td, struct _umtx_lock_args *uap) /* struct umtx *umtx */ { return _do_lock_umtx(td, uap->umtx, td->td_tid, 0); } int _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap) /* struct umtx *umtx */ { return do_unlock_umtx(td, uap->umtx, td->td_tid); } static int __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_lock_umtx(td, uap->obj, uap->val, ts)); } static int __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap) { return (do_unlock_umtx(td, uap->obj, uap->val)); } static int __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 0, 0); } static int __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 0); } static int __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 1); } static int __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 0)); } +#define BATCH_SIZE 128 +static int +__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) +{ + int count = uap->val; + void *uaddrs[BATCH_SIZE]; + char **upp = (char **)uap->obj; + int tocopy; + int error = 0; + int i, pos = 0; + + while (count > 0) { + tocopy = count; + if (tocopy > BATCH_SIZE) + tocopy = BATCH_SIZE; + error = copyin(upp+pos, uaddrs, tocopy * sizeof(char *)); + if (error != 0) + break; + for (i = 0; i < tocopy; ++i) + kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); + count -= tocopy; + pos += tocopy; + } + return (error); +} + static int __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) { return (kern_umtx_wake(td, uap->obj, uap->val, 1)); } static int __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, 0); } static int __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) { return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY); } static int __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); } static int __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) { return do_wake_umutex(td, uap->obj); } static int __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) { return do_unlock_umutex(td, uap->obj); } static int __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) { return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1); } static int __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) { return do_cv_signal(td, uap->obj); } static int __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) { return do_cv_broadcast(td, uap->obj); } static int __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_wrlock2(td, uap->obj, &timeout); } return (error); } static int __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) { return do_rw_unlock(td, uap->obj); } static int __umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin(uap->uaddr2, &timeout, sizeof(timeout)); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_sem_wait(td, uap->obj, ts)); } static int __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) { return do_sem_wake(td, uap->obj); } typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); static _umtx_op_func op_table[] = { __umtx_op_lock_umtx, /* UMTX_OP_LOCK */ __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */ __umtx_op_wait, /* UMTX_OP_WAIT */ __umtx_op_wake, /* UMTX_OP_WAKE */ __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */ __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */ __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/ __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */ __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */ __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */ __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */ __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */ __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ __umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */ - __umtx_op_sem_wake /* UMTX_OP_SEM_WAKE */ + __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ + __umtx_op_nwake_private /* UMTX_OP_NWAKE_PRIVATE */ }; int _umtx_op(struct thread *td, struct _umtx_op_args *uap) { if ((unsigned)uap->op < UMTX_OP_MAX) return (*op_table[uap->op])(td, uap); return (EINVAL); } #ifdef COMPAT_FREEBSD32 int freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap) /* struct umtx *umtx */ { return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL)); } int freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap) /* struct umtx *umtx */ { return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid)); } struct timespec32 { uint32_t tv_sec; uint32_t tv_nsec; }; static inline int copyin_timeout32(void *addr, struct timespec *tsp) { struct timespec32 ts32; int error; error = copyin(addr, &ts32, sizeof(struct timespec32)); if (error == 0) { tsp->tv_sec = ts32.tv_sec; tsp->tv_nsec = ts32.tv_nsec; } return (error); } static int __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } ts = &timeout; } return (do_lock_umtx32(td, uap->obj, uap->val, ts)); } static int __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap) { return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val)); } static int __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 0); } static int __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, 0); } static int __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT); } static int __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout); } return (error); } static int __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) { return (EINVAL); } error = do_rw_wrlock2(td, uap->obj, &timeout); } return (error); } static int __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return do_wait(td, uap->obj, uap->val, ts, 1, 1); } static int __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { error = copyin_timeout32(uap->uaddr2, &timeout); if (error != 0) return (error); if (timeout.tv_nsec >= 1000000000 || timeout.tv_nsec < 0) return (EINVAL); ts = &timeout; } return (do_sem_wait(td, uap->obj, ts)); } +static int +__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) +{ + int count = uap->val; + uint32_t uaddrs[BATCH_SIZE]; + uint32_t **upp = (uint32_t **)uap->obj; + int tocopy; + int error = 0; + int i, pos = 0; + + while (count > 0) { + tocopy = count; + if (tocopy > BATCH_SIZE) + tocopy = BATCH_SIZE; + error = copyin(upp+pos, uaddrs, tocopy * sizeof(uint32_t)); + if (error != 0) + break; + for (i = 0; i < tocopy; ++i) + kern_umtx_wake(td, (void *)(intptr_t)uaddrs[i], + INT_MAX, 1); + count -= tocopy; + pos += tocopy; + } + return (error); +} + static _umtx_op_func op_table_compat32[] = { __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */ __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */ __umtx_op_wait_compat32, /* UMTX_OP_WAIT */ __umtx_op_wake, /* UMTX_OP_WAKE */ __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */ __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */ __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */ __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */ __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/ __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */ __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */ __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */ __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */ __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */ __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */ __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */ __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */ __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */ __umtx_op_wake_umutex, /* UMTX_OP_UMUTEX_WAKE */ __umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */ - __umtx_op_sem_wake /* UMTX_OP_SEM_WAKE */ + __umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */ + __umtx_op_nwake_private32 /* UMTX_OP_NWAKE_PRIVATE */ }; int freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap) { if ((unsigned)uap->op < UMTX_OP_MAX) return (*op_table_compat32[uap->op])(td, (struct _umtx_op_args *)uap); return (EINVAL); } #endif void umtx_thread_init(struct thread *td) { td->td_umtxq = umtxq_alloc(); td->td_umtxq->uq_thread = td; } void umtx_thread_fini(struct thread *td) { umtxq_free(td->td_umtxq); } /* * It will be called when new thread is created, e.g fork(). */ void umtx_thread_alloc(struct thread *td) { struct umtx_q *uq; uq = td->td_umtxq; uq->uq_inherited_pri = PRI_MAX; KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); KASSERT(uq->uq_thread == td, ("uq_thread != td")); KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); } /* * exec() hook. */ static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused) { umtx_thread_cleanup(curthread); } /* * thread_exit() hook. */ void umtx_thread_exit(struct thread *td) { umtx_thread_cleanup(td); } /* * clean up umtx data. */ static void umtx_thread_cleanup(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; if ((uq = td->td_umtxq) == NULL) return; mtx_lock_spin(&umtx_lock); uq->uq_inherited_pri = PRI_MAX; while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { pi->pi_owner = NULL; TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); } mtx_unlock_spin(&umtx_lock); thread_lock(td); sched_unlend_user_prio(td, PRI_MAX); thread_unlock(td); } diff --git a/sys/sys/_umtx.h b/sys/sys/_umtx.h index fcda9744c35e..fa0ed99280b7 100644 --- a/sys/sys/_umtx.h +++ b/sys/sys/_umtx.h @@ -1,66 +1,67 @@ /*- * Copyright (c) 2010, David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef _SYS__UMTX_H_ #define _SYS__UMTX_H_ #include struct umtx { volatile unsigned long u_owner; /* Owner of the mutex. */ }; struct umutex { volatile __lwpid_t m_owner; /* Owner of the mutex */ __uint32_t m_flags; /* Flags of the mutex */ __uint32_t m_ceilings[2]; /* Priority protect ceiling */ __uint32_t m_spare[4]; }; struct ucond { volatile __uint32_t c_has_waiters; /* Has waiters in kernel */ __uint32_t c_flags; /* Flags of the condition variable */ - __uint32_t c_spare[2]; /* Spare space */ + __uint32_t c_clockid; /* Clock id */ + __uint32_t c_spare[1]; /* Spare space */ }; struct urwlock { volatile __int32_t rw_state; __uint32_t rw_flags; __uint32_t rw_blocked_readers; __uint32_t rw_blocked_writers; __uint32_t rw_spare[4]; }; struct _usem { volatile __uint32_t _has_waiters; volatile __uint32_t _count; __uint32_t _flags; }; #endif /* !_SYS__UMTX_H_ */ diff --git a/sys/sys/umtx.h b/sys/sys/umtx.h index dab862ef1b9f..c7d69450dea4 100644 --- a/sys/sys/umtx.h +++ b/sys/sys/umtx.h @@ -1,180 +1,185 @@ /*- * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef _SYS_UMTX_H_ #define _SYS_UMTX_H_ #include #include #define UMTX_UNOWNED 0x0 #define UMTX_CONTESTED LONG_MIN #define USYNC_PROCESS_SHARED 0x0001 /* Process shared sync objs */ #define UMUTEX_UNOWNED 0x0 #define UMUTEX_CONTESTED 0x80000000U #define UMUTEX_ERROR_CHECK 0x0002 /* Error-checking mutex */ #define UMUTEX_PRIO_INHERIT 0x0004 /* Priority inherited mutex */ #define UMUTEX_PRIO_PROTECT 0x0008 /* Priority protect mutex */ /* urwlock flags */ #define URWLOCK_PREFER_READER 0x0002 #define URWLOCK_WRITE_OWNER 0x80000000U #define URWLOCK_WRITE_WAITERS 0x40000000U #define URWLOCK_READ_WAITERS 0x20000000U #define URWLOCK_MAX_READERS 0x1fffffffU #define URWLOCK_READER_COUNT(c) ((c) & URWLOCK_MAX_READERS) /* _usem flags */ #define SEM_NAMED 0x0002 /* op code for _umtx_op */ #define UMTX_OP_LOCK 0 #define UMTX_OP_UNLOCK 1 #define UMTX_OP_WAIT 2 #define UMTX_OP_WAKE 3 #define UMTX_OP_MUTEX_TRYLOCK 4 #define UMTX_OP_MUTEX_LOCK 5 #define UMTX_OP_MUTEX_UNLOCK 6 #define UMTX_OP_SET_CEILING 7 #define UMTX_OP_CV_WAIT 8 #define UMTX_OP_CV_SIGNAL 9 #define UMTX_OP_CV_BROADCAST 10 #define UMTX_OP_WAIT_UINT 11 #define UMTX_OP_RW_RDLOCK 12 #define UMTX_OP_RW_WRLOCK 13 #define UMTX_OP_RW_UNLOCK 14 #define UMTX_OP_WAIT_UINT_PRIVATE 15 #define UMTX_OP_WAKE_PRIVATE 16 #define UMTX_OP_MUTEX_WAIT 17 #define UMTX_OP_MUTEX_WAKE 18 #define UMTX_OP_SEM_WAIT 19 #define UMTX_OP_SEM_WAKE 20 -#define UMTX_OP_MAX 21 +#define UMTX_OP_NWAKE_PRIVATE 21 +#define UMTX_OP_MAX 22 -/* flags for UMTX_OP_CV_WAIT */ -#define UMTX_CHECK_UNPARKING 0x01 +/* Flags for UMTX_OP_CV_WAIT */ +#define CVWAIT_CHECK_UNPARKING 0x01 +#define CVWAIT_ABSTIME 0x02 +#define CVWAIT_CLOCKID 0x04 + +#define UMTX_CHECK_UNPARKING CVWAIT_CHECK_UNPARKING #ifndef _KERNEL int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2); /* * Old (deprecated) userland mutex system calls. */ int _umtx_lock(struct umtx *mtx); int _umtx_unlock(struct umtx *mtx); /* * Standard api. Try uncontested acquire/release and asks the * kernel to resolve failures. */ static __inline void umtx_init(struct umtx *umtx) { umtx->u_owner = UMTX_UNOWNED; } static __inline u_long umtx_owner(struct umtx *umtx) { return (umtx->u_owner & ~LONG_MIN); } static __inline int umtx_lock(struct umtx *umtx, u_long id) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) if (_umtx_lock(umtx) == -1) return (errno); return (0); } static __inline int umtx_trylock(struct umtx *umtx, u_long id) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) return (EBUSY); return (0); } static __inline int umtx_timedlock(struct umtx *umtx, u_long id, const struct timespec *timeout) { if (atomic_cmpset_acq_long(&umtx->u_owner, UMTX_UNOWNED, id) == 0) if (_umtx_op(umtx, UMTX_OP_LOCK, id, 0, __DECONST(void *, timeout)) == -1) return (errno); return (0); } static __inline int umtx_unlock(struct umtx *umtx, u_long id) { if (atomic_cmpset_rel_long(&umtx->u_owner, id, UMTX_UNOWNED) == 0) if (_umtx_unlock(umtx) == -1) return (errno); return (0); } static __inline int umtx_wait(u_long *p, long val, const struct timespec *timeout) { if (_umtx_op(p, UMTX_OP_WAIT, val, 0, __DECONST(void *, timeout)) == -1) return (errno); return (0); } /* Wake threads waiting on a user address. */ static __inline int umtx_wake(u_long *p, int nr_wakeup) { if (_umtx_op(p, UMTX_OP_WAKE, nr_wakeup, 0, 0) == -1) return (errno); return (0); } #else struct thread; struct umtx_q *umtxq_alloc(void); void umtxq_free(struct umtx_q *); int kern_umtx_wake(struct thread *, void *, int, int); void umtx_pi_adjust(struct thread *, u_char); void umtx_thread_init(struct thread *); void umtx_thread_fini(struct thread *); void umtx_thread_alloc(struct thread *); void umtx_thread_exit(struct thread *); #endif /* !_KERNEL */ #endif /* !_SYS_UMTX_H_ */