diff --git a/share/man/man9/condvar.9 b/share/man/man9/condvar.9 index 2ac8906ebd86..db3eab1225b4 100644 --- a/share/man/man9/condvar.9 +++ b/share/man/man9/condvar.9 @@ -1,215 +1,233 @@ .\" .\" Copyright (C) 2000 Jason Evans . All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice(s), this list of conditions and the following disclaimer as .\" the first lines of this file unmodified other than the possible .\" addition of one or more copyright notices. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice(s), this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY .\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED .\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE .\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY .\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES .\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR .\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER .\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH .\" DAMAGE. .\" .\" $FreeBSD$ .\" .Dd June 5, 2007 .Dt CONDVAR 9 .Os .Sh NAME .Nm condvar , .Nm cv_init , .Nm cv_destroy , .Nm cv_wait , .Nm cv_wait_sig , .Nm cv_wait_unlock , .Nm cv_timedwait , .Nm cv_timedwait_sig , .Nm cv_signal , .Nm cv_broadcast , .Nm cv_broadcastpri , .Nm cv_wmesg .Nd kernel condition variable .Sh SYNOPSIS .In sys/param.h .In sys/proc.h .In sys/condvar.h .Ft void .Fn cv_init "struct cv *cvp" "const char *desc" .Ft void .Fn cv_destroy "struct cv *cvp" .Ft void .Fn cv_wait "struct cv *cvp" "lock" .Ft int .Fn cv_wait_sig "struct cv *cvp" "lock" .Ft void .Fn cv_wait_unlock "struct cv *cvp" "lock" .Ft int .Fn cv_timedwait "struct cv *cvp" "lock" "int timo" .Ft int .Fn cv_timedwait_sig "struct cv *cvp" "lock" "int timo" .Ft void .Fn cv_signal "struct cv *cvp" .Ft void .Fn cv_broadcast "struct cv *cvp" .Ft void .Fn cv_broadcastpri "struct cv *cvp" "int pri" .Ft const char * .Fn cv_wmesg "struct cv *cvp" .Sh DESCRIPTION Condition variables are used in conjunction with mutexes to wait for conditions to occur. Condition variables are created with .Fn cv_init , where .Fa cvp is a pointer to space for a .Vt struct cv , and .Fa desc is a pointer to a null-terminated character string that describes the condition variable. Condition variables are destroyed with .Fn cv_destroy . Threads wait on condition variables by calling .Fn cv_wait , .Fn cv_wait_sig , .Fn cv_wait_unlock , .Fn cv_timedwait , or .Fn cv_timedwait_sig . Threads unblock waiters by calling .Fn cv_signal to unblock one waiter, or .Fn cv_broadcast or .Fn cv_broadcastpri to unblock all waiters. In addition to waking waiters, .Fn cv_broadcastpri ensures that all of the waiters have a priority of at least .Fa pri by raising the priority of any threads that do not. .Fn cv_wmesg returns the description string of .Fa cvp , as set by the initial call to .Fn cv_init . .Pp The .Fa lock argument is a pointer to either a .Xr mutex 9 , .Xr rwlock 9 , or .Xr sx 9 lock. A .Xr mutex 9 argument must be initialized with .Dv MTX_DEF and not .Dv MTX_SPIN . A thread must hold .Fa lock before calling .Fn cv_wait , .Fn cv_wait_sig , .Fn cv_wait_unlock , .Fn cv_timedwait , or .Fn cv_timedwait_sig . When a thread waits on a condition, .Fa lock is atomically released before the thread is blocked, then reacquired before the function call returns. +In addition, the thread will fully drop the +.Va Giant +mutex +(even if recursed) +while the it is suspended and will reacquire the +.Va Giant +mutex before the function returns. The .Fn cv_wait_unlock function does not reacquire the lock before returning. +Note that the +.Va Giant +mutex may be specified as +.Fa lock . +However, +.Va Giant +may not be used as +.Fa lock +for the +.Fn cv_wait_unlock +function. All waiters must pass the same .Fa lock in conjunction with .Fa cvp . .Pp When .Fn cv_wait , .Fn cv_wait_sig , .Fn cv_wait_unlock , .Fn cv_timedwait , and .Fn cv_timedwait_sig unblock, their calling threads are made runnable. .Fn cv_timedwait and .Fn cv_timedwait_sig wait for at most .Fa timo / .Dv HZ seconds before being unblocked and returning .Er EWOULDBLOCK ; otherwise, they return 0. .Fn cv_wait_sig and .Fn cv_timedwait_sig return prematurely with a value of .Er EINTR or .Er ERESTART if a signal is caught, or 0 if signaled via .Fn cv_signal or .Fn cv_broadcast . .Sh RETURN VALUES If successful, .Fn cv_wait_sig , .Fn cv_timedwait , and .Fn cv_timedwait_sig return 0. Otherwise, a non-zero error code is returned. .Pp .Fn cv_wmesg returns the description string that was passed to .Fn cv_init . .Sh ERRORS .Fn cv_wait_sig and .Fn cv_timedwait_sig will fail if: .Bl -tag -width Er .It Bq Er EINTR A signal was caught and the system call should be interrupted. .It Bq Er ERESTART A signal was caught and the system call should be restarted. .El .Pp .Fn cv_timedwait and .Fn cv_timedwait_sig will fail if: .Bl -tag -width Er .It Bq Er EWOULDBLOCK Timeout expired. .El .Sh SEE ALSO .Xr locking 9 , .Xr mtx_pool 9 , .Xr mutex 9 , .Xr rwlock 9 , .Xr sema 9 , .Xr sleep 9 , .Xr sx 9 diff --git a/share/man/man9/sleep.9 b/share/man/man9/sleep.9 index aa87642d5b63..a9f087351510 100644 --- a/share/man/man9/sleep.9 +++ b/share/man/man9/sleep.9 @@ -1,320 +1,326 @@ .\" .\" Copyright (c) 1996 Joerg Wunsch .\" .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" .Dd April 4, 2008 .Os .Dt SLEEP 9 .Sh NAME .Nm msleep , .Nm msleep_spin , .Nm pause , .Nm tsleep , .Nm wakeup .Nd wait for events .Sh SYNOPSIS .In sys/param.h .In sys/systm.h .In sys/proc.h .Ft int .Fn msleep "void *chan" "struct mtx *mtx" "int priority" "const char *wmesg" "int timo" .Ft int .Fn msleep_spin "void *chan" "struct mtx *mtx" "const char *wmesg" "int timo" .Ft void .Fn pause "const char *wmesg" "int timo" .Ft int .Fn tsleep "void *chan" "int priority" "const char *wmesg" "int timo" .Ft void .Fn wakeup "void *chan" .Ft void .Fn wakeup_one "void *chan" .Sh DESCRIPTION The functions .Fn tsleep , .Fn msleep , .Fn msleep_spin , .Fn pause , .Fn wakeup , and .Fn wakeup_one handle event-based thread blocking. If a thread must wait for an external event, it is put to sleep by .Fn tsleep , .Fn msleep , .Fn msleep_spin , or .Fn pause . Threads may also wait using one of the locking primitive sleep routines .Xr mtx_sleep 9 , .Xr rw_sleep 9 , or .Xr sx_sleep 9 . .Pp The parameter .Fa chan is an arbitrary address that uniquely identifies the event on which the thread is being put to sleep. All threads sleeping on a single .Fa chan are woken up later by .Fn wakeup , often called from inside an interrupt routine, to indicate that the resource the thread was blocking on is available now. .Pp The parameter .Fa priority specifies a new priority for the thread as well as some optional flags. If the new priority is not 0, then the thread will be made runnable with the specified .Fa priority when it resumes. .Dv PZERO should never be used, as it is for compatibility only. A new priority of 0 means to use the thread's current priority when it is made runnable again. If .Fa priority includes the .Dv PCATCH flag, signals are checked before and after sleeping, otherwise signals are not checked. If .Dv PCATCH is set and a signal needs to be delivered, .Er ERESTART is returned if the current system call should be restarted if possible, and .Er EINTR is returned if the system call should be interrupted by the signal (return .Er EINTR ) . .Pp The parameter .Fa wmesg is a string describing the sleep condition for tools like .Xr ps 1 . Due to the limited space of those programs to display arbitrary strings, this message should not be longer than 6 characters. .Pp The parameter .Fa timo specifies a timeout for the sleep. If .Fa timo is not 0, then the thread will sleep for at most .Fa timo No / Va hz seconds. If the timeout expires, then the sleep function will return .Er EWOULDBLOCK . .Pp Several of the sleep functions including .Fn msleep , .Fn msleep_spin , and the locking primitive sleep routines specify an additional lock parameter. The lock will be released before sleeping and reacquired before the sleep routine returns. If .Fa priority includes the .Dv PDROP flag, then the lock will not be reacquired before returning. The lock is used to ensure that a condition can be checked atomically, and that the current thread can be suspended without missing a change to the condition, or an associated wakeup. In addition, all of the sleep routines will fully drop the .Va Giant mutex (even if recursed) while the thread is suspended and will reacquire the .Va Giant mutex before the function returns. +Note that the +.Va Giant +mutex may be specified as the lock to drop. +In that case, however, the +.Dv PDROP +flag is not allowed. .Pp To avoid lost wakeups, either a lock should be used to protect against races, or a timeout should be specified to place an upper bound on the delay due to a lost wakeup. As a result, the .Fn tsleep function should only be invoked with a timeout of 0 when the .Va Giant mutex is held. .Pp The .Fn msleep function requires that .Fa mtx reference a default, i.e. non-spin, mutex. Its use is deprecated in favor of .Xr mtx_sleep 9 which provides identical behavior. .Pp The .Fn msleep_spin function requires that .Fa mtx reference a spin mutex. The .Fn msleep_spin function does not accept a .Fa priority parameter and thus does not support changing the current thread's priority, the .Dv PDROP flag, or catching signals via the .Dv PCATCH flag. .Pp The .Fn pause function is a wrapper around .Fn tsleep that suspends execution of the current thread for the indicated timeout. The thread can not be awakened early by signals or calls to .Fn wakeup or .Fn wakeup_one . .Pp The .Fn wakeup_one function makes the first thread in the queue that is sleeping on the parameter .Fa chan runnable. This reduces the load when a large number of threads are sleeping on the same address, but only one of them can actually do any useful work when made runnable. .Pp Due to the way it works, the .Fn wakeup_one function requires that only related threads sleep on a specific .Fa chan address. It is the programmer's responsibility to choose a unique .Fa chan value. The older .Fn wakeup function did not require this, though it was never good practice for threads to share a .Fa chan value. When converting from .Fn wakeup to .Fn wakeup_one , pay particular attention to ensure that no other threads wait on the same .Fa chan . .Sh RETURN VALUES If the thread is awakened by a call to .Fn wakeup or .Fn wakeup_one , the .Fn msleep , .Fn msleep_spin , .Fn tsleep , and locking primitive sleep functions return 0. Otherwise, a non-zero error code is returned. .Sh ERRORS .Fn msleep , .Fn msleep_spin , .Fn tsleep , and the locking primitive sleep functions will fail if: .Bl -tag -width Er .It Bq Er EINTR The .Dv PCATCH flag was specified, a signal was caught, and the system call should be interrupted. .It Bq Er ERESTART The .Dv PCATCH flag was specified, a signal was caught, and the system call should be restarted. .It Bq Er EWOULDBLOCK A non-zero timeout was specified and the timeout expired. .El .Sh SEE ALSO .Xr ps 1 , .Xr locking 9 , .Xr malloc 9 , .Xr mi_switch 9 , .Xr mtx_sleep 9 , .Xr rw_sleep 9 , .Xr sx_sleep 9 .Sh HISTORY The functions .Fn sleep and .Fn wakeup were present in .At v1 . They were probably also present in the preceding PDP-7 version of .Ux . They were the basic process synchronization model. .Pp The .Fn tsleep function appeared in .Bx 4.4 and added the parameters .Fa wmesg and .Fa timo . The .Fn sleep function was removed in .Fx 2.2 . The .Fn wakeup_one function appeared in .Fx 2.2 . The .Fn msleep function appeared in .Fx 5.0 , and the .Fn msleep_spin function appeared in .Fx 6.2 . The .Fn pause function appeared in .Fx 7.0 . .Sh AUTHORS .An -nosplit This manual page was written by .An J\(:org Wunsch Aq joerg@FreeBSD.org . diff --git a/sys/kern/kern_condvar.c b/sys/kern/kern_condvar.c index 5ee40a32bec6..8b002b1998fd 100644 --- a/sys/kern/kern_condvar.c +++ b/sys/kern/kern_condvar.c @@ -1,429 +1,451 @@ /*- * Copyright (c) 2000 Jake Burkholder . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif /* * Common sanity checks for cv_wait* functions. */ #define CV_ASSERT(cvp, lock, td) do { \ KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ KASSERT((lock) != NULL, ("%s: lock NULL", __func__)); \ } while (0) /* * Initialize a condition variable. Must be called before use. */ void cv_init(struct cv *cvp, const char *desc) { cvp->cv_description = desc; cvp->cv_waiters = 0; } /* * Destroy a condition variable. The condition variable must be re-initialized * in order to be re-used. */ void cv_destroy(struct cv *cvp) { #ifdef INVARIANTS struct sleepqueue *sq; sleepq_lock(cvp); sq = sleepq_lookup(cvp); sleepq_release(cvp); KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__)); #endif } /* * Wait on a condition variable. The current thread is placed on the condition * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same * condition variable will resume the thread. The mutex is released before * sleeping and will be held on return. It is recommended that the mutex be * held when cv_signal or cv_broadcast are called. */ void _cv_wait(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state; td = curthread; + lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); WITNESS_SAVE(lock, lock_witness); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ return; } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); - lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); + if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); + lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); + } sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); - class->lc_lock(lock, lock_state); - WITNESS_RESTORE(lock, lock_witness); + if (lock != &Giant.lock_object) { + class->lc_lock(lock, lock_state); + WITNESS_RESTORE(lock, lock_witness); + } } /* * Wait on a condition variable. This function differs from cv_wait by * not aquiring the mutex after condition variable was signaled. */ void _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) { struct lock_class *class; struct thread *td; td = curthread; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); + KASSERT(lock != &Giant.lock_object, + ("cv_wait_unlock cannot be used with Giant")); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * During autoconfiguration, just give interrupts * a chance, then just return. Don't run any other * thread or panic below, in case this is the idle * process and already asleep. */ class->lc_unlock(lock); return; } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); if (class->lc_flags & LC_SLEEPABLE) sleepq_release(cvp); class->lc_unlock(lock); if (class->lc_flags & LC_SLEEPABLE) sleepq_lock(cvp); sleepq_wait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); } /* * Wait on a condition variable, allowing interruption by signals. Return 0 if * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if * a signal was caught. If ERESTART is returned the system call should be * restarted if possible. */ int _cv_wait_sig(struct cv *cvp, struct lock_object *lock) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; struct proc *p; int lock_state, rval; td = curthread; p = td->td_proc; + lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); WITNESS_SAVE(lock, lock_witness); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * procs or panic below, in case this is the idle process and * already asleep. */ return (0); } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); - lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); + if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); + lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); + } rval = sleepq_wait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); - class->lc_lock(lock, lock_state); - WITNESS_RESTORE(lock, lock_witness); + if (lock != &Giant.lock_object) { + class->lc_lock(lock, lock_state); + WITNESS_RESTORE(lock, lock_witness); + } return (rval); } /* * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout * expires. */ int _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; int lock_state, rval; td = curthread; rval = 0; + lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); WITNESS_SAVE(lock, lock_witness); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); sleepq_set_timeout(cvp, timo); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); - lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); + if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); + lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); + } rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); - class->lc_lock(lock, lock_state); - WITNESS_RESTORE(lock, lock_witness); + if (lock != &Giant.lock_object) { + class->lc_lock(lock, lock_state); + WITNESS_RESTORE(lock, lock_witness); + } return (rval); } /* * Wait on a condition variable for at most timo/hz seconds, allowing * interruption by signals. Returns 0 if the thread was resumed by cv_signal * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if * a signal was caught. */ int _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo) { WITNESS_SAVE_DECL(lock_witness); struct lock_class *class; struct thread *td; struct proc *p; int lock_state, rval; td = curthread; p = td->td_proc; rval = 0; + lock_state = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif CV_ASSERT(cvp, lock, td); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Waiting on \"%s\"", cvp->cv_description); WITNESS_SAVE(lock, lock_witness); class = LOCK_CLASS(lock); if (cold || panicstr) { /* * After a panic, or during autoconfiguration, just give * interrupts a chance, then just return; don't run any other * thread or panic below, in case this is the idle process and * already asleep. */ return 0; } sleepq_lock(cvp); cvp->cv_waiters++; DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); sleepq_set_timeout(cvp, timo); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); - lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); + if (lock != &Giant.lock_object) { + if (class->lc_flags & LC_SLEEPABLE) + sleepq_release(cvp); + lock_state = class->lc_unlock(lock); + if (class->lc_flags & LC_SLEEPABLE) + sleepq_lock(cvp); + } rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); - class->lc_lock(lock, lock_state); - WITNESS_RESTORE(lock, lock_witness); + if (lock != &Giant.lock_object) { + class->lc_lock(lock, lock_state); + WITNESS_RESTORE(lock, lock_witness); + } return (rval); } /* * Signal a condition variable, wakes up one waiting thread. Will also wakeup * the swapper if the process is not in memory, so that it can bring the * sleeping process in. Note that this may also result in additional threads * being made runnable. Should be called with the same mutex as was passed to * cv_wait held. */ void cv_signal(struct cv *cvp) { int wakeup_swapper; wakeup_swapper = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { cvp->cv_waiters--; wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0); } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } /* * Broadcast a signal to a condition variable. Wakes up all waiting threads. * Should be called with the same mutex as was passed to cv_wait held. */ void cv_broadcastpri(struct cv *cvp, int pri) { int wakeup_swapper; /* * XXX sleepq_broadcast pri argument changed from -1 meaning * no pri to 0 meaning no pri. */ wakeup_swapper = 0; if (pri == -1) pri = 0; sleepq_lock(cvp); if (cvp->cv_waiters > 0) { cvp->cv_waiters = 0; wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0); } sleepq_release(cvp); if (wakeup_swapper) kick_proc0(); } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 6c10ba4c1ff2..ef682f488463 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -1,546 +1,550 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL); int hogticks; int lbolt; static int pause_wchan; static struct callout loadav_callout; static struct callout lbolt_callout; struct loadavg averunnable = { {0, 0, 0}, FSCALE }; /* load average, of runnable procs */ /* * Constants for averages over 1, 5, and 15 minutes * when sampling at 5 second intervals. */ static fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ }; /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ static int fscale __unused = FSCALE; SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); static void loadav(void *arg); static void lboltcb(void *arg); void sleepinit(void) { hogticks = (hz / 10) * 2; /* Default only. */ init_sleepqueues(); } /* * General sleep call. Suspends the current thread until a wakeup is * performed on the specified identifier. The thread will then be made * runnable with the specified priority. Sleeps at most timo/hz seconds * (0 means no timeout). If pri includes PCATCH flag, signals are checked * before and after sleeping, else signals are not checked. Returns 0 if * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a * signal needs to be delivered, ERESTART is returned if the current system * call should be restarted if possible, and EINTR is returned if the system * call should be interrupted by the signal (return EINTR). * * The lock argument is unlocked before the caller is suspended, and * re-locked before _sleep() returns. If priority includes the PDROP * flag the lock is not re-locked before returning. */ int _sleep(void *ident, struct lock_object *lock, int priority, const char *wmesg, int timo) { struct thread *td; struct proc *p; struct lock_class *class; int catch, flags, lock_state, pri, rval; WITNESS_SAVE_DECL(lock_witness); td = curthread; p = td->td_proc; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL || ident == &lbolt, ("sleeping without a lock")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); + if (priority & PDROP) + KASSERT(lock != NULL && lock != &Giant.lock_object, + ("PDROP requires a non-Giant lock")); if (lock != NULL) class = LOCK_CLASS(lock); else class = NULL; if (cold) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); } catch = priority & PCATCH; pri = priority & PRIMASK; rval = 0; /* * If we are already on a sleep queue, then remove us from that * sleep queue first. We have to do this to handle recursive * sleeps. */ if (TD_ON_SLEEPQ(td)) sleepq_remove(td, td->td_wchan); if (ident == &pause_wchan) flags = SLEEPQ_PAUSE; else flags = SLEEPQ_SLEEP; if (catch) flags |= SLEEPQ_INTERRUPTIBLE; sleepq_lock(ident); CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); DROP_GIANT(); - if (lock != NULL && !(class->lc_flags & LC_SLEEPABLE)) { + if (lock != NULL && lock != &Giant.lock_object && + !(class->lc_flags & LC_SLEEPABLE)) { WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); } else /* GCC needs to follow the Yellow Brick Road */ lock_state = -1; /* * We put ourselves on the sleep queue and start our timeout * before calling thread_suspend_check, as we could stop there, * and a wakeup or a SIGCONT (or both) could occur while we were * stopped without resuming us. Thus, we must be ready for sleep * when cursig() is called. If the wakeup happens while we're * stopped, then td will no longer be on a sleep queue upon * return from cursig(). */ sleepq_add(ident, ident == &lbolt ? NULL : lock, wmesg, flags, 0); if (timo) sleepq_set_timeout(ident, timo); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); sleepq_lock(ident); } if (timo && catch) rval = sleepq_timedwait_sig(ident, pri); else if (timo) rval = sleepq_timedwait(ident, pri); else if (catch) rval = sleepq_wait_sig(ident, pri); else { sleepq_wait(ident, pri); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); - if (lock != NULL && !(priority & PDROP)) { + if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { class->lc_lock(lock, lock_state); WITNESS_RESTORE(lock, lock_witness); } return (rval); } int msleep_spin(void *ident, struct mtx *mtx, const char *wmesg, int timo) { struct thread *td; struct proc *p; int rval; WITNESS_SAVE_DECL(mtx); td = curthread; p = td->td_proc; KASSERT(mtx != NULL, ("sleeping without a mutex")); KASSERT(p != NULL, ("msleep1")); KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); if (cold) { /* * During autoconfiguration, just return; * don't run any other threads or panic below, * in case this is the idle thread and already asleep. * XXX: this used to do "s = splhigh(); splx(safepri); * splx(s);" to give interrupts a chance, but there is * no way to give interrupts a chance now. */ return (0); } sleepq_lock(ident); CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)", td->td_tid, p->p_pid, td->td_name, wmesg, ident); DROP_GIANT(); mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); WITNESS_SAVE(&mtx->lock_object, mtx); mtx_unlock_spin(mtx); /* * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); if (timo) sleepq_set_timeout(ident, timo); /* * Can't call ktrace with any spin locks held so it can lock the * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold * any spin lock. Thus, we have to drop the sleepq spin lock while * we handle those requests. This is safe since we have placed our * thread on the sleep queue already. */ #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) { sleepq_release(ident); ktrcsw(1, 0); sleepq_lock(ident); } #endif #ifdef WITNESS sleepq_release(ident); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"", wmesg); sleepq_lock(ident); #endif if (timo) rval = sleepq_timedwait(ident, 0); else { sleepq_wait(ident, 0); rval = 0; } #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 0); #endif PICKUP_GIANT(); mtx_lock_spin(mtx); WITNESS_RESTORE(&mtx->lock_object, mtx); return (rval); } /* * pause() is like tsleep() except that the intention is to not be * explicitly woken up by another thread. Instead, the current thread * simply wishes to sleep until the timeout expires. It is * implemented using a dummy wait channel. */ int pause(const char *wmesg, int timo) { KASSERT(timo != 0, ("pause: timeout required")); return (tsleep(&pause_wchan, 0, wmesg, timo)); } /* * Make all threads sleeping on the specified identifier runnable. */ void wakeup(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } /* * Make a thread sleeping on the specified identifier runnable. * May wake more than one thread if a target thread is currently * swapped out. */ void wakeup_one(void *ident) { int wakeup_swapper; sleepq_lock(ident); wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0); sleepq_release(ident); if (wakeup_swapper) kick_proc0(); } static void kdb_switch(void) { thread_unlock(curthread); kdb_backtrace(); kdb_reenter(); panic("%s: did not reenter debugger", __func__); } /* * The machine independent parts of context switching. */ void mi_switch(int flags, struct thread *newtd) { uint64_t runtime, new_switchtime; struct thread *td; struct proc *p; td = curthread; /* XXX */ THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); p = td->td_proc; /* XXX */ KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); #ifdef INVARIANTS if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td)) mtx_assert(&Giant, MA_NOTOWNED); #endif KASSERT(td->td_critnest == 1 || (td->td_critnest == 2 && (td->td_owepreempt) && (flags & SW_INVOL) != 0 && newtd == NULL) || panicstr, ("mi_switch: switch in a critical section")); KASSERT((flags & (SW_INVOL | SW_VOL)) != 0, ("mi_switch: switch must be voluntary or involuntary")); KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself")); /* * Don't perform context switches from the debugger. */ if (kdb_active) kdb_switch(); if (flags & SW_VOL) td->td_ru.ru_nvcsw++; else td->td_ru.ru_nivcsw++; #ifdef SCHED_STATS SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]); #endif /* * Compute the amount of time during which the current * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); runtime = new_switchtime - PCPU_GET(switchtime); td->td_runtime += runtime; td->td_incruntime += runtime; PCPU_SET(switchtime, new_switchtime); td->td_generation++; /* bump preempt-detect counter */ PCPU_INC(cnt.v_swtch); PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); #if (KTR_COMPILE & KTR_SCHED) != 0 if (TD_IS_IDLETHREAD(td)) CTR3(KTR_SCHED, "mi_switch: %p(%s) prio %d idle", td, td->td_name, td->td_priority); else if (newtd != NULL) CTR5(KTR_SCHED, "mi_switch: %p(%s) prio %d preempted by %p(%s)", td, td->td_name, td->td_priority, newtd, newtd->td_name); else CTR6(KTR_SCHED, "mi_switch: %p(%s) prio %d inhibit %d wmesg %s lock %s", td, td->td_name, td->td_priority, td->td_inhibitors, td->td_wmesg, td->td_lockname); #endif sched_switch(td, newtd, flags); CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d", td, td->td_name, td->td_priority); CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); /* * If the last thread was exiting, finish cleaning it up. */ if ((td = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(td); } } /* * Change thread state to be runnable, placing it on the run queue if * it is in memory. If it is swapped out, return true so our caller * will know to awaken the swapper. */ int setrunnable(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td->td_proc->p_state != PRS_ZOMBIE, ("setrunnable: pid %d is a zombie", td->td_proc->p_pid)); switch (td->td_state) { case TDS_RUNNING: case TDS_RUNQ: return (0); case TDS_INHIBITED: /* * If we are only inhibited because we are swapped out * then arange to swap in this process. Otherwise just return. */ if (td->td_inhibitors != TDI_SWAPPED) return (0); /* FALLTHROUGH */ case TDS_CAN_RUN: break; default: printf("state is 0x%x", td->td_state); panic("setrunnable(2)"); } if ((td->td_flags & TDF_INMEM) == 0) { if ((td->td_flags & TDF_SWAPINREQ) == 0) { td->td_flags |= TDF_SWAPINREQ; return (1); } } else sched_wakeup(td); return (0); } /* * Compute a tenex style load average of a quantity on * 1, 5 and 15 minute intervals. */ static void loadav(void *arg) { int i, nrun; struct loadavg *avg; nrun = sched_load(); avg = &averunnable; for (i = 0; i < 3; i++) avg->ldavg[i] = (cexp[i] * avg->ldavg[i] + nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT; /* * Schedule the next update to occur after 5 seconds, but add a * random variation to avoid synchronisation with processes that * run at regular intervals. */ callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)), loadav, NULL); } static void lboltcb(void *arg) { wakeup(&lbolt); callout_reset(&lbolt_callout, hz, lboltcb, NULL); } /* ARGSUSED */ static void synch_setup(void *dummy) { callout_init(&loadav_callout, CALLOUT_MPSAFE); callout_init(&lbolt_callout, CALLOUT_MPSAFE); /* Kick off timeout driven events by calling first time. */ loadav(NULL); lboltcb(NULL); } /* * General purpose yield system call. */ int yield(struct thread *td, struct yield_args *uap) { thread_lock(td); sched_prio(td, PRI_MAX_TIMESHARE); mi_switch(SW_VOL | SWT_RELINQUISH, NULL); thread_unlock(td); td->td_retval[0] = 0; return (0); }