Index: projects/hps_head/share/man/man9/Makefile =================================================================== --- projects/hps_head/share/man/man9/Makefile +++ projects/hps_head/share/man/man9/Makefile @@ -1573,6 +1573,7 @@ timeout.9 callout_active.9 \ timeout.9 callout_deactivate.9 \ timeout.9 callout_drain.9 \ + timeout.9 callout_drain_async.9 \ timeout.9 callout_handle_init.9 \ timeout.9 callout_init.9 \ timeout.9 callout_init_mtx.9 \ Index: projects/hps_head/share/man/man9/timeout.9 =================================================================== --- projects/hps_head/share/man/man9/timeout.9 +++ projects/hps_head/share/man/man9/timeout.9 @@ -29,13 +29,14 @@ .\" .\" $FreeBSD$ .\" -.Dd October 8, 2014 +.Dd January 24, 2015 .Dt TIMEOUT 9 .Os .Sh NAME .Nm callout_active , .Nm callout_deactivate , .Nm callout_drain , +.Nm callout_drain_async , .Nm callout_handle_init , .Nm callout_init , .Nm callout_init_mtx , @@ -63,256 +64,234 @@ .In sys/systm.h .Bd -literal typedef void timeout_t (void *); +typedef void callout_func_t (void *); .Ed -.Ft int -.Fn callout_active "struct callout *c" -.Ft void -.Fn callout_deactivate "struct callout *c" -.Ft int -.Fn callout_drain "struct callout *c" -.Ft void -.Fn callout_handle_init "struct callout_handle *handle" -.Bd -literal -struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle); -.Ed -.Ft void -.Fn callout_init "struct callout *c" "int mpsafe" -.Ft void -.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" -.Ft void -.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" -.Ft void -.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" -.Ft int -.Fn callout_pending "struct callout *c" -.Ft int -.Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" -.Ft int -.Fn callout_reset_curcpu "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" -.Ft int -.Fn callout_reset_on "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" "int cpu" -.Ft int -.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int cpu" "int flags" -.Ft int -.Fn callout_schedule "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_curcpu "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" -.Ft int -.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int cpu" "int flags" -.Ft int -.Fn callout_stop "struct callout *c" -.Ft struct callout_handle -.Fn timeout "timeout_t *func" "void *arg" "int ticks" -.Ft void -.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" .Sh DESCRIPTION The .Nm callout -API is used to schedule a call to an arbitrary function at a specific -time in the future. -Consumers of this API are required to allocate a callout structure -.Pq struct callout +API is used to schedule a one-time call to an arbitrary function at a +specific time in the future. +Consumers of this API are required to allocate a +.Ft struct callout for each pending function invocation. -This structure stores state about the pending function invocation including -the function to be called and the time at which the function should be invoked. -Pending function calls can be cancelled or rescheduled to a different time. -In addition, -a callout structure may be reused to schedule a new function call after a -scheduled call is completed. -.Pp -Callouts only provide a single-shot mode. -If a consumer requires a periodic timer, -it must explicitly reschedule each function call. -This is normally done by rescheduling the subsequent call within the called -function. -.Pp -Callout functions must not sleep. -They may not acquire sleepable locks, -wait on condition variables, -perform blocking allocation requests, -or invoke any other action that might sleep. -.Pp -Each callout structure must be initialized by -.Fn callout_init , -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw -before it is passed to any of the other callout functions. The +.Ft struct callout +stores the full state about any pending function call and +must be drained by a call to +.Fn callout_drain +or +.Fn callout_drain_async +before freeing. +.Sh INITIALIZATION +.Ft void +.Fn callout_handle_init "struct callout_handle *handle" +This function is deprecated. +Please use .Fn callout_init -function initializes a callout structure in -.Fa c -that is not associated with a specific lock. +instead. +This function is used to prepare a +.Ft struct callout_handle +before it can be used the first time. +If this function is called on a pending timeout, the pending timeout +cannot be cancelled and the +.Fn untimeout +function will return as if no timeout was pending. +.Pp +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +This macro is deprecated. +This macro is used to statically initialize a +.Ft struct callout_handle . +Please use +.Fn callout_init +instead. +.Pp +.Ft void +.Fn callout_init "struct callout *c" "int mpsafe" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. If the .Fa mpsafe -argument is zero, -the callout structure is not considered to be -.Dq multi-processor safe ; -and the Giant lock will be acquired before calling the callout function -and released when the callout function returns. -.Pp -The -.Fn callout_init_mtx , -.Fn callout_init_rm , -and -.Fn callout_init_rw -functions initialize a callout structure in -.Fa c -that is associated with a specific lock. -The lock is specified by the -.Fa mtx , -.Fa rm , -or -.Fa rw -parameter. -The associated lock must be held while stopping or rescheduling the -callout. -The callout subsystem acquires the associated lock before calling the -callout function and releases it after the function returns. -If the callout was cancelled while the callout subsystem waited for the -associated lock, -the callout function is not called, -and the associated lock is released. -This ensures that stopping or rescheduling the callout will abort any -previously scheduled invocation. -.Pp -Only regular mutexes may be used with -.Fn callout_init_mtx ; -spin mutexes are not supported. -A sleepable read-mostly lock -.Po -one initialized with the -.Dv RM_SLEEPABLE -flag -.Pc -may not be used with -.Fn callout_init_rm . -Similarly, other sleepable lock types such as -.Xr sx 9 -and -.Xr lockmgr 9 -cannot be used with callouts because sleeping is not permitted in -the callout subsystem. +argument is non-zero, the callback function will be running unlocked +and the callback is so-called "mpsafe". +.Bf Sy +It is the application's entire responsibility to not call any +.Fn callout_xxx +functions, including the +.Fn callout_drain +function, simultaneously on the same callout when the +.Fa mpsafe +argument is non-zero. +Otherwise, undefined behavior can happen. +Avoid simultaneous calls by obtaining an exclusive lock before calling +any +.Fn callout_xxx +functions other than the +.Fn callout_drain +function. +.Ef +If the +.Fa mpsafe +argument is zero, the Giant mutex will be locked before the callback +function is called. +If the +.Fa mpsafe +argument is zero, the Giant mutex is expected to be locked when calling +any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. .Pp -These +.Ft void +.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. +The +.Fa mtx +argument is a pointer to a valid spinlock type of mutex or a valid +regular non-sleepable mutex which the callback subsystem will lock +before calling the callback function. +The specified mutex is expected to be locked when calling any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. +Valid .Fa flags -may be specified for -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw : +are: .Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED" .It Dv CALLOUT_RETURNUNLOCKED -The callout function will release the associated lock itself, -so the callout subsystem should not attempt to unlock it -after the callout function returns. -.It Dv CALLOUT_SHAREDLOCK -The lock is only acquired in read mode when running the callout handler. -This flag is ignored by -.Fn callout_init_mtx . +The callout function is assumed to have released the specified mutex +before returning. +.It Dv 0 +The callout subsystem will release the specified mutex after the +callout function has returned. .El .Pp -The function -.Fn callout_stop -cancels a callout -.Fa c -if it is currently pending. -If the callout is pending, then -.Fn callout_stop -returns a non-zero value. -If the callout is not set, -has already been serviced, -or is currently being serviced, -then zero will be returned. -If the callout has an associated lock, -then that lock must be held when this function is called. -.Pp -The function -.Fn callout_drain -is identical to -.Fn callout_stop -except that it will wait for the callout -.Fa c -to complete if it is already in progress. -This function MUST NOT be called while holding any -locks on which the callout might block, or deadlock will result. -Note that if the callout subsystem has already begun processing this -callout, then the callout function may be invoked before -.Fn callout_drain -returns. -However, the callout subsystem does guarantee that the callout will be -fully stopped before -.Fn callout_drain -returns. +.Ft void +.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read-mostly type of lock. +The read-mostly lock must not be initialized with the +.Dv RM_SLEEPABLE +flag. .Pp -The +.Ft void +.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read/write type of lock. +.Sh SCHEDULING CALLOUTS +.Ft struct callout_handle +.Fn timeout "timeout_t *func" "void *arg" "int ticks" +This function is deprecated. +Please use .Fn callout_reset -and -.Fn callout_schedule -function families schedule a future function invocation for callout -.Fa c . -If -.Fa c -already has a pending callout, -it is cancelled before the new invocation is scheduled. -These functions return a non-zero value if a pending callout was cancelled -and zero if there was no pending callout. -If the callout has an associated lock, -then that lock must be held when any of these functions are called. -.Pp -The time at which the callout function will be invoked is determined by -either the -.Fa ticks -argument or the -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments. -When -.Fa ticks -is used, -the callout is scheduled to execute after +instead. +This function schedules a call to +.Fa func +to take place after .Fa ticks Ns No /hz seconds. Non-positive values of .Fa ticks are silently converted to the value .Sq 1 . -.Pp The -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments provide more control over the scheduled time including -support for higher resolution times, -specifying the precision of the scheduled time, -and setting an absolute deadline instead of a relative timeout. -The callout is scheduled to execute in a time window which begins at -the time specified in +.Fa func +argument is a valid pointer to a function that takes a single +.Fa void * +argument. +Upon invocation, the +.Fa func +function will receive +.Fa arg +as its only argument. +The Giant lock is locked when the +.Fa arg +function is invoked and should not be unlocked by this function. +The returned value from +.Fn timeout +is a +.Ft struct callout_handle +structure which can be used in conjunction with the +.Fn untimeout +function to request that a scheduled timeout be cancelled. +As handles are recycled by the system, it is possible, although unlikely, +that a handle from one invocation of +.Fn timeout +may match the handle of another invocation of +.Fn timeout +if both calls used the same function pointer and argument, and the first +timeout is expired or cancelled before the second call. +Please ensure that the function and argument pointers are unique when using this function. +.Pp +.Ft int +.Fn callout_reset "struct callout *c" "int ticks" "callout_func_t *func" "void *arg" +This function is used to schedule or re-schedule a callout. +This function at first stops the callout given by the +.Fa c +argument, if any. +Then it will start the callout given by the +.Fa c +argument. +The relative time until the timeout callback happens is given by the +.Fa ticks +argument. +The number of ticks in a second is defined by +.Dv hz +and can vary from system to system. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Otherwise, a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, this +function will always ensure that previous callback function, if any, +is never reached. +In other words, the callout will be atomically restarted. +Otherwise, there is no such guarantee. +The callback function is given by +.Fa func +and its function argument is given by +.Fa arg . +.Pp +.Ft int +.Fn callout_reset_curcpu "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the same CPU which called this function. +.Pp +.Ft int +.Fn callout_reset_on "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" "int cpu" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the CPU given by the +.Fa cpu +argument. +.Pp +.Ft int +.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works the same like the +.Fn callout_reset +function except the relative or absolute time after which the timeout +callback should happen is given by the .Fa sbt -and extends for the amount of time specified in +argument and extends for the amount of time specified in .Fa pr . +This function is used when high precision timeouts are needed. If .Fa sbt specifies a time in the past, @@ -322,12 +301,13 @@ allows the callout subsystem to coalesce callouts scheduled close to each other into fewer timer interrupts, reducing processing overhead and power consumption. -These +The .Fa flags -may be specified to adjust the interpretation of +argument may be non-zero to adjust the interpretation of the .Fa sbt and -.Fa pr : +.Fa pr +arguments: .Bl -tag -width ".Dv C_DIRECT_EXEC" .It Dv C_ABSOLUTE Handle the @@ -347,7 +327,7 @@ and should be as small as possible because they run with absolute priority. .It Fn C_PREL Specifies relative event time precision as binary logarithm of time interval -divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc. +divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on. Note that the larger of .Fa pr or this value is used as the length of the time window. @@ -360,65 +340,207 @@ calls if possible. .El .Pp -The -.Fn callout_reset -functions accept a +.Ft int +.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by the .Fa func -argument which identifies the function to be called when the time expires. -It must be a pointer to a function that takes a single -.Fa void * -argument. -Upon invocation, +argument will be executed on the CPU which called this function. +.Pp +.Ft int +.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int cpu" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by .Fa func -will receive -.Fa arg -as its only argument. -The -.Fn callout_schedule -functions reuse the +will be executed on the CPU given by +.Fa cpu . +.Pp +.Ft int +.Fn callout_schedule "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_curcpu "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" +This function works the same like the +.Fn callout_reset_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int cpu" "int flags" +This function works the same like the +.Fn callout_reset_sbt_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Sh CHECKING THE STATE OF CALLOUTS +.Ft int +.Fn callout_pending "struct callout *c" +This function returns non-zero if the callout pointed to by the +.Fa c +argument is pending for callback. +Else this function returns zero. +This function returns zero when inside the callout function if the +callout is not re-scheduled. +.Pp +.Ft int +.Fn callout_active "struct callout *c" +This function is deprecated and returns non-zero if the callout +pointed to by the +.Fa c +argument was scheduled in the past. +Else this function returns zero. +This function also returns zero after the +.Fn callout_deactivate +or the +.Fn callout_stop +or the +.Fn callout_drain +or the +.Fn callout_drain_async +function is called on the same callout as given by the +.Fa c +argument. +.Pp +.Ft void +.Fn callout_deactivate "struct callout *c" +This function is deprecated and ensures that subsequent calls to the +.Fn callout_activate +function returns zero until the callout is scheduled again. +.Sh STOPPING CALLOUTS +.Ft void +.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" +This function is deprecated and cancels the timeout associated with the +.Fa handle +argument using the function pointed to by the .Fa func -and +argument and having the .Fa arg -arguments from the previous callout. -Note that one of the -.Fn callout_reset -functions must always be called to initialize +arguments to validate the handle. +If the handle does not correspond to a timeout with +the function .Fa func -and +taking the argument .Fa arg -before one of the -.Fn callout_schedule -functions can be used. +no action is taken. The +.Fa handle +must be initialized by a previous call to +.Fn timeout , +.Fn callout_handle_init +or assigned the value of +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +before being passed to +.Fn untimeout . +The behavior of calling +.Fn untimeout +with an uninitialized handle +is undefined. .Pp -The callout subsystem provides a softclock thread for each CPU in the system. -Callouts are assigned to a single CPU and are executed by the softclock thread -for that CPU. -Initially, -callouts are assigned to CPU 0. -The -.Fn callout_reset_on , -.Fn callout_reset_sbt_on , -.Fn callout_schedule_on -and -.Fn callout_schedule_sbt_on -functions assign the callout to CPU -.Fa cpu . -The -.Fn callout_reset_curcpu , -.Fn callout_reset_sbt_curpu , -.Fn callout_schedule_curcpu -and -.Fn callout_schedule_sbt_curcpu -functions assign the callout to the current CPU. -The -.Fn callout_reset , -.Fn callout_reset_sbt , -.Fn callout_schedule -and -.Fn callout_schedule_sbt -functions schedule the callout to execute in the softclock thread of the CPU -to which it is currently assigned. +.Ft int +.Fn callout_stop "struct callout *c" +This function is used to stop a timeout function invocation associated with the callout pointed to by the +.Fa c +argument, in a non-blocking fashion. +This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Else a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, the +.Fn callout_stop +function will always ensure that the callback function is never reached. +In other words the callout will be atomically stopped. +Else there is no such guarantee. +.Sh DRAINING CALLOUTS +.Ft int +.Fn callout_drain "struct callout *c" +This function works the same like the +.Fn callout_stop +function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the +.Fa c +argument inside the callout subsystem before it returns. +Also this function ensures that the lock, if any, associated with the +callout is no longer being used. +When this function returns, it is safe to free the callout structure pointed to by the +.Fa c +argument. .Pp +.Ft int +.Fn callout_drain_async "struct callout *c" "callout_func_t *fn" "void *arg" +This function is non-blocking and works the same like the +.Fn callout_stop +function except if it returns non-zero it means the callback function pointed to by the +.Fa fn +argument will be called back with the +.Fa arg +argument when all references to the callout pointed to by the +.Fa c +argument are gone. +If this function returns non-zero it should not be called again until the callback function has been called. +If the +.Fn callout_drain +or +.Fn callout_drain_async +functions are called while an asynchronous drain is pending, +previously pending asynchronous drains might get cancelled. +If this function returns zero, it is safe to free the callout structure pointed to by the +.Fa c +argument right away. +.Sh CALLOUT FUNCTION RESTRICTIONS +Callout functions must not sleep. +They may not acquire sleepable locks, wait on condition variables, +perform blocking allocation requests, or invoke any other action that +might sleep. +.Sh CALLOUT SUBSYSTEM INTERNALS +The callout subsystem has its own set of spinlocks to protect its internal state. +The callout subsystem provides a softclock thread for each CPU in the +system. +Callouts are assigned to a single CPU and are executed by the +softclock thread for that CPU. +Initially, callouts are assigned to CPU 0. Softclock threads are not pinned to their respective CPUs by default. The softclock thread for CPU 0 can be pinned to CPU 0 by setting the .Va kern.pin_default_swi @@ -427,50 +549,7 @@ respective CPUs by setting the .Va kern.pin_pcpu_swi loader tunable to a non-zero value. -.Pp -The macros -.Fn callout_pending , -.Fn callout_active -and -.Fn callout_deactivate -provide access to the current state of the callout. -The -.Fn callout_pending -macro checks whether a callout is -.Em pending ; -a callout is considered -.Em pending -when a timeout has been set but the time has not yet arrived. -Note that once the timeout time arrives and the callout subsystem -starts to process this callout, -.Fn callout_pending -will return -.Dv FALSE -even though the callout function may not have finished -.Pq or even begun -executing. -The -.Fn callout_active -macro checks whether a callout is marked as -.Em active , -and the -.Fn callout_deactivate -macro clears the callout's -.Em active -flag. -The callout subsystem marks a callout as -.Em active -when a timeout is set and it clears the -.Em active -flag in -.Fn callout_stop -and -.Fn callout_drain , -but it -.Em does not -clear it when a callout expires normally via the execution of the -callout function. -.Ss "Avoiding Race Conditions" +.Sh "AVOIDING RACE CONDITIONS" The callout subsystem invokes callout functions from its own thread context. Without some kind of synchronization, @@ -531,9 +610,8 @@ .Pc indicates whether or not the callout was removed. If it is known that the callout was set and the callout function has -not yet executed, then a return value of -.Dv FALSE -indicates that the callout function is about to be called. +not yet executed, then a return value of zero indicates that the +callout function is about to be called. For example: .Bd -literal -offset indent if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) { @@ -589,16 +667,14 @@ .Em pending flag and return without action if .Fn callout_pending -returns -.Dv TRUE . +returns non-zero. This indicates that the callout was rescheduled using .Fn callout_reset just before the callout function was invoked. If .Fn callout_active -returns -.Dv FALSE -then the callout function should also return without action. +returns zero then the callout function should also return without +action. This indicates that the callout has been stopped. Finally, the callout function should call .Fn callout_deactivate @@ -668,129 +744,13 @@ or releasing the storage for the callout structure. .Sh LEGACY API .Bf Sy -The functions below are a legacy API that will be removed in a future release. -New code should not use these routines. -.Ef -.Pp -The function -.Fn timeout -schedules a call to the function given by the argument -.Fa func -to take place after -.Fa ticks Ns No /hz -seconds. -Non-positive values of -.Fa ticks -are silently converted to the value -.Sq 1 . -.Fa func -should be a pointer to a function that takes a -.Fa void * -argument. -Upon invocation, -.Fa func -will receive -.Fa arg -as its only argument. -The return value from +The .Fn timeout -is a -.Ft struct callout_handle -which can be used in conjunction with the -.Fn untimeout -function to request that a scheduled timeout be canceled. -.Pp -The function -.Fn callout_handle_init -can be used to initialize a handle to a state which will cause -any calls to -.Fn untimeout -with that handle to return with no side -effects. -.Pp -Assigning a callout handle the value of -.Fn CALLOUT_HANDLE_INITIALIZER -performs the same function as -.Fn callout_handle_init -and is provided for use on statically declared or global callout handles. -.Pp -The function -.Fn untimeout -cancels the timeout associated with -.Fa handle -using the -.Fa func and -.Fa arg -arguments to validate the handle. -If the handle does not correspond to a timeout with -the function -.Fa func -taking the argument -.Fa arg -no action is taken. -.Fa handle -must be initialized by a previous call to -.Fn timeout , -.Fn callout_handle_init , -or assigned the value of -.Fn CALLOUT_HANDLE_INITIALIZER "&handle" -before being passed to -.Fn untimeout . -The behavior of calling .Fn untimeout -with an uninitialized handle -is undefined. -.Pp -As handles are recycled by the system, it is possible (although unlikely) -that a handle from one invocation of -.Fn timeout -may match the handle of another invocation of -.Fn timeout -if both calls used the same function pointer and argument, and the first -timeout is expired or canceled before the second call. -The timeout facility offers O(1) running time for -.Fn timeout -and -.Fn untimeout . -Timeouts are executed from -.Fn softclock -with the -.Va Giant -lock held. -Thus they are protected from re-entrancy. -.Sh RETURN VALUES -The -.Fn callout_active -macro returns the state of a callout's -.Em active -flag. -.Pp -The -.Fn callout_pending -macro returns the state of a callout's -.Em pending -flag. -.Pp -The -.Fn callout_reset -and -.Fn callout_schedule -function families return non-zero if the callout was pending before the new -function invocation was scheduled. -.Pp -The -.Fn callout_stop -and -.Fn callout_drain -functions return non-zero if the callout was still pending when it was -called or zero otherwise. -The -.Fn timeout -function returns a -.Ft struct callout_handle -that can be passed to -.Fn untimeout . +functions are a legacy API that will be removed in a future release. +New code should not use these routines. +.Ef .Sh HISTORY The current timeout and untimeout routines are based on the work of .An Adam M. Costello @@ -815,4 +775,4 @@ .Bx linked list callout mechanism which offered O(n) insertion and removal running time -but did not generate or require handles for untimeout operations. +and did not generate or require handles for untimeout operations. Index: projects/hps_head/sys/kern/init_main.c =================================================================== --- projects/hps_head/sys/kern/init_main.c +++ projects/hps_head/sys/kern/init_main.c @@ -506,7 +506,8 @@ callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); /* Create credentials. */ newcred = crget(); Index: projects/hps_head/sys/kern/kern_clocksource.c =================================================================== --- projects/hps_head/sys/kern/kern_clocksource.c +++ projects/hps_head/sys/kern/kern_clocksource.c @@ -160,6 +160,9 @@ int usermode; int done, runs; + KASSERT(curthread->td_critnest != 0, + ("Must be in a critical section")); + CTR3(KTR_SPARE2, "handle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = 0; Index: projects/hps_head/sys/kern/kern_condvar.c =================================================================== --- projects/hps_head/sys/kern/kern_condvar.c +++ projects/hps_head/sys/kern/kern_condvar.c @@ -313,15 +313,13 @@ DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE @@ -383,15 +381,13 @@ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE Index: projects/hps_head/sys/kern/kern_lock.c =================================================================== --- projects/hps_head/sys/kern/kern_lock.c +++ projects/hps_head/sys/kern/kern_lock.c @@ -210,9 +210,11 @@ GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ? SLEEPQ_INTERRUPTIBLE : 0), queue); - if ((flags & LK_TIMELOCK) && timo) + if ((flags & LK_TIMELOCK) && timo) { + sleepq_release(&lk->lock_object); sleepq_set_timeout(&lk->lock_object, timo); - + sleepq_lock(&lk->lock_object); + } /* * Decisional switch for real sleeping. */ Index: projects/hps_head/sys/kern/kern_switch.c =================================================================== --- projects/hps_head/sys/kern/kern_switch.c +++ projects/hps_head/sys/kern/kern_switch.c @@ -93,8 +93,6 @@ &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); SCHED_STAT_DEFINE_VAR(sleepq, &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); -SCHED_STAT_DEFINE_VAR(sleepqtimo, - &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); SCHED_STAT_DEFINE_VAR(relinquish, &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); SCHED_STAT_DEFINE_VAR(needresched, Index: projects/hps_head/sys/kern/kern_synch.c =================================================================== --- projects/hps_head/sys/kern/kern_synch.c +++ projects/hps_head/sys/kern/kern_synch.c @@ -219,12 +219,16 @@ * return from cursig(). */ sleepq_add(ident, lock, wmesg, sleepq_flags, 0); - if (sbt != 0) - sleepq_set_timeout_sbt(ident, sbt, pr, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); + if (sbt != 0) + sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); + } else if (sbt != 0) { + sleepq_release(ident); + sleepq_set_timeout_sbt(ident, sbt, pr, flags); sleepq_lock(ident); } if (sbt != 0 && catch) @@ -289,8 +293,11 @@ * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); - if (sbt != 0) + if (sbt != 0) { + sleepq_release(ident); sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); + } /* * Can't call ktrace with any spin locks held so it can lock the Index: projects/hps_head/sys/kern/kern_thread.c =================================================================== --- projects/hps_head/sys/kern/kern_thread.c +++ projects/hps_head/sys/kern/kern_thread.c @@ -149,6 +149,9 @@ audit_thread_alloc(td); #endif umtx_thread_alloc(td); + + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); return (0); } @@ -162,6 +165,10 @@ td = (struct thread *)mem; + /* make sure to drain any use of the "td->td_slpcallout" */ + callout_drain(&td->td_slpcallout); + mtx_destroy(&td->td_slpmutex); + #ifdef INVARIANTS /* Verify that this thread is in a safe state to free. */ switch (td->td_state) { @@ -544,7 +551,6 @@ LIST_INIT(&td->td_lprof[0]); LIST_INIT(&td->td_lprof[1]); sigqueue_init(&td->td_sigqueue, p); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); p->p_numthreads++; } Index: projects/hps_head/sys/kern/kern_timeout.c =================================================================== --- projects/hps_head/sys/kern/kern_timeout.c +++ projects/hps_head/sys/kern/kern_timeout.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include #include @@ -75,28 +77,25 @@ "struct callout *"); #ifdef CALLOUT_PROFILING -static int avg_depth; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, +static int avg_depth[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0, "Average number of items examined per softclock call. Units = 1/1000"); -static int avg_gcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, +static int avg_gcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); -static int avg_lockcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, +static int avg_lockcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); -static int avg_mpcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, +static int avg_mpcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); -static int avg_depth_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); -static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, - &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " + &avg_lockcalls[1], 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); -static int avg_mpcalls_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1], 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif @@ -124,64 +123,230 @@ */ u_int callwheelsize, callwheelmask; +#define CALLOUT_RET_NORMAL 0 +#define CALLOUT_RET_CANCELLED 1 +#define CALLOUT_RET_DRAINING 2 + +struct callout_args { + sbintime_t time; /* absolute time for the event */ + sbintime_t precision; /* delta allowed wrt opt */ + void *arg; /* function argument */ + callout_func_t *func; /* function to call */ + int flags; /* flags passed to callout_reset() */ + int cpu; /* CPU we're scheduled on */ +}; + +typedef void callout_mutex_op_t(struct lock_object *); + +struct callout_mutex_ops { + callout_mutex_op_t *lock; + callout_mutex_op_t *unlock; +}; + +enum { + CALLOUT_LC_UNUSED_0, + CALLOUT_LC_UNUSED_1, + CALLOUT_LC_UNUSED_2, + CALLOUT_LC_UNUSED_3, + CALLOUT_LC_SPIN, + CALLOUT_LC_MUTEX, + CALLOUT_LC_RW, + CALLOUT_LC_RM, +}; + +static void +callout_mutex_op_none(struct lock_object *lock) +{ +} + +static void +callout_mutex_lock(struct lock_object *lock) +{ + + mtx_lock((struct mtx *)lock); +} + +static void +callout_mutex_unlock(struct lock_object *lock) +{ + + mtx_unlock((struct mtx *)lock); +} + +static void +callout_mutex_lock_spin(struct lock_object *lock) +{ + + mtx_lock_spin((struct mtx *)lock); +} + +static void +callout_mutex_unlock_spin(struct lock_object *lock) +{ + + mtx_unlock_spin((struct mtx *)lock); +} + +static void +callout_rm_wlock(struct lock_object *lock) +{ + + rm_wlock((struct rmlock *)lock); +} + +static void +callout_rm_wunlock(struct lock_object *lock) +{ + + rm_wunlock((struct rmlock *)lock); +} + +static void +callout_rw_wlock(struct lock_object *lock) +{ + + rw_wlock((struct rwlock *)lock); +} + +static void +callout_rw_wunlock(struct lock_object *lock) +{ + + rw_wunlock((struct rwlock *)lock); +} + +static const struct callout_mutex_ops callout_mutex_ops[8] = { + [CALLOUT_LC_UNUSED_0] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_1] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_2] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_UNUSED_3] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + }, + [CALLOUT_LC_SPIN] = { + .lock = callout_mutex_lock_spin, + .unlock = callout_mutex_unlock_spin, + }, + [CALLOUT_LC_MUTEX] = { + .lock = callout_mutex_lock, + .unlock = callout_mutex_unlock, + }, + [CALLOUT_LC_RW] = { + .lock = callout_rw_wlock, + .unlock = callout_rw_wunlock, + }, + [CALLOUT_LC_RM] = { + .lock = callout_rm_wlock, + .unlock = callout_rm_wunlock, + }, +}; + +static inline void +callout_lock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); +} + +static inline void +callout_unlock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); +} + /* - * The callout cpu exec entities represent informations necessary for - * describing the state of callouts currently running on the CPU and the ones - * necessary for migrating callouts to the new callout cpu. In particular, - * the first entry of the array cc_exec_entity holds informations for callout - * running in SWI thread context, while the second one holds informations - * for callout running directly from hardware interrupt context. - * The cached informations are very important for deferring migration when - * the migrating callout is already running. + * The callout CPU exec structure represent information necessary for + * describing the state of callouts currently running on the CPU and + * for handling deferred callout restarts. + * + * In particular, the first entry of the array cc_exec_entity holds + * information for callouts running from the SWI thread context, while + * the second one holds information for callouts running directly from + * the hardware interrupt context. */ struct cc_exec { + /* + * The "cc_curr" points to the currently executing callout and + * is protected by the "cc_lock" spinlock. If no callback is + * currently executing it is equal to "NULL". + */ struct callout *cc_curr; -#ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - sbintime_t ce_migration_time; - sbintime_t ce_migration_prec; + /* + * The "cc_restart_args" structure holds the argument for a + * deferred callback restart and is protected by the "cc_lock" + * spinlock. The structure is only valid if "cc_restart" is + * "true". If "cc_restart" is "false" the information in the + * "cc_restart_args" structure shall be ignored. + */ + struct callout_args cc_restart_args; + bool cc_restart; + /* + * The "cc_cancel" variable allows the currently pending + * callback to be atomically cancelled. This field is write + * protected by the "cc_lock" spinlock. + */ + bool cc_cancel; + /* + * The "cc_drain_fn" points to a function which shall be + * called with the argument stored in "cc_drain_arg" when an + * asynchronous drain is performed. This field is write + * protected by the "cc_lock" spinlock. + */ + callout_func_t *cc_drain_fn; + void *cc_drain_arg; + /* + * The following fields are used for callout profiling only: + */ +#ifdef CALLOUT_PROFILING + int cc_depth; + int cc_mpcalls; + int cc_lockcalls; + int cc_gcalls; #endif - bool cc_cancel; - bool cc_waiting; }; /* - * There is one struct callout_cpu per cpu, holding all relevant + * There is one "struct callout_cpu" per CPU, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; - struct callout *cc_next; struct callout *cc_callout; struct callout_list *cc_callwheel; + struct callout_list cc_tmplist; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; - u_int cc_bucket; - u_int cc_inited; char cc_ktr_event_name[20]; }; -#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) +#define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr +#define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args +#define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart +#define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel +#define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn +#define cc_exec_drain_arg(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_arg +#define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth +#define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls +#define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls +#define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls -#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr -#define cc_exec_next(cc) cc->cc_next -#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel -#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP -#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func -#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg -#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu -#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time -#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec - struct callout_cpu cc_cpu[MAXCPU]; -#define CPUBLOCK MAXCPU +#define CPUBLOCK -1 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else @@ -196,67 +361,13 @@ static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); -static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct); +static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); -/** - * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is cc_curr. - * If cc_curr is non-NULL, threads waiting in - * callout_drain() will be woken up as soon as the - * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and cc_lock held - * guarantees that the current callout will not run. - * The softclock() function sets this to 0 before it - * drops callout_lock to acquire c_lock, and it calls - * the handler only if curr_cancelled is still 0 after - * cc_lock is successfully acquired. - * cc_waiting - If a thread is waiting in callout_drain(), then - * callout_wait is nonzero. Set only when - * cc_curr is non-NULL. - */ - -/* - * Resets the execution entity tied to a specific callout cpu. - */ -static void -cc_cce_cleanup(struct callout_cpu *cc, int direct) -{ - - cc_exec_curr(cc, direct) = NULL; - cc_exec_cancel(cc, direct) = false; - cc_exec_waiting(cc, direct) = false; -#ifdef SMP - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif -} - -/* - * Checks if migration is requested by a specific callout cpu. - */ -static int -cc_cce_migrating(struct callout_cpu *cc, int direct) -{ - -#ifdef SMP - return (cc_migration_cpu(cc, direct) != CPUBLOCK); -#else - return (0); -#endif -} - /* - * Kernel low level callwheel initialization - * called on cpu0 during kernel startup. + * Kernel low level callwheel initialization called from cpu0 during + * kernel startup: */ static void callout_callwheel_init(void *dummy) @@ -311,15 +422,13 @@ mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); - cc->cc_inited = 1; cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); + LIST_INIT(&cc->cc_tmplist); cc->cc_firstevent = SBT_MAX; - for (i = 0; i < 2; i++) - cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ @@ -327,38 +436,38 @@ for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_iflags = CALLOUT_LOCAL_ALLOC; + c->c_flags |= CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } -#ifdef SMP -/* - * Switches the cpu tied to a specific callout. - * The function expects a locked incoming callout cpu and returns with - * locked outcoming callout cpu. - */ -static struct callout_cpu * -callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +#ifdef CALLOUT_PROFILING +static inline void +callout_clear_stats(struct callout_cpu *cc, const int direct) { - struct callout_cpu *new_cc; - - MPASS(c != NULL && cc != NULL); - CC_LOCK_ASSERT(cc); + cc_exec_depth(cc, direct) = 0; + cc_exec_mpcalls(cc, direct) = 0; + cc_exec_lockcalls(cc, direct) = 0; + cc_exec_gcalls(cc, direct) = 0; +} +#endif - /* - * Avoid interrupts and preemption firing after the callout cpu - * is blocked in order to avoid deadlocks as the new thread - * may be willing to acquire the callout cpu lock. - */ - c->c_cpu = CPUBLOCK; - spinlock_enter(); - CC_UNLOCK(cc); - new_cc = CC_CPU(new_cpu); - CC_LOCK(new_cc); - spinlock_exit(); - c->c_cpu = new_cpu; - return (new_cc); +#ifdef CALLOUT_PROFILING +static inline void +callout_update_stats(struct callout_cpu *cc, const int direct) +{ + avg_depth[direct] += + (cc_exec_depth(cc, direct) * 1000 - + avg_depth[direct]) >> 8; + avg_mpcalls[direct] += + (cc_exec_mpcalls(cc, direct) * 1000 - + avg_mpcalls[direct]) >> 8; + avg_lockcalls[direct] += + (cc_exec_lockcalls(cc, direct) * 1000 - + avg_lockcalls[direct]) >> 8; + avg_gcalls[direct] += + (cc_exec_gcalls(cc, direct) * 1000 - + avg_gcalls[direct]) >> 8; } #endif @@ -429,19 +538,19 @@ void callout_process(sbintime_t now) { - struct callout *tmp, *tmpn; + struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; -#ifdef CALLOUT_PROFILING - int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; -#endif cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_LOCK(cc); +#ifdef CALLOUT_PROFILING + callout_clear_stats(cc, 1); +#endif /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; @@ -474,49 +583,44 @@ /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; - tmp = LIST_FIRST(sc); - while (tmp != NULL) { + while (1) { + tmp = LIST_FIRST(sc); + if (tmp == NULL) + break; + + LIST_REMOVE(tmp, c_links.le); + /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* - * Consumer told us the callout may be run - * directly from hardware interrupt context. + * Consumer told us the callout may be + * run directly from the hardware + * interrupt context: */ - if (tmp->c_iflags & CALLOUT_DIRECT) { -#ifdef CALLOUT_PROFILING - ++depth_dir; -#endif - cc_exec_next(cc) = - LIST_NEXT(tmp, c_links.le); - cc->cc_bucket = firstb & callwheelmask; - LIST_REMOVE(tmp, c_links.le); - softclock_call_cc(tmp, cc, -#ifdef CALLOUT_PROFILING - &mpcalls_dir, &lockcalls_dir, NULL, -#endif - 1); - tmp = cc_exec_next(cc); - cc_exec_next(cc) = NULL; + if (tmp->c_flags & CALLOUT_DIRECT) { + softclock_call_cc(tmp, cc, 1); } else { - tmpn = LIST_NEXT(tmp, c_links.le); - LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); - tmp->c_iflags |= CALLOUT_PROCESSED; - tmp = tmpn; + tmp->c_flags |= CALLOUT_PROCESSED; } continue; } + + /* insert callout into temporary list */ + LIST_INSERT_HEAD(&cc->cc_tmplist, tmp, c_links.le); + /* Skip events from distant future. */ if (tmp->c_time >= max) - goto next; + continue; + /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; - goto next; + continue; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) @@ -524,11 +628,14 @@ tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; -next: - tmp = LIST_NEXT(tmp, c_links.le); } + + /* Put temporary list back into the main bucket */ + LIST_SWAP(sc, &cc->cc_tmplist, callout, c_links.le); + /* Proceed with the next bucket. */ firstb++; + /* * Stop if we looked after present time and found * some event we can't execute at now. @@ -540,14 +647,13 @@ cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING - avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; - avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; - avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; + callout_update_stats(cc, 1); #endif - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_UNLOCK(cc); /* - * swi_sched acquires the thread lock, so we don't want to call it - * with cc_lock held; incorrect locking order. + * "swi_sched()" acquires the thread lock and we don't want to + * call it having cc_lock held because it leads to a locking + * order reversal issue. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); @@ -563,8 +669,7 @@ cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { - while (c->c_cpu == CPUBLOCK) - cpu_spinwait(); + cpu_spinwait(); continue; } #endif @@ -577,32 +682,56 @@ return (cc); } -static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, - sbintime_t sbt, sbintime_t precision, void (*func)(void *), - void *arg, int cpu, int flags) +static struct callout_cpu * +callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, + struct callout_args *coa) { - int bucket; +#ifndef NO_EVENTTIMERS + sbintime_t sbt; +#endif + u_int bucket; CC_LOCK_ASSERT(cc); - if (sbt < cc->cc_lastscan) - sbt = cc->cc_lastscan; - c->c_arg = arg; - c->c_iflags |= CALLOUT_PENDING; - c->c_iflags &= ~CALLOUT_PROCESSED; - c->c_flags |= CALLOUT_ACTIVE; - if (flags & C_DIRECT_EXEC) - c->c_iflags |= CALLOUT_DIRECT; - c->c_func = func; - c->c_time = sbt; - c->c_precision = precision; + + /* update flags before swapping locks, if any */ + c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); + if (coa->flags & C_DIRECT_EXEC) + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); + else + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + +#ifdef SMP + /* only set the "c_cpu" if the CPU number changed and is valid */ + if (c->c_cpu != coa->cpu && coa->cpu > CPUBLOCK && + coa->cpu <= mp_maxid && !CPU_ABSENT(coa->cpu)) { + /* + * Avoid interrupts and preemption firing after the + * callout CPU is blocked in order to avoid deadlocks + * as the new thread may be willing to acquire the + * callout CPU lock: + */ + c->c_cpu = CPUBLOCK; + spinlock_enter(); + CC_UNLOCK(cc); + cc = CC_CPU(coa->cpu); + CC_LOCK(cc); + spinlock_exit(); + c->c_cpu = coa->cpu; + } +#endif + if (coa->time < cc->cc_lastscan) + coa->time = cc->cc_lastscan; + c->c_arg = coa->arg; + c->c_func = coa->func; + c->c_time = coa->time; + c->c_precision = coa->precision; + bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); - if (cc->cc_bucket == bucket) - cc_exec_next(cc) = c; + #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout @@ -613,42 +742,28 @@ sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(cpu, sbt, c->c_time); + cpu_new_callout(c->c_cpu, sbt, c->c_time); } #endif + return (cc); } -static void +static inline void callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) - return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } -static void +static inline void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct) + const int direct) { - struct rm_priotracker tracker; - void (*c_func)(void *); + callout_func_t *c_func; void *c_arg; - struct lock_class *class; struct lock_object *c_lock; - uintptr_t lock_status; - int c_iflags; -#ifdef SMP - struct callout_cpu *new_cc; - void (*new_func)(void *); - void *new_arg; - int flags, new_cpu; - sbintime_t new_prec, new_time; -#endif + int c_flags; #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; @@ -656,62 +771,68 @@ static timeout_t *lastfunc; #endif - KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, - ("softclock_call_cc: pend %p %x", c, c->c_iflags)); - KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, - ("softclock_call_cc: act %p %x", c, c->c_flags)); - class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; - lock_status = 0; - if (c->c_flags & CALLOUT_SHAREDLOCK) { - if (class == &lock_class_rm) - lock_status = (uintptr_t)&tracker; - else - lock_status = 1; - } + KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == + (CALLOUT_PENDING | CALLOUT_ACTIVE), + ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); + c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; - c_iflags = c->c_iflags; - if (c->c_iflags & CALLOUT_LOCAL_ALLOC) - c->c_iflags = CALLOUT_LOCAL_ALLOC; - else - c->c_iflags &= ~CALLOUT_PENDING; - + c_flags = c->c_flags; + + /* remove pending bit */ + c->c_flags &= ~CALLOUT_PENDING; + + /* reset our local state */ cc_exec_curr(cc, direct) = c; - cc_exec_cancel(cc, direct) = false; - CC_UNLOCK(cc); + cc_exec_restart(cc, direct) = false; + cc_exec_drain_fn(cc, direct) = NULL; + cc_exec_drain_arg(cc, direct) = NULL; + if (c_lock != NULL) { - class->lc_lock(c_lock, lock_status); + cc_exec_cancel(cc, direct) = false; + CC_UNLOCK(cc); + + /* unlocked region for switching locks */ + + callout_lock_client(c_flags, c_lock); + /* - * The callout may have been cancelled - * while we switched locks. + * Check if the callout may have been cancelled while + * we were switching locks. Even though the callout is + * specifying a lock, it might not be certain this + * lock is locked when starting and stopping callouts. */ + CC_LOCK(cc); if (cc_exec_cancel(cc, direct)) { - class->lc_unlock(c_lock); - goto skip; + callout_unlock_client(c_flags, c_lock); + goto skip_cc_locked; } - /* The callout cannot be stopped now. */ - cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING - (*gcalls)++; + cc_exec_gcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING - (*lockcalls)++; + cc_exec_lockcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING - (*mpcalls)++; + cc_exec_mpcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } + /* The callout cannot be stopped now! */ + cc_exec_cancel(cc, direct) = true; + CC_UNLOCK(cc); + + /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) @@ -738,85 +859,46 @@ #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) - class->lc_unlock(c_lock); -skip: + + /* + * At this point the callback structure might have been freed, + * so we need to check the previously copied value of + * "c->c_flags": + */ + if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) + callout_unlock_client(c_flags, c_lock); + CC_LOCK(cc); + +skip_cc_locked: KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; - if (cc_exec_waiting(cc, direct)) { + + /* Check if there is anything which needs draining */ + if (cc_exec_drain_fn(cc, direct) != NULL) { /* - * There is someone waiting for the - * callout to complete. - * If the callout was scheduled for - * migration just cancel it. + * Unlock the CPU callout last, so that any use of + * structures belonging to the callout are complete: */ - if (cc_cce_migrating(cc, direct)) { - cc_cce_cleanup(cc, direct); - - /* - * It should be assert here that the callout is not - * destroyed but that is not easy. - */ - c->c_iflags &= ~CALLOUT_DFRMIGRATION; - } - cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); - wakeup(&cc_exec_waiting(cc, direct)); + /* call drain function unlocked */ + cc_exec_drain_fn(cc, direct)( + cc_exec_drain_arg(cc, direct)); CC_LOCK(cc); - } else if (cc_cce_migrating(cc, direct)) { - KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, - ("Migrating legacy callout %p", c)); -#ifdef SMP - /* - * If the callout was scheduled for - * migration just perform it now. - */ - new_cpu = cc_migration_cpu(cc, direct); - new_time = cc_migration_time(cc, direct); - new_prec = cc_migration_prec(cc, direct); - new_func = cc_migration_func(cc, direct); - new_arg = cc_migration_arg(cc, direct); - cc_cce_cleanup(cc, direct); - - /* - * It should be assert here that the callout is not destroyed - * but that is not easy. - * - * As first thing, handle deferred callout stops. - */ - if (!callout_migrating(c)) { - CTR3(KTR_CALLOUT, - "deferred cancelled %p func %p arg %p", - c, new_func, new_arg); - callout_cc_del(c, cc); - return; + } else if (c_flags & CALLOUT_LOCAL_ALLOC) { + /* return callout back to freelist */ + callout_cc_del(c, cc); + } else if (cc_exec_restart(cc, direct)) { + struct callout_cpu *new_cc; + /* [re-]schedule callout, if any */ + new_cc = callout_cc_add_locked(c, cc, + &cc_exec_restart_args(cc, direct)); + if (new_cc != cc) { + /* switch locks back again */ + CC_UNLOCK(new_cc); + CC_LOCK(cc); } - c->c_iflags &= ~CALLOUT_DFRMIGRATION; - - new_cc = callout_cpu_switch(c, cc, new_cpu); - flags = (direct) ? C_DIRECT_EXEC : 0; - callout_cc_add(c, new_cc, new_time, new_prec, new_func, - new_arg, new_cpu, flags); - CC_UNLOCK(new_cc); - CC_LOCK(cc); -#else - panic("migration should not happen"); -#endif } - /* - * If the current callout is locally allocated (from - * timeout(9)) then put it on the freelist. - * - * Note: we need to check the cached copy of c_iflags because - * if it was not local, then it's not safe to deref the - * callout pointer. - */ - KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || - c->c_iflags == CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - if (c_iflags & CALLOUT_LOCAL_ALLOC) - callout_cc_del(c, cc); } /* @@ -840,28 +922,18 @@ { struct callout_cpu *cc; struct callout *c; -#ifdef CALLOUT_PROFILING - int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; -#endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING - &mpcalls, &lockcalls, &gcalls, -#endif - 0); -#ifdef CALLOUT_PROFILING - ++depth; + callout_clear_stats(cc, 0); #endif + while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + softclock_call_cc(c, cc, 0); } #ifdef CALLOUT_PROFILING - avg_depth += (depth * 1000 - avg_depth) >> 8; - avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; - avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; - avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; + callout_update_stats(cc, 0); #endif CC_UNLOCK(cc); } @@ -897,10 +969,11 @@ /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); - callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); + callout_reset(new, to_ticks, ftn, arg); + return (handle); } @@ -908,6 +981,7 @@ untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; + bool match; /* * Check for a handle that was initialized @@ -918,9 +992,11 @@ return; cc = callout_lock(handle.callout); - if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) - callout_stop(handle.callout); + match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); CC_UNLOCK(cc); + + if (match) + callout_stop(handle.callout); } void @@ -929,6 +1005,118 @@ handle->callout = NULL; } +static int +callout_restart_async(struct callout *c, struct callout_args *coa, + callout_func_t *drain_fn, void *drain_arg) +{ + struct callout_cpu *cc; + int cancelled; + int direct; + + cc = callout_lock(c); + + /* Figure out if the callout is direct or not */ + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + + /* + * Check if the callback is currently scheduled for + * completion: + */ + if (cc_exec_curr(cc, direct) == c) { + /* + * Try to prevent the callback from running by setting + * the "cc_cancel" variable to "true". Also check if + * the callout was previously subject to a deferred + * callout restart: + */ + if (cc_exec_cancel(cc, direct) == false || + (c->c_flags & CALLOUT_DEFRESTART) != 0) { + cc_exec_cancel(cc, direct) = true; + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + /* + * Prevent callback restart if "callout_drain_xxx()" + * is being called or we are stopping the callout or + * the callback was preallocated by us: + */ + if (cc_exec_drain_fn(cc, direct) != NULL || + coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and draining" : "draining", + c, c->c_func, c->c_arg); + + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* clear restart flag, if any */ + cc_exec_restart(cc, direct) = false; + + /* set drain function, if any */ + if (drain_fn != NULL) { + cc_exec_drain_fn(cc, direct) = drain_fn; + cc_exec_drain_arg(cc, direct) = drain_arg; + cancelled |= CALLOUT_RET_DRAINING; + } + } else { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and restarting" : "restarting", + c, c->c_func, c->c_arg); + + /* get us back into the game */ + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART); + c->c_flags &= ~CALLOUT_PROCESSED; + + /* enable deferred restart */ + cc_exec_restart(cc, direct) = true; + + /* store arguments for the deferred restart, if any */ + cc_exec_restart_args(cc, direct) = *coa; + } + } else { + /* stop callout */ + if (c->c_flags & CALLOUT_PENDING) { + /* + * The callback has not yet been executed, and + * we simply just need to unlink it: + */ + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + LIST_REMOVE(c, c_links.le); + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "rescheduled" : "scheduled", + c, c->c_func, c->c_arg); + + /* [re-]schedule callout, if any */ + if (coa != NULL) { + cc = callout_cc_add_locked(c, cc, coa); + } else { + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* return callback to pre-allocated list, if any */ + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && + cancelled != CALLOUT_RET_NORMAL) { + callout_cc_del(c, cc); + } + } + } + CC_UNLOCK(cc); + return (cancelled); +} + /* * New interface; clients allocate their own callout structures. * @@ -947,33 +1135,32 @@ */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - void (*ftn)(void *), void *arg, int cpu, int flags) + callout_func_t *ftn, void *arg, int cpu, int flags) { - sbintime_t to_sbt, pr; - struct callout_cpu *cc; - int cancelled, direct; - int ignore_cpu=0; + struct callout_args coa; - cancelled = 0; - if (cpu == -1) { - ignore_cpu = 1; - } else if ((cpu >= MAXCPU) || - ((CC_CPU(cpu))->cc_inited == 0)) { - /* Invalid CPU spec */ - panic("Invalid CPU in callout %d", cpu); - } - if (flags & C_ABSOLUTE) { - to_sbt = sbt; + /* store arguments for callout add function */ + coa.func = ftn; + coa.arg = arg; + coa.precision = precision; + coa.flags = flags; + coa.cpu = cpu; + + /* compute the rest of the arguments needed */ + if (coa.flags & C_ABSOLUTE) { + coa.time = sbt; } else { - if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) + sbintime_t pr; + + if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((flags & C_HARDCLOCK) || + if ((coa.flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - to_sbt = getsbinuptime(); + coa.time = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - to_sbt += tc_tick_sbt - tick_sbt; + coa.time += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* @@ -983,150 +1170,29 @@ * active ones. */ #ifdef __LP64__ - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); #else spinlock_enter(); - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((flags & C_HARDCLOCK) == 0) - to_sbt += tick_sbt; + if ((coa.flags & C_HARDCLOCK) == 0) + coa.time += tick_sbt; } else - to_sbt = sbinuptime(); - if (SBT_MAX - to_sbt < sbt) - to_sbt = SBT_MAX; + coa.time = sbinuptime(); + if (SBT_MAX - coa.time < sbt) + coa.time = SBT_MAX; else - to_sbt += sbt; - pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(flags)); - if (pr > precision) - precision = pr; - } - /* - * This flag used to be added by callout_cc_add, but the - * first time you call this we could end up with the - * wrong direct flag if we don't do it before we add. - */ - if (flags & C_DIRECT_EXEC) { - direct = 1; - } else { - direct = 0; - } - KASSERT(!direct || c->c_lock == NULL, - ("%s: direct callout %p has lock", __func__, c)); - cc = callout_lock(c); - /* - * Don't allow migration of pre-allocated callouts lest they - * become unbalanced or handle the case where the user does - * not care. - */ - if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || - ignore_cpu) { - cpu = c->c_cpu; + coa.time += sbt; + pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(coa.flags)); + if (pr > coa.precision) + coa.precision = pr; } - if (cc_exec_curr(cc, direct) == c) { - /* - * We're being asked to reschedule a callout which is - * currently in progress. If there is a lock then we - * can cancel the callout if it has not really started. - */ - if (c->c_lock != NULL && cc_exec_cancel(cc, direct)) - cancelled = cc_exec_cancel(cc, direct) = true; - if (cc_exec_waiting(cc, direct)) { - /* - * Someone has called callout_drain to kill this - * callout. Don't reschedule. - */ - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled" : "failed to cancel", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (cancelled); - } -#ifdef SMP - if (callout_migrating(c)) { - /* - * This only occurs when a second callout_reset_sbt_on - * is made after a previous one moved it into - * deferred migration (below). Note we do *not* change - * the prev_cpu even though the previous target may - * be different. - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - cancelled = 1; - CC_UNLOCK(cc); - return (cancelled); - } -#endif - } - if (c->c_iflags & CALLOUT_PENDING) { - if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - cancelled = 1; - c->c_iflags &= ~ CALLOUT_PENDING; - c->c_flags &= ~ CALLOUT_ACTIVE; - } - -#ifdef SMP - /* - * If the callout must migrate try to perform it immediately. - * If the callout is currently running, just defer the migration - * to a more appropriate moment. - */ - if (c->c_cpu != cpu) { - if (cc_exec_curr(cc, direct) == c) { - /* - * Pending will have been removed since we are - * actually executing the callout on another - * CPU. That callout should be waiting on the - * lock the caller holds. If we set both - * active/and/pending after we return and the - * lock on the executing callout proceeds, it - * will then see pending is true and return. - * At the return from the actual callout execution - * the migration will occur in softclock_call_cc - * and this new callout will be placed on the - * new CPU via a call to callout_cpu_switch() which - * will get the lock on the right CPU followed - * by a call callout_cc_add() which will add it there. - * (see above in softclock_call_cc()). - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); - c->c_flags |= CALLOUT_ACTIVE; - CTR6(KTR_CALLOUT, - "migration of %p func %p arg %p in %d.%08x to %u deferred", - c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff), cpu); - CC_UNLOCK(cc); - return (cancelled); - } - cc = callout_cpu_switch(c, cc, cpu); - } -#endif - - callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); - CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", - cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff)); - CC_UNLOCK(cc); - - return (cancelled); + /* get callback started, if any */ + return (callout_restart_async(c, &coa, NULL, NULL)); } /* @@ -1145,270 +1211,106 @@ } int -_callout_stop_safe(struct callout *c, int safe) +callout_stop(struct callout *c) { - struct callout_cpu *cc, *old_cc; - struct lock_class *class; - int direct, sq_locked, use_lock; - int not_on_a_list; - - if (safe) - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, - "calling %s", __func__); + /* get callback stopped, if any */ + return (callout_restart_async(c, NULL, NULL, NULL)); +} - /* - * Some old subsystems don't hold Giant while running a callout_stop(), - * so just discard this check for the moment. - */ - if (!safe && c->c_lock != NULL) { - if (c->c_lock == &Giant.lock_object) - use_lock = mtx_owned(&Giant); - else { - use_lock = 1; - class = LOCK_CLASS(c->c_lock); - class->lc_assert(c->c_lock, LA_XLOCKED); - } - } else - use_lock = 0; - if (c->c_iflags & CALLOUT_DIRECT) { - direct = 1; - } else { - direct = 0; - } - sq_locked = 0; - old_cc = NULL; -again: - cc = callout_lock(c); +static void +callout_drain_function(void *arg) +{ + wakeup(arg); +} - if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == - (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && - ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { - /* - * Special case where this slipped in while we - * were migrating *as* the callout is about to - * execute. The caller probably holds the lock - * the callout wants. - * - * Get rid of the migration first. Then set - * the flag that tells this code *not* to - * try to remove it from any lists (its not - * on one yet). When the callout wheel runs, - * it will ignore this callout. - */ - c->c_iflags &= ~CALLOUT_PENDING; - c->c_flags &= ~CALLOUT_ACTIVE; - not_on_a_list = 1; - } else { - not_on_a_list = 0; - } +int +callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) +{ + /* get callback stopped, if any */ + return (callout_restart_async( + c, NULL, fn, arg) & CALLOUT_RET_DRAINING); +} - /* - * If the callout was migrating while the callout cpu lock was - * dropped, just drop the sleepqueue lock and check the states - * again. - */ - if (sq_locked != 0 && cc != old_cc) { -#ifdef SMP - CC_UNLOCK(cc); - sleepq_release(&cc_exec_waiting(old_cc, direct)); - sq_locked = 0; - old_cc = NULL; - goto again; -#else - panic("migration should not happen"); -#endif - } +int +callout_drain(struct callout *c) +{ + int cancelled; - /* - * If the callout isn't pending, it's not on the queue, so - * don't attempt to remove it from the queue. We can try to - * stop it by other means however. - */ - if (!(c->c_iflags & CALLOUT_PENDING)) { - c->c_flags &= ~CALLOUT_ACTIVE; + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Draining callout"); + + callout_lock_client(c->c_flags, c->c_lock); + + /* at this point the "c->c_cpu" field is not changing */ + + cancelled = callout_drain_async(c, &callout_drain_function, c); + + if (cancelled != CALLOUT_RET_NORMAL) { + struct callout_cpu *cc; + int direct; + + CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", + c, c->c_func, c->c_arg); + + cc = callout_lock(c); + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); /* - * If it wasn't on the queue and it isn't the current - * callout, then we can't stop it, so just bail. + * We've gotten our callout CPU lock, it is safe to + * drop the initial lock: */ - if (cc_exec_curr(cc, direct) != c) { - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - return (0); - } + callout_unlock_client(c->c_flags, c->c_lock); - if (safe) { - /* - * The current callout is running (or just - * about to run) and blocking is allowed, so - * just wait for the current invocation to - * finish. - */ - while (cc_exec_curr(cc, direct) == c) { - /* - * Use direct calls to sleepqueue interface - * instead of cv/msleep in order to avoid - * a LOR between cc_lock and sleepqueue - * chain spinlocks. This piece of code - * emulates a msleep_spin() call actually. - * - * If we already have the sleepqueue chain - * locked, then we can safely block. If we - * don't already have it locked, however, - * we have to drop the cc_lock to lock - * it. This opens several races, so we - * restart at the beginning once we have - * both locks. If nothing has changed, then - * we will end up back here with sq_locked - * set. - */ - if (!sq_locked) { - CC_UNLOCK(cc); - sleepq_lock( - &cc_exec_waiting(cc, direct)); - sq_locked = 1; - old_cc = cc; - goto again; - } + /* Wait for drain to complete */ + + while (cc_exec_curr(cc, direct) == c) + msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); - /* - * Migration could be cancelled here, but - * as long as it is still not sure when it - * will be packed up, just let softclock() - * take care of it. - */ - cc_exec_waiting(cc, direct) = true; - DROP_GIANT(); - CC_UNLOCK(cc); - sleepq_add( - &cc_exec_waiting(cc, direct), - &cc->cc_lock.lock_object, "codrain", - SLEEPQ_SLEEP, 0); - sleepq_wait( - &cc_exec_waiting(cc, direct), - 0); - sq_locked = 0; - old_cc = NULL; - - /* Reacquire locks previously released. */ - PICKUP_GIANT(); - CC_LOCK(cc); - } - } else if (use_lock && - !cc_exec_cancel(cc, direct)) { - - /* - * The current callout is waiting for its - * lock which we hold. Cancel the callout - * and return. After our caller drops the - * lock, the callout will be skipped in - * softclock(). - */ - cc_exec_cancel(cc, direct) = true; - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", - c, c->c_func, c->c_arg); - KASSERT(!cc_cce_migrating(cc, direct), - ("callout wrongly scheduled for migration")); - if (callout_migrating(c)) { - c->c_iflags &= ~CALLOUT_DFRMIGRATION; -#ifdef SMP - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif - } - CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain locked")); - return (1); - } else if (callout_migrating(c)) { - /* - * The callout is currently being serviced - * and the "next" callout is scheduled at - * its completion with a migration. We remove - * the migration flag so it *won't* get rescheduled, - * but we can't stop the one thats running so - * we return 0. - */ - c->c_iflags &= ~CALLOUT_DFRMIGRATION; -#ifdef SMP - /* - * We can't call cc_cce_cleanup here since - * if we do it will remove .ce_curr and - * its still running. This will prevent a - * reschedule of the callout when the - * execution completes. - */ - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif - CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (0); - } - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain still locked")); - return (0); + } else { + callout_unlock_client(c->c_flags, c->c_lock); } - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - - c->c_iflags &= ~CALLOUT_PENDING; - c->c_flags &= ~CALLOUT_ACTIVE; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if (not_on_a_list == 0) { - if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - } - callout_cc_del(c, cc); - CC_UNLOCK(cc); - return (1); + + return (cancelled & CALLOUT_RET_CANCELLED); } void callout_init(struct callout *c, int mpsafe) { - bzero(c, sizeof *c); if (mpsafe) { - c->c_lock = NULL; - c->c_iflags = CALLOUT_RETURNUNLOCKED; + _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); } else { - c->c_lock = &Giant.lock_object; - c->c_iflags = 0; + _callout_init_lock(c, &Giant.lock_object, 0); } - c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); + KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: bad flags 0x%08x", flags)); + flags &= CALLOUT_RETURNUNLOCKED; + if (lock != NULL) { + struct lock_class *class = LOCK_CLASS(lock); + if (class == &lock_class_mtx_sleep) + flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); + else if (class == &lock_class_mtx_spin) + flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); + else if (class == &lock_class_rm) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); + else if (class == &lock_class_rw) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); + else + panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); + } else { + flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); + } c->c_lock = lock; - KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, - ("callout_init_lock: bad flags %d", flags)); - KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, - ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); - KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & - (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", - __func__)); - c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); + c->c_flags = flags; c->c_cpu = timeout_cpu; } Index: projects/hps_head/sys/kern/subr_sleepqueue.c =================================================================== --- projects/hps_head/sys/kern/subr_sleepqueue.c +++ projects/hps_head/sys/kern/subr_sleepqueue.c @@ -152,7 +152,8 @@ */ static int sleepq_catch_signals(void *wchan, int pri); static int sleepq_check_signals(void); -static int sleepq_check_timeout(void); +static int sleepq_check_timeout(struct thread *); +static void sleepq_stop_timeout(struct thread *); #ifdef INVARIANTS static void sleepq_dtor(void *mem, int size, void *arg); #endif @@ -373,17 +374,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags) { - struct sleepqueue_chain *sc; struct thread *td; td = curthread; - sc = SC_LOOKUP(wchan); - mtx_assert(&sc->sc_lock, MA_OWNED); - MPASS(TD_ON_SLEEPQ(td)); - MPASS(td->td_sleepqueue == NULL); - MPASS(wchan != NULL); + + mtx_lock_spin(&td->td_slpmutex); callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); + mtx_unlock_spin(&td->td_slpmutex); } /* @@ -559,11 +557,8 @@ * Check to see if we timed out. */ static int -sleepq_check_timeout(void) +sleepq_check_timeout(struct thread *td) { - struct thread *td; - - td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* @@ -573,28 +568,21 @@ td->td_flags &= ~TDF_TIMEOUT; return (EWOULDBLOCK); } - - /* - * If TDF_TIMOFAIL is set, the timeout ran after we had - * already been woken up. - */ - if (td->td_flags & TDF_TIMOFAIL) - td->td_flags &= ~TDF_TIMOFAIL; - - /* - * If callout_stop() fails, then the timeout is running on - * another CPU, so synchronize with it to avoid having it - * accidentally wake up a subsequent sleep. - */ - else if (callout_stop(&td->td_slpcallout) == 0) { - td->td_flags |= TDF_TIMEOUT; - TD_SET_SLEEPING(td); - mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL); - } return (0); } /* + * Atomically stop the timeout by using a mutex. + */ +static void +sleepq_stop_timeout(struct thread *td) +{ + mtx_lock_spin(&td->td_slpmutex); + callout_stop(&td->td_slpcallout); + mtx_unlock_spin(&td->td_slpmutex); +} + +/* * Check to see if we were awoken by a signal. */ static int @@ -664,9 +652,11 @@ MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); - rval = sleepq_check_timeout(); + rval = sleepq_check_timeout(td); thread_unlock(td); + sleepq_stop_timeout(td); + return (rval); } @@ -677,12 +667,18 @@ int sleepq_timedwait_sig(void *wchan, int pri) { + struct thread *td; int rcatch, rvalt, rvals; + td = curthread; + rcatch = sleepq_catch_signals(wchan, pri); - rvalt = sleepq_check_timeout(); + rvalt = sleepq_check_timeout(td); rvals = sleepq_check_signals(); - thread_unlock(curthread); + thread_unlock(td); + + sleepq_stop_timeout(td); + if (rcatch) return (rcatch); if (rvals) @@ -889,64 +885,49 @@ static void sleepq_timeout(void *arg) { - struct sleepqueue_chain *sc; - struct sleepqueue *sq; - struct thread *td; - void *wchan; - int wakeup_swapper; + struct thread *td = arg; + int wakeup_swapper = 0; - td = arg; - wakeup_swapper = 0; CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); - /* - * First, see if the thread is asleep and get the wait channel if - * it is. - */ - thread_lock(td); - if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { - wchan = td->td_wchan; - sc = SC_LOOKUP(wchan); - THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); - sq = sleepq_lookup(wchan); - MPASS(sq != NULL); - td->td_flags |= TDF_TIMEOUT; - wakeup_swapper = sleepq_resume_thread(sq, td, 0); - thread_unlock(td); - if (wakeup_swapper) - kick_proc0(); - return; - } + /* Handle the three cases which can happen */ - /* - * If the thread is on the SLEEPQ but isn't sleeping yet, it - * can either be on another CPU in between sleepq_add() and - * one of the sleepq_*wait*() routines or it can be in - * sleepq_catch_signals(). - */ + thread_lock(td); if (TD_ON_SLEEPQ(td)) { - td->td_flags |= TDF_TIMEOUT; - thread_unlock(td); - return; - } + if (TD_IS_SLEEPING(td)) { + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + void *wchan; - /* - * Now check for the edge cases. First, if TDF_TIMEOUT is set, - * then the other thread has already yielded to us, so clear - * the flag and resume it. If TDF_TIMEOUT is not set, then the - * we know that the other thread is not on a sleep queue, but it - * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL - * to let it know that the timeout has already run and doesn't - * need to be canceled. - */ - if (td->td_flags & TDF_TIMEOUT) { - MPASS(TD_IS_SLEEPING(td)); - td->td_flags &= ~TDF_TIMEOUT; - TD_CLR_SLEEPING(td); - wakeup_swapper = setrunnable(td); - } else - td->td_flags |= TDF_TIMOFAIL; + /* + * Case I - thread is asleep and needs to be + * awoken: + */ + wchan = td->td_wchan; + sc = SC_LOOKUP(wchan); + THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); + sq = sleepq_lookup(wchan); + MPASS(sq != NULL); + td->td_flags |= TDF_TIMEOUT; + wakeup_swapper = sleepq_resume_thread(sq, td, 0); + } else { + /* + * Case II - cancel going to sleep by setting + * the timeout flag because the target thread + * is not asleep yet. It can be on another CPU + * in between sleepq_add() and one of the + * sleepq_*wait*() routines or it can be in + * sleepq_catch_signals(). + */ + td->td_flags |= TDF_TIMEOUT; + } + } else { + /* + * Case III - thread is already woken up by a wakeup + * call and should not timeout. Nothing to do! + */ + } thread_unlock(td); if (wakeup_swapper) kick_proc0(); Index: projects/hps_head/sys/ofed/include/linux/completion.h =================================================================== --- projects/hps_head/sys/ofed/include/linux/completion.h +++ projects/hps_head/sys/ofed/include/linux/completion.h @@ -64,3 +64,4 @@ extern int linux_completion_done(struct completion *); #endif /* _LINUX_COMPLETION_H_ */ + Index: projects/hps_head/sys/ofed/include/linux/linux_compat.c =================================================================== --- projects/hps_head/sys/ofed/include/linux/linux_compat.c +++ projects/hps_head/sys/ofed/include/linux/linux_compat.c @@ -846,7 +846,9 @@ if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); + sleepq_release(c); sleepq_set_timeout(c, linux_timer_jiffies_until(end)); + sleepq_lock(c); if (flags & SLEEPQ_INTERRUPTIBLE) ret = sleepq_timedwait_sig(c, 0); else Index: projects/hps_head/sys/sys/_callout.h =================================================================== --- projects/hps_head/sys/sys/_callout.h +++ projects/hps_head/sys/sys/_callout.h @@ -46,6 +46,8 @@ SLIST_HEAD(callout_slist, callout); TAILQ_HEAD(callout_tailq, callout); +typedef void callout_func_t(void *); + struct callout { union { LIST_ENTRY(callout) le; @@ -55,10 +57,9 @@ sbintime_t c_time; /* ticks to the event */ sbintime_t c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ - void (*c_func)(void *); /* function to call */ + callout_func_t *c_func; /* function to call */ struct lock_object *c_lock; /* lock to handle */ - short c_flags; /* User State */ - short c_iflags; /* Internal State */ + int c_flags; /* state of this entry */ volatile int c_cpu; /* CPU we're scheduled on */ }; Index: projects/hps_head/sys/sys/callout.h =================================================================== --- projects/hps_head/sys/sys/callout.h +++ projects/hps_head/sys/sys/callout.h @@ -45,10 +45,12 @@ #define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ -#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ -#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_UNUSED_5 0x0020 /* --available-- */ +#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ +#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */ +#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */ #define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ #define C_PRELBITS 7 @@ -63,25 +65,10 @@ }; #ifdef _KERNEL -/* - * Note the flags field is actually *two* fields. The c_flags - * field is the one that caller operations that may, or may not have - * a lock touches i.e. callout_deactivate(). The other, the c_iflags, - * is the internal flags that *must* be kept correct on which the - * callout system depend on e.g. callout_pending(). - * The c_iflag is used internally by the callout system to determine which - * list the callout is on and track internal state. Callers *should not* - * use the c_flags field directly but should use the macros provided. - * - * The c_iflags field holds internal flags that are protected by internal - * locks of the callout subsystem. The c_flags field holds external flags. - * The caller must hold its own lock while manipulating or reading external - * flags via callout_active(), callout_deactivate(), callout_reset*(), or - * callout_stop() to avoid races. - */ #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) -#define callout_drain(c) _callout_stop_safe(c, 1) +int callout_drain(struct callout *); +int callout_drain_async(struct callout *, callout_func_t *, void *); void callout_init(struct callout *, int); void _callout_init_lock(struct callout *, struct lock_object *, int); #define callout_init_mtx(c, mtx, flags) \ @@ -93,9 +80,9 @@ #define callout_init_rw(c, rw, flags) \ _callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \ NULL, (flags)) -#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING) +#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, - void (*)(void *), void *, int, int); + callout_func_t *, void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ @@ -119,8 +106,7 @@ int callout_schedule_on(struct callout *, int, int); #define callout_schedule_curcpu(c, on_tick) \ callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) -#define callout_stop(c) _callout_stop_safe(c, 0) -int _callout_stop_safe(struct callout *, int); +int callout_stop(struct callout *); void callout_process(sbintime_t now); #endif Index: projects/hps_head/sys/sys/proc.h =================================================================== --- projects/hps_head/sys/sys/proc.h +++ projects/hps_head/sys/sys/proc.h @@ -308,6 +308,7 @@ } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval struct callout td_slpcallout; /* (h) Callout for sleep. */ + struct mtx td_slpmutex; /* (h) Mutex for sleep callout */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ @@ -364,7 +365,7 @@ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ +#define TDF_UNUSED12 0x00001000 /* --available-- */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ @@ -706,7 +707,7 @@ #define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ -#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ +#define SWT_UNUSED5 5 /* --available-- */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */ Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -1558,6 +1558,7 @@ timeout.9 callout_active.9 \ timeout.9 callout_deactivate.9 \ timeout.9 callout_drain.9 \ + timeout.9 callout_drain_async.9 \ timeout.9 callout_handle_init.9 \ timeout.9 callout_init.9 \ timeout.9 callout_init_mtx.9 \ Index: share/man/man9/timeout.9 =================================================================== --- share/man/man9/timeout.9 +++ share/man/man9/timeout.9 @@ -29,13 +29,14 @@ .\" .\" $FreeBSD$ .\" -.Dd October 8, 2014 +.Dd January 24, 2015 .Dt TIMEOUT 9 .Os .Sh NAME .Nm callout_active , .Nm callout_deactivate , .Nm callout_drain , +.Nm callout_drain_async , .Nm callout_handle_init , .Nm callout_init , .Nm callout_init_mtx , @@ -63,256 +64,248 @@ .In sys/systm.h .Bd -literal typedef void timeout_t (void *); +typedef void callout_func_t (void *); .Ed -.Ft int -.Fn callout_active "struct callout *c" -.Ft void -.Fn callout_deactivate "struct callout *c" -.Ft int -.Fn callout_drain "struct callout *c" -.Ft void -.Fn callout_handle_init "struct callout_handle *handle" -.Bd -literal -struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle); -.Ed -.Ft void -.Fn callout_init "struct callout *c" "int mpsafe" -.Ft void -.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" -.Ft void -.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" -.Ft void -.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" -.Ft int -.Fn callout_pending "struct callout *c" -.Ft int -.Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" -.Ft int -.Fn callout_reset_curcpu "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" -.Ft int -.Fn callout_reset_on "struct callout *c" "int ticks" "timeout_t *func" \ -"void *arg" "int cpu" -.Ft int -.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int flags" -.Ft int -.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "timeout_t *func" "void *arg" "int cpu" "int flags" -.Ft int -.Fn callout_schedule "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_curcpu "struct callout *c" "int ticks" -.Ft int -.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" -.Ft int -.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int flags" -.Ft int -.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ -"sbintime_t pr" "int cpu" "int flags" -.Ft int -.Fn callout_stop "struct callout *c" -.Ft struct callout_handle -.Fn timeout "timeout_t *func" "void *arg" "int ticks" -.Ft void -.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" .Sh DESCRIPTION The .Nm callout -API is used to schedule a call to an arbitrary function at a specific -time in the future. -Consumers of this API are required to allocate a callout structure -.Pq struct callout +API is used to schedule a one-time call to an arbitrary function at a +specific time in the future. +Consumers of this API are required to allocate a +.Ft struct callout for each pending function invocation. -This structure stores state about the pending function invocation including -the function to be called and the time at which the function should be invoked. -Pending function calls can be cancelled or rescheduled to a different time. -In addition, -a callout structure may be reused to schedule a new function call after a -scheduled call is completed. -.Pp -Callouts only provide a single-shot mode. -If a consumer requires a periodic timer, -it must explicitly reschedule each function call. -This is normally done by rescheduling the subsequent call within the called -function. -.Pp -Callout functions must not sleep. -They may not acquire sleepable locks, -wait on condition variables, -perform blocking allocation requests, -or invoke any other action that might sleep. -.Pp -Each callout structure must be initialized by -.Fn callout_init , -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw -before it is passed to any of the other callout functions. The +.Ft struct callout +stores the full state about any pending function call and +must be drained by a call to +.Fn callout_drain +or +.Fn callout_drain_async +before freeing. +.Sh INITIALIZATION +.Ft void +.Fn callout_handle_init "struct callout_handle *handle" +This function is deprecated. +Please use .Fn callout_init -function initializes a callout structure in -.Fa c -that is not associated with a specific lock. +instead. +This function is used to prepare a +.Ft struct callout_handle +before it can be used the first time. +If this function is called on a pending timeout, the pending timeout +cannot be cancelled and the +.Fn untimeout +function will return as if no timeout was pending. +.Pp +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +This macro is deprecated. +This macro is used to statically initialize a +.Ft struct callout_handle . +Please use +.Fn callout_init +instead. +.Pp +.Ft void +.Fn callout_init "struct callout *c" "int mpsafe" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. If the .Fa mpsafe -argument is zero, -the callout structure is not considered to be -.Dq multi-processor safe ; -and the Giant lock will be acquired before calling the callout function -and released when the callout function returns. -.Pp -The -.Fn callout_init_mtx , -.Fn callout_init_rm , -and -.Fn callout_init_rw -functions initialize a callout structure in -.Fa c -that is associated with a specific lock. -The lock is specified by the -.Fa mtx , -.Fa rm , -or -.Fa rw -parameter. -The associated lock must be held while stopping or rescheduling the -callout. -The callout subsystem acquires the associated lock before calling the -callout function and releases it after the function returns. -If the callout was cancelled while the callout subsystem waited for the -associated lock, -the callout function is not called, -and the associated lock is released. -This ensures that stopping or rescheduling the callout will abort any -previously scheduled invocation. -.Pp -Only regular mutexes may be used with -.Fn callout_init_mtx ; -spin mutexes are not supported. -A sleepable read-mostly lock -.Po -one initialized with the -.Dv RM_SLEEPABLE -flag -.Pc -may not be used with -.Fn callout_init_rm . -Similarly, other sleepable lock types such as -.Xr sx 9 -and -.Xr lockmgr 9 -cannot be used with callouts because sleeping is not permitted in -the callout subsystem. +argument is non-zero, the callback function will be running unlocked +and the callback is so-called "mpsafe". +.Bf Sy +It is the application's entire responsibility to not call any +.Fn callout_xxx +functions, including the +.Fn callout_drain +function, simultaneously on the same callout when the +.Fa mpsafe +argument is non-zero. +Otherwise, undefined behavior can happen. +Avoid simultaneous calls by obtaining an exclusive lock before calling +any +.Fn callout_xxx +functions other than the +.Fn callout_drain +function. +.Ef +If the +.Fa mpsafe +argument is zero, the Giant mutex will be locked before the callback +function is called. +If the +.Fa mpsafe +argument is zero, the Giant mutex is expected to be locked when calling +any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. .Pp -These +.Ft void +.Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" +This function prepares a +.Ft struct callout +before it can be used. +This function should not be used when the callout is pending a timeout. +The +.Fa mtx +argument is a pointer to a valid spinlock type of mutex or a valid +regular non-sleepable mutex which the callback subsystem will lock +before calling the callback function. +The specified mutex is expected to be locked when calling any +.Fn callout_xxx +functions which start and stop a callout other than the +.Fn callout_drain +function. +Valid .Fa flags -may be specified for -.Fn callout_init_mtx , -.Fn callout_init_rm , -or -.Fn callout_init_rw : +are: .Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED" .It Dv CALLOUT_RETURNUNLOCKED -The callout function will release the associated lock itself, -so the callout subsystem should not attempt to unlock it -after the callout function returns. -.It Dv CALLOUT_SHAREDLOCK -The lock is only acquired in read mode when running the callout handler. -This flag is ignored by -.Fn callout_init_mtx . +The callout function is assumed to have released the specified mutex +before returning. +.It Dv 0 +The callout subsystem will release the specified mutex after the +callout function has returned. .El .Pp -The function -.Fn callout_stop -cancels a callout -.Fa c -if it is currently pending. -If the callout is pending, then -.Fn callout_stop -returns a non-zero value. -If the callout is not set, -has already been serviced, -or is currently being serviced, -then zero will be returned. -If the callout has an associated lock, -then that lock must be held when this function is called. -.Pp -The function -.Fn callout_drain -is identical to -.Fn callout_stop -except that it will wait for the callout -.Fa c -to complete if it is already in progress. -This function MUST NOT be called while holding any -locks on which the callout might block, or deadlock will result. -Note that if the callout subsystem has already begun processing this -callout, then the callout function may be invoked before -.Fn callout_drain -returns. -However, the callout subsystem does guarantee that the callout will be -fully stopped before -.Fn callout_drain -returns. +.Ft void +.Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read-mostly type of lock. +The read-mostly lock must not be initialized with the +.Dv RM_SLEEPABLE +flag. .Pp -The +.Ft void +.Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" +This function is similar to +.Fn callout_init_mtx , +but it accepts a read/write type of lock. +.Sh SCHEDULING CALLOUTS +.Ft struct callout_handle +.Fn timeout "timeout_t *func" "void *arg" "int ticks" +This function is deprecated. +Please use .Fn callout_reset -and -.Fn callout_schedule -function families schedule a future function invocation for callout -.Fa c . -If -.Fa c -already has a pending callout, -it is cancelled before the new invocation is scheduled. -These functions return a non-zero value if a pending callout was cancelled -and zero if there was no pending callout. -If the callout has an associated lock, -then that lock must be held when any of these functions are called. -.Pp -The time at which the callout function will be invoked is determined by -either the -.Fa ticks -argument or the -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments. -When -.Fa ticks -is used, -the callout is scheduled to execute after +instead. +This function schedules a call to +.Fa func +to take place after .Fa ticks Ns No /hz seconds. Non-positive values of .Fa ticks are silently converted to the value .Sq 1 . -.Pp The -.Fa sbt , -.Fa pr , -and -.Fa flags -arguments provide more control over the scheduled time including -support for higher resolution times, -specifying the precision of the scheduled time, -and setting an absolute deadline instead of a relative timeout. -The callout is scheduled to execute in a time window which begins at -the time specified in +.Fa func +argument is a valid pointer to a function that takes a single +.Fa void * +argument. +Upon invocation, the +.Fa func +function will receive +.Fa arg +as its only argument. +The Giant lock is locked when the +.Fa arg +function is invoked and should not be unlocked by this function. +The returned value from +.Fn timeout +is a +.Ft struct callout_handle +structure which can be used in conjunction with the +.Fn untimeout +function to request that a scheduled timeout be cancelled. +As handles are recycled by the system, it is possible, although unlikely, +that a handle from one invocation of +.Fn timeout +may match the handle of another invocation of +.Fn timeout +if both calls used the same function pointer and argument, and the first +timeout is expired or cancelled before the second call. +Please ensure that the function and argument pointers are unique when using this function. +.Pp +.Ft int +.Fn callout_reset "struct callout *c" "int ticks" "callout_func_t *func" "void *arg" +This function is used to schedule or re-schedule a callout. +This function at first stops the callout given by the +.Fa c +argument, if any. +Then it will start the callout given by the +.Fa c +argument. +The relative time until the timeout callback happens is given by the +.Fa ticks +argument. +The number of ticks in a second is defined by +.Dv hz +and can vary from system to system. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Otherwise, a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, this +function will always ensure that previous callback function, if any, +is never reached. +In other words, the callout will be atomically restarted. +Otherwise, there is no such guarantee. +The callback function is given by +.Fa func +and its function argument is given by +.Fa arg . +.Pp +.Ft int +.Fn callout_reset_curcpu "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the same CPU which called this function. +A change in the CPU selection can only happen if the callout has a +lock associated with it and this lock is locked when this function is +called or the callout is marked "mpsafe". +See +.Fn callout_init . +The CPU selection cannot be changed while the callout subsystem is +processing the callback for completion. +.Pp +.Ft int +.Fn callout_reset_on "struct callout *c" "int ticks" "callout_func_t *func" \ +"void *arg" "int cpu" +This function works the same like the +.Fn callout_reset +function except the callback function given by the +.Fa func +argument will be executed on the CPU given by the +.Fa cpu +argument. +A change in the CPU selection can only happen if the callout has a +lock associated with it and this lock is locked when this function is +called or the callout is marked "mpsafe". +See +.Fn callout_init . +The CPU selection cannot be changed while the callout subsystem is +processing the callback for completion. +.Pp +.Ft int +.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works the same like the +.Fn callout_reset +function except the relative or absolute time after which the timeout +callback should happen is given by the .Fa sbt -and extends for the amount of time specified in +argument and extends for the amount of time specified in .Fa pr . +This function is used when high precision timeouts are needed. If .Fa sbt specifies a time in the past, @@ -322,12 +315,13 @@ allows the callout subsystem to coalesce callouts scheduled close to each other into fewer timer interrupts, reducing processing overhead and power consumption. -These +The .Fa flags -may be specified to adjust the interpretation of +argument may be non-zero to adjust the interpretation of the .Fa sbt and -.Fa pr : +.Fa pr +arguments: .Bl -tag -width ".Dv C_DIRECT_EXEC" .It Dv C_ABSOLUTE Handle the @@ -347,7 +341,7 @@ and should be as small as possible because they run with absolute priority. .It Fn C_PREL Specifies relative event time precision as binary logarithm of time interval -divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc. +divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on. Note that the larger of .Fa pr or this value is used as the length of the time window. @@ -360,65 +354,221 @@ calls if possible. .El .Pp -The -.Fn callout_reset -functions accept a +.Ft int +.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by the .Fa func -argument which identifies the function to be called when the time expires. -It must be a pointer to a function that takes a single -.Fa void * -argument. -Upon invocation, +argument will be executed on the CPU which called this function. +A change in the CPU selection can only happen if the callout has a +lock associated with it and this lock is locked when this function is +called or the callout is marked "mpsafe". +See +.Fn callout_init . +The CPU selection cannot be changed while the callout subsystem is +processing the callback for completion. +.Pp +.Ft int +.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "callout_func_t *func" "void *arg" "int cpu" "int flags" +This function works like +.Fn callout_reset_sbt , +except the callback function given by .Fa func -will receive -.Fa arg -as its only argument. -The -.Fn callout_schedule -functions reuse the +will be executed on the CPU given by +.Fa cpu . +A change in the CPU selection can only happen if the callout has a +lock associated with it and this lock is locked when this function is +called or the callout is marked "mpsafe". +See +.Fn callout_init . +The CPU selection cannot be changed while the callout subsystem is +processing the callback for completion. +.Pp +.Ft int +.Fn callout_schedule "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_curcpu "struct callout *c" "int ticks" +This function works the same like the +.Fn callout_reset_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" +This function works the same like the +.Fn callout_reset_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int flags" +This function works the same like the +.Fn callout_reset_sbt_curcpu +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Pp +.Ft int +.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ +"sbintime_t pr" "int cpu" "int flags" +This function works the same like the +.Fn callout_reset_sbt_on +function except it re-uses the callback function and the callback argument +already stored in the +.Pq struct callout +structure. +.Sh CHECKING THE STATE OF CALLOUTS +.Ft int +.Fn callout_pending "struct callout *c" +This function returns non-zero if the callout pointed to by the +.Fa c +argument is pending for callback. +Else this function returns zero. +This function returns zero when inside the callout function if the +callout is not re-scheduled. +.Pp +.Ft int +.Fn callout_active "struct callout *c" +This function is deprecated and returns non-zero if the callout +pointed to by the +.Fa c +argument was scheduled in the past. +Else this function returns zero. +This function also returns zero after the +.Fn callout_deactivate +or the +.Fn callout_stop +or the +.Fn callout_drain +or the +.Fn callout_drain_async +function is called on the same callout as given by the +.Fa c +argument. +.Pp +.Ft void +.Fn callout_deactivate "struct callout *c" +This function is deprecated and ensures that subsequent calls to the +.Fn callout_activate +function returns zero until the callout is scheduled again. +.Sh STOPPING CALLOUTS +.Ft void +.Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" +This function is deprecated and cancels the timeout associated with the +.Fa handle +argument using the function pointed to by the .Fa func -and +argument and having the .Fa arg -arguments from the previous callout. -Note that one of the -.Fn callout_reset -functions must always be called to initialize +arguments to validate the handle. +If the handle does not correspond to a timeout with +the function .Fa func -and +taking the argument .Fa arg -before one of the -.Fn callout_schedule -functions can be used. +no action is taken. The +.Fa handle +must be initialized by a previous call to +.Fn timeout , +.Fn callout_handle_init +or assigned the value of +.Fn CALLOUT_HANDLE_INITIALIZER "&handle" +before being passed to +.Fn untimeout . +The behavior of calling +.Fn untimeout +with an uninitialized handle +is undefined. .Pp -The callout subsystem provides a softclock thread for each CPU in the system. -Callouts are assigned to a single CPU and are executed by the softclock thread -for that CPU. -Initially, -callouts are assigned to CPU 0. -The -.Fn callout_reset_on , -.Fn callout_reset_sbt_on , -.Fn callout_schedule_on -and -.Fn callout_schedule_sbt_on -functions assign the callout to CPU -.Fa cpu . -The -.Fn callout_reset_curcpu , -.Fn callout_reset_sbt_curpu , -.Fn callout_schedule_curcpu -and -.Fn callout_schedule_sbt_curcpu -functions assign the callout to the current CPU. -The -.Fn callout_reset , -.Fn callout_reset_sbt , -.Fn callout_schedule -and -.Fn callout_schedule_sbt -functions schedule the callout to execute in the softclock thread of the CPU -to which it is currently assigned. +.Ft int +.Fn callout_stop "struct callout *c" +This function is used to stop a timeout function invocation associated with the callout pointed to by the +.Fa c +argument, in a non-blocking fashion. +This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized. +This function returns a non-zero value if the given callout was pending and +the callback function was prevented from being called. +Else a value of zero is returned. +If a lock is associated with the callout given by the +.Fa c +argument and it is exclusivly locked when this function is called, the +.Fn callout_stop +function will always ensure that the callback function is never reached. +In other words the callout will be atomically stopped. +Else there is no such guarantee. +.Sh DRAINING CALLOUTS +.Ft int +.Fn callout_drain "struct callout *c" +This function works the same like the +.Fn callout_stop +function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the +.Fa c +argument inside the callout subsystem before it returns. +Also this function ensures that the lock, if any, associated with the +callout is no longer being used. +When this function returns, it is safe to free the callout structure pointed to by the +.Fa c +argument. .Pp +.Ft int +.Fn callout_drain_async "struct callout *c" "callout_func_t *fn" "void *arg" +This function is non-blocking and works the same like the +.Fn callout_stop +function except if it returns non-zero it means the callback function pointed to by the +.Fa fn +argument will be called back with the +.Fa arg +argument when all references to the callout pointed to by the +.Fa c +argument are gone. +If this function returns non-zero it should not be called again until the callback function has been called. +If the +.Fn callout_drain +or +.Fn callout_drain_async +functions are called while an asynchronous drain is pending, +previously pending asynchronous drains might get cancelled. +If this function returns zero, it is safe to free the callout structure pointed to by the +.Fa c +argument right away. +.Sh CALLOUT FUNCTION RESTRICTIONS +Callout functions must not sleep. +They may not acquire sleepable locks, wait on condition variables, +perform blocking allocation requests, or invoke any other action that +might sleep. +.Sh CALLOUT SUBSYSTEM INTERNALS +The callout subsystem has its own set of spinlocks to protect its internal state. +The callout subsystem provides a softclock thread for each CPU in the +system. +Callouts are assigned to a single CPU and are executed by the +softclock thread for that CPU. +Initially, callouts are assigned to CPU 0. Softclock threads are not pinned to their respective CPUs by default. The softclock thread for CPU 0 can be pinned to CPU 0 by setting the .Va kern.pin_default_swi @@ -427,50 +577,7 @@ respective CPUs by setting the .Va kern.pin_pcpu_swi loader tunable to a non-zero value. -.Pp -The macros -.Fn callout_pending , -.Fn callout_active -and -.Fn callout_deactivate -provide access to the current state of the callout. -The -.Fn callout_pending -macro checks whether a callout is -.Em pending ; -a callout is considered -.Em pending -when a timeout has been set but the time has not yet arrived. -Note that once the timeout time arrives and the callout subsystem -starts to process this callout, -.Fn callout_pending -will return -.Dv FALSE -even though the callout function may not have finished -.Pq or even begun -executing. -The -.Fn callout_active -macro checks whether a callout is marked as -.Em active , -and the -.Fn callout_deactivate -macro clears the callout's -.Em active -flag. -The callout subsystem marks a callout as -.Em active -when a timeout is set and it clears the -.Em active -flag in -.Fn callout_stop -and -.Fn callout_drain , -but it -.Em does not -clear it when a callout expires normally via the execution of the -callout function. -.Ss "Avoiding Race Conditions" +.Sh "AVOIDING RACE CONDITIONS" The callout subsystem invokes callout functions from its own thread context. Without some kind of synchronization, @@ -531,9 +638,8 @@ .Pc indicates whether or not the callout was removed. If it is known that the callout was set and the callout function has -not yet executed, then a return value of -.Dv FALSE -indicates that the callout function is about to be called. +not yet executed, then a return value of zero indicates that the +callout function is about to be called. For example: .Bd -literal -offset indent if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) { @@ -589,16 +695,14 @@ .Em pending flag and return without action if .Fn callout_pending -returns -.Dv TRUE . +returns non-zero. This indicates that the callout was rescheduled using .Fn callout_reset just before the callout function was invoked. If .Fn callout_active -returns -.Dv FALSE -then the callout function should also return without action. +returns zero then the callout function should also return without +action. This indicates that the callout has been stopped. Finally, the callout function should call .Fn callout_deactivate @@ -668,129 +772,13 @@ or releasing the storage for the callout structure. .Sh LEGACY API .Bf Sy -The functions below are a legacy API that will be removed in a future release. -New code should not use these routines. -.Ef -.Pp -The function -.Fn timeout -schedules a call to the function given by the argument -.Fa func -to take place after -.Fa ticks Ns No /hz -seconds. -Non-positive values of -.Fa ticks -are silently converted to the value -.Sq 1 . -.Fa func -should be a pointer to a function that takes a -.Fa void * -argument. -Upon invocation, -.Fa func -will receive -.Fa arg -as its only argument. -The return value from +The .Fn timeout -is a -.Ft struct callout_handle -which can be used in conjunction with the -.Fn untimeout -function to request that a scheduled timeout be canceled. -.Pp -The function -.Fn callout_handle_init -can be used to initialize a handle to a state which will cause -any calls to -.Fn untimeout -with that handle to return with no side -effects. -.Pp -Assigning a callout handle the value of -.Fn CALLOUT_HANDLE_INITIALIZER -performs the same function as -.Fn callout_handle_init -and is provided for use on statically declared or global callout handles. -.Pp -The function -.Fn untimeout -cancels the timeout associated with -.Fa handle -using the -.Fa func and -.Fa arg -arguments to validate the handle. -If the handle does not correspond to a timeout with -the function -.Fa func -taking the argument -.Fa arg -no action is taken. -.Fa handle -must be initialized by a previous call to -.Fn timeout , -.Fn callout_handle_init , -or assigned the value of -.Fn CALLOUT_HANDLE_INITIALIZER "&handle" -before being passed to -.Fn untimeout . -The behavior of calling .Fn untimeout -with an uninitialized handle -is undefined. -.Pp -As handles are recycled by the system, it is possible (although unlikely) -that a handle from one invocation of -.Fn timeout -may match the handle of another invocation of -.Fn timeout -if both calls used the same function pointer and argument, and the first -timeout is expired or canceled before the second call. -The timeout facility offers O(1) running time for -.Fn timeout -and -.Fn untimeout . -Timeouts are executed from -.Fn softclock -with the -.Va Giant -lock held. -Thus they are protected from re-entrancy. -.Sh RETURN VALUES -The -.Fn callout_active -macro returns the state of a callout's -.Em active -flag. -.Pp -The -.Fn callout_pending -macro returns the state of a callout's -.Em pending -flag. -.Pp -The -.Fn callout_reset -and -.Fn callout_schedule -function families return non-zero if the callout was pending before the new -function invocation was scheduled. -.Pp -The -.Fn callout_stop -and -.Fn callout_drain -functions return non-zero if the callout was still pending when it was -called or zero otherwise. -The -.Fn timeout -function returns a -.Ft struct callout_handle -that can be passed to -.Fn untimeout . +functions are a legacy API that will be removed in a future release. +New code should not use these routines. +.Ef .Sh HISTORY The current timeout and untimeout routines are based on the work of .An Adam M. Costello @@ -815,4 +803,4 @@ .Bx linked list callout mechanism which offered O(n) insertion and removal running time -but did not generate or require handles for untimeout operations. +and did not generate or require handles for untimeout operations. Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -505,7 +505,8 @@ callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); /* Create credentials. */ p->p_ucred = crget(); Index: sys/kern/kern_clocksource.c =================================================================== --- sys/kern/kern_clocksource.c +++ sys/kern/kern_clocksource.c @@ -160,6 +160,9 @@ int usermode; int done, runs; + KASSERT(curthread->td_critnest != 0, + ("Must be in a critical section")); + CTR3(KTR_SPARE2, "handle at %d: now %d.%08x", curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff)); done = 0; Index: sys/kern/kern_condvar.c =================================================================== --- sys/kern/kern_condvar.c +++ sys/kern/kern_condvar.c @@ -313,15 +313,13 @@ DROP_GIANT(); sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait(cvp, 0); #ifdef KTRACE @@ -383,15 +381,13 @@ sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR | SLEEPQ_INTERRUPTIBLE, 0); + sleepq_release(cvp); sleepq_set_timeout_sbt(cvp, sbt, pr, flags); if (lock != &Giant.lock_object) { - if (class->lc_flags & LC_SLEEPABLE) - sleepq_release(cvp); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); - if (class->lc_flags & LC_SLEEPABLE) - sleepq_lock(cvp); } + sleepq_lock(cvp); rval = sleepq_timedwait_sig(cvp, 0); #ifdef KTRACE Index: sys/kern/kern_lock.c =================================================================== --- sys/kern/kern_lock.c +++ sys/kern/kern_lock.c @@ -210,9 +210,11 @@ GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ? SLEEPQ_INTERRUPTIBLE : 0), queue); - if ((flags & LK_TIMELOCK) && timo) + if ((flags & LK_TIMELOCK) && timo) { + sleepq_release(&lk->lock_object); sleepq_set_timeout(&lk->lock_object, timo); - + sleepq_lock(&lk->lock_object); + } /* * Decisional switch for real sleeping. */ Index: sys/kern/kern_switch.c =================================================================== --- sys/kern/kern_switch.c +++ sys/kern/kern_switch.c @@ -93,8 +93,6 @@ &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), ""); SCHED_STAT_DEFINE_VAR(sleepq, &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), ""); -SCHED_STAT_DEFINE_VAR(sleepqtimo, - &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), ""); SCHED_STAT_DEFINE_VAR(relinquish, &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), ""); SCHED_STAT_DEFINE_VAR(needresched, Index: sys/kern/kern_synch.c =================================================================== --- sys/kern/kern_synch.c +++ sys/kern/kern_synch.c @@ -236,12 +236,16 @@ * return from cursig(). */ sleepq_add(ident, lock, wmesg, sleepq_flags, 0); - if (sbt != 0) - sleepq_set_timeout_sbt(ident, sbt, pr, flags); if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { sleepq_release(ident); WITNESS_SAVE(lock, lock_witness); lock_state = class->lc_unlock(lock); + if (sbt != 0) + sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); + } else if (sbt != 0) { + sleepq_release(ident); + sleepq_set_timeout_sbt(ident, sbt, pr, flags); sleepq_lock(ident); } if (sbt != 0 && catch) @@ -306,8 +310,11 @@ * We put ourselves on the sleep queue and start our timeout. */ sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0); - if (sbt != 0) + if (sbt != 0) { + sleepq_release(ident); sleepq_set_timeout_sbt(ident, sbt, pr, flags); + sleepq_lock(ident); + } /* * Can't call ktrace with any spin locks held so it can lock the Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -149,6 +149,9 @@ audit_thread_alloc(td); #endif umtx_thread_alloc(td); + + mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN); + callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0); return (0); } @@ -162,6 +165,10 @@ td = (struct thread *)mem; + /* make sure to drain any use of the "td->td_slpcallout" */ + callout_drain(&td->td_slpcallout); + mtx_destroy(&td->td_slpmutex); + #ifdef INVARIANTS /* Verify that this thread is in a safe state to free. */ switch (td->td_state) { @@ -544,7 +551,6 @@ LIST_INIT(&td->td_lprof[0]); LIST_INIT(&td->td_lprof[1]); sigqueue_init(&td->td_sigqueue, p); - callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist); p->p_numthreads++; } Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c +++ sys/kern/kern_timeout.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include #include @@ -75,28 +77,25 @@ "struct callout *"); #ifdef CALLOUT_PROFILING -static int avg_depth; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, +static int avg_depth[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0, "Average number of items examined per softclock call. Units = 1/1000"); -static int avg_gcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, +static int avg_gcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); -static int avg_lockcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, +static int avg_lockcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); -static int avg_mpcalls; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, +static int avg_mpcalls[2]; +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); -static int avg_depth_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); -static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, - &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " + &avg_lockcalls[1], 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); -static int avg_mpcalls_dir; -SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1], 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif @@ -124,59 +123,274 @@ */ u_int callwheelsize, callwheelmask; +#define CALLOUT_RET_NORMAL 0 +#define CALLOUT_RET_CANCELLED 1 +#define CALLOUT_RET_DRAINING 2 + +struct callout_args { + sbintime_t time; /* absolute time for the event */ + sbintime_t precision; /* delta allowed wrt opt */ + void *arg; /* function argument */ + callout_func_t *func; /* function to call */ + int flags; /* flags passed to callout_reset() */ + int cpu; /* CPU we're scheduled on */ +}; + +typedef void callout_mutex_op_t(struct lock_object *); +typedef int callout_owned_op_t(struct lock_object *); + +struct callout_mutex_ops { + callout_mutex_op_t *lock; + callout_mutex_op_t *unlock; + callout_owned_op_t *owned; +}; + +enum { + CALLOUT_LC_UNUSED_0, + CALLOUT_LC_UNUSED_1, + CALLOUT_LC_UNUSED_2, + CALLOUT_LC_UNUSED_3, + CALLOUT_LC_SPIN, + CALLOUT_LC_MUTEX, + CALLOUT_LC_RW, + CALLOUT_LC_RM, +}; + +static void +callout_mutex_op_none(struct lock_object *lock) +{ +} + +static int +callout_owned_op_none(struct lock_object *lock) +{ + return (0); +} + +static void +callout_mutex_lock(struct lock_object *lock) +{ + + mtx_lock((struct mtx *)lock); +} + +static void +callout_mutex_unlock(struct lock_object *lock) +{ + + mtx_unlock((struct mtx *)lock); +} + +static void +callout_mutex_lock_spin(struct lock_object *lock) +{ + + mtx_lock_spin((struct mtx *)lock); +} + +static void +callout_mutex_unlock_spin(struct lock_object *lock) +{ + + mtx_unlock_spin((struct mtx *)lock); +} + +static int +callout_mutex_owned(struct lock_object *lock) +{ + + return (mtx_owned((struct mtx *)lock)); +} + +static void +callout_rm_wlock(struct lock_object *lock) +{ + + rm_wlock((struct rmlock *)lock); +} + +static void +callout_rm_wunlock(struct lock_object *lock) +{ + + rm_wunlock((struct rmlock *)lock); +} + +static int +callout_rm_owned(struct lock_object *lock) +{ + + return (rm_wowned((struct rmlock *)lock)); +} + +static void +callout_rw_wlock(struct lock_object *lock) +{ + + rw_wlock((struct rwlock *)lock); +} + +static void +callout_rw_wunlock(struct lock_object *lock) +{ + + rw_wunlock((struct rwlock *)lock); +} + +static int +callout_rw_owned(struct lock_object *lock) +{ + + return (rw_wowned((struct rwlock *)lock)); +} + +static const struct callout_mutex_ops callout_mutex_ops[8] = { + [CALLOUT_LC_UNUSED_0] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + .owned = callout_owned_op_none, + }, + [CALLOUT_LC_UNUSED_1] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + .owned = callout_owned_op_none, + }, + [CALLOUT_LC_UNUSED_2] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + .owned = callout_owned_op_none, + }, + [CALLOUT_LC_UNUSED_3] = { + .lock = callout_mutex_op_none, + .unlock = callout_mutex_op_none, + .owned = callout_owned_op_none, + }, + [CALLOUT_LC_SPIN] = { + .lock = callout_mutex_lock_spin, + .unlock = callout_mutex_unlock_spin, + .owned = callout_mutex_owned, + }, + [CALLOUT_LC_MUTEX] = { + .lock = callout_mutex_lock, + .unlock = callout_mutex_unlock, + .owned = callout_mutex_owned, + }, + [CALLOUT_LC_RW] = { + .lock = callout_rw_wlock, + .unlock = callout_rw_wunlock, + .owned = callout_rw_owned, + }, + [CALLOUT_LC_RM] = { + .lock = callout_rm_wlock, + .unlock = callout_rm_wunlock, + .owned = callout_rm_owned, + }, +}; + +static inline void +callout_lock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); +} + +static inline void +callout_unlock_client(int c_flags, struct lock_object *c_lock) +{ + + callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); +} + +#ifdef SMP +static inline int +callout_lock_owned_client(int c_flags, struct lock_object *c_lock) +{ + + return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock)); +} +#endif + /* - * The callout cpu exec entities represent informations necessary for - * describing the state of callouts currently running on the CPU and the ones - * necessary for migrating callouts to the new callout cpu. In particular, - * the first entry of the array cc_exec_entity holds informations for callout - * running in SWI thread context, while the second one holds informations - * for callout running directly from hardware interrupt context. - * The cached informations are very important for deferring migration when - * the migrating callout is already running. + * The callout CPU exec structure represent information necessary for + * describing the state of callouts currently running on the CPU and + * for handling deferred callout restarts. + * + * In particular, the first entry of the array cc_exec_entity holds + * information for callouts running from the SWI thread context, while + * the second one holds information for callouts running directly from + * the hardware interrupt context. */ struct cc_exec { + /* + * The "cc_curr" points to the currently executing callout and + * is protected by the "cc_lock" spinlock. If no callback is + * currently executing it is equal to "NULL". + */ struct callout *cc_curr; -#ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - sbintime_t ce_migration_time; - sbintime_t ce_migration_prec; + /* + * The "cc_restart_args" structure holds the argument for a + * deferred callback restart and is protected by the "cc_lock" + * spinlock. The structure is only valid if "cc_restart" is + * "true". If "cc_restart" is "false" the information in the + * "cc_restart_args" structure shall be ignored. + */ + struct callout_args cc_restart_args; + bool cc_restart; + /* + * The "cc_cancel" variable allows the currently pending + * callback to be atomically cancelled. This field is write + * protected by the "cc_lock" spinlock. + */ + bool cc_cancel; + /* + * The "cc_drain_fn" points to a function which shall be + * called with the argument stored in "cc_drain_arg" when an + * asynchronous drain is performed. This field is write + * protected by the "cc_lock" spinlock. + */ + callout_func_t *cc_drain_fn; + void *cc_drain_arg; + /* + * The following fields are used for callout profiling only: + */ +#ifdef CALLOUT_PROFILING + int cc_depth; + int cc_mpcalls; + int cc_lockcalls; + int cc_gcalls; #endif - bool cc_cancel; - bool cc_waiting; }; /* - * There is one struct callout_cpu per cpu, holding all relevant + * There is one "struct callout_cpu" per CPU, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; - struct callout *cc_next; struct callout *cc_callout; struct callout_list *cc_callwheel; + struct callout_list cc_tmplist; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; - u_int cc_bucket; char cc_ktr_event_name[20]; }; -#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr -#define cc_exec_next(cc) cc->cc_next -#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel -#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting -#ifdef SMP -#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func -#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg -#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu -#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time -#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec +#define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr +#define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args +#define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart +#define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel +#define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn +#define cc_exec_drain_arg(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_arg +#define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth +#define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls +#define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls +#define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls +#ifdef SMP struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) @@ -193,67 +407,13 @@ static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); -static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct); +static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); -/** - * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is cc_curr. - * If cc_curr is non-NULL, threads waiting in - * callout_drain() will be woken up as soon as the - * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and cc_lock held - * guarantees that the current callout will not run. - * The softclock() function sets this to 0 before it - * drops callout_lock to acquire c_lock, and it calls - * the handler only if curr_cancelled is still 0 after - * cc_lock is successfully acquired. - * cc_waiting - If a thread is waiting in callout_drain(), then - * callout_wait is nonzero. Set only when - * cc_curr is non-NULL. - */ - -/* - * Resets the execution entity tied to a specific callout cpu. - */ -static void -cc_cce_cleanup(struct callout_cpu *cc, int direct) -{ - - cc_exec_curr(cc, direct) = NULL; - cc_exec_cancel(cc, direct) = false; - cc_exec_waiting(cc, direct) = false; -#ifdef SMP - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif -} - -/* - * Checks if migration is requested by a specific callout cpu. - */ -static int -cc_cce_migrating(struct callout_cpu *cc, int direct) -{ - -#ifdef SMP - return (cc_migration_cpu(cc, direct) != CPUBLOCK); -#else - return (0); -#endif -} - /* - * Kernel low level callwheel initialization - * called on cpu0 during kernel startup. + * Kernel low level callwheel initialization called from cpu0 during + * kernel startup: */ static void callout_callwheel_init(void *dummy) @@ -313,8 +473,6 @@ LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; - for (i = 0; i < 2; i++) - cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ @@ -322,38 +480,38 @@ for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_flags = CALLOUT_LOCAL_ALLOC; + c->c_flags |= CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } -#ifdef SMP -/* - * Switches the cpu tied to a specific callout. - * The function expects a locked incoming callout cpu and returns with - * locked outcoming callout cpu. - */ -static struct callout_cpu * -callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +#ifdef CALLOUT_PROFILING +static inline void +callout_clear_stats(struct callout_cpu *cc, const int direct) { - struct callout_cpu *new_cc; - - MPASS(c != NULL && cc != NULL); - CC_LOCK_ASSERT(cc); + cc_exec_depth(cc, direct) = 0; + cc_exec_mpcalls(cc, direct) = 0; + cc_exec_lockcalls(cc, direct) = 0; + cc_exec_gcalls(cc, direct) = 0; +} +#endif - /* - * Avoid interrupts and preemption firing after the callout cpu - * is blocked in order to avoid deadlocks as the new thread - * may be willing to acquire the callout cpu lock. - */ - c->c_cpu = CPUBLOCK; - spinlock_enter(); - CC_UNLOCK(cc); - new_cc = CC_CPU(new_cpu); - CC_LOCK(new_cc); - spinlock_exit(); - c->c_cpu = new_cpu; - return (new_cc); +#ifdef CALLOUT_PROFILING +static inline void +callout_update_stats(struct callout_cpu *cc, const int direct) +{ + avg_depth[direct] += + (cc_exec_depth(cc, direct) * 1000 - + avg_depth[direct]) >> 8; + avg_mpcalls[direct] += + (cc_exec_mpcalls(cc, direct) * 1000 - + avg_mpcalls[direct]) >> 8; + avg_lockcalls[direct] += + (cc_exec_lockcalls(cc, direct) * 1000 - + avg_lockcalls[direct]) >> 8; + avg_gcalls[direct] += + (cc_exec_gcalls(cc, direct) * 1000 - + avg_gcalls[direct]) >> 8; } #endif @@ -424,19 +582,19 @@ void callout_process(sbintime_t now) { - struct callout *tmp, *tmpn; + struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; -#ifdef CALLOUT_PROFILING - int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; -#endif cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_LOCK(cc); +#ifdef CALLOUT_PROFILING + callout_clear_stats(cc, 1); +#endif /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; @@ -468,50 +626,47 @@ /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { + LIST_INIT(&cc->cc_tmplist); + sc = &cc->cc_callwheel[firstb & callwheelmask]; - tmp = LIST_FIRST(sc); - while (tmp != NULL) { + while (1) { + tmp = LIST_FIRST(sc); + if (tmp == NULL) + break; + + LIST_REMOVE(tmp, c_links.le); + /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* - * Consumer told us the callout may be run - * directly from hardware interrupt context. + * Consumer told us the callout may be + * run directly from the hardware + * interrupt context: */ if (tmp->c_flags & CALLOUT_DIRECT) { -#ifdef CALLOUT_PROFILING - ++depth_dir; -#endif - cc_exec_next(cc) = - LIST_NEXT(tmp, c_links.le); - cc->cc_bucket = firstb & callwheelmask; - LIST_REMOVE(tmp, c_links.le); - softclock_call_cc(tmp, cc, -#ifdef CALLOUT_PROFILING - &mpcalls_dir, &lockcalls_dir, NULL, -#endif - 1); - tmp = cc_exec_next(cc); - cc_exec_next(cc) = NULL; + softclock_call_cc(tmp, cc, 1); } else { - tmpn = LIST_NEXT(tmp, c_links.le); - LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_flags |= CALLOUT_PROCESSED; - tmp = tmpn; } continue; } + + /* insert callout into temporary list */ + LIST_INSERT_HEAD(&cc->cc_tmplist, tmp, c_links.le); + /* Skip events from distant future. */ if (tmp->c_time >= max) - goto next; + continue; + /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; - goto next; + continue; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) @@ -519,11 +674,14 @@ tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; -next: - tmp = LIST_NEXT(tmp, c_links.le); } + + /* Put temporary list back into the main bucket */ + LIST_SWAP(sc, &cc->cc_tmplist, callout, c_links.le); + /* Proceed with the next bucket. */ firstb++; + /* * Stop if we looked after present time and found * some event we can't execute at now. @@ -535,66 +693,70 @@ cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING - avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; - avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; - avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; + callout_update_stats(cc, 1); #endif - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + CC_UNLOCK(cc); /* - * swi_sched acquires the thread lock, so we don't want to call it - * with cc_lock held; incorrect locking order. + * "swi_sched()" acquires the thread lock and we don't want to + * call it having cc_lock held because it leads to a locking + * order reversal issue. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } -static struct callout_cpu * +static inline struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; - int cpu; - - for (;;) { - cpu = c->c_cpu; -#ifdef SMP - if (cpu == CPUBLOCK) { - while (c->c_cpu == CPUBLOCK) - cpu_spinwait(); - continue; - } -#endif - cc = CC_CPU(cpu); - CC_LOCK(cc); - if (cpu == c->c_cpu) - break; - CC_UNLOCK(cc); - } + cc = CC_CPU(c->c_cpu); + CC_LOCK(cc); return (cc); } -static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, - sbintime_t sbt, sbintime_t precision, void (*func)(void *), - void *arg, int cpu, int flags) +static struct callout_cpu * +callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, + struct callout_args *coa, bool can_swap_cpu) { +#ifndef NO_EVENTTIMERS + sbintime_t sbt; +#endif int bucket; CC_LOCK_ASSERT(cc); - if (sbt < cc->cc_lastscan) - sbt = cc->cc_lastscan; - c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - c->c_flags &= ~CALLOUT_PROCESSED; - c->c_func = func; - c->c_time = sbt; - c->c_precision = precision; + + /* update flags before swapping locks, if any */ + c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); + if (coa->flags & C_DIRECT_EXEC) + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); + else + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + +#ifdef SMP + /* + * Check if we are changing the CPU on which the callback + * should be executed and if we have a lock protecting us: + */ + if (can_swap_cpu != false && coa->cpu != c->c_cpu && + callout_lock_owned_client(c->c_flags, c->c_lock) != 0) { + CC_UNLOCK(cc); + c->c_cpu = coa->cpu; + cc = callout_lock(c); + } +#endif + if (coa->time < cc->cc_lastscan) + coa->time = cc->cc_lastscan; + c->c_arg = coa->arg; + c->c_func = coa->func; + c->c_time = coa->time; + c->c_precision = coa->precision; + bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); - if (cc->cc_bucket == bucket) - cc_exec_next(cc) = c; + #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout @@ -605,42 +767,28 @@ sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(cpu, sbt, c->c_time); + cpu_new_callout(c->c_cpu, sbt, c->c_time); } #endif + return (cc); } -static void +static inline void callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) - return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } -static void +static inline void softclock_call_cc(struct callout *c, struct callout_cpu *cc, -#ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, -#endif - int direct) + const int direct) { - struct rm_priotracker tracker; - void (*c_func)(void *); + callout_func_t *c_func; void *c_arg; - struct lock_class *class; struct lock_object *c_lock; - uintptr_t lock_status; int c_flags; -#ifdef SMP - struct callout_cpu *new_cc; - void (*new_func)(void *); - void *new_arg; - int flags, new_cpu; - sbintime_t new_prec, new_time; -#endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; @@ -651,58 +799,65 @@ KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == (CALLOUT_PENDING | CALLOUT_ACTIVE), ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); - class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; - lock_status = 0; - if (c->c_flags & CALLOUT_SHAREDLOCK) { - if (class == &lock_class_rm) - lock_status = (uintptr_t)&tracker; - else - lock_status = 1; - } + c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) - c->c_flags = CALLOUT_LOCAL_ALLOC; - else - c->c_flags &= ~CALLOUT_PENDING; - + + /* remove pending bit */ + c->c_flags &= ~CALLOUT_PENDING; + + /* reset our local state */ cc_exec_curr(cc, direct) = c; - cc_exec_cancel(cc, direct) = false; - CC_UNLOCK(cc); + cc_exec_restart(cc, direct) = false; + cc_exec_drain_fn(cc, direct) = NULL; + cc_exec_drain_arg(cc, direct) = NULL; + if (c_lock != NULL) { - class->lc_lock(c_lock, lock_status); + cc_exec_cancel(cc, direct) = false; + CC_UNLOCK(cc); + + /* unlocked region for switching locks */ + + callout_lock_client(c_flags, c_lock); + /* - * The callout may have been cancelled - * while we switched locks. + * Check if the callout may have been cancelled while + * we were switching locks. Even though the callout is + * specifying a lock, it might not be certain this + * lock is locked when starting and stopping callouts. */ + CC_LOCK(cc); if (cc_exec_cancel(cc, direct)) { - class->lc_unlock(c_lock); - goto skip; + callout_unlock_client(c_flags, c_lock); + goto skip_cc_locked; } - /* The callout cannot be stopped now. */ - cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING - (*gcalls)++; + cc_exec_gcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING - (*lockcalls)++; + cc_exec_lockcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING - (*mpcalls)++; + cc_exec_mpcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } + /* The callout cannot be stopped now! */ + cc_exec_cancel(cc, direct) = true; + CC_UNLOCK(cc); + + /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) @@ -729,85 +884,40 @@ #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); + + /* + * At this point the callback structure might have been freed, + * so we need to check the previously copied value of + * "c->c_flags": + */ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) - class->lc_unlock(c_lock); -skip: + callout_unlock_client(c_flags, c_lock); + CC_LOCK(cc); + +skip_cc_locked: KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; - if (cc_exec_waiting(cc, direct)) { - /* - * There is someone waiting for the - * callout to complete. - * If the callout was scheduled for - * migration just cancel it. - */ - if (cc_cce_migrating(cc, direct)) { - cc_cce_cleanup(cc, direct); - - /* - * It should be assert here that the callout is not - * destroyed but that is not easy. - */ - c->c_flags &= ~CALLOUT_DFRMIGRATION; - } - cc_exec_waiting(cc, direct) = false; - CC_UNLOCK(cc); - wakeup(&cc_exec_waiting(cc, direct)); - CC_LOCK(cc); - } else if (cc_cce_migrating(cc, direct)) { - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, - ("Migrating legacy callout %p", c)); -#ifdef SMP - /* - * If the callout was scheduled for - * migration just perform it now. - */ - new_cpu = cc_migration_cpu(cc, direct); - new_time = cc_migration_time(cc, direct); - new_prec = cc_migration_prec(cc, direct); - new_func = cc_migration_func(cc, direct); - new_arg = cc_migration_arg(cc, direct); - cc_cce_cleanup(cc, direct); + /* Check if there is anything which needs draining */ + if (cc_exec_drain_fn(cc, direct) != NULL) { /* - * It should be assert here that the callout is not destroyed - * but that is not easy. - * - * As first thing, handle deferred callout stops. + * Unlock the CPU callout last, so that any use of + * structures belonging to the callout are complete: */ - if (!callout_migrating(c)) { - CTR3(KTR_CALLOUT, - "deferred cancelled %p func %p arg %p", - c, new_func, new_arg); - callout_cc_del(c, cc); - return; - } - c->c_flags &= ~CALLOUT_DFRMIGRATION; - - new_cc = callout_cpu_switch(c, cc, new_cpu); - flags = (direct) ? C_DIRECT_EXEC : 0; - callout_cc_add(c, new_cc, new_time, new_prec, new_func, - new_arg, new_cpu, flags); - CC_UNLOCK(new_cc); + CC_UNLOCK(cc); + /* call drain function unlocked */ + cc_exec_drain_fn(cc, direct)( + cc_exec_drain_arg(cc, direct)); CC_LOCK(cc); -#else - panic("migration should not happen"); -#endif - } - /* - * If the current callout is locally allocated (from - * timeout(9)) then put it on the freelist. - * - * Note: we need to check the cached copy of c_flags because - * if it was not local, then it's not safe to deref the - * callout pointer. - */ - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || - c->c_flags == CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - if (c_flags & CALLOUT_LOCAL_ALLOC) + } else if (c_flags & CALLOUT_LOCAL_ALLOC) { + /* return callout back to freelist */ callout_cc_del(c, cc); + } else if (cc_exec_restart(cc, direct)) { + /* [re-]schedule callout, if any */ + (void) callout_cc_add_locked(c, cc, + &cc_exec_restart_args(cc, direct), false); + } } /* @@ -831,28 +941,18 @@ { struct callout_cpu *cc; struct callout *c; -#ifdef CALLOUT_PROFILING - int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; -#endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - softclock_call_cc(c, cc, -#ifdef CALLOUT_PROFILING - &mpcalls, &lockcalls, &gcalls, -#endif - 0); #ifdef CALLOUT_PROFILING - ++depth; + callout_clear_stats(cc, 0); #endif + while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + softclock_call_cc(c, cc, 0); } #ifdef CALLOUT_PROFILING - avg_depth += (depth * 1000 - avg_depth) >> 8; - avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; - avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; - avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; + callout_update_stats(cc, 0); #endif CC_UNLOCK(cc); } @@ -888,10 +988,11 @@ /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); - callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); + callout_reset(new, to_ticks, ftn, arg); + return (handle); } @@ -899,6 +1000,7 @@ untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; + bool match; /* * Check for a handle that was initialized @@ -909,9 +1011,11 @@ return; cc = callout_lock(handle.callout); - if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) - callout_stop(handle.callout); + match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); CC_UNLOCK(cc); + + if (match) + callout_stop(handle.callout); } void @@ -920,6 +1024,118 @@ handle->callout = NULL; } +static int +callout_restart_async(struct callout *c, struct callout_args *coa, + callout_func_t *drain_fn, void *drain_arg) +{ + struct callout_cpu *cc; + int cancelled; + int direct; + + cc = callout_lock(c); + + /* Figure out if the callout is direct or not */ + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + + /* + * Check if the callback is currently scheduled for + * completion: + */ + if (cc_exec_curr(cc, direct) == c) { + /* + * Try to prevent the callback from running by setting + * the "cc_cancel" variable to "true". Also check if + * the callout was previously subject to a deferred + * callout restart: + */ + if (cc_exec_cancel(cc, direct) == false || + (c->c_flags & CALLOUT_DEFRESTART) != 0) { + cc_exec_cancel(cc, direct) = true; + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + /* + * Prevent callback restart if "callout_drain_xxx()" + * is being called or we are stopping the callout or + * the callback was preallocated by us: + */ + if (cc_exec_drain_fn(cc, direct) != NULL || + coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and draining" : "draining", + c, c->c_func, c->c_arg); + + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* clear restart flag, if any */ + cc_exec_restart(cc, direct) = false; + + /* set drain function, if any */ + if (drain_fn != NULL) { + cc_exec_drain_fn(cc, direct) = drain_fn; + cc_exec_drain_arg(cc, direct) = drain_arg; + cancelled |= CALLOUT_RET_DRAINING; + } + } else { + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled and restarting" : "restarting", + c, c->c_func, c->c_arg); + + /* get us back into the game */ + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART); + c->c_flags &= ~CALLOUT_PROCESSED; + + /* enable deferred restart */ + cc_exec_restart(cc, direct) = true; + + /* store arguments for the deferred restart, if any */ + cc_exec_restart_args(cc, direct) = *coa; + } + } else { + /* stop callout */ + if (c->c_flags & CALLOUT_PENDING) { + /* + * The callback has not yet been executed, and + * we simply just need to unlink it: + */ + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + LIST_REMOVE(c, c_links.le); + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } + cancelled = CALLOUT_RET_CANCELLED; + } else { + cancelled = CALLOUT_RET_NORMAL; + } + + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "rescheduled" : "scheduled", + c, c->c_func, c->c_arg); + + /* [re-]schedule callout, if any */ + if (coa != NULL) { + cc = callout_cc_add_locked(c, cc, coa, true); + } else { + /* clear old flags, if any */ + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | + CALLOUT_DEFRESTART | CALLOUT_PROCESSED); + + /* return callback to pre-allocated list, if any */ + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && + cancelled != CALLOUT_RET_NORMAL) { + callout_cc_del(c, cc); + } + } + } + CC_UNLOCK(cc); + return (cancelled); +} + /* * New interface; clients allocate their own callout structures. * @@ -938,25 +1154,32 @@ */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - void (*ftn)(void *), void *arg, int cpu, int flags) + callout_func_t *ftn, void *arg, int cpu, int flags) { - sbintime_t to_sbt, pr; - struct callout_cpu *cc; - int cancelled, direct; + struct callout_args coa; - cancelled = 0; - if (flags & C_ABSOLUTE) { - to_sbt = sbt; + /* store arguments for callout add function */ + coa.func = ftn; + coa.arg = arg; + coa.precision = precision; + coa.flags = flags; + coa.cpu = cpu; + + /* compute the rest of the arguments needed */ + if (coa.flags & C_ABSOLUTE) { + coa.time = sbt; } else { - if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) + sbintime_t pr; + + if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((flags & C_HARDCLOCK) || + if ((coa.flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - to_sbt = getsbinuptime(); + coa.time = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - to_sbt += tc_tick_sbt - tick_sbt; + coa.time += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* @@ -966,142 +1189,29 @@ * active ones. */ #ifdef __LP64__ - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); #else spinlock_enter(); - to_sbt = DPCPU_GET(hardclocktime); + coa.time = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((flags & C_HARDCLOCK) == 0) - to_sbt += tick_sbt; + if ((coa.flags & C_HARDCLOCK) == 0) + coa.time += tick_sbt; } else - to_sbt = sbinuptime(); - if (SBT_MAX - to_sbt < sbt) - to_sbt = SBT_MAX; + coa.time = sbinuptime(); + if (SBT_MAX - coa.time < sbt) + coa.time = SBT_MAX; else - to_sbt += sbt; - pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(flags)); - if (pr > precision) - precision = pr; + coa.time += sbt; + pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(coa.flags)); + if (pr > coa.precision) + coa.precision = pr; } - /* - * Don't allow migration of pre-allocated callouts lest they - * become unbalanced. - */ - if (c->c_flags & CALLOUT_LOCAL_ALLOC) - cpu = c->c_cpu; - /* - * This flag used to be added by callout_cc_add, but the - * first time you call this we could end up with the - * wrong direct flag if we don't do it before we add. - */ - if (flags & C_DIRECT_EXEC) { - c->c_flags |= CALLOUT_DIRECT; - } - direct = (c->c_flags & CALLOUT_DIRECT) != 0; - KASSERT(!direct || c->c_lock == NULL, - ("%s: direct callout %p has lock", __func__, c)); - cc = callout_lock(c); - if (cc_exec_curr(cc, direct) == c) { - /* - * We're being asked to reschedule a callout which is - * currently in progress. If there is a lock then we - * can cancel the callout if it has not really started. - */ - if (c->c_lock != NULL && cc_exec_cancel(cc, direct)) - cancelled = cc_exec_cancel(cc, direct) = true; - if (cc_exec_waiting(cc, direct)) { - /* - * Someone has called callout_drain to kill this - * callout. Don't reschedule. - */ - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled" : "failed to cancel", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (cancelled); - } -#ifdef SMP - if (callout_migrating(c)) { - /* - * This only occurs when a second callout_reset_sbt_on - * is made after a previous one moved it into - * deferred migration (below). Note we do *not* change - * the prev_cpu even though the previous target may - * be different. - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - cancelled = 1; - CC_UNLOCK(cc); - return (cancelled); - } -#endif - } - if (c->c_flags & CALLOUT_PENDING) { - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - cancelled = 1; - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); - } - -#ifdef SMP - /* - * If the callout must migrate try to perform it immediately. - * If the callout is currently running, just defer the migration - * to a more appropriate moment. - */ - if (c->c_cpu != cpu) { - if (cc_exec_curr(cc, direct) == c) { - /* - * Pending will have been removed since we are - * actually executing the callout on another - * CPU. That callout should be waiting on the - * lock the caller holds. If we set both - * active/and/pending after we return and the - * lock on the executing callout proceeds, it - * will then see pending is true and return. - * At the return from the actual callout execution - * the migration will occur in softclock_call_cc - * and this new callout will be placed on the - * new CPU via a call to callout_cpu_switch() which - * will get the lock on the right CPU followed - * by a call callout_cc_add() which will add it there. - * (see above in softclock_call_cc()). - */ - cc_migration_cpu(cc, direct) = cpu; - cc_migration_time(cc, direct) = to_sbt; - cc_migration_prec(cc, direct) = precision; - cc_migration_func(cc, direct) = ftn; - cc_migration_arg(cc, direct) = arg; - c->c_flags |= (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING); - CTR6(KTR_CALLOUT, - "migration of %p func %p arg %p in %d.%08x to %u deferred", - c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff), cpu); - CC_UNLOCK(cc); - return (cancelled); - } - cc = callout_cpu_switch(c, cc, cpu); - } -#endif - callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); - CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", - cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), - (u_int)(to_sbt & 0xffffffff)); - CC_UNLOCK(cc); - - return (cancelled); + /* get callback started, if any */ + return (callout_restart_async(c, &coa, NULL, NULL)); } /* @@ -1120,252 +1230,106 @@ } int -_callout_stop_safe(struct callout *c, int safe) +callout_stop(struct callout *c) { - struct callout_cpu *cc, *old_cc; - struct lock_class *class; - int direct, sq_locked, use_lock; - int not_on_a_list; - - if (safe) - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, - "calling %s", __func__); + /* get callback stopped, if any */ + return (callout_restart_async(c, NULL, NULL, NULL)); +} - /* - * Some old subsystems don't hold Giant while running a callout_stop(), - * so just discard this check for the moment. - */ - if (!safe && c->c_lock != NULL) { - if (c->c_lock == &Giant.lock_object) - use_lock = mtx_owned(&Giant); - else { - use_lock = 1; - class = LOCK_CLASS(c->c_lock); - class->lc_assert(c->c_lock, LA_XLOCKED); - } - } else - use_lock = 0; - direct = (c->c_flags & CALLOUT_DIRECT) != 0; - sq_locked = 0; - old_cc = NULL; -again: - cc = callout_lock(c); +static void +callout_drain_function(void *arg) +{ + wakeup(arg); +} - if ((c->c_flags & (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) == - (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) { - /* - * Special case where this slipped in while we - * were migrating *as* the callout is about to - * execute. The caller probably holds the lock - * the callout wants. - * - * Get rid of the migration first. Then set - * the flag that tells this code *not* to - * try to remove it from any lists (its not - * on one yet). When the callout wheel runs, - * it will ignore this callout. - */ - c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_ACTIVE); - not_on_a_list = 1; - } else { - not_on_a_list = 0; - } +int +callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) +{ + /* get callback stopped, if any */ + return (callout_restart_async( + c, NULL, fn, arg) & CALLOUT_RET_DRAINING); +} - /* - * If the callout was migrating while the callout cpu lock was - * dropped, just drop the sleepqueue lock and check the states - * again. - */ - if (sq_locked != 0 && cc != old_cc) { -#ifdef SMP - CC_UNLOCK(cc); - sleepq_release(&cc_exec_waiting(old_cc, direct)); - sq_locked = 0; - old_cc = NULL; - goto again; -#else - panic("migration should not happen"); -#endif - } +int +callout_drain(struct callout *c) +{ + int cancelled; - /* - * If the callout isn't pending, it's not on the queue, so - * don't attempt to remove it from the queue. We can try to - * stop it by other means however. - */ - if (!(c->c_flags & CALLOUT_PENDING)) { - c->c_flags &= ~CALLOUT_ACTIVE; + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Draining callout"); + + callout_lock_client(c->c_flags, c->c_lock); + + /* at this point the "c->c_cpu" field is not changing */ + + cancelled = callout_drain_async(c, &callout_drain_function, c); + + if (cancelled != CALLOUT_RET_NORMAL) { + struct callout_cpu *cc; + int direct; + + CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", + c, c->c_func, c->c_arg); + + cc = callout_lock(c); + direct = ((c->c_flags & CALLOUT_DIRECT) != 0); /* - * If it wasn't on the queue and it isn't the current - * callout, then we can't stop it, so just bail. + * We've gotten our callout CPU lock, it is safe to + * drop the initial lock: */ - if (cc_exec_curr(cc, direct) != c) { - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - return (0); - } + callout_unlock_client(c->c_flags, c->c_lock); - if (safe) { - /* - * The current callout is running (or just - * about to run) and blocking is allowed, so - * just wait for the current invocation to - * finish. - */ - while (cc_exec_curr(cc, direct) == c) { - /* - * Use direct calls to sleepqueue interface - * instead of cv/msleep in order to avoid - * a LOR between cc_lock and sleepqueue - * chain spinlocks. This piece of code - * emulates a msleep_spin() call actually. - * - * If we already have the sleepqueue chain - * locked, then we can safely block. If we - * don't already have it locked, however, - * we have to drop the cc_lock to lock - * it. This opens several races, so we - * restart at the beginning once we have - * both locks. If nothing has changed, then - * we will end up back here with sq_locked - * set. - */ - if (!sq_locked) { - CC_UNLOCK(cc); - sleepq_lock( - &cc_exec_waiting(cc, direct)); - sq_locked = 1; - old_cc = cc; - goto again; - } + /* Wait for drain to complete */ + + while (cc_exec_curr(cc, direct) == c) + msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); - /* - * Migration could be cancelled here, but - * as long as it is still not sure when it - * will be packed up, just let softclock() - * take care of it. - */ - cc_exec_waiting(cc, direct) = true; - DROP_GIANT(); - CC_UNLOCK(cc); - sleepq_add( - &cc_exec_waiting(cc, direct), - &cc->cc_lock.lock_object, "codrain", - SLEEPQ_SLEEP, 0); - sleepq_wait( - &cc_exec_waiting(cc, direct), - 0); - sq_locked = 0; - old_cc = NULL; - - /* Reacquire locks previously released. */ - PICKUP_GIANT(); - CC_LOCK(cc); - } - } else if (use_lock && - !cc_exec_cancel(cc, direct)) { - - /* - * The current callout is waiting for its - * lock which we hold. Cancel the callout - * and return. After our caller drops the - * lock, the callout will be skipped in - * softclock(). - */ - cc_exec_cancel(cc, direct) = true; - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", - c, c->c_func, c->c_arg); - KASSERT(!cc_cce_migrating(cc, direct), - ("callout wrongly scheduled for migration")); - CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain locked")); - return (1); - } else if (callout_migrating(c)) { - /* - * The callout is currently being serviced - * and the "next" callout is scheduled at - * its completion with a migration. We remove - * the migration flag so it *won't* get rescheduled, - * but we can't stop the one thats running so - * we return 0. - */ - c->c_flags &= ~CALLOUT_DFRMIGRATION; -#ifdef SMP - /* - * We can't call cc_cce_cleanup here since - * if we do it will remove .ce_curr and - * its still running. This will prevent a - * reschedule of the callout when the - * execution completes. - */ - cc_migration_cpu(cc, direct) = CPUBLOCK; - cc_migration_time(cc, direct) = 0; - cc_migration_prec(cc, direct) = 0; - cc_migration_func(cc, direct) = NULL; - cc_migration_arg(cc, direct) = NULL; -#endif - CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); - return (0); - } - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); CC_UNLOCK(cc); - KASSERT(!sq_locked, ("sleepqueue chain still locked")); - return (0); + } else { + callout_unlock_client(c->c_flags, c->c_lock); } - if (sq_locked) - sleepq_release(&cc_exec_waiting(cc, direct)); - - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if (not_on_a_list == 0) { - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc_exec_next(cc) == c) - cc_exec_next(cc) = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - callout_cc_del(c, cc); - CC_UNLOCK(cc); - return (1); + + return (cancelled & CALLOUT_RET_CANCELLED); } void callout_init(struct callout *c, int mpsafe) { - bzero(c, sizeof *c); if (mpsafe) { - c->c_lock = NULL; - c->c_flags = CALLOUT_RETURNUNLOCKED; + _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); } else { - c->c_lock = &Giant.lock_object; - c->c_flags = 0; + _callout_init_lock(c, &Giant.lock_object, 0); } - c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); + KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: bad flags 0x%08x", flags)); + flags &= CALLOUT_RETURNUNLOCKED; + if (lock != NULL) { + struct lock_class *class = LOCK_CLASS(lock); + if (class == &lock_class_mtx_sleep) + flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); + else if (class == &lock_class_mtx_spin) + flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); + else if (class == &lock_class_rm) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); + else if (class == &lock_class_rw) + flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); + else + panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); + } else { + flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); + } c->c_lock = lock; - KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, - ("callout_init_lock: bad flags %d", flags)); - KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, - ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); - KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & - (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", - __func__)); - c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); + c->c_flags = flags; c->c_cpu = timeout_cpu; } Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c +++ sys/kern/subr_sleepqueue.c @@ -152,7 +152,8 @@ */ static int sleepq_catch_signals(void *wchan, int pri); static int sleepq_check_signals(void); -static int sleepq_check_timeout(void); +static int sleepq_check_timeout(struct thread *); +static void sleepq_stop_timeout(struct thread *); #ifdef INVARIANTS static void sleepq_dtor(void *mem, int size, void *arg); #endif @@ -373,17 +374,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr, int flags) { - struct sleepqueue_chain *sc; struct thread *td; td = curthread; - sc = SC_LOOKUP(wchan); - mtx_assert(&sc->sc_lock, MA_OWNED); - MPASS(TD_ON_SLEEPQ(td)); - MPASS(td->td_sleepqueue == NULL); - MPASS(wchan != NULL); + + mtx_lock_spin(&td->td_slpmutex); callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); + mtx_unlock_spin(&td->td_slpmutex); } /* @@ -559,11 +557,8 @@ * Check to see if we timed out. */ static int -sleepq_check_timeout(void) +sleepq_check_timeout(struct thread *td) { - struct thread *td; - - td = curthread; THREAD_LOCK_ASSERT(td, MA_OWNED); /* @@ -573,28 +568,21 @@ td->td_flags &= ~TDF_TIMEOUT; return (EWOULDBLOCK); } - - /* - * If TDF_TIMOFAIL is set, the timeout ran after we had - * already been woken up. - */ - if (td->td_flags & TDF_TIMOFAIL) - td->td_flags &= ~TDF_TIMOFAIL; - - /* - * If callout_stop() fails, then the timeout is running on - * another CPU, so synchronize with it to avoid having it - * accidentally wake up a subsequent sleep. - */ - else if (callout_stop(&td->td_slpcallout) == 0) { - td->td_flags |= TDF_TIMEOUT; - TD_SET_SLEEPING(td); - mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL); - } return (0); } /* + * Atomically stop the timeout by using a mutex. + */ +static void +sleepq_stop_timeout(struct thread *td) +{ + mtx_lock_spin(&td->td_slpmutex); + callout_stop(&td->td_slpcallout); + mtx_unlock_spin(&td->td_slpmutex); +} + +/* * Check to see if we were awoken by a signal. */ static int @@ -664,9 +652,11 @@ MPASS(!(td->td_flags & TDF_SINTR)); thread_lock(td); sleepq_switch(wchan, pri); - rval = sleepq_check_timeout(); + rval = sleepq_check_timeout(td); thread_unlock(td); + sleepq_stop_timeout(td); + return (rval); } @@ -677,12 +667,18 @@ int sleepq_timedwait_sig(void *wchan, int pri) { + struct thread *td; int rcatch, rvalt, rvals; + td = curthread; + rcatch = sleepq_catch_signals(wchan, pri); - rvalt = sleepq_check_timeout(); + rvalt = sleepq_check_timeout(td); rvals = sleepq_check_signals(); - thread_unlock(curthread); + thread_unlock(td); + + sleepq_stop_timeout(td); + if (rcatch) return (rcatch); if (rvals) @@ -889,64 +885,49 @@ static void sleepq_timeout(void *arg) { - struct sleepqueue_chain *sc; - struct sleepqueue *sq; - struct thread *td; - void *wchan; - int wakeup_swapper; + struct thread *td = arg; + int wakeup_swapper = 0; - td = arg; - wakeup_swapper = 0; CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); - /* - * First, see if the thread is asleep and get the wait channel if - * it is. - */ - thread_lock(td); - if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) { - wchan = td->td_wchan; - sc = SC_LOOKUP(wchan); - THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); - sq = sleepq_lookup(wchan); - MPASS(sq != NULL); - td->td_flags |= TDF_TIMEOUT; - wakeup_swapper = sleepq_resume_thread(sq, td, 0); - thread_unlock(td); - if (wakeup_swapper) - kick_proc0(); - return; - } + /* Handle the three cases which can happen */ - /* - * If the thread is on the SLEEPQ but isn't sleeping yet, it - * can either be on another CPU in between sleepq_add() and - * one of the sleepq_*wait*() routines or it can be in - * sleepq_catch_signals(). - */ + thread_lock(td); if (TD_ON_SLEEPQ(td)) { - td->td_flags |= TDF_TIMEOUT; - thread_unlock(td); - return; - } + if (TD_IS_SLEEPING(td)) { + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + void *wchan; - /* - * Now check for the edge cases. First, if TDF_TIMEOUT is set, - * then the other thread has already yielded to us, so clear - * the flag and resume it. If TDF_TIMEOUT is not set, then the - * we know that the other thread is not on a sleep queue, but it - * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL - * to let it know that the timeout has already run and doesn't - * need to be canceled. - */ - if (td->td_flags & TDF_TIMEOUT) { - MPASS(TD_IS_SLEEPING(td)); - td->td_flags &= ~TDF_TIMEOUT; - TD_CLR_SLEEPING(td); - wakeup_swapper = setrunnable(td); - } else - td->td_flags |= TDF_TIMOFAIL; + /* + * Case I - thread is asleep and needs to be + * awoken: + */ + wchan = td->td_wchan; + sc = SC_LOOKUP(wchan); + THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); + sq = sleepq_lookup(wchan); + MPASS(sq != NULL); + td->td_flags |= TDF_TIMEOUT; + wakeup_swapper = sleepq_resume_thread(sq, td, 0); + } else { + /* + * Case II - cancel going to sleep by setting + * the timeout flag because the target thread + * is not asleep yet. It can be on another CPU + * in between sleepq_add() and one of the + * sleepq_*wait*() routines or it can be in + * sleepq_catch_signals(). + */ + td->td_flags |= TDF_TIMEOUT; + } + } else { + /* + * Case III - thread is already woken up by a wakeup + * call and should not timeout. Nothing to do! + */ + } thread_unlock(td); if (wakeup_swapper) kick_proc0(); Index: sys/ofed/include/linux/completion.h =================================================================== --- sys/ofed/include/linux/completion.h +++ sys/ofed/include/linux/completion.h @@ -105,7 +105,9 @@ if (c->done) break; sleepq_add(c, NULL, "completion", flags, 0); + sleepq_release(c); sleepq_set_timeout(c, end - ticks); + sleepq_lock(c); if (flags & SLEEPQ_INTERRUPTIBLE) { if (sleepq_timedwait_sig(c, 0) != 0) return (-ERESTARTSYS); Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h +++ sys/sys/_callout.h @@ -46,6 +46,8 @@ SLIST_HEAD(callout_slist, callout); TAILQ_HEAD(callout_tailq, callout); +typedef void callout_func_t(void *); + struct callout { union { LIST_ENTRY(callout) le; @@ -55,7 +57,7 @@ sbintime_t c_time; /* ticks to the event */ sbintime_t c_precision; /* delta allowed wrt opt */ void *c_arg; /* function argument */ - void (*c_func)(void *); /* function to call */ + callout_func_t *c_func; /* function to call */ struct lock_object *c_lock; /* lock to handle */ int c_flags; /* state of this entry */ volatile int c_cpu; /* CPU we're scheduled on */ Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h +++ sys/sys/callout.h @@ -45,10 +45,12 @@ #define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */ #define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ #define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */ -#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */ -#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */ +#define CALLOUT_UNUSED_5 0x0020 /* --available-- */ +#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */ #define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */ #define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */ +#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */ +#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */ #define C_DIRECT_EXEC 0x0001 /* direct execution of callout */ #define C_PRELBITS 7 @@ -64,9 +66,9 @@ #ifdef _KERNEL #define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE) -#define callout_migrating(c) ((c)->c_flags & CALLOUT_DFRMIGRATION) #define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE) -#define callout_drain(c) _callout_stop_safe(c, 1) +int callout_drain(struct callout *); +int callout_drain_async(struct callout *, callout_func_t *, void *); void callout_init(struct callout *, int); void _callout_init_lock(struct callout *, struct lock_object *, int); #define callout_init_mtx(c, mtx, flags) \ @@ -80,7 +82,7 @@ NULL, (flags)) #define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, - void (*)(void *), void *, int, int); + callout_func_t *, void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ @@ -104,8 +106,7 @@ int callout_schedule_on(struct callout *, int, int); #define callout_schedule_curcpu(c, on_tick) \ callout_schedule_on((c), (on_tick), PCPU_GET(cpuid)) -#define callout_stop(c) _callout_stop_safe(c, 0) -int _callout_stop_safe(struct callout *, int); +int callout_stop(struct callout *); void callout_process(sbintime_t now); #endif Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -308,6 +308,7 @@ } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval struct callout td_slpcallout; /* (h) Callout for sleep. */ + struct mtx td_slpmutex; /* (h) Mutex for sleep callout */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ @@ -364,7 +365,7 @@ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ -#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ +#define TDF_UNUSED12 0x00001000 /* --available-- */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ @@ -706,7 +707,7 @@ #define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ -#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ +#define SWT_UNUSED5 5 /* --available-- */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */