D1438.vs3951.amp.diff
No OneTemporary
Actions

Size

189 KB

Referenced Files

None

Subscribers

None

D1438.vs3951.amp.diff
View Options

	Index: projects/hps_head/share/man/man9/Makefile
	===================================================================
	--- projects/hps_head/share/man/man9/Makefile
	+++ projects/hps_head/share/man/man9/Makefile
	@@ -1573,6 +1573,7 @@
	timeout.9 callout_active.9 \
	timeout.9 callout_deactivate.9 \
	timeout.9 callout_drain.9 \
	+ timeout.9 callout_drain_async.9 \
	timeout.9 callout_handle_init.9 \
	timeout.9 callout_init.9 \
	timeout.9 callout_init_mtx.9 \
	Index: projects/hps_head/share/man/man9/timeout.9
	===================================================================
	--- projects/hps_head/share/man/man9/timeout.9
	+++ projects/hps_head/share/man/man9/timeout.9
	@@ -29,13 +29,14 @@
	.\"
	.\" $FreeBSD$
	.\"
	-.Dd October 8, 2014
	+.Dd January 24, 2015
	.Dt TIMEOUT 9
	.Os
	.Sh NAME
	.Nm callout_active ,
	.Nm callout_deactivate ,
	.Nm callout_drain ,
	+.Nm callout_drain_async ,
	.Nm callout_handle_init ,
	.Nm callout_init ,
	.Nm callout_init_mtx ,
	@@ -63,256 +64,234 @@
	.In sys/systm.h
	.Bd -literal
	typedef void timeout_t (void *);
	+typedef void callout_func_t (void *);
	.Ed
	-.Ft int
	-.Fn callout_active "struct callout *c"
	-.Ft void
	-.Fn callout_deactivate "struct callout *c"
	-.Ft int
	-.Fn callout_drain "struct callout *c"
	-.Ft void
	-.Fn callout_handle_init "struct callout_handle *handle"
	-.Bd -literal
	-struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle);
	-.Ed
	-.Ft void
	-.Fn callout_init "struct callout *c" "int mpsafe"
	-.Ft void
	-.Fn callout_init_mtx "struct callout c" "struct mtx mtx" "int flags"
	-.Ft void
	-.Fn callout_init_rm "struct callout c" "struct rmlock rm" "int flags"
	-.Ft void
	-.Fn callout_init_rw "struct callout c" "struct rwlock rw" "int flags"
	-.Ft int
	-.Fn callout_pending "struct callout *c"
	-.Ft int
	-.Fn callout_reset "struct callout c" "int ticks" "timeout_t func" "void *arg"
	-.Ft int
	-.Fn callout_reset_curcpu "struct callout c" "int ticks" "timeout_t func" \
	-"void *arg"
	-.Ft int
	-.Fn callout_reset_on "struct callout c" "int ticks" "timeout_t func" \
	-"void *arg" "int cpu"
	-.Ft int
	-.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int flags"
	-.Ft int
	-.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int flags"
	-.Ft int
	-.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int cpu" "int flags"
	-.Ft int
	-.Fn callout_schedule "struct callout *c" "int ticks"
	-.Ft int
	-.Fn callout_schedule_curcpu "struct callout *c" "int ticks"
	-.Ft int
	-.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu"
	-.Ft int
	-.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int flags"
	-.Ft int
	-.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int flags"
	-.Ft int
	-.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int cpu" "int flags"
	-.Ft int
	-.Fn callout_stop "struct callout *c"
	-.Ft struct callout_handle
	-.Fn timeout "timeout_t func" "void arg" "int ticks"
	-.Ft void
	-.Fn untimeout "timeout_t func" "void arg" "struct callout_handle handle"
	.Sh DESCRIPTION
	The
	.Nm callout
	-API is used to schedule a call to an arbitrary function at a specific
	-time in the future.
	-Consumers of this API are required to allocate a callout structure
	-.Pq struct callout
	+API is used to schedule a one-time call to an arbitrary function at a
	+specific time in the future.
	+Consumers of this API are required to allocate a
	+.Ft struct callout
	for each pending function invocation.
	-This structure stores state about the pending function invocation including
	-the function to be called and the time at which the function should be invoked.
	-Pending function calls can be cancelled or rescheduled to a different time.
	-In addition,
	-a callout structure may be reused to schedule a new function call after a
	-scheduled call is completed.
	-.Pp
	-Callouts only provide a single-shot mode.
	-If a consumer requires a periodic timer,
	-it must explicitly reschedule each function call.
	-This is normally done by rescheduling the subsequent call within the called
	-function.
	-.Pp
	-Callout functions must not sleep.
	-They may not acquire sleepable locks,
	-wait on condition variables,
	-perform blocking allocation requests,
	-or invoke any other action that might sleep.
	-.Pp
	-Each callout structure must be initialized by
	-.Fn callout_init ,
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-or
	-.Fn callout_init_rw
	-before it is passed to any of the other callout functions.
	The
	+.Ft struct callout
	+stores the full state about any pending function call and
	+must be drained by a call to
	+.Fn callout_drain
	+or
	+.Fn callout_drain_async
	+before freeing.
	+.Sh INITIALIZATION
	+.Ft void
	+.Fn callout_handle_init "struct callout_handle *handle"
	+This function is deprecated.
	+Please use
	.Fn callout_init
	-function initializes a callout structure in
	-.Fa c
	-that is not associated with a specific lock.
	+instead.
	+This function is used to prepare a
	+.Ft struct callout_handle
	+before it can be used the first time.
	+If this function is called on a pending timeout, the pending timeout
	+cannot be cancelled and the
	+.Fn untimeout
	+function will return as if no timeout was pending.
	+.Pp
	+.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	+This macro is deprecated.
	+This macro is used to statically initialize a
	+.Ft struct callout_handle .
	+Please use
	+.Fn callout_init
	+instead.
	+.Pp
	+.Ft void
	+.Fn callout_init "struct callout *c" "int mpsafe"
	+This function prepares a
	+.Ft struct callout
	+before it can be used.
	+This function should not be used when the callout is pending a timeout.
	If the
	.Fa mpsafe
	-argument is zero,
	-the callout structure is not considered to be
	-.Dq multi-processor safe ;
	-and the Giant lock will be acquired before calling the callout function
	-and released when the callout function returns.
	-.Pp
	-The
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-and
	-.Fn callout_init_rw
	-functions initialize a callout structure in
	-.Fa c
	-that is associated with a specific lock.
	-The lock is specified by the
	-.Fa mtx ,
	-.Fa rm ,
	-or
	-.Fa rw
	-parameter.
	-The associated lock must be held while stopping or rescheduling the
	-callout.
	-The callout subsystem acquires the associated lock before calling the
	-callout function and releases it after the function returns.
	-If the callout was cancelled while the callout subsystem waited for the
	-associated lock,
	-the callout function is not called,
	-and the associated lock is released.
	-This ensures that stopping or rescheduling the callout will abort any
	-previously scheduled invocation.
	-.Pp
	-Only regular mutexes may be used with
	-.Fn callout_init_mtx ;
	-spin mutexes are not supported.
	-A sleepable read-mostly lock
	-.Po
	-one initialized with the
	-.Dv RM_SLEEPABLE
	-flag
	-.Pc
	-may not be used with
	-.Fn callout_init_rm .
	-Similarly, other sleepable lock types such as
	-.Xr sx 9
	-and
	-.Xr lockmgr 9
	-cannot be used with callouts because sleeping is not permitted in
	-the callout subsystem.
	+argument is non-zero, the callback function will be running unlocked
	+and the callback is so-called "mpsafe".
	+.Bf Sy
	+It is the application's entire responsibility to not call any
	+.Fn callout_xxx
	+functions, including the
	+.Fn callout_drain
	+function, simultaneously on the same callout when the
	+.Fa mpsafe
	+argument is non-zero.
	+Otherwise, undefined behavior can happen.
	+Avoid simultaneous calls by obtaining an exclusive lock before calling
	+any
	+.Fn callout_xxx
	+functions other than the
	+.Fn callout_drain
	+function.
	+.Ef
	+If the
	+.Fa mpsafe
	+argument is zero, the Giant mutex will be locked before the callback
	+function is called.
	+If the
	+.Fa mpsafe
	+argument is zero, the Giant mutex is expected to be locked when calling
	+any
	+.Fn callout_xxx
	+functions which start and stop a callout other than the
	+.Fn callout_drain
	+function.
	.Pp
	-These
	+.Ft void
	+.Fn callout_init_mtx "struct callout c" "struct mtx mtx" "int flags"
	+This function prepares a
	+.Ft struct callout
	+before it can be used.
	+This function should not be used when the callout is pending a timeout.
	+The
	+.Fa mtx
	+argument is a pointer to a valid spinlock type of mutex or a valid
	+regular non-sleepable mutex which the callback subsystem will lock
	+before calling the callback function.
	+The specified mutex is expected to be locked when calling any
	+.Fn callout_xxx
	+functions which start and stop a callout other than the
	+.Fn callout_drain
	+function.
	+Valid
	.Fa flags
	-may be specified for
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-or
	-.Fn callout_init_rw :
	+are:
	.Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED"
	.It Dv CALLOUT_RETURNUNLOCKED
	-The callout function will release the associated lock itself,
	-so the callout subsystem should not attempt to unlock it
	-after the callout function returns.
	-.It Dv CALLOUT_SHAREDLOCK
	-The lock is only acquired in read mode when running the callout handler.
	-This flag is ignored by
	-.Fn callout_init_mtx .
	+The callout function is assumed to have released the specified mutex
	+before returning.
	+.It Dv 0
	+The callout subsystem will release the specified mutex after the
	+callout function has returned.
	.El
	.Pp
	-The function
	-.Fn callout_stop
	-cancels a callout
	-.Fa c
	-if it is currently pending.
	-If the callout is pending, then
	-.Fn callout_stop
	-returns a non-zero value.
	-If the callout is not set,
	-has already been serviced,
	-or is currently being serviced,
	-then zero will be returned.
	-If the callout has an associated lock,
	-then that lock must be held when this function is called.
	-.Pp
	-The function
	-.Fn callout_drain
	-is identical to
	-.Fn callout_stop
	-except that it will wait for the callout
	-.Fa c
	-to complete if it is already in progress.
	-This function MUST NOT be called while holding any
	-locks on which the callout might block, or deadlock will result.
	-Note that if the callout subsystem has already begun processing this
	-callout, then the callout function may be invoked before
	-.Fn callout_drain
	-returns.
	-However, the callout subsystem does guarantee that the callout will be
	-fully stopped before
	-.Fn callout_drain
	-returns.
	+.Ft void
	+.Fn callout_init_rm "struct callout c" "struct rmlock rm" "int flags"
	+This function is similar to
	+.Fn callout_init_mtx ,
	+but it accepts a read-mostly type of lock.
	+The read-mostly lock must not be initialized with the
	+.Dv RM_SLEEPABLE
	+flag.
	.Pp
	-The
	+.Ft void
	+.Fn callout_init_rw "struct callout c" "struct rwlock rw" "int flags"
	+This function is similar to
	+.Fn callout_init_mtx ,
	+but it accepts a read/write type of lock.
	+.Sh SCHEDULING CALLOUTS
	+.Ft struct callout_handle
	+.Fn timeout "timeout_t func" "void arg" "int ticks"
	+This function is deprecated.
	+Please use
	.Fn callout_reset
	-and
	-.Fn callout_schedule
	-function families schedule a future function invocation for callout
	-.Fa c .
	-If
	-.Fa c
	-already has a pending callout,
	-it is cancelled before the new invocation is scheduled.
	-These functions return a non-zero value if a pending callout was cancelled
	-and zero if there was no pending callout.
	-If the callout has an associated lock,
	-then that lock must be held when any of these functions are called.
	-.Pp
	-The time at which the callout function will be invoked is determined by
	-either the
	-.Fa ticks
	-argument or the
	-.Fa sbt ,
	-.Fa pr ,
	-and
	-.Fa flags
	-arguments.
	-When
	-.Fa ticks
	-is used,
	-the callout is scheduled to execute after
	+instead.
	+This function schedules a call to
	+.Fa func
	+to take place after
	.Fa ticks Ns No /hz
	seconds.
	Non-positive values of
	.Fa ticks
	are silently converted to the value
	.Sq 1 .
	-.Pp
	The
	-.Fa sbt ,
	-.Fa pr ,
	-and
	-.Fa flags
	-arguments provide more control over the scheduled time including
	-support for higher resolution times,
	-specifying the precision of the scheduled time,
	-and setting an absolute deadline instead of a relative timeout.
	-The callout is scheduled to execute in a time window which begins at
	-the time specified in
	+.Fa func
	+argument is a valid pointer to a function that takes a single
	+.Fa void *
	+argument.
	+Upon invocation, the
	+.Fa func
	+function will receive
	+.Fa arg
	+as its only argument.
	+The Giant lock is locked when the
	+.Fa arg
	+function is invoked and should not be unlocked by this function.
	+The returned value from
	+.Fn timeout
	+is a
	+.Ft struct callout_handle
	+structure which can be used in conjunction with the
	+.Fn untimeout
	+function to request that a scheduled timeout be cancelled.
	+As handles are recycled by the system, it is possible, although unlikely,
	+that a handle from one invocation of
	+.Fn timeout
	+may match the handle of another invocation of
	+.Fn timeout
	+if both calls used the same function pointer and argument, and the first
	+timeout is expired or cancelled before the second call.
	+Please ensure that the function and argument pointers are unique when using this function.
	+.Pp
	+.Ft int
	+.Fn callout_reset "struct callout c" "int ticks" "callout_func_t func" "void *arg"
	+This function is used to schedule or re-schedule a callout.
	+This function at first stops the callout given by the
	+.Fa c
	+argument, if any.
	+Then it will start the callout given by the
	+.Fa c
	+argument.
	+The relative time until the timeout callback happens is given by the
	+.Fa ticks
	+argument.
	+The number of ticks in a second is defined by
	+.Dv hz
	+and can vary from system to system.
	+This function returns a non-zero value if the given callout was pending and
	+the callback function was prevented from being called.
	+Otherwise, a value of zero is returned.
	+If a lock is associated with the callout given by the
	+.Fa c
	+argument and it is exclusivly locked when this function is called, this
	+function will always ensure that previous callback function, if any,
	+is never reached.
	+In other words, the callout will be atomically restarted.
	+Otherwise, there is no such guarantee.
	+The callback function is given by
	+.Fa func
	+and its function argument is given by
	+.Fa arg .
	+.Pp
	+.Ft int
	+.Fn callout_reset_curcpu "struct callout c" "int ticks" "callout_func_t func" \
	+"void *arg"
	+This function works the same like the
	+.Fn callout_reset
	+function except the callback function given by the
	+.Fa func
	+argument will be executed on the same CPU which called this function.
	+.Pp
	+.Ft int
	+.Fn callout_reset_on "struct callout c" "int ticks" "callout_func_t func" \
	+"void *arg" "int cpu"
	+This function works the same like the
	+.Fn callout_reset
	+function except the callback function given by the
	+.Fa func
	+argument will be executed on the CPU given by the
	+.Fa cpu
	+argument.
	+.Pp
	+.Ft int
	+.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int flags"
	+This function works the same like the
	+.Fn callout_reset
	+function except the relative or absolute time after which the timeout
	+callback should happen is given by the
	.Fa sbt
	-and extends for the amount of time specified in
	+argument and extends for the amount of time specified in
	.Fa pr .
	+This function is used when high precision timeouts are needed.
	If
	.Fa sbt
	specifies a time in the past,
	@@ -322,12 +301,13 @@
	allows the callout subsystem to coalesce callouts scheduled close to each
	other into fewer timer interrupts,
	reducing processing overhead and power consumption.
	-These
	+The
	.Fa flags
	-may be specified to adjust the interpretation of
	+argument may be non-zero to adjust the interpretation of the
	.Fa sbt
	and
	-.Fa pr :
	+.Fa pr
	+arguments:
	.Bl -tag -width ".Dv C_DIRECT_EXEC"
	.It Dv C_ABSOLUTE
	Handle the
	@@ -347,7 +327,7 @@
	and should be as small as possible because they run with absolute priority.
	.It Fn C_PREL
	Specifies relative event time precision as binary logarithm of time interval
	-divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc.
	+divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on.
	Note that the larger of
	.Fa pr
	or this value is used as the length of the time window.
	@@ -360,65 +340,207 @@
	calls if possible.
	.El
	.Pp
	-The
	-.Fn callout_reset
	-functions accept a
	+.Ft int
	+.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int flags"
	+This function works like
	+.Fn callout_reset_sbt ,
	+except the callback function given by the
	.Fa func
	-argument which identifies the function to be called when the time expires.
	-It must be a pointer to a function that takes a single
	-.Fa void *
	-argument.
	-Upon invocation,
	+argument will be executed on the CPU which called this function.
	+.Pp
	+.Ft int
	+.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int cpu" "int flags"
	+This function works like
	+.Fn callout_reset_sbt ,
	+except the callback function given by
	.Fa func
	-will receive
	-.Fa arg
	-as its only argument.
	-The
	-.Fn callout_schedule
	-functions reuse the
	+will be executed on the CPU given by
	+.Fa cpu .
	+.Pp
	+.Ft int
	+.Fn callout_schedule "struct callout *c" "int ticks"
	+This function works the same like the
	+.Fn callout_reset
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_curcpu "struct callout *c" "int ticks"
	+This function works the same like the
	+.Fn callout_reset_curcpu
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu"
	+This function works the same like the
	+.Fn callout_reset_on
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt_curcpu
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int cpu" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt_on
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Sh CHECKING THE STATE OF CALLOUTS
	+.Ft int
	+.Fn callout_pending "struct callout *c"
	+This function returns non-zero if the callout pointed to by the
	+.Fa c
	+argument is pending for callback.
	+Else this function returns zero.
	+This function returns zero when inside the callout function if the
	+callout is not re-scheduled.
	+.Pp
	+.Ft int
	+.Fn callout_active "struct callout *c"
	+This function is deprecated and returns non-zero if the callout
	+pointed to by the
	+.Fa c
	+argument was scheduled in the past.
	+Else this function returns zero.
	+This function also returns zero after the
	+.Fn callout_deactivate
	+or the
	+.Fn callout_stop
	+or the
	+.Fn callout_drain
	+or the
	+.Fn callout_drain_async
	+function is called on the same callout as given by the
	+.Fa c
	+argument.
	+.Pp
	+.Ft void
	+.Fn callout_deactivate "struct callout *c"
	+This function is deprecated and ensures that subsequent calls to the
	+.Fn callout_activate
	+function returns zero until the callout is scheduled again.
	+.Sh STOPPING CALLOUTS
	+.Ft void
	+.Fn untimeout "timeout_t func" "void arg" "struct callout_handle handle"
	+This function is deprecated and cancels the timeout associated with the
	+.Fa handle
	+argument using the function pointed to by the
	.Fa func
	-and
	+argument and having the
	.Fa arg
	-arguments from the previous callout.
	-Note that one of the
	-.Fn callout_reset
	-functions must always be called to initialize
	+arguments to validate the handle.
	+If the handle does not correspond to a timeout with
	+the function
	.Fa func
	-and
	+taking the argument
	.Fa arg
	-before one of the
	-.Fn callout_schedule
	-functions can be used.
	+no action is taken. The
	+.Fa handle
	+must be initialized by a previous call to
	+.Fn timeout ,
	+.Fn callout_handle_init
	+or assigned the value of
	+.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	+before being passed to
	+.Fn untimeout .
	+The behavior of calling
	+.Fn untimeout
	+with an uninitialized handle
	+is undefined.
	.Pp
	-The callout subsystem provides a softclock thread for each CPU in the system.
	-Callouts are assigned to a single CPU and are executed by the softclock thread
	-for that CPU.
	-Initially,
	-callouts are assigned to CPU 0.
	-The
	-.Fn callout_reset_on ,
	-.Fn callout_reset_sbt_on ,
	-.Fn callout_schedule_on
	-and
	-.Fn callout_schedule_sbt_on
	-functions assign the callout to CPU
	-.Fa cpu .
	-The
	-.Fn callout_reset_curcpu ,
	-.Fn callout_reset_sbt_curpu ,
	-.Fn callout_schedule_curcpu
	-and
	-.Fn callout_schedule_sbt_curcpu
	-functions assign the callout to the current CPU.
	-The
	-.Fn callout_reset ,
	-.Fn callout_reset_sbt ,
	-.Fn callout_schedule
	-and
	-.Fn callout_schedule_sbt
	-functions schedule the callout to execute in the softclock thread of the CPU
	-to which it is currently assigned.
	+.Ft int
	+.Fn callout_stop "struct callout *c"
	+This function is used to stop a timeout function invocation associated with the callout pointed to by the
	+.Fa c
	+argument, in a non-blocking fashion.
	+This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized.
	+This function returns a non-zero value if the given callout was pending and
	+the callback function was prevented from being called.
	+Else a value of zero is returned.
	+If a lock is associated with the callout given by the
	+.Fa c
	+argument and it is exclusivly locked when this function is called, the
	+.Fn callout_stop
	+function will always ensure that the callback function is never reached.
	+In other words the callout will be atomically stopped.
	+Else there is no such guarantee.
	+.Sh DRAINING CALLOUTS
	+.Ft int
	+.Fn callout_drain "struct callout *c"
	+This function works the same like the
	+.Fn callout_stop
	+function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the
	+.Fa c
	+argument inside the callout subsystem before it returns.
	+Also this function ensures that the lock, if any, associated with the
	+callout is no longer being used.
	+When this function returns, it is safe to free the callout structure pointed to by the
	+.Fa c
	+argument.
	.Pp
	+.Ft int
	+.Fn callout_drain_async "struct callout c" "callout_func_t fn" "void *arg"
	+This function is non-blocking and works the same like the
	+.Fn callout_stop
	+function except if it returns non-zero it means the callback function pointed to by the
	+.Fa fn
	+argument will be called back with the
	+.Fa arg
	+argument when all references to the callout pointed to by the
	+.Fa c
	+argument are gone.
	+If this function returns non-zero it should not be called again until the callback function has been called.
	+If the
	+.Fn callout_drain
	+or
	+.Fn callout_drain_async
	+functions are called while an asynchronous drain is pending,
	+previously pending asynchronous drains might get cancelled.
	+If this function returns zero, it is safe to free the callout structure pointed to by the
	+.Fa c
	+argument right away.
	+.Sh CALLOUT FUNCTION RESTRICTIONS
	+Callout functions must not sleep.
	+They may not acquire sleepable locks, wait on condition variables,
	+perform blocking allocation requests, or invoke any other action that
	+might sleep.
	+.Sh CALLOUT SUBSYSTEM INTERNALS
	+The callout subsystem has its own set of spinlocks to protect its internal state.
	+The callout subsystem provides a softclock thread for each CPU in the
	+system.
	+Callouts are assigned to a single CPU and are executed by the
	+softclock thread for that CPU.
	+Initially, callouts are assigned to CPU 0.
	Softclock threads are not pinned to their respective CPUs by default.
	The softclock thread for CPU 0 can be pinned to CPU 0 by setting the
	.Va kern.pin_default_swi
	@@ -427,50 +549,7 @@
	respective CPUs by setting the
	.Va kern.pin_pcpu_swi
	loader tunable to a non-zero value.
	-.Pp
	-The macros
	-.Fn callout_pending ,
	-.Fn callout_active
	-and
	-.Fn callout_deactivate
	-provide access to the current state of the callout.
	-The
	-.Fn callout_pending
	-macro checks whether a callout is
	-.Em pending ;
	-a callout is considered
	-.Em pending
	-when a timeout has been set but the time has not yet arrived.
	-Note that once the timeout time arrives and the callout subsystem
	-starts to process this callout,
	-.Fn callout_pending
	-will return
	-.Dv FALSE
	-even though the callout function may not have finished
	-.Pq or even begun
	-executing.
	-The
	-.Fn callout_active
	-macro checks whether a callout is marked as
	-.Em active ,
	-and the
	-.Fn callout_deactivate
	-macro clears the callout's
	-.Em active
	-flag.
	-The callout subsystem marks a callout as
	-.Em active
	-when a timeout is set and it clears the
	-.Em active
	-flag in
	-.Fn callout_stop
	-and
	-.Fn callout_drain ,
	-but it
	-.Em does not
	-clear it when a callout expires normally via the execution of the
	-callout function.
	-.Ss "Avoiding Race Conditions"
	+.Sh "AVOIDING RACE CONDITIONS"
	The callout subsystem invokes callout functions from its own thread
	context.
	Without some kind of synchronization,
	@@ -531,9 +610,8 @@
	.Pc
	indicates whether or not the callout was removed.
	If it is known that the callout was set and the callout function has
	-not yet executed, then a return value of
	-.Dv FALSE
	-indicates that the callout function is about to be called.
	+not yet executed, then a return value of zero indicates that the
	+callout function is about to be called.
	For example:
	.Bd -literal -offset indent
	if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) {
	@@ -589,16 +667,14 @@
	.Em pending
	flag and return without action if
	.Fn callout_pending
	-returns
	-.Dv TRUE .
	+returns non-zero.
	This indicates that the callout was rescheduled using
	.Fn callout_reset
	just before the callout function was invoked.
	If
	.Fn callout_active
	-returns
	-.Dv FALSE
	-then the callout function should also return without action.
	+returns zero then the callout function should also return without
	+action.
	This indicates that the callout has been stopped.
	Finally, the callout function should call
	.Fn callout_deactivate
	@@ -668,129 +744,13 @@
	or releasing the storage for the callout structure.
	.Sh LEGACY API
	.Bf Sy
	-The functions below are a legacy API that will be removed in a future release.
	-New code should not use these routines.
	-.Ef
	-.Pp
	-The function
	-.Fn timeout
	-schedules a call to the function given by the argument
	-.Fa func
	-to take place after
	-.Fa ticks Ns No /hz
	-seconds.
	-Non-positive values of
	-.Fa ticks
	-are silently converted to the value
	-.Sq 1 .
	-.Fa func
	-should be a pointer to a function that takes a
	-.Fa void *
	-argument.
	-Upon invocation,
	-.Fa func
	-will receive
	-.Fa arg
	-as its only argument.
	-The return value from
	+The
	.Fn timeout
	-is a
	-.Ft struct callout_handle
	-which can be used in conjunction with the
	-.Fn untimeout
	-function to request that a scheduled timeout be canceled.
	-.Pp
	-The function
	-.Fn callout_handle_init
	-can be used to initialize a handle to a state which will cause
	-any calls to
	-.Fn untimeout
	-with that handle to return with no side
	-effects.
	-.Pp
	-Assigning a callout handle the value of
	-.Fn CALLOUT_HANDLE_INITIALIZER
	-performs the same function as
	-.Fn callout_handle_init
	-and is provided for use on statically declared or global callout handles.
	-.Pp
	-The function
	-.Fn untimeout
	-cancels the timeout associated with
	-.Fa handle
	-using the
	-.Fa func
	and
	-.Fa arg
	-arguments to validate the handle.
	-If the handle does not correspond to a timeout with
	-the function
	-.Fa func
	-taking the argument
	-.Fa arg
	-no action is taken.
	-.Fa handle
	-must be initialized by a previous call to
	-.Fn timeout ,
	-.Fn callout_handle_init ,
	-or assigned the value of
	-.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	-before being passed to
	-.Fn untimeout .
	-The behavior of calling
	.Fn untimeout
	-with an uninitialized handle
	-is undefined.
	-.Pp
	-As handles are recycled by the system, it is possible (although unlikely)
	-that a handle from one invocation of
	-.Fn timeout
	-may match the handle of another invocation of
	-.Fn timeout
	-if both calls used the same function pointer and argument, and the first
	-timeout is expired or canceled before the second call.
	-The timeout facility offers O(1) running time for
	-.Fn timeout
	-and
	-.Fn untimeout .
	-Timeouts are executed from
	-.Fn softclock
	-with the
	-.Va Giant
	-lock held.
	-Thus they are protected from re-entrancy.
	-.Sh RETURN VALUES
	-The
	-.Fn callout_active
	-macro returns the state of a callout's
	-.Em active
	-flag.
	-.Pp
	-The
	-.Fn callout_pending
	-macro returns the state of a callout's
	-.Em pending
	-flag.
	-.Pp
	-The
	-.Fn callout_reset
	-and
	-.Fn callout_schedule
	-function families return non-zero if the callout was pending before the new
	-function invocation was scheduled.
	-.Pp
	-The
	-.Fn callout_stop
	-and
	-.Fn callout_drain
	-functions return non-zero if the callout was still pending when it was
	-called or zero otherwise.
	-The
	-.Fn timeout
	-function returns a
	-.Ft struct callout_handle
	-that can be passed to
	-.Fn untimeout .
	+functions are a legacy API that will be removed in a future release.
	+New code should not use these routines.
	+.Ef
	.Sh HISTORY
	The current timeout and untimeout routines are based on the work of
	.An Adam M. Costello
	@@ -815,4 +775,4 @@
	.Bx
	linked list
	callout mechanism which offered O(n) insertion and removal running time
	-but did not generate or require handles for untimeout operations.
	+and did not generate or require handles for untimeout operations.
	Index: projects/hps_head/sys/kern/init_main.c
	===================================================================
	--- projects/hps_head/sys/kern/init_main.c
	+++ projects/hps_head/sys/kern/init_main.c
	@@ -506,7 +506,8 @@

	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);

	/* Create credentials. */
	newcred = crget();
	Index: projects/hps_head/sys/kern/kern_clocksource.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_clocksource.c
	+++ projects/hps_head/sys/kern/kern_clocksource.c
	@@ -160,6 +160,9 @@
	int usermode;
	int done, runs;

	+ KASSERT(curthread->td_critnest != 0,
	+ ("Must be in a critical section"));
	+
	CTR3(KTR_SPARE2, "handle at %d: now %d.%08x",
	curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
	done = 0;
	Index: projects/hps_head/sys/kern/kern_condvar.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_condvar.c
	+++ projects/hps_head/sys/kern/kern_condvar.c
	@@ -313,15 +313,13 @@
	DROP_GIANT();

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait(cvp, 0);

	#ifdef KTRACE
	@@ -383,15 +381,13 @@

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR \|
	SLEEPQ_INTERRUPTIBLE, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait_sig(cvp, 0);

	#ifdef KTRACE
	Index: projects/hps_head/sys/kern/kern_lock.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_lock.c
	+++ projects/hps_head/sys/kern/kern_lock.c
	@@ -210,9 +210,11 @@
	GIANT_SAVE();
	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK \| (catch ?
	SLEEPQ_INTERRUPTIBLE : 0), queue);
	- if ((flags & LK_TIMELOCK) && timo)
	+ if ((flags & LK_TIMELOCK) && timo) {
	+ sleepq_release(&lk->lock_object);
	sleepq_set_timeout(&lk->lock_object, timo);
	-
	+ sleepq_lock(&lk->lock_object);
	+ }
	/*
	* Decisional switch for real sleeping.
	*/
	Index: projects/hps_head/sys/kern/kern_switch.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_switch.c
	+++ projects/hps_head/sys/kern/kern_switch.c
	@@ -93,8 +93,6 @@
	&DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
	SCHED_STAT_DEFINE_VAR(sleepq,
	&DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
	-SCHED_STAT_DEFINE_VAR(sleepqtimo,
	- &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
	SCHED_STAT_DEFINE_VAR(relinquish,
	&DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
	SCHED_STAT_DEFINE_VAR(needresched,
	Index: projects/hps_head/sys/kern/kern_synch.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_synch.c
	+++ projects/hps_head/sys/kern/kern_synch.c
	@@ -219,12 +219,16 @@
	* return from cursig().
	*/
	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
	- if (sbt != 0)
	- sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
	sleepq_release(ident);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	+ if (sbt != 0)
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	+ } else if (sbt != 0) {
	+ sleepq_release(ident);
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	sleepq_lock(ident);
	}
	if (sbt != 0 && catch)
	@@ -289,8 +293,11 @@
	* We put ourselves on the sleep queue and start our timeout.
	*/
	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
	- if (sbt != 0)
	+ if (sbt != 0) {
	+ sleepq_release(ident);
	sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	+ }

	/*
	* Can't call ktrace with any spin locks held so it can lock the
	Index: projects/hps_head/sys/kern/kern_thread.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_thread.c
	+++ projects/hps_head/sys/kern/kern_thread.c
	@@ -149,6 +149,9 @@
	audit_thread_alloc(td);
	#endif
	umtx_thread_alloc(td);
	+
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);
	return (0);
	}

	@@ -162,6 +165,10 @@

	td = (struct thread *)mem;

	+ /* make sure to drain any use of the "td->td_slpcallout" */
	+ callout_drain(&td->td_slpcallout);
	+ mtx_destroy(&td->td_slpmutex);
	+
	#ifdef INVARIANTS
	/* Verify that this thread is in a safe state to free. */
	switch (td->td_state) {
	@@ -544,7 +551,6 @@
	LIST_INIT(&td->td_lprof[0]);
	LIST_INIT(&td->td_lprof[1]);
	sigqueue_init(&td->td_sigqueue, p);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
	p->p_numthreads++;
	}
	Index: projects/hps_head/sys/kern/kern_timeout.c
	===================================================================
	--- projects/hps_head/sys/kern/kern_timeout.c
	+++ projects/hps_head/sys/kern/kern_timeout.c
	@@ -54,6 +54,8 @@
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mutex.h>
	+#include <sys/rmlock.h>
	+#include <sys/rwlock.h>
	#include <sys/proc.h>
	#include <sys/sdt.h>
	#include <sys/sleepqueue.h>
	@@ -75,28 +77,25 @@
	"struct callout *");

	#ifdef CALLOUT_PROFILING
	-static int avg_depth;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
	+static int avg_depth[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0,
	"Average number of items examined per softclock call. Units = 1/1000");
	-static int avg_gcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
	+static int avg_gcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0,
	"Average number of Giant callouts made per softclock call. Units = 1/1000");
	-static int avg_lockcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
	+static int avg_lockcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0,
	"Average number of lock callouts made per softclock call. Units = 1/1000");
	-static int avg_mpcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
	+static int avg_mpcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0,
	"Average number of MP callouts made per softclock call. Units = 1/1000");
	-static int avg_depth_dir;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0,
	"Average number of direct callouts examined per callout_process call. "
	"Units = 1/1000");
	-static int avg_lockcalls_dir;
	SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
	- &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
	+ &avg_lockcalls[1], 0, "Average number of lock direct callouts made per "
	"callout_process call. Units = 1/1000");
	-static int avg_mpcalls_dir;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1],
	0, "Average number of MP direct callouts made per callout_process call. "
	"Units = 1/1000");
	#endif
	@@ -124,64 +123,230 @@
	*/
	u_int callwheelsize, callwheelmask;

	+#define CALLOUT_RET_NORMAL 0
	+#define CALLOUT_RET_CANCELLED 1
	+#define CALLOUT_RET_DRAINING 2
	+
	+struct callout_args {
	+ sbintime_t time; /* absolute time for the event */
	+ sbintime_t precision; /* delta allowed wrt opt */
	+ void arg; / function argument */
	+ callout_func_t func; / function to call */
	+ int flags; /* flags passed to callout_reset() */
	+ int cpu; /* CPU we're scheduled on */
	+};
	+
	+typedef void callout_mutex_op_t(struct lock_object *);
	+
	+struct callout_mutex_ops {
	+ callout_mutex_op_t *lock;
	+ callout_mutex_op_t *unlock;
	+};
	+
	+enum {
	+ CALLOUT_LC_UNUSED_0,
	+ CALLOUT_LC_UNUSED_1,
	+ CALLOUT_LC_UNUSED_2,
	+ CALLOUT_LC_UNUSED_3,
	+ CALLOUT_LC_SPIN,
	+ CALLOUT_LC_MUTEX,
	+ CALLOUT_LC_RW,
	+ CALLOUT_LC_RM,
	+};
	+
	+static void
	+callout_mutex_op_none(struct lock_object *lock)
	+{
	+}
	+
	+static void
	+callout_mutex_lock(struct lock_object *lock)
	+{
	+
	+ mtx_lock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock(struct lock_object *lock)
	+{
	+
	+ mtx_unlock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_lock_spin(struct lock_object *lock)
	+{
	+
	+ mtx_lock_spin((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock_spin(struct lock_object *lock)
	+{
	+
	+ mtx_unlock_spin((struct mtx *)lock);
	+}
	+
	+static void
	+callout_rm_wlock(struct lock_object *lock)
	+{
	+
	+ rm_wlock((struct rmlock *)lock);
	+}
	+
	+static void
	+callout_rm_wunlock(struct lock_object *lock)
	+{
	+
	+ rm_wunlock((struct rmlock *)lock);
	+}
	+
	+static void
	+callout_rw_wlock(struct lock_object *lock)
	+{
	+
	+ rw_wlock((struct rwlock *)lock);
	+}
	+
	+static void
	+callout_rw_wunlock(struct lock_object *lock)
	+{
	+
	+ rw_wunlock((struct rwlock *)lock);
	+}
	+
	+static const struct callout_mutex_ops callout_mutex_ops[8] = {
	+ [CALLOUT_LC_UNUSED_0] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_1] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_2] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_3] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ },
	+ [CALLOUT_LC_SPIN] = {
	+ .lock = callout_mutex_lock_spin,
	+ .unlock = callout_mutex_unlock_spin,
	+ },
	+ [CALLOUT_LC_MUTEX] = {
	+ .lock = callout_mutex_lock,
	+ .unlock = callout_mutex_unlock,
	+ },
	+ [CALLOUT_LC_RW] = {
	+ .lock = callout_rw_wlock,
	+ .unlock = callout_rw_wunlock,
	+ },
	+ [CALLOUT_LC_RM] = {
	+ .lock = callout_rm_wlock,
	+ .unlock = callout_rm_wunlock,
	+ },
	+};
	+
	+static inline void
	+callout_lock_client(int c_flags, struct lock_object *c_lock)
	+{
	+
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock);
	+}
	+
	+static inline void
	+callout_unlock_client(int c_flags, struct lock_object *c_lock)
	+{
	+
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock);
	+}
	+
	/*
	- * The callout cpu exec entities represent informations necessary for
	- * describing the state of callouts currently running on the CPU and the ones
	- * necessary for migrating callouts to the new callout cpu. In particular,
	- * the first entry of the array cc_exec_entity holds informations for callout
	- * running in SWI thread context, while the second one holds informations
	- * for callout running directly from hardware interrupt context.
	- * The cached informations are very important for deferring migration when
	- * the migrating callout is already running.
	+ * The callout CPU exec structure represent information necessary for
	+ * describing the state of callouts currently running on the CPU and
	+ * for handling deferred callout restarts.
	+ *
	+ * In particular, the first entry of the array cc_exec_entity holds
	+ * information for callouts running from the SWI thread context, while
	+ * the second one holds information for callouts running directly from
	+ * the hardware interrupt context.
	*/
	struct cc_exec {
	+ /*
	+ * The "cc_curr" points to the currently executing callout and
	+ * is protected by the "cc_lock" spinlock. If no callback is
	+ * currently executing it is equal to "NULL".
	+ */
	struct callout *cc_curr;
	-#ifdef SMP
	- void (ce_migration_func)(void );
	- void *ce_migration_arg;
	- int ce_migration_cpu;
	- sbintime_t ce_migration_time;
	- sbintime_t ce_migration_prec;
	+ /*
	+ * The "cc_restart_args" structure holds the argument for a
	+ * deferred callback restart and is protected by the "cc_lock"
	+ * spinlock. The structure is only valid if "cc_restart" is
	+ * "true". If "cc_restart" is "false" the information in the
	+ * "cc_restart_args" structure shall be ignored.
	+ */
	+ struct callout_args cc_restart_args;
	+ bool cc_restart;
	+ /*
	+ * The "cc_cancel" variable allows the currently pending
	+ * callback to be atomically cancelled. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ bool cc_cancel;
	+ /*
	+ * The "cc_drain_fn" points to a function which shall be
	+ * called with the argument stored in "cc_drain_arg" when an
	+ * asynchronous drain is performed. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ callout_func_t *cc_drain_fn;
	+ void *cc_drain_arg;
	+ /*
	+ * The following fields are used for callout profiling only:
	+ */
	+#ifdef CALLOUT_PROFILING
	+ int cc_depth;
	+ int cc_mpcalls;
	+ int cc_lockcalls;
	+ int cc_gcalls;
	#endif
	- bool cc_cancel;
	- bool cc_waiting;
	};

	/*
	- * There is one struct callout_cpu per cpu, holding all relevant
	+ * There is one "struct callout_cpu" per CPU, holding all relevant
	* state for the callout processing thread on the individual CPU.
	*/
	struct callout_cpu {
	struct mtx_padalign cc_lock;
	struct cc_exec cc_exec_entity[2];
	- struct callout *cc_next;
	struct callout *cc_callout;
	struct callout_list *cc_callwheel;
	+ struct callout_list cc_tmplist;
	struct callout_tailq cc_expireq;
	struct callout_slist cc_callfree;
	sbintime_t cc_firstevent;
	sbintime_t cc_lastscan;
	void *cc_cookie;
	- u_int cc_bucket;
	- u_int cc_inited;
	char cc_ktr_event_name[20];
	};

	-#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION)
	+#define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr
	+#define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args
	+#define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart
	+#define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel
	+#define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn
	+#define cc_exec_drain_arg(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_arg
	+#define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth
	+#define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls
	+#define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls
	+#define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls

	-#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
	-#define cc_exec_next(cc) cc->cc_next
	-#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
	-#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
	#ifdef SMP
	-#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
	-#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
	-#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
	-#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
	-#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
	-
	struct callout_cpu cc_cpu[MAXCPU];
	-#define CPUBLOCK MAXCPU
	+#define CPUBLOCK -1
	#define CC_CPU(cpu) (&cc_cpu[(cpu)])
	#define CC_SELF() CC_CPU(PCPU_GET(cpuid))
	#else
	@@ -196,67 +361,13 @@
	static int timeout_cpu;

	static void callout_cpu_init(struct callout_cpu *cc, int cpu);
	-static void softclock_call_cc(struct callout c, struct callout_cpu cc,
	-#ifdef CALLOUT_PROFILING
	- int mpcalls, int lockcalls, int *gcalls,
	-#endif
	- int direct);
	+static void softclock_call_cc(struct callout c, struct callout_cpu cc, const int direct);

	static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");

	-/**
	- * Locked by cc_lock:
	- * cc_curr - If a callout is in progress, it is cc_curr.
	- * If cc_curr is non-NULL, threads waiting in
	- * callout_drain() will be woken up as soon as the
	- * relevant callout completes.
	- * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
	- * guarantees that the current callout will not run.
	- * The softclock() function sets this to 0 before it
	- * drops callout_lock to acquire c_lock, and it calls
	- * the handler only if curr_cancelled is still 0 after
	- * cc_lock is successfully acquired.
	- * cc_waiting - If a thread is waiting in callout_drain(), then
	- * callout_wait is nonzero. Set only when
	- * cc_curr is non-NULL.
	- */
	-
	-/*
	- * Resets the execution entity tied to a specific callout cpu.
	- */
	-static void
	-cc_cce_cleanup(struct callout_cpu *cc, int direct)
	-{
	-
	- cc_exec_curr(cc, direct) = NULL;
	- cc_exec_cancel(cc, direct) = false;
	- cc_exec_waiting(cc, direct) = false;
	-#ifdef SMP
	- cc_migration_cpu(cc, direct) = CPUBLOCK;
	- cc_migration_time(cc, direct) = 0;
	- cc_migration_prec(cc, direct) = 0;
	- cc_migration_func(cc, direct) = NULL;
	- cc_migration_arg(cc, direct) = NULL;
	-#endif
	-}
	-
	-/*
	- * Checks if migration is requested by a specific callout cpu.
	- */
	-static int
	-cc_cce_migrating(struct callout_cpu *cc, int direct)
	-{
	-
	-#ifdef SMP
	- return (cc_migration_cpu(cc, direct) != CPUBLOCK);
	-#else
	- return (0);
	-#endif
	-}
	-
	/*
	- * Kernel low level callwheel initialization
	- * called on cpu0 during kernel startup.
	+ * Kernel low level callwheel initialization called from cpu0 during
	+ * kernel startup:
	*/
	static void
	callout_callwheel_init(void *dummy)
	@@ -311,15 +422,13 @@

	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN \| MTX_RECURSE);
	SLIST_INIT(&cc->cc_callfree);
	- cc->cc_inited = 1;
	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
	M_CALLOUT, M_WAITOK);
	for (i = 0; i < callwheelsize; i++)
	LIST_INIT(&cc->cc_callwheel[i]);
	TAILQ_INIT(&cc->cc_expireq);
	+ LIST_INIT(&cc->cc_tmplist);
	cc->cc_firstevent = SBT_MAX;
	- for (i = 0; i < 2; i++)
	- cc_cce_cleanup(cc, i);
	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
	"callwheel cpu %d", cpu);
	if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
	@@ -327,38 +436,38 @@
	for (i = 0; i < ncallout; i++) {
	c = &cc->cc_callout[i];
	callout_init(c, 0);
	- c->c_iflags = CALLOUT_LOCAL_ALLOC;
	+ c->c_flags \|= CALLOUT_LOCAL_ALLOC;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}
	}

	-#ifdef SMP
	-/*
	- * Switches the cpu tied to a specific callout.
	- * The function expects a locked incoming callout cpu and returns with
	- * locked outcoming callout cpu.
	- */
	-static struct callout_cpu *
	-callout_cpu_switch(struct callout c, struct callout_cpu cc, int new_cpu)
	+#ifdef CALLOUT_PROFILING
	+static inline void
	+callout_clear_stats(struct callout_cpu *cc, const int direct)
	{
	- struct callout_cpu *new_cc;
	-
	- MPASS(c != NULL && cc != NULL);
	- CC_LOCK_ASSERT(cc);
	+ cc_exec_depth(cc, direct) = 0;
	+ cc_exec_mpcalls(cc, direct) = 0;
	+ cc_exec_lockcalls(cc, direct) = 0;
	+ cc_exec_gcalls(cc, direct) = 0;
	+}
	+#endif

	- /*
	- * Avoid interrupts and preemption firing after the callout cpu
	- * is blocked in order to avoid deadlocks as the new thread
	- * may be willing to acquire the callout cpu lock.
	- */
	- c->c_cpu = CPUBLOCK;
	- spinlock_enter();
	- CC_UNLOCK(cc);
	- new_cc = CC_CPU(new_cpu);
	- CC_LOCK(new_cc);
	- spinlock_exit();
	- c->c_cpu = new_cpu;
	- return (new_cc);
	+#ifdef CALLOUT_PROFILING
	+static inline void
	+callout_update_stats(struct callout_cpu *cc, const int direct)
	+{
	+ avg_depth[direct] +=
	+ (cc_exec_depth(cc, direct) * 1000 -
	+ avg_depth[direct]) >> 8;
	+ avg_mpcalls[direct] +=
	+ (cc_exec_mpcalls(cc, direct) * 1000 -
	+ avg_mpcalls[direct]) >> 8;
	+ avg_lockcalls[direct] +=
	+ (cc_exec_lockcalls(cc, direct) * 1000 -
	+ avg_lockcalls[direct]) >> 8;
	+ avg_gcalls[direct] +=
	+ (cc_exec_gcalls(cc, direct) * 1000 -
	+ avg_gcalls[direct]) >> 8;
	}
	#endif

	@@ -429,19 +538,19 @@
	void
	callout_process(sbintime_t now)
	{
	- struct callout tmp, tmpn;
	+ struct callout *tmp;
	struct callout_cpu *cc;
	struct callout_list *sc;
	sbintime_t first, last, max, tmp_max;
	uint32_t lookahead;
	u_int firstb, lastb, nowb;
	-#ifdef CALLOUT_PROFILING
	- int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
	-#endif

	cc = CC_SELF();
	- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_LOCK(cc);

	+#ifdef CALLOUT_PROFILING
	+ callout_clear_stats(cc, 1);
	+#endif
	/* Compute the buckets of the last scan and present times. */
	firstb = callout_hash(cc->cc_lastscan);
	cc->cc_lastscan = now;
	@@ -474,49 +583,44 @@
	/* Iterate callwheel from firstb to nowb and then up to lastb. */
	do {
	sc = &cc->cc_callwheel[firstb & callwheelmask];
	- tmp = LIST_FIRST(sc);
	- while (tmp != NULL) {
	+ while (1) {
	+ tmp = LIST_FIRST(sc);
	+ if (tmp == NULL)
	+ break;
	+
	+ LIST_REMOVE(tmp, c_links.le);
	+
	/* Run the callout if present time within allowed. */
	if (tmp->c_time <= now) {
	/*
	- * Consumer told us the callout may be run
	- * directly from hardware interrupt context.
	+ * Consumer told us the callout may be
	+ * run directly from the hardware
	+ * interrupt context:
	*/
	- if (tmp->c_iflags & CALLOUT_DIRECT) {
	-#ifdef CALLOUT_PROFILING
	- ++depth_dir;
	-#endif
	- cc_exec_next(cc) =
	- LIST_NEXT(tmp, c_links.le);
	- cc->cc_bucket = firstb & callwheelmask;
	- LIST_REMOVE(tmp, c_links.le);
	- softclock_call_cc(tmp, cc,
	-#ifdef CALLOUT_PROFILING
	- &mpcalls_dir, &lockcalls_dir, NULL,
	-#endif
	- 1);
	- tmp = cc_exec_next(cc);
	- cc_exec_next(cc) = NULL;
	+ if (tmp->c_flags & CALLOUT_DIRECT) {
	+ softclock_call_cc(tmp, cc, 1);
	} else {
	- tmpn = LIST_NEXT(tmp, c_links.le);
	- LIST_REMOVE(tmp, c_links.le);
	TAILQ_INSERT_TAIL(&cc->cc_expireq,
	tmp, c_links.tqe);
	- tmp->c_iflags \|= CALLOUT_PROCESSED;
	- tmp = tmpn;
	+ tmp->c_flags \|= CALLOUT_PROCESSED;
	}
	continue;
	}
	+
	+ /* insert callout into temporary list */
	+ LIST_INSERT_HEAD(&cc->cc_tmplist, tmp, c_links.le);
	+
	/* Skip events from distant future. */
	if (tmp->c_time >= max)
	- goto next;
	+ continue;
	+
	/*
	* Event minimal time is bigger than present maximal
	* time, so it cannot be aggregated.
	*/
	if (tmp->c_time > last) {
	lastb = nowb;
	- goto next;
	+ continue;
	}
	/* Update first and last time, respecting this event. */
	if (tmp->c_time < first)
	@@ -524,11 +628,14 @@
	tmp_max = tmp->c_time + tmp->c_precision;
	if (tmp_max < last)
	last = tmp_max;
	-next:
	- tmp = LIST_NEXT(tmp, c_links.le);
	}
	+
	+ /* Put temporary list back into the main bucket */
	+ LIST_SWAP(sc, &cc->cc_tmplist, callout, c_links.le);
	+
	/* Proceed with the next bucket. */
	firstb++;
	+
	/*
	* Stop if we looked after present time and found
	* some event we can't execute at now.
	@@ -540,14 +647,13 @@
	cpu_new_callout(curcpu, last, first);
	#endif
	#ifdef CALLOUT_PROFILING
	- avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
	- avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
	- avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
	+ callout_update_stats(cc, 1);
	#endif
	- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_UNLOCK(cc);
	/*
	- * swi_sched acquires the thread lock, so we don't want to call it
	- * with cc_lock held; incorrect locking order.
	+ * "swi_sched()" acquires the thread lock and we don't want to
	+ * call it having cc_lock held because it leads to a locking
	+ * order reversal issue.
	*/
	if (!TAILQ_EMPTY(&cc->cc_expireq))
	swi_sched(cc->cc_cookie, 0);
	@@ -563,8 +669,7 @@
	cpu = c->c_cpu;
	#ifdef SMP
	if (cpu == CPUBLOCK) {
	- while (c->c_cpu == CPUBLOCK)
	- cpu_spinwait();
	+ cpu_spinwait();
	continue;
	}
	#endif
	@@ -577,32 +682,56 @@
	return (cc);
	}

	-static void
	-callout_cc_add(struct callout c, struct callout_cpu cc,
	- sbintime_t sbt, sbintime_t precision, void (func)(void ),
	- void *arg, int cpu, int flags)
	+static struct callout_cpu *
	+callout_cc_add_locked(struct callout c, struct callout_cpu cc,
	+ struct callout_args *coa)
	{
	- int bucket;
	+#ifndef NO_EVENTTIMERS
	+ sbintime_t sbt;
	+#endif
	+ u_int bucket;

	CC_LOCK_ASSERT(cc);
	- if (sbt < cc->cc_lastscan)
	- sbt = cc->cc_lastscan;
	- c->c_arg = arg;
	- c->c_iflags \|= CALLOUT_PENDING;
	- c->c_iflags &= ~CALLOUT_PROCESSED;
	- c->c_flags \|= CALLOUT_ACTIVE;
	- if (flags & C_DIRECT_EXEC)
	- c->c_iflags \|= CALLOUT_DIRECT;
	- c->c_func = func;
	- c->c_time = sbt;
	- c->c_precision = precision;
	+
	+ /* update flags before swapping locks, if any */
	+ c->c_flags &= ~(CALLOUT_PROCESSED \| CALLOUT_DIRECT \| CALLOUT_DEFRESTART);
	+ if (coa->flags & C_DIRECT_EXEC)
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \| CALLOUT_DIRECT);
	+ else
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	+
	+#ifdef SMP
	+ /* only set the "c_cpu" if the CPU number changed and is valid */
	+ if (c->c_cpu != coa->cpu && coa->cpu > CPUBLOCK &&
	+ coa->cpu <= mp_maxid && !CPU_ABSENT(coa->cpu)) {
	+ /*
	+ * Avoid interrupts and preemption firing after the
	+ * callout CPU is blocked in order to avoid deadlocks
	+ * as the new thread may be willing to acquire the
	+ * callout CPU lock:
	+ */
	+ c->c_cpu = CPUBLOCK;
	+ spinlock_enter();
	+ CC_UNLOCK(cc);
	+ cc = CC_CPU(coa->cpu);
	+ CC_LOCK(cc);
	+ spinlock_exit();
	+ c->c_cpu = coa->cpu;
	+ }
	+#endif
	+ if (coa->time < cc->cc_lastscan)
	+ coa->time = cc->cc_lastscan;
	+ c->c_arg = coa->arg;
	+ c->c_func = coa->func;
	+ c->c_time = coa->time;
	+ c->c_precision = coa->precision;
	+
	bucket = callout_get_bucket(c->c_time);
	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
	c, (int)(c->c_precision >> 32),
	(u_int)(c->c_precision & 0xffffffff));
	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
	- if (cc->cc_bucket == bucket)
	- cc_exec_next(cc) = c;
	+
	#ifndef NO_EVENTTIMERS
	/*
	* Inform the eventtimers(4) subsystem there's a new callout
	@@ -613,42 +742,28 @@
	sbt = c->c_time + c->c_precision;
	if (sbt < cc->cc_firstevent) {
	cc->cc_firstevent = sbt;
	- cpu_new_callout(cpu, sbt, c->c_time);
	+ cpu_new_callout(c->c_cpu, sbt, c->c_time);
	}
	#endif
	+ return (cc);
	}

	-static void
	+static inline void
	callout_cc_del(struct callout c, struct callout_cpu cc)
	{

	- if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
	- return;
	c->c_func = NULL;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}

	-static void
	+static inline void
	softclock_call_cc(struct callout c, struct callout_cpu cc,
	-#ifdef CALLOUT_PROFILING
	- int mpcalls, int lockcalls, int *gcalls,
	-#endif
	- int direct)
	+ const int direct)
	{
	- struct rm_priotracker tracker;
	- void (c_func)(void );
	+ callout_func_t *c_func;
	void *c_arg;
	- struct lock_class *class;
	struct lock_object *c_lock;
	- uintptr_t lock_status;
	- int c_iflags;
	-#ifdef SMP
	- struct callout_cpu *new_cc;
	- void (new_func)(void );
	- void *new_arg;
	- int flags, new_cpu;
	- sbintime_t new_prec, new_time;
	-#endif
	+ int c_flags;
	#if defined(DIAGNOSTIC) \|\| defined(CALLOUT_PROFILING)
	sbintime_t sbt1, sbt2;
	struct timespec ts2;
	@@ -656,62 +771,68 @@
	static timeout_t *lastfunc;
	#endif

	- KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
	- ("softclock_call_cc: pend %p %x", c, c->c_iflags));
	- KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
	- ("softclock_call_cc: act %p %x", c, c->c_flags));
	- class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
	- lock_status = 0;
	- if (c->c_flags & CALLOUT_SHAREDLOCK) {
	- if (class == &lock_class_rm)
	- lock_status = (uintptr_t)&tracker;
	- else
	- lock_status = 1;
	- }
	+ KASSERT((c->c_flags & (CALLOUT_PENDING \| CALLOUT_ACTIVE)) ==
	+ (CALLOUT_PENDING \| CALLOUT_ACTIVE),
	+ ("softclock_call_cc: pend\|act %p %x", c, c->c_flags));
	+
	c_lock = c->c_lock;
	c_func = c->c_func;
	c_arg = c->c_arg;
	- c_iflags = c->c_iflags;
	- if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
	- c->c_iflags = CALLOUT_LOCAL_ALLOC;
	- else
	- c->c_iflags &= ~CALLOUT_PENDING;
	-
	+ c_flags = c->c_flags;
	+
	+ /* remove pending bit */
	+ c->c_flags &= ~CALLOUT_PENDING;
	+
	+ /* reset our local state */
	cc_exec_curr(cc, direct) = c;
	- cc_exec_cancel(cc, direct) = false;
	- CC_UNLOCK(cc);
	+ cc_exec_restart(cc, direct) = false;
	+ cc_exec_drain_fn(cc, direct) = NULL;
	+ cc_exec_drain_arg(cc, direct) = NULL;
	+
	if (c_lock != NULL) {
	- class->lc_lock(c_lock, lock_status);
	+ cc_exec_cancel(cc, direct) = false;
	+ CC_UNLOCK(cc);
	+
	+ /* unlocked region for switching locks */
	+
	+ callout_lock_client(c_flags, c_lock);
	+
	/*
	- * The callout may have been cancelled
	- * while we switched locks.
	+ * Check if the callout may have been cancelled while
	+ * we were switching locks. Even though the callout is
	+ * specifying a lock, it might not be certain this
	+ * lock is locked when starting and stopping callouts.
	*/
	+ CC_LOCK(cc);
	if (cc_exec_cancel(cc, direct)) {
	- class->lc_unlock(c_lock);
	- goto skip;
	+ callout_unlock_client(c_flags, c_lock);
	+ goto skip_cc_locked;
	}
	- /* The callout cannot be stopped now. */
	- cc_exec_cancel(cc, direct) = true;
	if (c_lock == &Giant.lock_object) {
	#ifdef CALLOUT_PROFILING
	- (*gcalls)++;
	+ cc_exec_gcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
	c, c_func, c_arg);
	} else {
	#ifdef CALLOUT_PROFILING
	- (*lockcalls)++;
	+ cc_exec_lockcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
	c, c_func, c_arg);
	}
	} else {
	#ifdef CALLOUT_PROFILING
	- (*mpcalls)++;
	+ cc_exec_mpcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
	c, c_func, c_arg);
	}
	+ /* The callout cannot be stopped now! */
	+ cc_exec_cancel(cc, direct) = true;
	+ CC_UNLOCK(cc);
	+
	+ /* unlocked region */
	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
	"func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
	#if defined(DIAGNOSTIC) \|\| defined(CALLOUT_PROFILING)
	@@ -738,85 +859,46 @@
	#endif
	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
	CTR1(KTR_CALLOUT, "callout %p finished", c);
	- if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
	- class->lc_unlock(c_lock);
	-skip:
	+
	+ /*
	+ * At this point the callback structure might have been freed,
	+ * so we need to check the previously copied value of
	+ * "c->c_flags":
	+ */
	+ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
	+ callout_unlock_client(c_flags, c_lock);
	+
	CC_LOCK(cc);
	+
	+skip_cc_locked:
	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
	cc_exec_curr(cc, direct) = NULL;
	- if (cc_exec_waiting(cc, direct)) {
	+
	+ /* Check if there is anything which needs draining */
	+ if (cc_exec_drain_fn(cc, direct) != NULL) {
	/*
	- * There is someone waiting for the
	- * callout to complete.
	- * If the callout was scheduled for
	- * migration just cancel it.
	+ * Unlock the CPU callout last, so that any use of
	+ * structures belonging to the callout are complete:
	*/
	- if (cc_cce_migrating(cc, direct)) {
	- cc_cce_cleanup(cc, direct);
	-
	- /*
	- * It should be assert here that the callout is not
	- * destroyed but that is not easy.
	- */
	- c->c_iflags &= ~CALLOUT_DFRMIGRATION;
	- }
	- cc_exec_waiting(cc, direct) = false;
	CC_UNLOCK(cc);
	- wakeup(&cc_exec_waiting(cc, direct));
	+ /* call drain function unlocked */
	+ cc_exec_drain_fn(cc, direct)(
	+ cc_exec_drain_arg(cc, direct));
	CC_LOCK(cc);
	- } else if (cc_cce_migrating(cc, direct)) {
	- KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
	- ("Migrating legacy callout %p", c));
	-#ifdef SMP
	- /*
	- * If the callout was scheduled for
	- * migration just perform it now.
	- */
	- new_cpu = cc_migration_cpu(cc, direct);
	- new_time = cc_migration_time(cc, direct);
	- new_prec = cc_migration_prec(cc, direct);
	- new_func = cc_migration_func(cc, direct);
	- new_arg = cc_migration_arg(cc, direct);
	- cc_cce_cleanup(cc, direct);
	-
	- /*
	- * It should be assert here that the callout is not destroyed
	- * but that is not easy.
	- *
	- * As first thing, handle deferred callout stops.
	- */
	- if (!callout_migrating(c)) {
	- CTR3(KTR_CALLOUT,
	- "deferred cancelled %p func %p arg %p",
	- c, new_func, new_arg);
	- callout_cc_del(c, cc);
	- return;
	+ } else if (c_flags & CALLOUT_LOCAL_ALLOC) {
	+ /* return callout back to freelist */
	+ callout_cc_del(c, cc);
	+ } else if (cc_exec_restart(cc, direct)) {
	+ struct callout_cpu *new_cc;
	+ /* [re-]schedule callout, if any */
	+ new_cc = callout_cc_add_locked(c, cc,
	+ &cc_exec_restart_args(cc, direct));
	+ if (new_cc != cc) {
	+ /* switch locks back again */
	+ CC_UNLOCK(new_cc);
	+ CC_LOCK(cc);
	}
	- c->c_iflags &= ~CALLOUT_DFRMIGRATION;
	-
	- new_cc = callout_cpu_switch(c, cc, new_cpu);
	- flags = (direct) ? C_DIRECT_EXEC : 0;
	- callout_cc_add(c, new_cc, new_time, new_prec, new_func,
	- new_arg, new_cpu, flags);
	- CC_UNLOCK(new_cc);
	- CC_LOCK(cc);
	-#else
	- panic("migration should not happen");
	-#endif
	}
	- /*
	- * If the current callout is locally allocated (from
	- * timeout(9)) then put it on the freelist.
	- *
	- * Note: we need to check the cached copy of c_iflags because
	- * if it was not local, then it's not safe to deref the
	- * callout pointer.
	- */
	- KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 \|\|
	- c->c_iflags == CALLOUT_LOCAL_ALLOC,
	- ("corrupted callout"));
	- if (c_iflags & CALLOUT_LOCAL_ALLOC)
	- callout_cc_del(c, cc);
	}

	/*
	@@ -840,28 +922,18 @@
	{
	struct callout_cpu *cc;
	struct callout *c;
	-#ifdef CALLOUT_PROFILING
	- int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
	-#endif

	cc = (struct callout_cpu *)arg;
	CC_LOCK(cc);
	- while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- softclock_call_cc(c, cc,
	#ifdef CALLOUT_PROFILING
	- &mpcalls, &lockcalls, &gcalls,
	-#endif
	- 0);
	-#ifdef CALLOUT_PROFILING
	- ++depth;
	+ callout_clear_stats(cc, 0);
	#endif
	+ while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
	+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	+ softclock_call_cc(c, cc, 0);
	}
	#ifdef CALLOUT_PROFILING
	- avg_depth += (depth * 1000 - avg_depth) >> 8;
	- avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
	- avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
	- avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
	+ callout_update_stats(cc, 0);
	#endif
	CC_UNLOCK(cc);
	}
	@@ -897,10 +969,11 @@
	/* XXX Attempt to malloc first */
	panic("timeout table full");
	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
	- callout_reset(new, to_ticks, ftn, arg);
	handle.callout = new;
	CC_UNLOCK(cc);

	+ callout_reset(new, to_ticks, ftn, arg);
	+
	return (handle);
	}

	@@ -908,6 +981,7 @@
	untimeout(timeout_t ftn, void arg, struct callout_handle handle)
	{
	struct callout_cpu *cc;
	+ bool match;

	/*
	* Check for a handle that was initialized
	@@ -918,9 +992,11 @@
	return;

	cc = callout_lock(handle.callout);
	- if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
	- callout_stop(handle.callout);
	+ match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg);
	CC_UNLOCK(cc);
	+
	+ if (match)
	+ callout_stop(handle.callout);
	}

	void
	@@ -929,6 +1005,118 @@
	handle->callout = NULL;
	}

	+static int
	+callout_restart_async(struct callout c, struct callout_args coa,
	+ callout_func_t drain_fn, void drain_arg)
	+{
	+ struct callout_cpu *cc;
	+ int cancelled;
	+ int direct;
	+
	+ cc = callout_lock(c);
	+
	+ /* Figure out if the callout is direct or not */
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
	+
	+ /*
	+ * Check if the callback is currently scheduled for
	+ * completion:
	+ */
	+ if (cc_exec_curr(cc, direct) == c) {
	+ /*
	+ * Try to prevent the callback from running by setting
	+ * the "cc_cancel" variable to "true". Also check if
	+ * the callout was previously subject to a deferred
	+ * callout restart:
	+ */
	+ if (cc_exec_cancel(cc, direct) == false \|\|
	+ (c->c_flags & CALLOUT_DEFRESTART) != 0) {
	+ cc_exec_cancel(cc, direct) = true;
	+ cancelled = CALLOUT_RET_CANCELLED;
	+ } else {
	+ cancelled = CALLOUT_RET_NORMAL;
	+ }
	+
	+ /*
	+ * Prevent callback restart if "callout_drain_xxx()"
	+ * is being called or we are stopping the callout or
	+ * the callback was preallocated by us:
	+ */
	+ if (cc_exec_drain_fn(cc, direct) != NULL \|\|
	+ coa == NULL \|\| (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and draining" : "draining",
	+ c, c->c_func, c->c_arg);
	+
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* clear restart flag, if any */
	+ cc_exec_restart(cc, direct) = false;
	+
	+ /* set drain function, if any */
	+ if (drain_fn != NULL) {
	+ cc_exec_drain_fn(cc, direct) = drain_fn;
	+ cc_exec_drain_arg(cc, direct) = drain_arg;
	+ cancelled \|= CALLOUT_RET_DRAINING;
	+ }
	+ } else {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and restarting" : "restarting",
	+ c, c->c_func, c->c_arg);
	+
	+ /* get us back into the game */
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART);
	+ c->c_flags &= ~CALLOUT_PROCESSED;
	+
	+ /* enable deferred restart */
	+ cc_exec_restart(cc, direct) = true;
	+
	+ /* store arguments for the deferred restart, if any */
	+ cc_exec_restart_args(cc, direct) = *coa;
	+ }
	+ } else {
	+ /* stop callout */
	+ if (c->c_flags & CALLOUT_PENDING) {
	+ /*
	+ * The callback has not yet been executed, and
	+ * we simply just need to unlink it:
	+ */
	+ if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	+ LIST_REMOVE(c, c_links.le);
	+ } else {
	+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	+ }
	+ cancelled = CALLOUT_RET_CANCELLED;
	+ } else {
	+ cancelled = CALLOUT_RET_NORMAL;
	+ }
	+
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "rescheduled" : "scheduled",
	+ c, c->c_func, c->c_arg);
	+
	+ /* [re-]schedule callout, if any */
	+ if (coa != NULL) {
	+ cc = callout_cc_add_locked(c, cc, coa);
	+ } else {
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* return callback to pre-allocated list, if any */
	+ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) &&
	+ cancelled != CALLOUT_RET_NORMAL) {
	+ callout_cc_del(c, cc);
	+ }
	+ }
	+ }
	+ CC_UNLOCK(cc);
	+ return (cancelled);
	+}
	+
	/*
	* New interface; clients allocate their own callout structures.
	*
	@@ -947,33 +1135,32 @@
	*/
	int
	callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
	- void (ftn)(void ), void *arg, int cpu, int flags)
	+ callout_func_t ftn, void arg, int cpu, int flags)
	{
	- sbintime_t to_sbt, pr;
	- struct callout_cpu *cc;
	- int cancelled, direct;
	- int ignore_cpu=0;
	+ struct callout_args coa;

	- cancelled = 0;
	- if (cpu == -1) {
	- ignore_cpu = 1;
	- } else if ((cpu >= MAXCPU) \|\|
	- ((CC_CPU(cpu))->cc_inited == 0)) {
	- /* Invalid CPU spec */
	- panic("Invalid CPU in callout %d", cpu);
	- }
	- if (flags & C_ABSOLUTE) {
	- to_sbt = sbt;
	+ /* store arguments for callout add function */
	+ coa.func = ftn;
	+ coa.arg = arg;
	+ coa.precision = precision;
	+ coa.flags = flags;
	+ coa.cpu = cpu;
	+
	+ /* compute the rest of the arguments needed */
	+ if (coa.flags & C_ABSOLUTE) {
	+ coa.time = sbt;
	} else {
	- if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
	+ sbintime_t pr;
	+
	+ if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt))
	sbt = tick_sbt;
	- if ((flags & C_HARDCLOCK) \|\|
	+ if ((coa.flags & C_HARDCLOCK) \|\|
	#ifdef NO_EVENTTIMERS
	sbt >= sbt_timethreshold) {
	- to_sbt = getsbinuptime();
	+ coa.time = getsbinuptime();

	/* Add safety belt for the case of hz > 1000. */
	- to_sbt += tc_tick_sbt - tick_sbt;
	+ coa.time += tc_tick_sbt - tick_sbt;
	#else
	sbt >= sbt_tickthreshold) {
	/*
	@@ -983,150 +1170,29 @@
	* active ones.
	*/
	#ifdef __LP64__
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	#else
	spinlock_enter();
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	spinlock_exit();
	#endif
	#endif
	- if ((flags & C_HARDCLOCK) == 0)
	- to_sbt += tick_sbt;
	+ if ((coa.flags & C_HARDCLOCK) == 0)
	+ coa.time += tick_sbt;
	} else
	- to_sbt = sbinuptime();
	- if (SBT_MAX - to_sbt < sbt)
	- to_sbt = SBT_MAX;
	+ coa.time = sbinuptime();
	+ if (SBT_MAX - coa.time < sbt)
	+ coa.time = SBT_MAX;
	else
	- to_sbt += sbt;
	- pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
	- sbt >> C_PRELGET(flags));
	- if (pr > precision)
	- precision = pr;
	- }
	- /*
	- * This flag used to be added by callout_cc_add, but the
	- * first time you call this we could end up with the
	- * wrong direct flag if we don't do it before we add.
	- */
	- if (flags & C_DIRECT_EXEC) {
	- direct = 1;
	- } else {
	- direct = 0;
	- }
	- KASSERT(!direct \|\| c->c_lock == NULL,
	- ("%s: direct callout %p has lock", __func__, c));
	- cc = callout_lock(c);
	- /*
	- * Don't allow migration of pre-allocated callouts lest they
	- * become unbalanced or handle the case where the user does
	- * not care.
	- */
	- if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) \|\|
	- ignore_cpu) {
	- cpu = c->c_cpu;
	+ coa.time += sbt;
	+ pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp :
	+ sbt >> C_PRELGET(coa.flags));
	+ if (pr > coa.precision)
	+ coa.precision = pr;
	}

	- if (cc_exec_curr(cc, direct) == c) {
	- /*
	- * We're being asked to reschedule a callout which is
	- * currently in progress. If there is a lock then we
	- * can cancel the callout if it has not really started.
	- */
	- if (c->c_lock != NULL && cc_exec_cancel(cc, direct))
	- cancelled = cc_exec_cancel(cc, direct) = true;
	- if (cc_exec_waiting(cc, direct)) {
	- /*
	- * Someone has called callout_drain to kill this
	- * callout. Don't reschedule.
	- */
	- CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	- cancelled ? "cancelled" : "failed to cancel",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	-#ifdef SMP
	- if (callout_migrating(c)) {
	- /*
	- * This only occurs when a second callout_reset_sbt_on
	- * is made after a previous one moved it into
	- * deferred migration (below). Note we do not change
	- * the prev_cpu even though the previous target may
	- * be different.
	- */
	- cc_migration_cpu(cc, direct) = cpu;
	- cc_migration_time(cc, direct) = to_sbt;
	- cc_migration_prec(cc, direct) = precision;
	- cc_migration_func(cc, direct) = ftn;
	- cc_migration_arg(cc, direct) = arg;
	- cancelled = 1;
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	-#endif
	- }
	- if (c->c_iflags & CALLOUT_PENDING) {
	- if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
	- if (cc_exec_next(cc) == c)
	- cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else {
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- }
	- cancelled = 1;
	- c->c_iflags &= ~ CALLOUT_PENDING;
	- c->c_flags &= ~ CALLOUT_ACTIVE;
	- }
	-
	-#ifdef SMP
	- /*
	- * If the callout must migrate try to perform it immediately.
	- * If the callout is currently running, just defer the migration
	- * to a more appropriate moment.
	- */
	- if (c->c_cpu != cpu) {
	- if (cc_exec_curr(cc, direct) == c) {
	- /*
	- * Pending will have been removed since we are
	- * actually executing the callout on another
	- * CPU. That callout should be waiting on the
	- * lock the caller holds. If we set both
	- * active/and/pending after we return and the
	- * lock on the executing callout proceeds, it
	- * will then see pending is true and return.
	- * At the return from the actual callout execution
	- * the migration will occur in softclock_call_cc
	- * and this new callout will be placed on the
	- * new CPU via a call to callout_cpu_switch() which
	- * will get the lock on the right CPU followed
	- * by a call callout_cc_add() which will add it there.
	- * (see above in softclock_call_cc()).
	- */
	- cc_migration_cpu(cc, direct) = cpu;
	- cc_migration_time(cc, direct) = to_sbt;
	- cc_migration_prec(cc, direct) = precision;
	- cc_migration_func(cc, direct) = ftn;
	- cc_migration_arg(cc, direct) = arg;
	- c->c_iflags \|= (CALLOUT_DFRMIGRATION \| CALLOUT_PENDING);
	- c->c_flags \|= CALLOUT_ACTIVE;
	- CTR6(KTR_CALLOUT,
	- "migration of %p func %p arg %p in %d.%08x to %u deferred",
	- c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff), cpu);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	- cc = callout_cpu_switch(c, cc, cpu);
	- }
	-#endif
	-
	- callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
	- CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
	- cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff));
	- CC_UNLOCK(cc);
	-
	- return (cancelled);
	+ /* get callback started, if any */
	+ return (callout_restart_async(c, &coa, NULL, NULL));
	}

	/*
	@@ -1145,270 +1211,106 @@
	}

	int
	-_callout_stop_safe(struct callout *c, int safe)
	+callout_stop(struct callout *c)
	{
	- struct callout_cpu cc, old_cc;
	- struct lock_class *class;
	- int direct, sq_locked, use_lock;
	- int not_on_a_list;
	-
	- if (safe)
	- WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, c->c_lock,
	- "calling %s", __func__);
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(c, NULL, NULL, NULL));
	+}

	- /*
	- * Some old subsystems don't hold Giant while running a callout_stop(),
	- * so just discard this check for the moment.
	- */
	- if (!safe && c->c_lock != NULL) {
	- if (c->c_lock == &Giant.lock_object)
	- use_lock = mtx_owned(&Giant);
	- else {
	- use_lock = 1;
	- class = LOCK_CLASS(c->c_lock);
	- class->lc_assert(c->c_lock, LA_XLOCKED);
	- }
	- } else
	- use_lock = 0;
	- if (c->c_iflags & CALLOUT_DIRECT) {
	- direct = 1;
	- } else {
	- direct = 0;
	- }
	- sq_locked = 0;
	- old_cc = NULL;
	-again:
	- cc = callout_lock(c);
	+static void
	+callout_drain_function(void *arg)
	+{
	+ wakeup(arg);
	+}

	- if ((c->c_iflags & (CALLOUT_DFRMIGRATION \| CALLOUT_PENDING)) ==
	- (CALLOUT_DFRMIGRATION \| CALLOUT_PENDING) &&
	- ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
	- /*
	- * Special case where this slipped in while we
	- * were migrating as the callout is about to
	- * execute. The caller probably holds the lock
	- * the callout wants.
	- *
	- * Get rid of the migration first. Then set
	- * the flag that tells this code not to
	- * try to remove it from any lists (its not
	- * on one yet). When the callout wheel runs,
	- * it will ignore this callout.
	- */
	- c->c_iflags &= ~CALLOUT_PENDING;
	- c->c_flags &= ~CALLOUT_ACTIVE;
	- not_on_a_list = 1;
	- } else {
	- not_on_a_list = 0;
	- }
	+int
	+callout_drain_async(struct callout c, callout_func_t fn, void *arg)
	+{
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(
	+ c, NULL, fn, arg) & CALLOUT_RET_DRAINING);
	+}

	- /*
	- * If the callout was migrating while the callout cpu lock was
	- * dropped, just drop the sleepqueue lock and check the states
	- * again.
	- */
	- if (sq_locked != 0 && cc != old_cc) {
	-#ifdef SMP
	- CC_UNLOCK(cc);
	- sleepq_release(&cc_exec_waiting(old_cc, direct));
	- sq_locked = 0;
	- old_cc = NULL;
	- goto again;
	-#else
	- panic("migration should not happen");
	-#endif
	- }
	+int
	+callout_drain(struct callout *c)
	+{
	+ int cancelled;

	- /*
	- * If the callout isn't pending, it's not on the queue, so
	- * don't attempt to remove it from the queue. We can try to
	- * stop it by other means however.
	- */
	- if (!(c->c_iflags & CALLOUT_PENDING)) {
	- c->c_flags &= ~CALLOUT_ACTIVE;
	+ WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
	+ "Draining callout");
	+
	+ callout_lock_client(c->c_flags, c->c_lock);
	+
	+ /* at this point the "c->c_cpu" field is not changing */
	+
	+ cancelled = callout_drain_async(c, &callout_drain_function, c);
	+
	+ if (cancelled != CALLOUT_RET_NORMAL) {
	+ struct callout_cpu *cc;
	+ int direct;
	+
	+ CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p",
	+ c, c->c_func, c->c_arg);
	+
	+ cc = callout_lock(c);
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);

	/*
	- * If it wasn't on the queue and it isn't the current
	- * callout, then we can't stop it, so just bail.
	+ * We've gotten our callout CPU lock, it is safe to
	+ * drop the initial lock:
	*/
	- if (cc_exec_curr(cc, direct) != c) {
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- if (sq_locked)
	- sleepq_release(&cc_exec_waiting(cc, direct));
	- return (0);
	- }
	+ callout_unlock_client(c->c_flags, c->c_lock);

	- if (safe) {
	- /*
	- * The current callout is running (or just
	- * about to run) and blocking is allowed, so
	- * just wait for the current invocation to
	- * finish.
	- */
	- while (cc_exec_curr(cc, direct) == c) {
	- /*
	- * Use direct calls to sleepqueue interface
	- * instead of cv/msleep in order to avoid
	- * a LOR between cc_lock and sleepqueue
	- * chain spinlocks. This piece of code
	- * emulates a msleep_spin() call actually.
	- *
	- * If we already have the sleepqueue chain
	- * locked, then we can safely block. If we
	- * don't already have it locked, however,
	- * we have to drop the cc_lock to lock
	- * it. This opens several races, so we
	- * restart at the beginning once we have
	- * both locks. If nothing has changed, then
	- * we will end up back here with sq_locked
	- * set.
	- */
	- if (!sq_locked) {
	- CC_UNLOCK(cc);
	- sleepq_lock(
	- &cc_exec_waiting(cc, direct));
	- sq_locked = 1;
	- old_cc = cc;
	- goto again;
	- }
	+ /* Wait for drain to complete */
	+
	+ while (cc_exec_curr(cc, direct) == c)
	+ msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0);

	- /*
	- * Migration could be cancelled here, but
	- * as long as it is still not sure when it
	- * will be packed up, just let softclock()
	- * take care of it.
	- */
	- cc_exec_waiting(cc, direct) = true;
	- DROP_GIANT();
	- CC_UNLOCK(cc);
	- sleepq_add(
	- &cc_exec_waiting(cc, direct),
	- &cc->cc_lock.lock_object, "codrain",
	- SLEEPQ_SLEEP, 0);
	- sleepq_wait(
	- &cc_exec_waiting(cc, direct),
	- 0);
	- sq_locked = 0;
	- old_cc = NULL;
	-
	- /* Reacquire locks previously released. */
	- PICKUP_GIANT();
	- CC_LOCK(cc);
	- }
	- } else if (use_lock &&
	- !cc_exec_cancel(cc, direct)) {
	-
	- /*
	- * The current callout is waiting for its
	- * lock which we hold. Cancel the callout
	- * and return. After our caller drops the
	- * lock, the callout will be skipped in
	- * softclock().
	- */
	- cc_exec_cancel(cc, direct) = true;
	- CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- KASSERT(!cc_cce_migrating(cc, direct),
	- ("callout wrongly scheduled for migration"));
	- if (callout_migrating(c)) {
	- c->c_iflags &= ~CALLOUT_DFRMIGRATION;
	-#ifdef SMP
	- cc_migration_cpu(cc, direct) = CPUBLOCK;
	- cc_migration_time(cc, direct) = 0;
	- cc_migration_prec(cc, direct) = 0;
	- cc_migration_func(cc, direct) = NULL;
	- cc_migration_arg(cc, direct) = NULL;
	-#endif
	- }
	- CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain locked"));
	- return (1);
	- } else if (callout_migrating(c)) {
	- /*
	- * The callout is currently being serviced
	- * and the "next" callout is scheduled at
	- * its completion with a migration. We remove
	- * the migration flag so it won't get rescheduled,
	- * but we can't stop the one thats running so
	- * we return 0.
	- */
	- c->c_iflags &= ~CALLOUT_DFRMIGRATION;
	-#ifdef SMP
	- /*
	- * We can't call cc_cce_cleanup here since
	- * if we do it will remove .ce_curr and
	- * its still running. This will prevent a
	- * reschedule of the callout when the
	- * execution completes.
	- */
	- cc_migration_cpu(cc, direct) = CPUBLOCK;
	- cc_migration_time(cc, direct) = 0;
	- cc_migration_prec(cc, direct) = 0;
	- cc_migration_func(cc, direct) = NULL;
	- cc_migration_arg(cc, direct) = NULL;
	-#endif
	- CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (0);
	- }
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain still locked"));
	- return (0);
	+ } else {
	+ callout_unlock_client(c->c_flags, c->c_lock);
	}
	- if (sq_locked)
	- sleepq_release(&cc_exec_waiting(cc, direct));
	-
	- c->c_iflags &= ~CALLOUT_PENDING;
	- c->c_flags &= ~CALLOUT_ACTIVE;

	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	c, c->c_func, c->c_arg);
	- if (not_on_a_list == 0) {
	- if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
	- if (cc_exec_next(cc) == c)
	- cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else {
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- }
	- }
	- callout_cc_del(c, cc);
	- CC_UNLOCK(cc);
	- return (1);
	+
	+ return (cancelled & CALLOUT_RET_CANCELLED);
	}

	void
	callout_init(struct callout *c, int mpsafe)
	{
	- bzero(c, sizeof *c);
	if (mpsafe) {
	- c->c_lock = NULL;
	- c->c_iflags = CALLOUT_RETURNUNLOCKED;
	+ _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED);
	} else {
	- c->c_lock = &Giant.lock_object;
	- c->c_iflags = 0;
	+ _callout_init_lock(c, &Giant.lock_object, 0);
	}
	- c->c_cpu = timeout_cpu;
	}

	void
	_callout_init_lock(struct callout c, struct lock_object lock, int flags)
	{
	bzero(c, sizeof *c);
	+ KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0,
	+ ("callout_init_lock: bad flags 0x%08x", flags));
	+ flags &= CALLOUT_RETURNUNLOCKED;
	+ if (lock != NULL) {
	+ struct lock_class *class = LOCK_CLASS(lock);
	+ if (class == &lock_class_mtx_sleep)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_MUTEX);
	+ else if (class == &lock_class_mtx_spin)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_SPIN);
	+ else if (class == &lock_class_rm)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RM);
	+ else if (class == &lock_class_rw)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RW);
	+ else
	+ panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name);
	+ } else {
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0);
	+ }
	c->c_lock = lock;
	- KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK)) == 0,
	- ("callout_init_lock: bad flags %d", flags));
	- KASSERT(lock != NULL \|\| (flags & CALLOUT_RETURNUNLOCKED) == 0,
	- ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
	- KASSERT(lock == NULL \|\| !(LOCK_CLASS(lock)->lc_flags &
	- (LC_SPINLOCK \| LC_SLEEPABLE)), ("%s: invalid lock class",
	- __func__));
	- c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK);
	+ c->c_flags = flags;
	c->c_cpu = timeout_cpu;
	}

	Index: projects/hps_head/sys/kern/subr_sleepqueue.c
	===================================================================
	--- projects/hps_head/sys/kern/subr_sleepqueue.c
	+++ projects/hps_head/sys/kern/subr_sleepqueue.c
	@@ -152,7 +152,8 @@
	*/
	static int sleepq_catch_signals(void *wchan, int pri);
	static int sleepq_check_signals(void);
	-static int sleepq_check_timeout(void);
	+static int sleepq_check_timeout(struct thread *);
	+static void sleepq_stop_timeout(struct thread *);
	#ifdef INVARIANTS
	static void sleepq_dtor(void mem, int size, void arg);
	#endif
	@@ -373,17 +374,14 @@
	sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
	int flags)
	{
	- struct sleepqueue_chain *sc;
	struct thread *td;

	td = curthread;
	- sc = SC_LOOKUP(wchan);
	- mtx_assert(&sc->sc_lock, MA_OWNED);
	- MPASS(TD_ON_SLEEPQ(td));
	- MPASS(td->td_sleepqueue == NULL);
	- MPASS(wchan != NULL);
	+
	+ mtx_lock_spin(&td->td_slpmutex);
	callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
	sleepq_timeout, td, PCPU_GET(cpuid), flags \| C_DIRECT_EXEC);
	+ mtx_unlock_spin(&td->td_slpmutex);
	}

	/*
	@@ -559,11 +557,8 @@
	* Check to see if we timed out.
	*/
	static int
	-sleepq_check_timeout(void)
	+sleepq_check_timeout(struct thread *td)
	{
	- struct thread *td;
	-
	- td = curthread;
	THREAD_LOCK_ASSERT(td, MA_OWNED);

	/*
	@@ -573,28 +568,21 @@
	td->td_flags &= ~TDF_TIMEOUT;
	return (EWOULDBLOCK);
	}
	-
	- /*
	- * If TDF_TIMOFAIL is set, the timeout ran after we had
	- * already been woken up.
	- */
	- if (td->td_flags & TDF_TIMOFAIL)
	- td->td_flags &= ~TDF_TIMOFAIL;
	-
	- /*
	- * If callout_stop() fails, then the timeout is running on
	- * another CPU, so synchronize with it to avoid having it
	- * accidentally wake up a subsequent sleep.
	- */
	- else if (callout_stop(&td->td_slpcallout) == 0) {
	- td->td_flags \|= TDF_TIMEOUT;
	- TD_SET_SLEEPING(td);
	- mi_switch(SW_INVOL \| SWT_SLEEPQTIMO, NULL);
	- }
	return (0);
	}

	/*
	+ * Atomically stop the timeout by using a mutex.
	+ */
	+static void
	+sleepq_stop_timeout(struct thread *td)
	+{
	+ mtx_lock_spin(&td->td_slpmutex);
	+ callout_stop(&td->td_slpcallout);
	+ mtx_unlock_spin(&td->td_slpmutex);
	+}
	+
	+/*
	* Check to see if we were awoken by a signal.
	*/
	static int
	@@ -664,9 +652,11 @@
	MPASS(!(td->td_flags & TDF_SINTR));
	thread_lock(td);
	sleepq_switch(wchan, pri);
	- rval = sleepq_check_timeout();
	+ rval = sleepq_check_timeout(td);
	thread_unlock(td);

	+ sleepq_stop_timeout(td);
	+
	return (rval);
	}

	@@ -677,12 +667,18 @@
	int
	sleepq_timedwait_sig(void *wchan, int pri)
	{
	+ struct thread *td;
	int rcatch, rvalt, rvals;

	+ td = curthread;
	+
	rcatch = sleepq_catch_signals(wchan, pri);
	- rvalt = sleepq_check_timeout();
	+ rvalt = sleepq_check_timeout(td);
	rvals = sleepq_check_signals();
	- thread_unlock(curthread);
	+ thread_unlock(td);
	+
	+ sleepq_stop_timeout(td);
	+
	if (rcatch)
	return (rcatch);
	if (rvals)
	@@ -889,64 +885,49 @@
	static void
	sleepq_timeout(void *arg)
	{
	- struct sleepqueue_chain *sc;
	- struct sleepqueue *sq;
	- struct thread *td;
	- void *wchan;
	- int wakeup_swapper;
	+ struct thread *td = arg;
	+ int wakeup_swapper = 0;

	- td = arg;
	- wakeup_swapper = 0;
	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
	(void )td, (long)td->td_proc->p_pid, (void )td->td_name);

	- /*
	- * First, see if the thread is asleep and get the wait channel if
	- * it is.
	- */
	- thread_lock(td);
	- if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
	- wchan = td->td_wchan;
	- sc = SC_LOOKUP(wchan);
	- THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	- sq = sleepq_lookup(wchan);
	- MPASS(sq != NULL);
	- td->td_flags \|= TDF_TIMEOUT;
	- wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	- thread_unlock(td);
	- if (wakeup_swapper)
	- kick_proc0();
	- return;
	- }
	+ /* Handle the three cases which can happen */

	- /*
	- * If the thread is on the SLEEPQ but isn't sleeping yet, it
	- * can either be on another CPU in between sleepq_add() and
	- * one of the sleepq_wait() routines or it can be in
	- * sleepq_catch_signals().
	- */
	+ thread_lock(td);
	if (TD_ON_SLEEPQ(td)) {
	- td->td_flags \|= TDF_TIMEOUT;
	- thread_unlock(td);
	- return;
	- }
	+ if (TD_IS_SLEEPING(td)) {
	+ struct sleepqueue_chain *sc;
	+ struct sleepqueue *sq;
	+ void *wchan;

	- /*
	- * Now check for the edge cases. First, if TDF_TIMEOUT is set,
	- * then the other thread has already yielded to us, so clear
	- * the flag and resume it. If TDF_TIMEOUT is not set, then the
	- * we know that the other thread is not on a sleep queue, but it
	- * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
	- * to let it know that the timeout has already run and doesn't
	- * need to be canceled.
	- */
	- if (td->td_flags & TDF_TIMEOUT) {
	- MPASS(TD_IS_SLEEPING(td));
	- td->td_flags &= ~TDF_TIMEOUT;
	- TD_CLR_SLEEPING(td);
	- wakeup_swapper = setrunnable(td);
	- } else
	- td->td_flags \|= TDF_TIMOFAIL;
	+ /*
	+ * Case I - thread is asleep and needs to be
	+ * awoken:
	+ */
	+ wchan = td->td_wchan;
	+ sc = SC_LOOKUP(wchan);
	+ THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	+ sq = sleepq_lookup(wchan);
	+ MPASS(sq != NULL);
	+ td->td_flags \|= TDF_TIMEOUT;
	+ wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	+ } else {
	+ /*
	+ * Case II - cancel going to sleep by setting
	+ * the timeout flag because the target thread
	+ * is not asleep yet. It can be on another CPU
	+ * in between sleepq_add() and one of the
	+ * sleepq_wait() routines or it can be in
	+ * sleepq_catch_signals().
	+ */
	+ td->td_flags \|= TDF_TIMEOUT;
	+ }
	+ } else {
	+ /*
	+ * Case III - thread is already woken up by a wakeup
	+ * call and should not timeout. Nothing to do!
	+ */
	+ }
	thread_unlock(td);
	if (wakeup_swapper)
	kick_proc0();
	Index: projects/hps_head/sys/ofed/include/linux/completion.h
	===================================================================
	--- projects/hps_head/sys/ofed/include/linux/completion.h
	+++ projects/hps_head/sys/ofed/include/linux/completion.h
	@@ -64,3 +64,4 @@
	extern int linux_completion_done(struct completion *);

	#endif /* _LINUX_COMPLETION_H_ */
	+
	Index: projects/hps_head/sys/ofed/include/linux/linux_compat.c
	===================================================================
	--- projects/hps_head/sys/ofed/include/linux/linux_compat.c
	+++ projects/hps_head/sys/ofed/include/linux/linux_compat.c
	@@ -846,7 +846,9 @@
	if (c->done)
	break;
	sleepq_add(c, NULL, "completion", flags, 0);
	+ sleepq_release(c);
	sleepq_set_timeout(c, linux_timer_jiffies_until(end));
	+ sleepq_lock(c);
	if (flags & SLEEPQ_INTERRUPTIBLE)
	ret = sleepq_timedwait_sig(c, 0);
	else
	Index: projects/hps_head/sys/sys/_callout.h
	===================================================================
	--- projects/hps_head/sys/sys/_callout.h
	+++ projects/hps_head/sys/sys/_callout.h
	@@ -46,6 +46,8 @@
	SLIST_HEAD(callout_slist, callout);
	TAILQ_HEAD(callout_tailq, callout);

	+typedef void callout_func_t(void *);
	+
	struct callout {
	union {
	LIST_ENTRY(callout) le;
	@@ -55,10 +57,9 @@
	sbintime_t c_time; /* ticks to the event */
	sbintime_t c_precision; /* delta allowed wrt opt */
	void c_arg; / function argument */
	- void (c_func)(void ); /* function to call */
	+ callout_func_t c_func; / function to call */
	struct lock_object c_lock; / lock to handle */
	- short c_flags; /* User State */
	- short c_iflags; /* Internal State */
	+ int c_flags; /* state of this entry */
	volatile int c_cpu; /* CPU we're scheduled on */
	};

	Index: projects/hps_head/sys/sys/callout.h
	===================================================================
	--- projects/hps_head/sys/sys/callout.h
	+++ projects/hps_head/sys/sys/callout.h
	@@ -45,10 +45,12 @@
	#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
	#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
	#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
	-#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
	-#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
	+#define CALLOUT_UNUSED_5 0x0020 /* --available-- */
	+#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */
	#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
	#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
	+#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */
	+#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */

	#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
	#define C_PRELBITS 7
	@@ -63,25 +65,10 @@
	};

	#ifdef _KERNEL
	-/*
	- * Note the flags field is actually two fields. The c_flags
	- * field is the one that caller operations that may, or may not have
	- * a lock touches i.e. callout_deactivate(). The other, the c_iflags,
	- * is the internal flags that must be kept correct on which the
	- * callout system depend on e.g. callout_pending().
	- * The c_iflag is used internally by the callout system to determine which
	- * list the callout is on and track internal state. Callers should not
	- * use the c_flags field directly but should use the macros provided.
	- *
	- * The c_iflags field holds internal flags that are protected by internal
	- * locks of the callout subsystem. The c_flags field holds external flags.
	- * The caller must hold its own lock while manipulating or reading external
	- * flags via callout_active(), callout_deactivate(), callout_reset*(), or
	- * callout_stop() to avoid races.
	- */
	#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
	#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
	-#define callout_drain(c) _callout_stop_safe(c, 1)
	+int callout_drain(struct callout *);
	+int callout_drain_async(struct callout , callout_func_t , void *);
	void callout_init(struct callout *, int);
	void _callout_init_lock(struct callout , struct lock_object , int);
	#define callout_init_mtx(c, mtx, flags) \
	@@ -93,9 +80,9 @@
	#define callout_init_rw(c, rw, flags) \
	_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \
	NULL, (flags))
	-#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING)
	+#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
	int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
	- void ()(void ), void *, int, int);
	+ callout_func_t , void , int, int);
	#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
	callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags))
	#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
	@@ -119,8 +106,7 @@
	int callout_schedule_on(struct callout *, int, int);
	#define callout_schedule_curcpu(c, on_tick) \
	callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
	-#define callout_stop(c) _callout_stop_safe(c, 0)
	-int _callout_stop_safe(struct callout *, int);
	+int callout_stop(struct callout *);
	void callout_process(sbintime_t now);

	#endif
	Index: projects/hps_head/sys/sys/proc.h
	===================================================================
	--- projects/hps_head/sys/sys/proc.h
	+++ projects/hps_head/sys/sys/proc.h
	@@ -308,6 +308,7 @@
	} td_uretoff; /* (k) Syscall aux returns. */
	#define td_retval td_uretoff.tdu_retval
	struct callout td_slpcallout; /* (h) Callout for sleep. */
	+ struct mtx td_slpmutex; /* (h) Mutex for sleep callout */
	struct trapframe td_frame; / (k) */
	struct vm_object td_kstack_obj;/ (a) Kstack object. */
	vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
	@@ -364,7 +365,7 @@
	#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
	#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
	#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
	-#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
	+#define TDF_UNUSED12 0x00001000 /* --available-- */
	#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
	#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
	#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
	@@ -706,7 +707,7 @@
	#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
	#define SWT_TURNSTILE 3 /* Turnstile contention. */
	#define SWT_SLEEPQ 4 /* Sleepq wait. */
	-#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
	+#define SWT_UNUSED5 5 /* --available-- */
	#define SWT_RELINQUISH 6 /* yield call. */
	#define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */
	#define SWT_IDLE 8 /* Switching from the idle thread. */
	Index: share/man/man9/Makefile
	===================================================================
	--- share/man/man9/Makefile
	+++ share/man/man9/Makefile
	@@ -1558,6 +1558,7 @@
	timeout.9 callout_active.9 \
	timeout.9 callout_deactivate.9 \
	timeout.9 callout_drain.9 \
	+ timeout.9 callout_drain_async.9 \
	timeout.9 callout_handle_init.9 \
	timeout.9 callout_init.9 \
	timeout.9 callout_init_mtx.9 \
	Index: share/man/man9/timeout.9
	===================================================================
	--- share/man/man9/timeout.9
	+++ share/man/man9/timeout.9
	@@ -29,13 +29,14 @@
	.\"
	.\" $FreeBSD$
	.\"
	-.Dd October 8, 2014
	+.Dd January 24, 2015
	.Dt TIMEOUT 9
	.Os
	.Sh NAME
	.Nm callout_active ,
	.Nm callout_deactivate ,
	.Nm callout_drain ,
	+.Nm callout_drain_async ,
	.Nm callout_handle_init ,
	.Nm callout_init ,
	.Nm callout_init_mtx ,
	@@ -63,256 +64,248 @@
	.In sys/systm.h
	.Bd -literal
	typedef void timeout_t (void *);
	+typedef void callout_func_t (void *);
	.Ed
	-.Ft int
	-.Fn callout_active "struct callout *c"
	-.Ft void
	-.Fn callout_deactivate "struct callout *c"
	-.Ft int
	-.Fn callout_drain "struct callout *c"
	-.Ft void
	-.Fn callout_handle_init "struct callout_handle *handle"
	-.Bd -literal
	-struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle);
	-.Ed
	-.Ft void
	-.Fn callout_init "struct callout *c" "int mpsafe"
	-.Ft void
	-.Fn callout_init_mtx "struct callout c" "struct mtx mtx" "int flags"
	-.Ft void
	-.Fn callout_init_rm "struct callout c" "struct rmlock rm" "int flags"
	-.Ft void
	-.Fn callout_init_rw "struct callout c" "struct rwlock rw" "int flags"
	-.Ft int
	-.Fn callout_pending "struct callout *c"
	-.Ft int
	-.Fn callout_reset "struct callout c" "int ticks" "timeout_t func" "void *arg"
	-.Ft int
	-.Fn callout_reset_curcpu "struct callout c" "int ticks" "timeout_t func" \
	-"void *arg"
	-.Ft int
	-.Fn callout_reset_on "struct callout c" "int ticks" "timeout_t func" \
	-"void *arg" "int cpu"
	-.Ft int
	-.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int flags"
	-.Ft int
	-.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int flags"
	-.Ft int
	-.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "timeout_t func" "void arg" "int cpu" "int flags"
	-.Ft int
	-.Fn callout_schedule "struct callout *c" "int ticks"
	-.Ft int
	-.Fn callout_schedule_curcpu "struct callout *c" "int ticks"
	-.Ft int
	-.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu"
	-.Ft int
	-.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int flags"
	-.Ft int
	-.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int flags"
	-.Ft int
	-.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \
	-"sbintime_t pr" "int cpu" "int flags"
	-.Ft int
	-.Fn callout_stop "struct callout *c"
	-.Ft struct callout_handle
	-.Fn timeout "timeout_t func" "void arg" "int ticks"
	-.Ft void
	-.Fn untimeout "timeout_t func" "void arg" "struct callout_handle handle"
	.Sh DESCRIPTION
	The
	.Nm callout
	-API is used to schedule a call to an arbitrary function at a specific
	-time in the future.
	-Consumers of this API are required to allocate a callout structure
	-.Pq struct callout
	+API is used to schedule a one-time call to an arbitrary function at a
	+specific time in the future.
	+Consumers of this API are required to allocate a
	+.Ft struct callout
	for each pending function invocation.
	-This structure stores state about the pending function invocation including
	-the function to be called and the time at which the function should be invoked.
	-Pending function calls can be cancelled or rescheduled to a different time.
	-In addition,
	-a callout structure may be reused to schedule a new function call after a
	-scheduled call is completed.
	-.Pp
	-Callouts only provide a single-shot mode.
	-If a consumer requires a periodic timer,
	-it must explicitly reschedule each function call.
	-This is normally done by rescheduling the subsequent call within the called
	-function.
	-.Pp
	-Callout functions must not sleep.
	-They may not acquire sleepable locks,
	-wait on condition variables,
	-perform blocking allocation requests,
	-or invoke any other action that might sleep.
	-.Pp
	-Each callout structure must be initialized by
	-.Fn callout_init ,
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-or
	-.Fn callout_init_rw
	-before it is passed to any of the other callout functions.
	The
	+.Ft struct callout
	+stores the full state about any pending function call and
	+must be drained by a call to
	+.Fn callout_drain
	+or
	+.Fn callout_drain_async
	+before freeing.
	+.Sh INITIALIZATION
	+.Ft void
	+.Fn callout_handle_init "struct callout_handle *handle"
	+This function is deprecated.
	+Please use
	.Fn callout_init
	-function initializes a callout structure in
	-.Fa c
	-that is not associated with a specific lock.
	+instead.
	+This function is used to prepare a
	+.Ft struct callout_handle
	+before it can be used the first time.
	+If this function is called on a pending timeout, the pending timeout
	+cannot be cancelled and the
	+.Fn untimeout
	+function will return as if no timeout was pending.
	+.Pp
	+.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	+This macro is deprecated.
	+This macro is used to statically initialize a
	+.Ft struct callout_handle .
	+Please use
	+.Fn callout_init
	+instead.
	+.Pp
	+.Ft void
	+.Fn callout_init "struct callout *c" "int mpsafe"
	+This function prepares a
	+.Ft struct callout
	+before it can be used.
	+This function should not be used when the callout is pending a timeout.
	If the
	.Fa mpsafe
	-argument is zero,
	-the callout structure is not considered to be
	-.Dq multi-processor safe ;
	-and the Giant lock will be acquired before calling the callout function
	-and released when the callout function returns.
	-.Pp
	-The
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-and
	-.Fn callout_init_rw
	-functions initialize a callout structure in
	-.Fa c
	-that is associated with a specific lock.
	-The lock is specified by the
	-.Fa mtx ,
	-.Fa rm ,
	-or
	-.Fa rw
	-parameter.
	-The associated lock must be held while stopping or rescheduling the
	-callout.
	-The callout subsystem acquires the associated lock before calling the
	-callout function and releases it after the function returns.
	-If the callout was cancelled while the callout subsystem waited for the
	-associated lock,
	-the callout function is not called,
	-and the associated lock is released.
	-This ensures that stopping or rescheduling the callout will abort any
	-previously scheduled invocation.
	-.Pp
	-Only regular mutexes may be used with
	-.Fn callout_init_mtx ;
	-spin mutexes are not supported.
	-A sleepable read-mostly lock
	-.Po
	-one initialized with the
	-.Dv RM_SLEEPABLE
	-flag
	-.Pc
	-may not be used with
	-.Fn callout_init_rm .
	-Similarly, other sleepable lock types such as
	-.Xr sx 9
	-and
	-.Xr lockmgr 9
	-cannot be used with callouts because sleeping is not permitted in
	-the callout subsystem.
	+argument is non-zero, the callback function will be running unlocked
	+and the callback is so-called "mpsafe".
	+.Bf Sy
	+It is the application's entire responsibility to not call any
	+.Fn callout_xxx
	+functions, including the
	+.Fn callout_drain
	+function, simultaneously on the same callout when the
	+.Fa mpsafe
	+argument is non-zero.
	+Otherwise, undefined behavior can happen.
	+Avoid simultaneous calls by obtaining an exclusive lock before calling
	+any
	+.Fn callout_xxx
	+functions other than the
	+.Fn callout_drain
	+function.
	+.Ef
	+If the
	+.Fa mpsafe
	+argument is zero, the Giant mutex will be locked before the callback
	+function is called.
	+If the
	+.Fa mpsafe
	+argument is zero, the Giant mutex is expected to be locked when calling
	+any
	+.Fn callout_xxx
	+functions which start and stop a callout other than the
	+.Fn callout_drain
	+function.
	.Pp
	-These
	+.Ft void
	+.Fn callout_init_mtx "struct callout c" "struct mtx mtx" "int flags"
	+This function prepares a
	+.Ft struct callout
	+before it can be used.
	+This function should not be used when the callout is pending a timeout.
	+The
	+.Fa mtx
	+argument is a pointer to a valid spinlock type of mutex or a valid
	+regular non-sleepable mutex which the callback subsystem will lock
	+before calling the callback function.
	+The specified mutex is expected to be locked when calling any
	+.Fn callout_xxx
	+functions which start and stop a callout other than the
	+.Fn callout_drain
	+function.
	+Valid
	.Fa flags
	-may be specified for
	-.Fn callout_init_mtx ,
	-.Fn callout_init_rm ,
	-or
	-.Fn callout_init_rw :
	+are:
	.Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED"
	.It Dv CALLOUT_RETURNUNLOCKED
	-The callout function will release the associated lock itself,
	-so the callout subsystem should not attempt to unlock it
	-after the callout function returns.
	-.It Dv CALLOUT_SHAREDLOCK
	-The lock is only acquired in read mode when running the callout handler.
	-This flag is ignored by
	-.Fn callout_init_mtx .
	+The callout function is assumed to have released the specified mutex
	+before returning.
	+.It Dv 0
	+The callout subsystem will release the specified mutex after the
	+callout function has returned.
	.El
	.Pp
	-The function
	-.Fn callout_stop
	-cancels a callout
	-.Fa c
	-if it is currently pending.
	-If the callout is pending, then
	-.Fn callout_stop
	-returns a non-zero value.
	-If the callout is not set,
	-has already been serviced,
	-or is currently being serviced,
	-then zero will be returned.
	-If the callout has an associated lock,
	-then that lock must be held when this function is called.
	-.Pp
	-The function
	-.Fn callout_drain
	-is identical to
	-.Fn callout_stop
	-except that it will wait for the callout
	-.Fa c
	-to complete if it is already in progress.
	-This function MUST NOT be called while holding any
	-locks on which the callout might block, or deadlock will result.
	-Note that if the callout subsystem has already begun processing this
	-callout, then the callout function may be invoked before
	-.Fn callout_drain
	-returns.
	-However, the callout subsystem does guarantee that the callout will be
	-fully stopped before
	-.Fn callout_drain
	-returns.
	+.Ft void
	+.Fn callout_init_rm "struct callout c" "struct rmlock rm" "int flags"
	+This function is similar to
	+.Fn callout_init_mtx ,
	+but it accepts a read-mostly type of lock.
	+The read-mostly lock must not be initialized with the
	+.Dv RM_SLEEPABLE
	+flag.
	.Pp
	-The
	+.Ft void
	+.Fn callout_init_rw "struct callout c" "struct rwlock rw" "int flags"
	+This function is similar to
	+.Fn callout_init_mtx ,
	+but it accepts a read/write type of lock.
	+.Sh SCHEDULING CALLOUTS
	+.Ft struct callout_handle
	+.Fn timeout "timeout_t func" "void arg" "int ticks"
	+This function is deprecated.
	+Please use
	.Fn callout_reset
	-and
	-.Fn callout_schedule
	-function families schedule a future function invocation for callout
	-.Fa c .
	-If
	-.Fa c
	-already has a pending callout,
	-it is cancelled before the new invocation is scheduled.
	-These functions return a non-zero value if a pending callout was cancelled
	-and zero if there was no pending callout.
	-If the callout has an associated lock,
	-then that lock must be held when any of these functions are called.
	-.Pp
	-The time at which the callout function will be invoked is determined by
	-either the
	-.Fa ticks
	-argument or the
	-.Fa sbt ,
	-.Fa pr ,
	-and
	-.Fa flags
	-arguments.
	-When
	-.Fa ticks
	-is used,
	-the callout is scheduled to execute after
	+instead.
	+This function schedules a call to
	+.Fa func
	+to take place after
	.Fa ticks Ns No /hz
	seconds.
	Non-positive values of
	.Fa ticks
	are silently converted to the value
	.Sq 1 .
	-.Pp
	The
	-.Fa sbt ,
	-.Fa pr ,
	-and
	-.Fa flags
	-arguments provide more control over the scheduled time including
	-support for higher resolution times,
	-specifying the precision of the scheduled time,
	-and setting an absolute deadline instead of a relative timeout.
	-The callout is scheduled to execute in a time window which begins at
	-the time specified in
	+.Fa func
	+argument is a valid pointer to a function that takes a single
	+.Fa void *
	+argument.
	+Upon invocation, the
	+.Fa func
	+function will receive
	+.Fa arg
	+as its only argument.
	+The Giant lock is locked when the
	+.Fa arg
	+function is invoked and should not be unlocked by this function.
	+The returned value from
	+.Fn timeout
	+is a
	+.Ft struct callout_handle
	+structure which can be used in conjunction with the
	+.Fn untimeout
	+function to request that a scheduled timeout be cancelled.
	+As handles are recycled by the system, it is possible, although unlikely,
	+that a handle from one invocation of
	+.Fn timeout
	+may match the handle of another invocation of
	+.Fn timeout
	+if both calls used the same function pointer and argument, and the first
	+timeout is expired or cancelled before the second call.
	+Please ensure that the function and argument pointers are unique when using this function.
	+.Pp
	+.Ft int
	+.Fn callout_reset "struct callout c" "int ticks" "callout_func_t func" "void *arg"
	+This function is used to schedule or re-schedule a callout.
	+This function at first stops the callout given by the
	+.Fa c
	+argument, if any.
	+Then it will start the callout given by the
	+.Fa c
	+argument.
	+The relative time until the timeout callback happens is given by the
	+.Fa ticks
	+argument.
	+The number of ticks in a second is defined by
	+.Dv hz
	+and can vary from system to system.
	+This function returns a non-zero value if the given callout was pending and
	+the callback function was prevented from being called.
	+Otherwise, a value of zero is returned.
	+If a lock is associated with the callout given by the
	+.Fa c
	+argument and it is exclusivly locked when this function is called, this
	+function will always ensure that previous callback function, if any,
	+is never reached.
	+In other words, the callout will be atomically restarted.
	+Otherwise, there is no such guarantee.
	+The callback function is given by
	+.Fa func
	+and its function argument is given by
	+.Fa arg .
	+.Pp
	+.Ft int
	+.Fn callout_reset_curcpu "struct callout c" "int ticks" "callout_func_t func" \
	+"void *arg"
	+This function works the same like the
	+.Fn callout_reset
	+function except the callback function given by the
	+.Fa func
	+argument will be executed on the same CPU which called this function.
	+A change in the CPU selection can only happen if the callout has a
	+lock associated with it and this lock is locked when this function is
	+called or the callout is marked "mpsafe".
	+See
	+.Fn callout_init .
	+The CPU selection cannot be changed while the callout subsystem is
	+processing the callback for completion.
	+.Pp
	+.Ft int
	+.Fn callout_reset_on "struct callout c" "int ticks" "callout_func_t func" \
	+"void *arg" "int cpu"
	+This function works the same like the
	+.Fn callout_reset
	+function except the callback function given by the
	+.Fa func
	+argument will be executed on the CPU given by the
	+.Fa cpu
	+argument.
	+A change in the CPU selection can only happen if the callout has a
	+lock associated with it and this lock is locked when this function is
	+called or the callout is marked "mpsafe".
	+See
	+.Fn callout_init .
	+The CPU selection cannot be changed while the callout subsystem is
	+processing the callback for completion.
	+.Pp
	+.Ft int
	+.Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int flags"
	+This function works the same like the
	+.Fn callout_reset
	+function except the relative or absolute time after which the timeout
	+callback should happen is given by the
	.Fa sbt
	-and extends for the amount of time specified in
	+argument and extends for the amount of time specified in
	.Fa pr .
	+This function is used when high precision timeouts are needed.
	If
	.Fa sbt
	specifies a time in the past,
	@@ -322,12 +315,13 @@
	allows the callout subsystem to coalesce callouts scheduled close to each
	other into fewer timer interrupts,
	reducing processing overhead and power consumption.
	-These
	+The
	.Fa flags
	-may be specified to adjust the interpretation of
	+argument may be non-zero to adjust the interpretation of the
	.Fa sbt
	and
	-.Fa pr :
	+.Fa pr
	+arguments:
	.Bl -tag -width ".Dv C_DIRECT_EXEC"
	.It Dv C_ABSOLUTE
	Handle the
	@@ -347,7 +341,7 @@
	and should be as small as possible because they run with absolute priority.
	.It Fn C_PREL
	Specifies relative event time precision as binary logarithm of time interval
	-divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, etc.
	+divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on.
	Note that the larger of
	.Fa pr
	or this value is used as the length of the time window.
	@@ -360,65 +354,221 @@
	calls if possible.
	.El
	.Pp
	-The
	-.Fn callout_reset
	-functions accept a
	+.Ft int
	+.Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int flags"
	+This function works like
	+.Fn callout_reset_sbt ,
	+except the callback function given by the
	.Fa func
	-argument which identifies the function to be called when the time expires.
	-It must be a pointer to a function that takes a single
	-.Fa void *
	-argument.
	-Upon invocation,
	+argument will be executed on the CPU which called this function.
	+A change in the CPU selection can only happen if the callout has a
	+lock associated with it and this lock is locked when this function is
	+called or the callout is marked "mpsafe".
	+See
	+.Fn callout_init .
	+The CPU selection cannot be changed while the callout subsystem is
	+processing the callback for completion.
	+.Pp
	+.Ft int
	+.Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "callout_func_t func" "void arg" "int cpu" "int flags"
	+This function works like
	+.Fn callout_reset_sbt ,
	+except the callback function given by
	.Fa func
	-will receive
	-.Fa arg
	-as its only argument.
	-The
	-.Fn callout_schedule
	-functions reuse the
	+will be executed on the CPU given by
	+.Fa cpu .
	+A change in the CPU selection can only happen if the callout has a
	+lock associated with it and this lock is locked when this function is
	+called or the callout is marked "mpsafe".
	+See
	+.Fn callout_init .
	+The CPU selection cannot be changed while the callout subsystem is
	+processing the callback for completion.
	+.Pp
	+.Ft int
	+.Fn callout_schedule "struct callout *c" "int ticks"
	+This function works the same like the
	+.Fn callout_reset
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_curcpu "struct callout *c" "int ticks"
	+This function works the same like the
	+.Fn callout_reset_curcpu
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu"
	+This function works the same like the
	+.Fn callout_reset_on
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt_curcpu
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Pp
	+.Ft int
	+.Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \
	+"sbintime_t pr" "int cpu" "int flags"
	+This function works the same like the
	+.Fn callout_reset_sbt_on
	+function except it re-uses the callback function and the callback argument
	+already stored in the
	+.Pq struct callout
	+structure.
	+.Sh CHECKING THE STATE OF CALLOUTS
	+.Ft int
	+.Fn callout_pending "struct callout *c"
	+This function returns non-zero if the callout pointed to by the
	+.Fa c
	+argument is pending for callback.
	+Else this function returns zero.
	+This function returns zero when inside the callout function if the
	+callout is not re-scheduled.
	+.Pp
	+.Ft int
	+.Fn callout_active "struct callout *c"
	+This function is deprecated and returns non-zero if the callout
	+pointed to by the
	+.Fa c
	+argument was scheduled in the past.
	+Else this function returns zero.
	+This function also returns zero after the
	+.Fn callout_deactivate
	+or the
	+.Fn callout_stop
	+or the
	+.Fn callout_drain
	+or the
	+.Fn callout_drain_async
	+function is called on the same callout as given by the
	+.Fa c
	+argument.
	+.Pp
	+.Ft void
	+.Fn callout_deactivate "struct callout *c"
	+This function is deprecated and ensures that subsequent calls to the
	+.Fn callout_activate
	+function returns zero until the callout is scheduled again.
	+.Sh STOPPING CALLOUTS
	+.Ft void
	+.Fn untimeout "timeout_t func" "void arg" "struct callout_handle handle"
	+This function is deprecated and cancels the timeout associated with the
	+.Fa handle
	+argument using the function pointed to by the
	.Fa func
	-and
	+argument and having the
	.Fa arg
	-arguments from the previous callout.
	-Note that one of the
	-.Fn callout_reset
	-functions must always be called to initialize
	+arguments to validate the handle.
	+If the handle does not correspond to a timeout with
	+the function
	.Fa func
	-and
	+taking the argument
	.Fa arg
	-before one of the
	-.Fn callout_schedule
	-functions can be used.
	+no action is taken. The
	+.Fa handle
	+must be initialized by a previous call to
	+.Fn timeout ,
	+.Fn callout_handle_init
	+or assigned the value of
	+.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	+before being passed to
	+.Fn untimeout .
	+The behavior of calling
	+.Fn untimeout
	+with an uninitialized handle
	+is undefined.
	.Pp
	-The callout subsystem provides a softclock thread for each CPU in the system.
	-Callouts are assigned to a single CPU and are executed by the softclock thread
	-for that CPU.
	-Initially,
	-callouts are assigned to CPU 0.
	-The
	-.Fn callout_reset_on ,
	-.Fn callout_reset_sbt_on ,
	-.Fn callout_schedule_on
	-and
	-.Fn callout_schedule_sbt_on
	-functions assign the callout to CPU
	-.Fa cpu .
	-The
	-.Fn callout_reset_curcpu ,
	-.Fn callout_reset_sbt_curpu ,
	-.Fn callout_schedule_curcpu
	-and
	-.Fn callout_schedule_sbt_curcpu
	-functions assign the callout to the current CPU.
	-The
	-.Fn callout_reset ,
	-.Fn callout_reset_sbt ,
	-.Fn callout_schedule
	-and
	-.Fn callout_schedule_sbt
	-functions schedule the callout to execute in the softclock thread of the CPU
	-to which it is currently assigned.
	+.Ft int
	+.Fn callout_stop "struct callout *c"
	+This function is used to stop a timeout function invocation associated with the callout pointed to by the
	+.Fa c
	+argument, in a non-blocking fashion.
	+This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized.
	+This function returns a non-zero value if the given callout was pending and
	+the callback function was prevented from being called.
	+Else a value of zero is returned.
	+If a lock is associated with the callout given by the
	+.Fa c
	+argument and it is exclusivly locked when this function is called, the
	+.Fn callout_stop
	+function will always ensure that the callback function is never reached.
	+In other words the callout will be atomically stopped.
	+Else there is no such guarantee.
	+.Sh DRAINING CALLOUTS
	+.Ft int
	+.Fn callout_drain "struct callout *c"
	+This function works the same like the
	+.Fn callout_stop
	+function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the
	+.Fa c
	+argument inside the callout subsystem before it returns.
	+Also this function ensures that the lock, if any, associated with the
	+callout is no longer being used.
	+When this function returns, it is safe to free the callout structure pointed to by the
	+.Fa c
	+argument.
	.Pp
	+.Ft int
	+.Fn callout_drain_async "struct callout c" "callout_func_t fn" "void *arg"
	+This function is non-blocking and works the same like the
	+.Fn callout_stop
	+function except if it returns non-zero it means the callback function pointed to by the
	+.Fa fn
	+argument will be called back with the
	+.Fa arg
	+argument when all references to the callout pointed to by the
	+.Fa c
	+argument are gone.
	+If this function returns non-zero it should not be called again until the callback function has been called.
	+If the
	+.Fn callout_drain
	+or
	+.Fn callout_drain_async
	+functions are called while an asynchronous drain is pending,
	+previously pending asynchronous drains might get cancelled.
	+If this function returns zero, it is safe to free the callout structure pointed to by the
	+.Fa c
	+argument right away.
	+.Sh CALLOUT FUNCTION RESTRICTIONS
	+Callout functions must not sleep.
	+They may not acquire sleepable locks, wait on condition variables,
	+perform blocking allocation requests, or invoke any other action that
	+might sleep.
	+.Sh CALLOUT SUBSYSTEM INTERNALS
	+The callout subsystem has its own set of spinlocks to protect its internal state.
	+The callout subsystem provides a softclock thread for each CPU in the
	+system.
	+Callouts are assigned to a single CPU and are executed by the
	+softclock thread for that CPU.
	+Initially, callouts are assigned to CPU 0.
	Softclock threads are not pinned to their respective CPUs by default.
	The softclock thread for CPU 0 can be pinned to CPU 0 by setting the
	.Va kern.pin_default_swi
	@@ -427,50 +577,7 @@
	respective CPUs by setting the
	.Va kern.pin_pcpu_swi
	loader tunable to a non-zero value.
	-.Pp
	-The macros
	-.Fn callout_pending ,
	-.Fn callout_active
	-and
	-.Fn callout_deactivate
	-provide access to the current state of the callout.
	-The
	-.Fn callout_pending
	-macro checks whether a callout is
	-.Em pending ;
	-a callout is considered
	-.Em pending
	-when a timeout has been set but the time has not yet arrived.
	-Note that once the timeout time arrives and the callout subsystem
	-starts to process this callout,
	-.Fn callout_pending
	-will return
	-.Dv FALSE
	-even though the callout function may not have finished
	-.Pq or even begun
	-executing.
	-The
	-.Fn callout_active
	-macro checks whether a callout is marked as
	-.Em active ,
	-and the
	-.Fn callout_deactivate
	-macro clears the callout's
	-.Em active
	-flag.
	-The callout subsystem marks a callout as
	-.Em active
	-when a timeout is set and it clears the
	-.Em active
	-flag in
	-.Fn callout_stop
	-and
	-.Fn callout_drain ,
	-but it
	-.Em does not
	-clear it when a callout expires normally via the execution of the
	-callout function.
	-.Ss "Avoiding Race Conditions"
	+.Sh "AVOIDING RACE CONDITIONS"
	The callout subsystem invokes callout functions from its own thread
	context.
	Without some kind of synchronization,
	@@ -531,9 +638,8 @@
	.Pc
	indicates whether or not the callout was removed.
	If it is known that the callout was set and the callout function has
	-not yet executed, then a return value of
	-.Dv FALSE
	-indicates that the callout function is about to be called.
	+not yet executed, then a return value of zero indicates that the
	+callout function is about to be called.
	For example:
	.Bd -literal -offset indent
	if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) {
	@@ -589,16 +695,14 @@
	.Em pending
	flag and return without action if
	.Fn callout_pending
	-returns
	-.Dv TRUE .
	+returns non-zero.
	This indicates that the callout was rescheduled using
	.Fn callout_reset
	just before the callout function was invoked.
	If
	.Fn callout_active
	-returns
	-.Dv FALSE
	-then the callout function should also return without action.
	+returns zero then the callout function should also return without
	+action.
	This indicates that the callout has been stopped.
	Finally, the callout function should call
	.Fn callout_deactivate
	@@ -668,129 +772,13 @@
	or releasing the storage for the callout structure.
	.Sh LEGACY API
	.Bf Sy
	-The functions below are a legacy API that will be removed in a future release.
	-New code should not use these routines.
	-.Ef
	-.Pp
	-The function
	-.Fn timeout
	-schedules a call to the function given by the argument
	-.Fa func
	-to take place after
	-.Fa ticks Ns No /hz
	-seconds.
	-Non-positive values of
	-.Fa ticks
	-are silently converted to the value
	-.Sq 1 .
	-.Fa func
	-should be a pointer to a function that takes a
	-.Fa void *
	-argument.
	-Upon invocation,
	-.Fa func
	-will receive
	-.Fa arg
	-as its only argument.
	-The return value from
	+The
	.Fn timeout
	-is a
	-.Ft struct callout_handle
	-which can be used in conjunction with the
	-.Fn untimeout
	-function to request that a scheduled timeout be canceled.
	-.Pp
	-The function
	-.Fn callout_handle_init
	-can be used to initialize a handle to a state which will cause
	-any calls to
	-.Fn untimeout
	-with that handle to return with no side
	-effects.
	-.Pp
	-Assigning a callout handle the value of
	-.Fn CALLOUT_HANDLE_INITIALIZER
	-performs the same function as
	-.Fn callout_handle_init
	-and is provided for use on statically declared or global callout handles.
	-.Pp
	-The function
	-.Fn untimeout
	-cancels the timeout associated with
	-.Fa handle
	-using the
	-.Fa func
	and
	-.Fa arg
	-arguments to validate the handle.
	-If the handle does not correspond to a timeout with
	-the function
	-.Fa func
	-taking the argument
	-.Fa arg
	-no action is taken.
	-.Fa handle
	-must be initialized by a previous call to
	-.Fn timeout ,
	-.Fn callout_handle_init ,
	-or assigned the value of
	-.Fn CALLOUT_HANDLE_INITIALIZER "&handle"
	-before being passed to
	-.Fn untimeout .
	-The behavior of calling
	.Fn untimeout
	-with an uninitialized handle
	-is undefined.
	-.Pp
	-As handles are recycled by the system, it is possible (although unlikely)
	-that a handle from one invocation of
	-.Fn timeout
	-may match the handle of another invocation of
	-.Fn timeout
	-if both calls used the same function pointer and argument, and the first
	-timeout is expired or canceled before the second call.
	-The timeout facility offers O(1) running time for
	-.Fn timeout
	-and
	-.Fn untimeout .
	-Timeouts are executed from
	-.Fn softclock
	-with the
	-.Va Giant
	-lock held.
	-Thus they are protected from re-entrancy.
	-.Sh RETURN VALUES
	-The
	-.Fn callout_active
	-macro returns the state of a callout's
	-.Em active
	-flag.
	-.Pp
	-The
	-.Fn callout_pending
	-macro returns the state of a callout's
	-.Em pending
	-flag.
	-.Pp
	-The
	-.Fn callout_reset
	-and
	-.Fn callout_schedule
	-function families return non-zero if the callout was pending before the new
	-function invocation was scheduled.
	-.Pp
	-The
	-.Fn callout_stop
	-and
	-.Fn callout_drain
	-functions return non-zero if the callout was still pending when it was
	-called or zero otherwise.
	-The
	-.Fn timeout
	-function returns a
	-.Ft struct callout_handle
	-that can be passed to
	-.Fn untimeout .
	+functions are a legacy API that will be removed in a future release.
	+New code should not use these routines.
	+.Ef
	.Sh HISTORY
	The current timeout and untimeout routines are based on the work of
	.An Adam M. Costello
	@@ -815,4 +803,4 @@
	.Bx
	linked list
	callout mechanism which offered O(n) insertion and removal running time
	-but did not generate or require handles for untimeout operations.
	+and did not generate or require handles for untimeout operations.
	Index: sys/kern/init_main.c
	===================================================================
	--- sys/kern/init_main.c
	+++ sys/kern/init_main.c
	@@ -505,7 +505,8 @@

	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);

	/* Create credentials. */
	p->p_ucred = crget();
	Index: sys/kern/kern_clocksource.c
	===================================================================
	--- sys/kern/kern_clocksource.c
	+++ sys/kern/kern_clocksource.c
	@@ -160,6 +160,9 @@
	int usermode;
	int done, runs;

	+ KASSERT(curthread->td_critnest != 0,
	+ ("Must be in a critical section"));
	+
	CTR3(KTR_SPARE2, "handle at %d: now %d.%08x",
	curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
	done = 0;
	Index: sys/kern/kern_condvar.c
	===================================================================
	--- sys/kern/kern_condvar.c
	+++ sys/kern/kern_condvar.c
	@@ -313,15 +313,13 @@
	DROP_GIANT();

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait(cvp, 0);

	#ifdef KTRACE
	@@ -383,15 +381,13 @@

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR \|
	SLEEPQ_INTERRUPTIBLE, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait_sig(cvp, 0);

	#ifdef KTRACE
	Index: sys/kern/kern_lock.c
	===================================================================
	--- sys/kern/kern_lock.c
	+++ sys/kern/kern_lock.c
	@@ -210,9 +210,11 @@
	GIANT_SAVE();
	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK \| (catch ?
	SLEEPQ_INTERRUPTIBLE : 0), queue);
	- if ((flags & LK_TIMELOCK) && timo)
	+ if ((flags & LK_TIMELOCK) && timo) {
	+ sleepq_release(&lk->lock_object);
	sleepq_set_timeout(&lk->lock_object, timo);
	-
	+ sleepq_lock(&lk->lock_object);
	+ }
	/*
	* Decisional switch for real sleeping.
	*/
	Index: sys/kern/kern_switch.c
	===================================================================
	--- sys/kern/kern_switch.c
	+++ sys/kern/kern_switch.c
	@@ -93,8 +93,6 @@
	&DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
	SCHED_STAT_DEFINE_VAR(sleepq,
	&DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
	-SCHED_STAT_DEFINE_VAR(sleepqtimo,
	- &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
	SCHED_STAT_DEFINE_VAR(relinquish,
	&DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
	SCHED_STAT_DEFINE_VAR(needresched,
	Index: sys/kern/kern_synch.c
	===================================================================
	--- sys/kern/kern_synch.c
	+++ sys/kern/kern_synch.c
	@@ -236,12 +236,16 @@
	* return from cursig().
	*/
	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
	- if (sbt != 0)
	- sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
	sleepq_release(ident);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	+ if (sbt != 0)
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	+ } else if (sbt != 0) {
	+ sleepq_release(ident);
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	sleepq_lock(ident);
	}
	if (sbt != 0 && catch)
	@@ -306,8 +310,11 @@
	* We put ourselves on the sleep queue and start our timeout.
	*/
	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
	- if (sbt != 0)
	+ if (sbt != 0) {
	+ sleepq_release(ident);
	sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	+ }

	/*
	* Can't call ktrace with any spin locks held so it can lock the
	Index: sys/kern/kern_thread.c
	===================================================================
	--- sys/kern/kern_thread.c
	+++ sys/kern/kern_thread.c
	@@ -149,6 +149,9 @@
	audit_thread_alloc(td);
	#endif
	umtx_thread_alloc(td);
	+
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);
	return (0);
	}

	@@ -162,6 +165,10 @@

	td = (struct thread *)mem;

	+ /* make sure to drain any use of the "td->td_slpcallout" */
	+ callout_drain(&td->td_slpcallout);
	+ mtx_destroy(&td->td_slpmutex);
	+
	#ifdef INVARIANTS
	/* Verify that this thread is in a safe state to free. */
	switch (td->td_state) {
	@@ -544,7 +551,6 @@
	LIST_INIT(&td->td_lprof[0]);
	LIST_INIT(&td->td_lprof[1]);
	sigqueue_init(&td->td_sigqueue, p);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
	p->p_numthreads++;
	}
	Index: sys/kern/kern_timeout.c
	===================================================================
	--- sys/kern/kern_timeout.c
	+++ sys/kern/kern_timeout.c
	@@ -54,6 +54,8 @@
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mutex.h>
	+#include <sys/rmlock.h>
	+#include <sys/rwlock.h>
	#include <sys/proc.h>
	#include <sys/sdt.h>
	#include <sys/sleepqueue.h>
	@@ -75,28 +77,25 @@
	"struct callout *");

	#ifdef CALLOUT_PROFILING
	-static int avg_depth;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
	+static int avg_depth[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0,
	"Average number of items examined per softclock call. Units = 1/1000");
	-static int avg_gcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
	+static int avg_gcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0,
	"Average number of Giant callouts made per softclock call. Units = 1/1000");
	-static int avg_lockcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
	+static int avg_lockcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0,
	"Average number of lock callouts made per softclock call. Units = 1/1000");
	-static int avg_mpcalls;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
	+static int avg_mpcalls[2];
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0,
	"Average number of MP callouts made per softclock call. Units = 1/1000");
	-static int avg_depth_dir;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0,
	"Average number of direct callouts examined per callout_process call. "
	"Units = 1/1000");
	-static int avg_lockcalls_dir;
	SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
	- &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
	+ &avg_lockcalls[1], 0, "Average number of lock direct callouts made per "
	"callout_process call. Units = 1/1000");
	-static int avg_mpcalls_dir;
	-SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
	+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1],
	0, "Average number of MP direct callouts made per callout_process call. "
	"Units = 1/1000");
	#endif
	@@ -124,59 +123,274 @@
	*/
	u_int callwheelsize, callwheelmask;

	+#define CALLOUT_RET_NORMAL 0
	+#define CALLOUT_RET_CANCELLED 1
	+#define CALLOUT_RET_DRAINING 2
	+
	+struct callout_args {
	+ sbintime_t time; /* absolute time for the event */
	+ sbintime_t precision; /* delta allowed wrt opt */
	+ void arg; / function argument */
	+ callout_func_t func; / function to call */
	+ int flags; /* flags passed to callout_reset() */
	+ int cpu; /* CPU we're scheduled on */
	+};
	+
	+typedef void callout_mutex_op_t(struct lock_object *);
	+typedef int callout_owned_op_t(struct lock_object *);
	+
	+struct callout_mutex_ops {
	+ callout_mutex_op_t *lock;
	+ callout_mutex_op_t *unlock;
	+ callout_owned_op_t *owned;
	+};
	+
	+enum {
	+ CALLOUT_LC_UNUSED_0,
	+ CALLOUT_LC_UNUSED_1,
	+ CALLOUT_LC_UNUSED_2,
	+ CALLOUT_LC_UNUSED_3,
	+ CALLOUT_LC_SPIN,
	+ CALLOUT_LC_MUTEX,
	+ CALLOUT_LC_RW,
	+ CALLOUT_LC_RM,
	+};
	+
	+static void
	+callout_mutex_op_none(struct lock_object *lock)
	+{
	+}
	+
	+static int
	+callout_owned_op_none(struct lock_object *lock)
	+{
	+ return (0);
	+}
	+
	+static void
	+callout_mutex_lock(struct lock_object *lock)
	+{
	+
	+ mtx_lock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock(struct lock_object *lock)
	+{
	+
	+ mtx_unlock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_lock_spin(struct lock_object *lock)
	+{
	+
	+ mtx_lock_spin((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock_spin(struct lock_object *lock)
	+{
	+
	+ mtx_unlock_spin((struct mtx *)lock);
	+}
	+
	+static int
	+callout_mutex_owned(struct lock_object *lock)
	+{
	+
	+ return (mtx_owned((struct mtx *)lock));
	+}
	+
	+static void
	+callout_rm_wlock(struct lock_object *lock)
	+{
	+
	+ rm_wlock((struct rmlock *)lock);
	+}
	+
	+static void
	+callout_rm_wunlock(struct lock_object *lock)
	+{
	+
	+ rm_wunlock((struct rmlock *)lock);
	+}
	+
	+static int
	+callout_rm_owned(struct lock_object *lock)
	+{
	+
	+ return (rm_wowned((struct rmlock *)lock));
	+}
	+
	+static void
	+callout_rw_wlock(struct lock_object *lock)
	+{
	+
	+ rw_wlock((struct rwlock *)lock);
	+}
	+
	+static void
	+callout_rw_wunlock(struct lock_object *lock)
	+{
	+
	+ rw_wunlock((struct rwlock *)lock);
	+}
	+
	+static int
	+callout_rw_owned(struct lock_object *lock)
	+{
	+
	+ return (rw_wowned((struct rwlock *)lock));
	+}
	+
	+static const struct callout_mutex_ops callout_mutex_ops[8] = {
	+ [CALLOUT_LC_UNUSED_0] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_1] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_2] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_3] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_SPIN] = {
	+ .lock = callout_mutex_lock_spin,
	+ .unlock = callout_mutex_unlock_spin,
	+ .owned = callout_mutex_owned,
	+ },
	+ [CALLOUT_LC_MUTEX] = {
	+ .lock = callout_mutex_lock,
	+ .unlock = callout_mutex_unlock,
	+ .owned = callout_mutex_owned,
	+ },
	+ [CALLOUT_LC_RW] = {
	+ .lock = callout_rw_wlock,
	+ .unlock = callout_rw_wunlock,
	+ .owned = callout_rw_owned,
	+ },
	+ [CALLOUT_LC_RM] = {
	+ .lock = callout_rm_wlock,
	+ .unlock = callout_rm_wunlock,
	+ .owned = callout_rm_owned,
	+ },
	+};
	+
	+static inline void
	+callout_lock_client(int c_flags, struct lock_object *c_lock)
	+{
	+
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock);
	+}
	+
	+static inline void
	+callout_unlock_client(int c_flags, struct lock_object *c_lock)
	+{
	+
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock);
	+}
	+
	+#ifdef SMP
	+static inline int
	+callout_lock_owned_client(int c_flags, struct lock_object *c_lock)
	+{
	+
	+ return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock));
	+}
	+#endif
	+
	/*
	- * The callout cpu exec entities represent informations necessary for
	- * describing the state of callouts currently running on the CPU and the ones
	- * necessary for migrating callouts to the new callout cpu. In particular,
	- * the first entry of the array cc_exec_entity holds informations for callout
	- * running in SWI thread context, while the second one holds informations
	- * for callout running directly from hardware interrupt context.
	- * The cached informations are very important for deferring migration when
	- * the migrating callout is already running.
	+ * The callout CPU exec structure represent information necessary for
	+ * describing the state of callouts currently running on the CPU and
	+ * for handling deferred callout restarts.
	+ *
	+ * In particular, the first entry of the array cc_exec_entity holds
	+ * information for callouts running from the SWI thread context, while
	+ * the second one holds information for callouts running directly from
	+ * the hardware interrupt context.
	*/
	struct cc_exec {
	+ /*
	+ * The "cc_curr" points to the currently executing callout and
	+ * is protected by the "cc_lock" spinlock. If no callback is
	+ * currently executing it is equal to "NULL".
	+ */
	struct callout *cc_curr;
	-#ifdef SMP
	- void (ce_migration_func)(void );
	- void *ce_migration_arg;
	- int ce_migration_cpu;
	- sbintime_t ce_migration_time;
	- sbintime_t ce_migration_prec;
	+ /*
	+ * The "cc_restart_args" structure holds the argument for a
	+ * deferred callback restart and is protected by the "cc_lock"
	+ * spinlock. The structure is only valid if "cc_restart" is
	+ * "true". If "cc_restart" is "false" the information in the
	+ * "cc_restart_args" structure shall be ignored.
	+ */
	+ struct callout_args cc_restart_args;
	+ bool cc_restart;
	+ /*
	+ * The "cc_cancel" variable allows the currently pending
	+ * callback to be atomically cancelled. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ bool cc_cancel;
	+ /*
	+ * The "cc_drain_fn" points to a function which shall be
	+ * called with the argument stored in "cc_drain_arg" when an
	+ * asynchronous drain is performed. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ callout_func_t *cc_drain_fn;
	+ void *cc_drain_arg;
	+ /*
	+ * The following fields are used for callout profiling only:
	+ */
	+#ifdef CALLOUT_PROFILING
	+ int cc_depth;
	+ int cc_mpcalls;
	+ int cc_lockcalls;
	+ int cc_gcalls;
	#endif
	- bool cc_cancel;
	- bool cc_waiting;
	};

	/*
	- * There is one struct callout_cpu per cpu, holding all relevant
	+ * There is one "struct callout_cpu" per CPU, holding all relevant
	* state for the callout processing thread on the individual CPU.
	*/
	struct callout_cpu {
	struct mtx_padalign cc_lock;
	struct cc_exec cc_exec_entity[2];
	- struct callout *cc_next;
	struct callout *cc_callout;
	struct callout_list *cc_callwheel;
	+ struct callout_list cc_tmplist;
	struct callout_tailq cc_expireq;
	struct callout_slist cc_callfree;
	sbintime_t cc_firstevent;
	sbintime_t cc_lastscan;
	void *cc_cookie;
	- u_int cc_bucket;
	char cc_ktr_event_name[20];
	};

	-#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
	-#define cc_exec_next(cc) cc->cc_next
	-#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
	-#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
	-#ifdef SMP
	-#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
	-#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
	-#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
	-#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
	-#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
	+#define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr
	+#define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args
	+#define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart
	+#define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel
	+#define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn
	+#define cc_exec_drain_arg(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_arg
	+#define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth
	+#define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls
	+#define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls
	+#define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls

	+#ifdef SMP
	struct callout_cpu cc_cpu[MAXCPU];
	#define CPUBLOCK MAXCPU
	#define CC_CPU(cpu) (&cc_cpu[(cpu)])
	@@ -193,67 +407,13 @@
	static int timeout_cpu;

	static void callout_cpu_init(struct callout_cpu *cc, int cpu);
	-static void softclock_call_cc(struct callout c, struct callout_cpu cc,
	-#ifdef CALLOUT_PROFILING
	- int mpcalls, int lockcalls, int *gcalls,
	-#endif
	- int direct);
	+static void softclock_call_cc(struct callout c, struct callout_cpu cc, const int direct);

	static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");

	-/**
	- * Locked by cc_lock:
	- * cc_curr - If a callout is in progress, it is cc_curr.
	- * If cc_curr is non-NULL, threads waiting in
	- * callout_drain() will be woken up as soon as the
	- * relevant callout completes.
	- * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
	- * guarantees that the current callout will not run.
	- * The softclock() function sets this to 0 before it
	- * drops callout_lock to acquire c_lock, and it calls
	- * the handler only if curr_cancelled is still 0 after
	- * cc_lock is successfully acquired.
	- * cc_waiting - If a thread is waiting in callout_drain(), then
	- * callout_wait is nonzero. Set only when
	- * cc_curr is non-NULL.
	- */
	-
	-/*
	- * Resets the execution entity tied to a specific callout cpu.
	- */
	-static void
	-cc_cce_cleanup(struct callout_cpu *cc, int direct)
	-{
	-
	- cc_exec_curr(cc, direct) = NULL;
	- cc_exec_cancel(cc, direct) = false;
	- cc_exec_waiting(cc, direct) = false;
	-#ifdef SMP
	- cc_migration_cpu(cc, direct) = CPUBLOCK;
	- cc_migration_time(cc, direct) = 0;
	- cc_migration_prec(cc, direct) = 0;
	- cc_migration_func(cc, direct) = NULL;
	- cc_migration_arg(cc, direct) = NULL;
	-#endif
	-}
	-
	-/*
	- * Checks if migration is requested by a specific callout cpu.
	- */
	-static int
	-cc_cce_migrating(struct callout_cpu *cc, int direct)
	-{
	-
	-#ifdef SMP
	- return (cc_migration_cpu(cc, direct) != CPUBLOCK);
	-#else
	- return (0);
	-#endif
	-}
	-
	/*
	- * Kernel low level callwheel initialization
	- * called on cpu0 during kernel startup.
	+ * Kernel low level callwheel initialization called from cpu0 during
	+ * kernel startup:
	*/
	static void
	callout_callwheel_init(void *dummy)
	@@ -313,8 +473,6 @@
	LIST_INIT(&cc->cc_callwheel[i]);
	TAILQ_INIT(&cc->cc_expireq);
	cc->cc_firstevent = SBT_MAX;
	- for (i = 0; i < 2; i++)
	- cc_cce_cleanup(cc, i);
	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
	"callwheel cpu %d", cpu);
	if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
	@@ -322,38 +480,38 @@
	for (i = 0; i < ncallout; i++) {
	c = &cc->cc_callout[i];
	callout_init(c, 0);
	- c->c_flags = CALLOUT_LOCAL_ALLOC;
	+ c->c_flags \|= CALLOUT_LOCAL_ALLOC;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}
	}

	-#ifdef SMP
	-/*
	- * Switches the cpu tied to a specific callout.
	- * The function expects a locked incoming callout cpu and returns with
	- * locked outcoming callout cpu.
	- */
	-static struct callout_cpu *
	-callout_cpu_switch(struct callout c, struct callout_cpu cc, int new_cpu)
	+#ifdef CALLOUT_PROFILING
	+static inline void
	+callout_clear_stats(struct callout_cpu *cc, const int direct)
	{
	- struct callout_cpu *new_cc;
	-
	- MPASS(c != NULL && cc != NULL);
	- CC_LOCK_ASSERT(cc);
	+ cc_exec_depth(cc, direct) = 0;
	+ cc_exec_mpcalls(cc, direct) = 0;
	+ cc_exec_lockcalls(cc, direct) = 0;
	+ cc_exec_gcalls(cc, direct) = 0;
	+}
	+#endif

	- /*
	- * Avoid interrupts and preemption firing after the callout cpu
	- * is blocked in order to avoid deadlocks as the new thread
	- * may be willing to acquire the callout cpu lock.
	- */
	- c->c_cpu = CPUBLOCK;
	- spinlock_enter();
	- CC_UNLOCK(cc);
	- new_cc = CC_CPU(new_cpu);
	- CC_LOCK(new_cc);
	- spinlock_exit();
	- c->c_cpu = new_cpu;
	- return (new_cc);
	+#ifdef CALLOUT_PROFILING
	+static inline void
	+callout_update_stats(struct callout_cpu *cc, const int direct)
	+{
	+ avg_depth[direct] +=
	+ (cc_exec_depth(cc, direct) * 1000 -
	+ avg_depth[direct]) >> 8;
	+ avg_mpcalls[direct] +=
	+ (cc_exec_mpcalls(cc, direct) * 1000 -
	+ avg_mpcalls[direct]) >> 8;
	+ avg_lockcalls[direct] +=
	+ (cc_exec_lockcalls(cc, direct) * 1000 -
	+ avg_lockcalls[direct]) >> 8;
	+ avg_gcalls[direct] +=
	+ (cc_exec_gcalls(cc, direct) * 1000 -
	+ avg_gcalls[direct]) >> 8;
	}
	#endif

	@@ -424,19 +582,19 @@
	void
	callout_process(sbintime_t now)
	{
	- struct callout tmp, tmpn;
	+ struct callout *tmp;
	struct callout_cpu *cc;
	struct callout_list *sc;
	sbintime_t first, last, max, tmp_max;
	uint32_t lookahead;
	u_int firstb, lastb, nowb;
	-#ifdef CALLOUT_PROFILING
	- int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
	-#endif

	cc = CC_SELF();
	- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_LOCK(cc);

	+#ifdef CALLOUT_PROFILING
	+ callout_clear_stats(cc, 1);
	+#endif
	/* Compute the buckets of the last scan and present times. */
	firstb = callout_hash(cc->cc_lastscan);
	cc->cc_lastscan = now;
	@@ -468,50 +626,47 @@

	/* Iterate callwheel from firstb to nowb and then up to lastb. */
	do {
	+ LIST_INIT(&cc->cc_tmplist);
	+
	sc = &cc->cc_callwheel[firstb & callwheelmask];
	- tmp = LIST_FIRST(sc);
	- while (tmp != NULL) {
	+ while (1) {
	+ tmp = LIST_FIRST(sc);
	+ if (tmp == NULL)
	+ break;
	+
	+ LIST_REMOVE(tmp, c_links.le);
	+
	/* Run the callout if present time within allowed. */
	if (tmp->c_time <= now) {
	/*
	- * Consumer told us the callout may be run
	- * directly from hardware interrupt context.
	+ * Consumer told us the callout may be
	+ * run directly from the hardware
	+ * interrupt context:
	*/
	if (tmp->c_flags & CALLOUT_DIRECT) {
	-#ifdef CALLOUT_PROFILING
	- ++depth_dir;
	-#endif
	- cc_exec_next(cc) =
	- LIST_NEXT(tmp, c_links.le);
	- cc->cc_bucket = firstb & callwheelmask;
	- LIST_REMOVE(tmp, c_links.le);
	- softclock_call_cc(tmp, cc,
	-#ifdef CALLOUT_PROFILING
	- &mpcalls_dir, &lockcalls_dir, NULL,
	-#endif
	- 1);
	- tmp = cc_exec_next(cc);
	- cc_exec_next(cc) = NULL;
	+ softclock_call_cc(tmp, cc, 1);
	} else {
	- tmpn = LIST_NEXT(tmp, c_links.le);
	- LIST_REMOVE(tmp, c_links.le);
	TAILQ_INSERT_TAIL(&cc->cc_expireq,
	tmp, c_links.tqe);
	tmp->c_flags \|= CALLOUT_PROCESSED;
	- tmp = tmpn;
	}
	continue;
	}
	+
	+ /* insert callout into temporary list */
	+ LIST_INSERT_HEAD(&cc->cc_tmplist, tmp, c_links.le);
	+
	/* Skip events from distant future. */
	if (tmp->c_time >= max)
	- goto next;
	+ continue;
	+
	/*
	* Event minimal time is bigger than present maximal
	* time, so it cannot be aggregated.
	*/
	if (tmp->c_time > last) {
	lastb = nowb;
	- goto next;
	+ continue;
	}
	/* Update first and last time, respecting this event. */
	if (tmp->c_time < first)
	@@ -519,11 +674,14 @@
	tmp_max = tmp->c_time + tmp->c_precision;
	if (tmp_max < last)
	last = tmp_max;
	-next:
	- tmp = LIST_NEXT(tmp, c_links.le);
	}
	+
	+ /* Put temporary list back into the main bucket */
	+ LIST_SWAP(sc, &cc->cc_tmplist, callout, c_links.le);
	+
	/* Proceed with the next bucket. */
	firstb++;
	+
	/*
	* Stop if we looked after present time and found
	* some event we can't execute at now.
	@@ -535,66 +693,70 @@
	cpu_new_callout(curcpu, last, first);
	#endif
	#ifdef CALLOUT_PROFILING
	- avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
	- avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
	- avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
	+ callout_update_stats(cc, 1);
	#endif
	- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_UNLOCK(cc);
	/*
	- * swi_sched acquires the thread lock, so we don't want to call it
	- * with cc_lock held; incorrect locking order.
	+ * "swi_sched()" acquires the thread lock and we don't want to
	+ * call it having cc_lock held because it leads to a locking
	+ * order reversal issue.
	*/
	if (!TAILQ_EMPTY(&cc->cc_expireq))
	swi_sched(cc->cc_cookie, 0);
	}

	-static struct callout_cpu *
	+static inline struct callout_cpu *
	callout_lock(struct callout *c)
	{
	struct callout_cpu *cc;
	- int cpu;
	-
	- for (;;) {
	- cpu = c->c_cpu;
	-#ifdef SMP
	- if (cpu == CPUBLOCK) {
	- while (c->c_cpu == CPUBLOCK)
	- cpu_spinwait();
	- continue;
	- }
	-#endif
	- cc = CC_CPU(cpu);
	- CC_LOCK(cc);
	- if (cpu == c->c_cpu)
	- break;
	- CC_UNLOCK(cc);
	- }
	+ cc = CC_CPU(c->c_cpu);
	+ CC_LOCK(cc);
	return (cc);
	}

	-static void
	-callout_cc_add(struct callout c, struct callout_cpu cc,
	- sbintime_t sbt, sbintime_t precision, void (func)(void ),
	- void *arg, int cpu, int flags)
	+static struct callout_cpu *
	+callout_cc_add_locked(struct callout c, struct callout_cpu cc,
	+ struct callout_args *coa, bool can_swap_cpu)
	{
	+#ifndef NO_EVENTTIMERS
	+ sbintime_t sbt;
	+#endif
	int bucket;

	CC_LOCK_ASSERT(cc);
	- if (sbt < cc->cc_lastscan)
	- sbt = cc->cc_lastscan;
	- c->c_arg = arg;
	- c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	- c->c_flags &= ~CALLOUT_PROCESSED;
	- c->c_func = func;
	- c->c_time = sbt;
	- c->c_precision = precision;
	+
	+ /* update flags before swapping locks, if any */
	+ c->c_flags &= ~(CALLOUT_PROCESSED \| CALLOUT_DIRECT \| CALLOUT_DEFRESTART);
	+ if (coa->flags & C_DIRECT_EXEC)
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \| CALLOUT_DIRECT);
	+ else
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	+
	+#ifdef SMP
	+ /*
	+ * Check if we are changing the CPU on which the callback
	+ * should be executed and if we have a lock protecting us:
	+ */
	+ if (can_swap_cpu != false && coa->cpu != c->c_cpu &&
	+ callout_lock_owned_client(c->c_flags, c->c_lock) != 0) {
	+ CC_UNLOCK(cc);
	+ c->c_cpu = coa->cpu;
	+ cc = callout_lock(c);
	+ }
	+#endif
	+ if (coa->time < cc->cc_lastscan)
	+ coa->time = cc->cc_lastscan;
	+ c->c_arg = coa->arg;
	+ c->c_func = coa->func;
	+ c->c_time = coa->time;
	+ c->c_precision = coa->precision;
	+
	bucket = callout_get_bucket(c->c_time);
	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
	c, (int)(c->c_precision >> 32),
	(u_int)(c->c_precision & 0xffffffff));
	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
	- if (cc->cc_bucket == bucket)
	- cc_exec_next(cc) = c;
	+
	#ifndef NO_EVENTTIMERS
	/*
	* Inform the eventtimers(4) subsystem there's a new callout
	@@ -605,42 +767,28 @@
	sbt = c->c_time + c->c_precision;
	if (sbt < cc->cc_firstevent) {
	cc->cc_firstevent = sbt;
	- cpu_new_callout(cpu, sbt, c->c_time);
	+ cpu_new_callout(c->c_cpu, sbt, c->c_time);
	}
	#endif
	+ return (cc);
	}

	-static void
	+static inline void
	callout_cc_del(struct callout c, struct callout_cpu cc)
	{

	- if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
	- return;
	c->c_func = NULL;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}

	-static void
	+static inline void
	softclock_call_cc(struct callout c, struct callout_cpu cc,
	-#ifdef CALLOUT_PROFILING
	- int mpcalls, int lockcalls, int *gcalls,
	-#endif
	- int direct)
	+ const int direct)
	{
	- struct rm_priotracker tracker;
	- void (c_func)(void );
	+ callout_func_t *c_func;
	void *c_arg;
	- struct lock_class *class;
	struct lock_object *c_lock;
	- uintptr_t lock_status;
	int c_flags;
	-#ifdef SMP
	- struct callout_cpu *new_cc;
	- void (new_func)(void );
	- void *new_arg;
	- int flags, new_cpu;
	- sbintime_t new_prec, new_time;
	-#endif
	#if defined(DIAGNOSTIC) \|\| defined(CALLOUT_PROFILING)
	sbintime_t sbt1, sbt2;
	struct timespec ts2;
	@@ -651,58 +799,65 @@
	KASSERT((c->c_flags & (CALLOUT_PENDING \| CALLOUT_ACTIVE)) ==
	(CALLOUT_PENDING \| CALLOUT_ACTIVE),
	("softclock_call_cc: pend\|act %p %x", c, c->c_flags));
	- class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
	- lock_status = 0;
	- if (c->c_flags & CALLOUT_SHAREDLOCK) {
	- if (class == &lock_class_rm)
	- lock_status = (uintptr_t)&tracker;
	- else
	- lock_status = 1;
	- }
	+
	c_lock = c->c_lock;
	c_func = c->c_func;
	c_arg = c->c_arg;
	c_flags = c->c_flags;
	- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
	- c->c_flags = CALLOUT_LOCAL_ALLOC;
	- else
	- c->c_flags &= ~CALLOUT_PENDING;
	-
	+
	+ /* remove pending bit */
	+ c->c_flags &= ~CALLOUT_PENDING;
	+
	+ /* reset our local state */
	cc_exec_curr(cc, direct) = c;
	- cc_exec_cancel(cc, direct) = false;
	- CC_UNLOCK(cc);
	+ cc_exec_restart(cc, direct) = false;
	+ cc_exec_drain_fn(cc, direct) = NULL;
	+ cc_exec_drain_arg(cc, direct) = NULL;
	+
	if (c_lock != NULL) {
	- class->lc_lock(c_lock, lock_status);
	+ cc_exec_cancel(cc, direct) = false;
	+ CC_UNLOCK(cc);
	+
	+ /* unlocked region for switching locks */
	+
	+ callout_lock_client(c_flags, c_lock);
	+
	/*
	- * The callout may have been cancelled
	- * while we switched locks.
	+ * Check if the callout may have been cancelled while
	+ * we were switching locks. Even though the callout is
	+ * specifying a lock, it might not be certain this
	+ * lock is locked when starting and stopping callouts.
	*/
	+ CC_LOCK(cc);
	if (cc_exec_cancel(cc, direct)) {
	- class->lc_unlock(c_lock);
	- goto skip;
	+ callout_unlock_client(c_flags, c_lock);
	+ goto skip_cc_locked;
	}
	- /* The callout cannot be stopped now. */
	- cc_exec_cancel(cc, direct) = true;
	if (c_lock == &Giant.lock_object) {
	#ifdef CALLOUT_PROFILING
	- (*gcalls)++;
	+ cc_exec_gcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
	c, c_func, c_arg);
	} else {
	#ifdef CALLOUT_PROFILING
	- (*lockcalls)++;
	+ cc_exec_lockcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
	c, c_func, c_arg);
	}
	} else {
	#ifdef CALLOUT_PROFILING
	- (*mpcalls)++;
	+ cc_exec_mpcalls(cc, direct)++;
	#endif
	CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
	c, c_func, c_arg);
	}
	+ /* The callout cannot be stopped now! */
	+ cc_exec_cancel(cc, direct) = true;
	+ CC_UNLOCK(cc);
	+
	+ /* unlocked region */
	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
	"func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
	#if defined(DIAGNOSTIC) \|\| defined(CALLOUT_PROFILING)
	@@ -729,85 +884,40 @@
	#endif
	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
	CTR1(KTR_CALLOUT, "callout %p finished", c);
	+
	+ /*
	+ * At this point the callback structure might have been freed,
	+ * so we need to check the previously copied value of
	+ * "c->c_flags":
	+ */
	if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
	- class->lc_unlock(c_lock);
	-skip:
	+ callout_unlock_client(c_flags, c_lock);
	+
	CC_LOCK(cc);
	+
	+skip_cc_locked:
	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
	cc_exec_curr(cc, direct) = NULL;
	- if (cc_exec_waiting(cc, direct)) {
	- /*
	- * There is someone waiting for the
	- * callout to complete.
	- * If the callout was scheduled for
	- * migration just cancel it.
	- */
	- if (cc_cce_migrating(cc, direct)) {
	- cc_cce_cleanup(cc, direct);
	-
	- /*
	- * It should be assert here that the callout is not
	- * destroyed but that is not easy.
	- */
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	- }
	- cc_exec_waiting(cc, direct) = false;
	- CC_UNLOCK(cc);
	- wakeup(&cc_exec_waiting(cc, direct));
	- CC_LOCK(cc);
	- } else if (cc_cce_migrating(cc, direct)) {
	- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
	- ("Migrating legacy callout %p", c));
	-#ifdef SMP
	- /*
	- * If the callout was scheduled for
	- * migration just perform it now.
	- */
	- new_cpu = cc_migration_cpu(cc, direct);
	- new_time = cc_migration_time(cc, direct);
	- new_prec = cc_migration_prec(cc, direct);
	- new_func = cc_migration_func(cc, direct);
	- new_arg = cc_migration_arg(cc, direct);
	- cc_cce_cleanup(cc, direct);

	+ /* Check if there is anything which needs draining */
	+ if (cc_exec_drain_fn(cc, direct) != NULL) {
	/*
	- * It should be assert here that the callout is not destroyed
	- * but that is not easy.
	- *
	- * As first thing, handle deferred callout stops.
	+ * Unlock the CPU callout last, so that any use of
	+ * structures belonging to the callout are complete:
	*/
	- if (!callout_migrating(c)) {
	- CTR3(KTR_CALLOUT,
	- "deferred cancelled %p func %p arg %p",
	- c, new_func, new_arg);
	- callout_cc_del(c, cc);
	- return;
	- }
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	-
	- new_cc = callout_cpu_switch(c, cc, new_cpu);
	- flags = (direct) ? C_DIRECT_EXEC : 0;
	- callout_cc_add(c, new_cc, new_time, new_prec, new_func,
	- new_arg, new_cpu, flags);
	- CC_UNLOCK(new_cc);
	+ CC_UNLOCK(cc);
	+ /* call drain function unlocked */
	+ cc_exec_drain_fn(cc, direct)(
	+ cc_exec_drain_arg(cc, direct));
	CC_LOCK(cc);
	-#else
	- panic("migration should not happen");
	-#endif
	- }
	- /*
	- * If the current callout is locally allocated (from
	- * timeout(9)) then put it on the freelist.
	- *
	- * Note: we need to check the cached copy of c_flags because
	- * if it was not local, then it's not safe to deref the
	- * callout pointer.
	- */
	- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 \|\|
	- c->c_flags == CALLOUT_LOCAL_ALLOC,
	- ("corrupted callout"));
	- if (c_flags & CALLOUT_LOCAL_ALLOC)
	+ } else if (c_flags & CALLOUT_LOCAL_ALLOC) {
	+ /* return callout back to freelist */
	callout_cc_del(c, cc);
	+ } else if (cc_exec_restart(cc, direct)) {
	+ /* [re-]schedule callout, if any */
	+ (void) callout_cc_add_locked(c, cc,
	+ &cc_exec_restart_args(cc, direct), false);
	+ }
	}

	/*
	@@ -831,28 +941,18 @@
	{
	struct callout_cpu *cc;
	struct callout *c;
	-#ifdef CALLOUT_PROFILING
	- int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
	-#endif

	cc = (struct callout_cpu *)arg;
	CC_LOCK(cc);
	- while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- softclock_call_cc(c, cc,
	-#ifdef CALLOUT_PROFILING
	- &mpcalls, &lockcalls, &gcalls,
	-#endif
	- 0);
	#ifdef CALLOUT_PROFILING
	- ++depth;
	+ callout_clear_stats(cc, 0);
	#endif
	+ while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
	+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	+ softclock_call_cc(c, cc, 0);
	}
	#ifdef CALLOUT_PROFILING
	- avg_depth += (depth * 1000 - avg_depth) >> 8;
	- avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
	- avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
	- avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
	+ callout_update_stats(cc, 0);
	#endif
	CC_UNLOCK(cc);
	}
	@@ -888,10 +988,11 @@
	/* XXX Attempt to malloc first */
	panic("timeout table full");
	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
	- callout_reset(new, to_ticks, ftn, arg);
	handle.callout = new;
	CC_UNLOCK(cc);

	+ callout_reset(new, to_ticks, ftn, arg);
	+
	return (handle);
	}

	@@ -899,6 +1000,7 @@
	untimeout(timeout_t ftn, void arg, struct callout_handle handle)
	{
	struct callout_cpu *cc;
	+ bool match;

	/*
	* Check for a handle that was initialized
	@@ -909,9 +1011,11 @@
	return;

	cc = callout_lock(handle.callout);
	- if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
	- callout_stop(handle.callout);
	+ match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg);
	CC_UNLOCK(cc);
	+
	+ if (match)
	+ callout_stop(handle.callout);
	}

	void
	@@ -920,6 +1024,118 @@
	handle->callout = NULL;
	}

	+static int
	+callout_restart_async(struct callout c, struct callout_args coa,
	+ callout_func_t drain_fn, void drain_arg)
	+{
	+ struct callout_cpu *cc;
	+ int cancelled;
	+ int direct;
	+
	+ cc = callout_lock(c);
	+
	+ /* Figure out if the callout is direct or not */
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
	+
	+ /*
	+ * Check if the callback is currently scheduled for
	+ * completion:
	+ */
	+ if (cc_exec_curr(cc, direct) == c) {
	+ /*
	+ * Try to prevent the callback from running by setting
	+ * the "cc_cancel" variable to "true". Also check if
	+ * the callout was previously subject to a deferred
	+ * callout restart:
	+ */
	+ if (cc_exec_cancel(cc, direct) == false \|\|
	+ (c->c_flags & CALLOUT_DEFRESTART) != 0) {
	+ cc_exec_cancel(cc, direct) = true;
	+ cancelled = CALLOUT_RET_CANCELLED;
	+ } else {
	+ cancelled = CALLOUT_RET_NORMAL;
	+ }
	+
	+ /*
	+ * Prevent callback restart if "callout_drain_xxx()"
	+ * is being called or we are stopping the callout or
	+ * the callback was preallocated by us:
	+ */
	+ if (cc_exec_drain_fn(cc, direct) != NULL \|\|
	+ coa == NULL \|\| (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and draining" : "draining",
	+ c, c->c_func, c->c_arg);
	+
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* clear restart flag, if any */
	+ cc_exec_restart(cc, direct) = false;
	+
	+ /* set drain function, if any */
	+ if (drain_fn != NULL) {
	+ cc_exec_drain_fn(cc, direct) = drain_fn;
	+ cc_exec_drain_arg(cc, direct) = drain_arg;
	+ cancelled \|= CALLOUT_RET_DRAINING;
	+ }
	+ } else {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and restarting" : "restarting",
	+ c, c->c_func, c->c_arg);
	+
	+ /* get us back into the game */
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART);
	+ c->c_flags &= ~CALLOUT_PROCESSED;
	+
	+ /* enable deferred restart */
	+ cc_exec_restart(cc, direct) = true;
	+
	+ /* store arguments for the deferred restart, if any */
	+ cc_exec_restart_args(cc, direct) = *coa;
	+ }
	+ } else {
	+ /* stop callout */
	+ if (c->c_flags & CALLOUT_PENDING) {
	+ /*
	+ * The callback has not yet been executed, and
	+ * we simply just need to unlink it:
	+ */
	+ if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	+ LIST_REMOVE(c, c_links.le);
	+ } else {
	+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	+ }
	+ cancelled = CALLOUT_RET_CANCELLED;
	+ } else {
	+ cancelled = CALLOUT_RET_NORMAL;
	+ }
	+
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "rescheduled" : "scheduled",
	+ c, c->c_func, c->c_arg);
	+
	+ /* [re-]schedule callout, if any */
	+ if (coa != NULL) {
	+ cc = callout_cc_add_locked(c, cc, coa, true);
	+ } else {
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* return callback to pre-allocated list, if any */
	+ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) &&
	+ cancelled != CALLOUT_RET_NORMAL) {
	+ callout_cc_del(c, cc);
	+ }
	+ }
	+ }
	+ CC_UNLOCK(cc);
	+ return (cancelled);
	+}
	+
	/*
	* New interface; clients allocate their own callout structures.
	*
	@@ -938,25 +1154,32 @@
	*/
	int
	callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
	- void (ftn)(void ), void *arg, int cpu, int flags)
	+ callout_func_t ftn, void arg, int cpu, int flags)
	{
	- sbintime_t to_sbt, pr;
	- struct callout_cpu *cc;
	- int cancelled, direct;
	+ struct callout_args coa;

	- cancelled = 0;
	- if (flags & C_ABSOLUTE) {
	- to_sbt = sbt;
	+ /* store arguments for callout add function */
	+ coa.func = ftn;
	+ coa.arg = arg;
	+ coa.precision = precision;
	+ coa.flags = flags;
	+ coa.cpu = cpu;
	+
	+ /* compute the rest of the arguments needed */
	+ if (coa.flags & C_ABSOLUTE) {
	+ coa.time = sbt;
	} else {
	- if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
	+ sbintime_t pr;
	+
	+ if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt))
	sbt = tick_sbt;
	- if ((flags & C_HARDCLOCK) \|\|
	+ if ((coa.flags & C_HARDCLOCK) \|\|
	#ifdef NO_EVENTTIMERS
	sbt >= sbt_timethreshold) {
	- to_sbt = getsbinuptime();
	+ coa.time = getsbinuptime();

	/* Add safety belt for the case of hz > 1000. */
	- to_sbt += tc_tick_sbt - tick_sbt;
	+ coa.time += tc_tick_sbt - tick_sbt;
	#else
	sbt >= sbt_tickthreshold) {
	/*
	@@ -966,142 +1189,29 @@
	* active ones.
	*/
	#ifdef __LP64__
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	#else
	spinlock_enter();
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	spinlock_exit();
	#endif
	#endif
	- if ((flags & C_HARDCLOCK) == 0)
	- to_sbt += tick_sbt;
	+ if ((coa.flags & C_HARDCLOCK) == 0)
	+ coa.time += tick_sbt;
	} else
	- to_sbt = sbinuptime();
	- if (SBT_MAX - to_sbt < sbt)
	- to_sbt = SBT_MAX;
	+ coa.time = sbinuptime();
	+ if (SBT_MAX - coa.time < sbt)
	+ coa.time = SBT_MAX;
	else
	- to_sbt += sbt;
	- pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
	- sbt >> C_PRELGET(flags));
	- if (pr > precision)
	- precision = pr;
	+ coa.time += sbt;
	+ pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp :
	+ sbt >> C_PRELGET(coa.flags));
	+ if (pr > coa.precision)
	+ coa.precision = pr;
	}
	- /*
	- * Don't allow migration of pre-allocated callouts lest they
	- * become unbalanced.
	- */
	- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
	- cpu = c->c_cpu;
	- /*
	- * This flag used to be added by callout_cc_add, but the
	- * first time you call this we could end up with the
	- * wrong direct flag if we don't do it before we add.
	- */
	- if (flags & C_DIRECT_EXEC) {
	- c->c_flags \|= CALLOUT_DIRECT;
	- }
	- direct = (c->c_flags & CALLOUT_DIRECT) != 0;
	- KASSERT(!direct \|\| c->c_lock == NULL,
	- ("%s: direct callout %p has lock", __func__, c));
	- cc = callout_lock(c);
	- if (cc_exec_curr(cc, direct) == c) {
	- /*
	- * We're being asked to reschedule a callout which is
	- * currently in progress. If there is a lock then we
	- * can cancel the callout if it has not really started.
	- */
	- if (c->c_lock != NULL && cc_exec_cancel(cc, direct))
	- cancelled = cc_exec_cancel(cc, direct) = true;
	- if (cc_exec_waiting(cc, direct)) {
	- /*
	- * Someone has called callout_drain to kill this
	- * callout. Don't reschedule.
	- */
	- CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	- cancelled ? "cancelled" : "failed to cancel",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	-#ifdef SMP
	- if (callout_migrating(c)) {
	- /*
	- * This only occurs when a second callout_reset_sbt_on
	- * is made after a previous one moved it into
	- * deferred migration (below). Note we do not change
	- * the prev_cpu even though the previous target may
	- * be different.
	- */
	- cc_migration_cpu(cc, direct) = cpu;
	- cc_migration_time(cc, direct) = to_sbt;
	- cc_migration_prec(cc, direct) = precision;
	- cc_migration_func(cc, direct) = ftn;
	- cc_migration_arg(cc, direct) = arg;
	- cancelled = 1;
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	-#endif
	- }
	- if (c->c_flags & CALLOUT_PENDING) {
	- if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	- if (cc_exec_next(cc) == c)
	- cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- cancelled = 1;
	- c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING);
	- }
	-
	-#ifdef SMP
	- /*
	- * If the callout must migrate try to perform it immediately.
	- * If the callout is currently running, just defer the migration
	- * to a more appropriate moment.
	- */
	- if (c->c_cpu != cpu) {
	- if (cc_exec_curr(cc, direct) == c) {
	- /*
	- * Pending will have been removed since we are
	- * actually executing the callout on another
	- * CPU. That callout should be waiting on the
	- * lock the caller holds. If we set both
	- * active/and/pending after we return and the
	- * lock on the executing callout proceeds, it
	- * will then see pending is true and return.
	- * At the return from the actual callout execution
	- * the migration will occur in softclock_call_cc
	- * and this new callout will be placed on the
	- * new CPU via a call to callout_cpu_switch() which
	- * will get the lock on the right CPU followed
	- * by a call callout_cc_add() which will add it there.
	- * (see above in softclock_call_cc()).
	- */
	- cc_migration_cpu(cc, direct) = cpu;
	- cc_migration_time(cc, direct) = to_sbt;
	- cc_migration_prec(cc, direct) = precision;
	- cc_migration_func(cc, direct) = ftn;
	- cc_migration_arg(cc, direct) = arg;
	- c->c_flags \|= (CALLOUT_DFRMIGRATION \| CALLOUT_ACTIVE \| CALLOUT_PENDING);
	- CTR6(KTR_CALLOUT,
	- "migration of %p func %p arg %p in %d.%08x to %u deferred",
	- c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff), cpu);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	- cc = callout_cpu_switch(c, cc, cpu);
	- }
	-#endif

	- callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
	- CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
	- cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff));
	- CC_UNLOCK(cc);
	-
	- return (cancelled);
	+ /* get callback started, if any */
	+ return (callout_restart_async(c, &coa, NULL, NULL));
	}

	/*
	@@ -1120,252 +1230,106 @@
	}

	int
	-_callout_stop_safe(struct callout *c, int safe)
	+callout_stop(struct callout *c)
	{
	- struct callout_cpu cc, old_cc;
	- struct lock_class *class;
	- int direct, sq_locked, use_lock;
	- int not_on_a_list;
	-
	- if (safe)
	- WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, c->c_lock,
	- "calling %s", __func__);
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(c, NULL, NULL, NULL));
	+}

	- /*
	- * Some old subsystems don't hold Giant while running a callout_stop(),
	- * so just discard this check for the moment.
	- */
	- if (!safe && c->c_lock != NULL) {
	- if (c->c_lock == &Giant.lock_object)
	- use_lock = mtx_owned(&Giant);
	- else {
	- use_lock = 1;
	- class = LOCK_CLASS(c->c_lock);
	- class->lc_assert(c->c_lock, LA_XLOCKED);
	- }
	- } else
	- use_lock = 0;
	- direct = (c->c_flags & CALLOUT_DIRECT) != 0;
	- sq_locked = 0;
	- old_cc = NULL;
	-again:
	- cc = callout_lock(c);
	+static void
	+callout_drain_function(void *arg)
	+{
	+ wakeup(arg);
	+}

	- if ((c->c_flags & (CALLOUT_DFRMIGRATION \| CALLOUT_ACTIVE \| CALLOUT_PENDING)) ==
	- (CALLOUT_DFRMIGRATION \| CALLOUT_ACTIVE \| CALLOUT_PENDING)) {
	- /*
	- * Special case where this slipped in while we
	- * were migrating as the callout is about to
	- * execute. The caller probably holds the lock
	- * the callout wants.
	- *
	- * Get rid of the migration first. Then set
	- * the flag that tells this code not to
	- * try to remove it from any lists (its not
	- * on one yet). When the callout wheel runs,
	- * it will ignore this callout.
	- */
	- c->c_flags &= ~(CALLOUT_PENDING\|CALLOUT_ACTIVE);
	- not_on_a_list = 1;
	- } else {
	- not_on_a_list = 0;
	- }
	+int
	+callout_drain_async(struct callout c, callout_func_t fn, void *arg)
	+{
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(
	+ c, NULL, fn, arg) & CALLOUT_RET_DRAINING);
	+}

	- /*
	- * If the callout was migrating while the callout cpu lock was
	- * dropped, just drop the sleepqueue lock and check the states
	- * again.
	- */
	- if (sq_locked != 0 && cc != old_cc) {
	-#ifdef SMP
	- CC_UNLOCK(cc);
	- sleepq_release(&cc_exec_waiting(old_cc, direct));
	- sq_locked = 0;
	- old_cc = NULL;
	- goto again;
	-#else
	- panic("migration should not happen");
	-#endif
	- }
	+int
	+callout_drain(struct callout *c)
	+{
	+ int cancelled;

	- /*
	- * If the callout isn't pending, it's not on the queue, so
	- * don't attempt to remove it from the queue. We can try to
	- * stop it by other means however.
	- */
	- if (!(c->c_flags & CALLOUT_PENDING)) {
	- c->c_flags &= ~CALLOUT_ACTIVE;
	+ WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
	+ "Draining callout");
	+
	+ callout_lock_client(c->c_flags, c->c_lock);
	+
	+ /* at this point the "c->c_cpu" field is not changing */
	+
	+ cancelled = callout_drain_async(c, &callout_drain_function, c);
	+
	+ if (cancelled != CALLOUT_RET_NORMAL) {
	+ struct callout_cpu *cc;
	+ int direct;
	+
	+ CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p",
	+ c, c->c_func, c->c_arg);
	+
	+ cc = callout_lock(c);
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);

	/*
	- * If it wasn't on the queue and it isn't the current
	- * callout, then we can't stop it, so just bail.
	+ * We've gotten our callout CPU lock, it is safe to
	+ * drop the initial lock:
	*/
	- if (cc_exec_curr(cc, direct) != c) {
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- if (sq_locked)
	- sleepq_release(&cc_exec_waiting(cc, direct));
	- return (0);
	- }
	+ callout_unlock_client(c->c_flags, c->c_lock);

	- if (safe) {
	- /*
	- * The current callout is running (or just
	- * about to run) and blocking is allowed, so
	- * just wait for the current invocation to
	- * finish.
	- */
	- while (cc_exec_curr(cc, direct) == c) {
	- /*
	- * Use direct calls to sleepqueue interface
	- * instead of cv/msleep in order to avoid
	- * a LOR between cc_lock and sleepqueue
	- * chain spinlocks. This piece of code
	- * emulates a msleep_spin() call actually.
	- *
	- * If we already have the sleepqueue chain
	- * locked, then we can safely block. If we
	- * don't already have it locked, however,
	- * we have to drop the cc_lock to lock
	- * it. This opens several races, so we
	- * restart at the beginning once we have
	- * both locks. If nothing has changed, then
	- * we will end up back here with sq_locked
	- * set.
	- */
	- if (!sq_locked) {
	- CC_UNLOCK(cc);
	- sleepq_lock(
	- &cc_exec_waiting(cc, direct));
	- sq_locked = 1;
	- old_cc = cc;
	- goto again;
	- }
	+ /* Wait for drain to complete */
	+
	+ while (cc_exec_curr(cc, direct) == c)
	+ msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0);

	- /*
	- * Migration could be cancelled here, but
	- * as long as it is still not sure when it
	- * will be packed up, just let softclock()
	- * take care of it.
	- */
	- cc_exec_waiting(cc, direct) = true;
	- DROP_GIANT();
	- CC_UNLOCK(cc);
	- sleepq_add(
	- &cc_exec_waiting(cc, direct),
	- &cc->cc_lock.lock_object, "codrain",
	- SLEEPQ_SLEEP, 0);
	- sleepq_wait(
	- &cc_exec_waiting(cc, direct),
	- 0);
	- sq_locked = 0;
	- old_cc = NULL;
	-
	- /* Reacquire locks previously released. */
	- PICKUP_GIANT();
	- CC_LOCK(cc);
	- }
	- } else if (use_lock &&
	- !cc_exec_cancel(cc, direct)) {
	-
	- /*
	- * The current callout is waiting for its
	- * lock which we hold. Cancel the callout
	- * and return. After our caller drops the
	- * lock, the callout will be skipped in
	- * softclock().
	- */
	- cc_exec_cancel(cc, direct) = true;
	- CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- KASSERT(!cc_cce_migrating(cc, direct),
	- ("callout wrongly scheduled for migration"));
	- CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain locked"));
	- return (1);
	- } else if (callout_migrating(c)) {
	- /*
	- * The callout is currently being serviced
	- * and the "next" callout is scheduled at
	- * its completion with a migration. We remove
	- * the migration flag so it won't get rescheduled,
	- * but we can't stop the one thats running so
	- * we return 0.
	- */
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	-#ifdef SMP
	- /*
	- * We can't call cc_cce_cleanup here since
	- * if we do it will remove .ce_curr and
	- * its still running. This will prevent a
	- * reschedule of the callout when the
	- * execution completes.
	- */
	- cc_migration_cpu(cc, direct) = CPUBLOCK;
	- cc_migration_time(cc, direct) = 0;
	- cc_migration_prec(cc, direct) = 0;
	- cc_migration_func(cc, direct) = NULL;
	- cc_migration_arg(cc, direct) = NULL;
	-#endif
	- CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (0);
	- }
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain still locked"));
	- return (0);
	+ } else {
	+ callout_unlock_client(c->c_flags, c->c_lock);
	}
	- if (sq_locked)
	- sleepq_release(&cc_exec_waiting(cc, direct));
	-
	- c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING);

	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	c, c->c_func, c->c_arg);
	- if (not_on_a_list == 0) {
	- if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	- if (cc_exec_next(cc) == c)
	- cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- }
	- callout_cc_del(c, cc);
	- CC_UNLOCK(cc);
	- return (1);
	+
	+ return (cancelled & CALLOUT_RET_CANCELLED);
	}

	void
	callout_init(struct callout *c, int mpsafe)
	{
	- bzero(c, sizeof *c);
	if (mpsafe) {
	- c->c_lock = NULL;
	- c->c_flags = CALLOUT_RETURNUNLOCKED;
	+ _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED);
	} else {
	- c->c_lock = &Giant.lock_object;
	- c->c_flags = 0;
	+ _callout_init_lock(c, &Giant.lock_object, 0);
	}
	- c->c_cpu = timeout_cpu;
	}

	void
	_callout_init_lock(struct callout c, struct lock_object lock, int flags)
	{
	bzero(c, sizeof *c);
	+ KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0,
	+ ("callout_init_lock: bad flags 0x%08x", flags));
	+ flags &= CALLOUT_RETURNUNLOCKED;
	+ if (lock != NULL) {
	+ struct lock_class *class = LOCK_CLASS(lock);
	+ if (class == &lock_class_mtx_sleep)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_MUTEX);
	+ else if (class == &lock_class_mtx_spin)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_SPIN);
	+ else if (class == &lock_class_rm)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RM);
	+ else if (class == &lock_class_rw)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RW);
	+ else
	+ panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name);
	+ } else {
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0);
	+ }
	c->c_lock = lock;
	- KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK)) == 0,
	- ("callout_init_lock: bad flags %d", flags));
	- KASSERT(lock != NULL \|\| (flags & CALLOUT_RETURNUNLOCKED) == 0,
	- ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
	- KASSERT(lock == NULL \|\| !(LOCK_CLASS(lock)->lc_flags &
	- (LC_SPINLOCK \| LC_SLEEPABLE)), ("%s: invalid lock class",
	- __func__));
	- c->c_flags = flags & (CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK);
	+ c->c_flags = flags;
	c->c_cpu = timeout_cpu;
	}

	Index: sys/kern/subr_sleepqueue.c
	===================================================================
	--- sys/kern/subr_sleepqueue.c
	+++ sys/kern/subr_sleepqueue.c
	@@ -152,7 +152,8 @@
	*/
	static int sleepq_catch_signals(void *wchan, int pri);
	static int sleepq_check_signals(void);
	-static int sleepq_check_timeout(void);
	+static int sleepq_check_timeout(struct thread *);
	+static void sleepq_stop_timeout(struct thread *);
	#ifdef INVARIANTS
	static void sleepq_dtor(void mem, int size, void arg);
	#endif
	@@ -373,17 +374,14 @@
	sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
	int flags)
	{
	- struct sleepqueue_chain *sc;
	struct thread *td;

	td = curthread;
	- sc = SC_LOOKUP(wchan);
	- mtx_assert(&sc->sc_lock, MA_OWNED);
	- MPASS(TD_ON_SLEEPQ(td));
	- MPASS(td->td_sleepqueue == NULL);
	- MPASS(wchan != NULL);
	+
	+ mtx_lock_spin(&td->td_slpmutex);
	callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
	sleepq_timeout, td, PCPU_GET(cpuid), flags \| C_DIRECT_EXEC);
	+ mtx_unlock_spin(&td->td_slpmutex);
	}

	/*
	@@ -559,11 +557,8 @@
	* Check to see if we timed out.
	*/
	static int
	-sleepq_check_timeout(void)
	+sleepq_check_timeout(struct thread *td)
	{
	- struct thread *td;
	-
	- td = curthread;
	THREAD_LOCK_ASSERT(td, MA_OWNED);

	/*
	@@ -573,28 +568,21 @@
	td->td_flags &= ~TDF_TIMEOUT;
	return (EWOULDBLOCK);
	}
	-
	- /*
	- * If TDF_TIMOFAIL is set, the timeout ran after we had
	- * already been woken up.
	- */
	- if (td->td_flags & TDF_TIMOFAIL)
	- td->td_flags &= ~TDF_TIMOFAIL;
	-
	- /*
	- * If callout_stop() fails, then the timeout is running on
	- * another CPU, so synchronize with it to avoid having it
	- * accidentally wake up a subsequent sleep.
	- */
	- else if (callout_stop(&td->td_slpcallout) == 0) {
	- td->td_flags \|= TDF_TIMEOUT;
	- TD_SET_SLEEPING(td);
	- mi_switch(SW_INVOL \| SWT_SLEEPQTIMO, NULL);
	- }
	return (0);
	}

	/*
	+ * Atomically stop the timeout by using a mutex.
	+ */
	+static void
	+sleepq_stop_timeout(struct thread *td)
	+{
	+ mtx_lock_spin(&td->td_slpmutex);
	+ callout_stop(&td->td_slpcallout);
	+ mtx_unlock_spin(&td->td_slpmutex);
	+}
	+
	+/*
	* Check to see if we were awoken by a signal.
	*/
	static int
	@@ -664,9 +652,11 @@
	MPASS(!(td->td_flags & TDF_SINTR));
	thread_lock(td);
	sleepq_switch(wchan, pri);
	- rval = sleepq_check_timeout();
	+ rval = sleepq_check_timeout(td);
	thread_unlock(td);

	+ sleepq_stop_timeout(td);
	+
	return (rval);
	}

	@@ -677,12 +667,18 @@
	int
	sleepq_timedwait_sig(void *wchan, int pri)
	{
	+ struct thread *td;
	int rcatch, rvalt, rvals;

	+ td = curthread;
	+
	rcatch = sleepq_catch_signals(wchan, pri);
	- rvalt = sleepq_check_timeout();
	+ rvalt = sleepq_check_timeout(td);
	rvals = sleepq_check_signals();
	- thread_unlock(curthread);
	+ thread_unlock(td);
	+
	+ sleepq_stop_timeout(td);
	+
	if (rcatch)
	return (rcatch);
	if (rvals)
	@@ -889,64 +885,49 @@
	static void
	sleepq_timeout(void *arg)
	{
	- struct sleepqueue_chain *sc;
	- struct sleepqueue *sq;
	- struct thread *td;
	- void *wchan;
	- int wakeup_swapper;
	+ struct thread *td = arg;
	+ int wakeup_swapper = 0;

	- td = arg;
	- wakeup_swapper = 0;
	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
	(void )td, (long)td->td_proc->p_pid, (void )td->td_name);

	- /*
	- * First, see if the thread is asleep and get the wait channel if
	- * it is.
	- */
	- thread_lock(td);
	- if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
	- wchan = td->td_wchan;
	- sc = SC_LOOKUP(wchan);
	- THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	- sq = sleepq_lookup(wchan);
	- MPASS(sq != NULL);
	- td->td_flags \|= TDF_TIMEOUT;
	- wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	- thread_unlock(td);
	- if (wakeup_swapper)
	- kick_proc0();
	- return;
	- }
	+ /* Handle the three cases which can happen */

	- /*
	- * If the thread is on the SLEEPQ but isn't sleeping yet, it
	- * can either be on another CPU in between sleepq_add() and
	- * one of the sleepq_wait() routines or it can be in
	- * sleepq_catch_signals().
	- */
	+ thread_lock(td);
	if (TD_ON_SLEEPQ(td)) {
	- td->td_flags \|= TDF_TIMEOUT;
	- thread_unlock(td);
	- return;
	- }
	+ if (TD_IS_SLEEPING(td)) {
	+ struct sleepqueue_chain *sc;
	+ struct sleepqueue *sq;
	+ void *wchan;

	- /*
	- * Now check for the edge cases. First, if TDF_TIMEOUT is set,
	- * then the other thread has already yielded to us, so clear
	- * the flag and resume it. If TDF_TIMEOUT is not set, then the
	- * we know that the other thread is not on a sleep queue, but it
	- * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
	- * to let it know that the timeout has already run and doesn't
	- * need to be canceled.
	- */
	- if (td->td_flags & TDF_TIMEOUT) {
	- MPASS(TD_IS_SLEEPING(td));
	- td->td_flags &= ~TDF_TIMEOUT;
	- TD_CLR_SLEEPING(td);
	- wakeup_swapper = setrunnable(td);
	- } else
	- td->td_flags \|= TDF_TIMOFAIL;
	+ /*
	+ * Case I - thread is asleep and needs to be
	+ * awoken:
	+ */
	+ wchan = td->td_wchan;
	+ sc = SC_LOOKUP(wchan);
	+ THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	+ sq = sleepq_lookup(wchan);
	+ MPASS(sq != NULL);
	+ td->td_flags \|= TDF_TIMEOUT;
	+ wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	+ } else {
	+ /*
	+ * Case II - cancel going to sleep by setting
	+ * the timeout flag because the target thread
	+ * is not asleep yet. It can be on another CPU
	+ * in between sleepq_add() and one of the
	+ * sleepq_wait() routines or it can be in
	+ * sleepq_catch_signals().
	+ */
	+ td->td_flags \|= TDF_TIMEOUT;
	+ }
	+ } else {
	+ /*
	+ * Case III - thread is already woken up by a wakeup
	+ * call and should not timeout. Nothing to do!
	+ */
	+ }
	thread_unlock(td);
	if (wakeup_swapper)
	kick_proc0();
	Index: sys/ofed/include/linux/completion.h
	===================================================================
	--- sys/ofed/include/linux/completion.h
	+++ sys/ofed/include/linux/completion.h
	@@ -105,7 +105,9 @@
	if (c->done)
	break;
	sleepq_add(c, NULL, "completion", flags, 0);
	+ sleepq_release(c);
	sleepq_set_timeout(c, end - ticks);
	+ sleepq_lock(c);
	if (flags & SLEEPQ_INTERRUPTIBLE) {
	if (sleepq_timedwait_sig(c, 0) != 0)
	return (-ERESTARTSYS);
	Index: sys/sys/_callout.h
	===================================================================
	--- sys/sys/_callout.h
	+++ sys/sys/_callout.h
	@@ -46,6 +46,8 @@
	SLIST_HEAD(callout_slist, callout);
	TAILQ_HEAD(callout_tailq, callout);

	+typedef void callout_func_t(void *);
	+
	struct callout {
	union {
	LIST_ENTRY(callout) le;
	@@ -55,7 +57,7 @@
	sbintime_t c_time; /* ticks to the event */
	sbintime_t c_precision; /* delta allowed wrt opt */
	void c_arg; / function argument */
	- void (c_func)(void ); /* function to call */
	+ callout_func_t c_func; / function to call */
	struct lock_object c_lock; / lock to handle */
	int c_flags; /* state of this entry */
	volatile int c_cpu; /* CPU we're scheduled on */
	Index: sys/sys/callout.h
	===================================================================
	--- sys/sys/callout.h
	+++ sys/sys/callout.h
	@@ -45,10 +45,12 @@
	#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
	#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
	#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
	-#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
	-#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
	+#define CALLOUT_UNUSED_5 0x0020 /* --available-- */
	+#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */
	#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
	#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
	+#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */
	+#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */

	#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
	#define C_PRELBITS 7
	@@ -64,9 +66,9 @@

	#ifdef _KERNEL
	#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
	-#define callout_migrating(c) ((c)->c_flags & CALLOUT_DFRMIGRATION)
	#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
	-#define callout_drain(c) _callout_stop_safe(c, 1)
	+int callout_drain(struct callout *);
	+int callout_drain_async(struct callout , callout_func_t , void *);
	void callout_init(struct callout *, int);
	void _callout_init_lock(struct callout , struct lock_object , int);
	#define callout_init_mtx(c, mtx, flags) \
	@@ -80,7 +82,7 @@
	NULL, (flags))
	#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
	int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
	- void ()(void ), void *, int, int);
	+ callout_func_t , void , int, int);
	#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
	callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags))
	#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
	@@ -104,8 +106,7 @@
	int callout_schedule_on(struct callout *, int, int);
	#define callout_schedule_curcpu(c, on_tick) \
	callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
	-#define callout_stop(c) _callout_stop_safe(c, 0)
	-int _callout_stop_safe(struct callout *, int);
	+int callout_stop(struct callout *);
	void callout_process(sbintime_t now);

	#endif
	Index: sys/sys/proc.h
	===================================================================
	--- sys/sys/proc.h
	+++ sys/sys/proc.h
	@@ -308,6 +308,7 @@
	} td_uretoff; /* (k) Syscall aux returns. */
	#define td_retval td_uretoff.tdu_retval
	struct callout td_slpcallout; /* (h) Callout for sleep. */
	+ struct mtx td_slpmutex; /* (h) Mutex for sleep callout */
	struct trapframe td_frame; / (k) */
	struct vm_object td_kstack_obj;/ (a) Kstack object. */
	vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
	@@ -364,7 +365,7 @@
	#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
	#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
	#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
	-#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
	+#define TDF_UNUSED12 0x00001000 /* --available-- */
	#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
	#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
	#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
	@@ -706,7 +707,7 @@
	#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
	#define SWT_TURNSTILE 3 /* Turnstile contention. */
	#define SWT_SLEEPQ 4 /* Sleepq wait. */
	-#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
	+#define SWT_UNUSED5 5 /* --available-- */
	#define SWT_RELINQUISH 6 /* yield call. */
	#define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */
	#define SWT_IDLE 8 /* Switching from the idle thread. */

File Metadata

Mime Type: text/plain
Expires: Tue, Dec 3, 5:24 AM (19 h, 16 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 15001236
Default Alt Text: D1438.vs3951.amp.diff (189 KB)

D1438.vs3951.amp.diffNo OneTemporaryActions

D1438.vs3951.amp.diffView Options

File Metadata

Event Timeline

D1438.vs3951.amp.diff
No OneTemporary
Actions

D1438.vs3951.amp.diff
View Options