Index: stable/10/sys/kern/kern_lock.c =================================================================== --- stable/10/sys/kern/kern_lock.c (revision 278649) +++ stable/10/sys/kern/kern_lock.c (revision 278650) @@ -1,1532 +1,1552 @@ /*- * Copyright (c) 2008 Attilio Rao * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ #include "opt_adaptive_lockmgrs.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #ifdef DEBUG_LOCKS #include #endif #include #include #include #ifdef DDB #include #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) == (LK_ADAPTIVE | LK_NOSHARE)); CTASSERT(LK_UNLOCKED == (LK_UNLOCKED & ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS))); #define SQ_EXCLUSIVE_QUEUE 0 #define SQ_SHARED_QUEUE 1 #ifndef INVARIANTS #define _lockmgr_assert(lk, what, file, line) #define TD_LOCKS_INC(td) #define TD_LOCKS_DEC(td) #else #define TD_LOCKS_INC(td) ((td)->td_locks++) #define TD_LOCKS_DEC(td) ((td)->td_locks--) #endif #define TD_SLOCKS_INC(td) ((td)->td_lk_slocks++) #define TD_SLOCKS_DEC(td) ((td)->td_lk_slocks--) #ifndef DEBUG_LOCKS #define STACK_PRINT(lk) #define STACK_SAVE(lk) #define STACK_ZERO(lk) #else #define STACK_PRINT(lk) stack_print_ddb(&(lk)->lk_stack) #define STACK_SAVE(lk) stack_save(&(lk)->lk_stack) #define STACK_ZERO(lk) stack_zero(&(lk)->lk_stack) #endif #define LOCK_LOG2(lk, string, arg1, arg2) \ if (LOCK_LOG_TEST(&(lk)->lock_object, 0)) \ CTR2(KTR_LOCK, (string), (arg1), (arg2)) #define LOCK_LOG3(lk, string, arg1, arg2, arg3) \ if (LOCK_LOG_TEST(&(lk)->lock_object, 0)) \ CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3)) #define GIANT_DECLARE \ int _i = 0; \ WITNESS_SAVE_DECL(Giant) #define GIANT_RESTORE() do { \ if (_i > 0) { \ while (_i--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) #define GIANT_SAVE() do { \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ while (mtx_owned(&Giant)) { \ _i++; \ mtx_unlock(&Giant); \ } \ } \ } while (0) #define LK_CAN_SHARE(x, flags) \ (((x) & LK_SHARE) && \ (((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 || \ (curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) || \ (curthread->td_pflags & TDP_DEADLKTREAT))) #define LK_TRYOP(x) \ ((x) & LK_NOWAIT) #define LK_CAN_WITNESS(x) \ (((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x)) #define LK_TRYWIT(x) \ (LK_TRYOP(x) ? LOP_TRYLOCK : 0) #define LK_CAN_ADAPT(lk, f) \ (((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 && \ ((f) & LK_SLEEPFAIL) == 0) #define lockmgr_disowned(lk) \ (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC) #define lockmgr_xlocked(lk) \ (((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread) static void assert_lockmgr(const struct lock_object *lock, int how); #ifdef DDB static void db_show_lockmgr(const struct lock_object *lock); #endif static void lock_lockmgr(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_lockmgr(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_lockmgr(struct lock_object *lock); struct lock_class lock_class_lockmgr = { .lc_name = "lockmgr", .lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE, .lc_assert = assert_lockmgr, #ifdef DDB .lc_ddb_show = db_show_lockmgr, #endif .lc_lock = lock_lockmgr, .lc_unlock = unlock_lockmgr, #ifdef KDTRACE_HOOKS .lc_owner = owner_lockmgr, #endif }; #ifdef ADAPTIVE_LOCKMGRS static u_int alk_retries = 10; static u_int alk_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL, "lockmgr debugging"); SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, ""); SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, ""); #endif static __inline struct thread * lockmgr_xholder(const struct lock *lk) { uintptr_t x; x = lk->lk_lock; return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x)); } /* * It assumes sleepq_lock held and returns with this one unheld. * It also assumes the generic interlock is sane and previously checked. * If LK_INTERLOCK is specified the interlock is not reacquired after the * sleep. */ static __inline int sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk, const char *wmesg, int pri, int timo, int queue) { GIANT_DECLARE; struct lock_class *class; int catch, error; class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL; catch = pri & PCATCH; pri &= PRIMASK; error = 0; LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk, (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared"); if (flags & LK_INTERLOCK) class->lc_unlock(ilk); if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0) lk->lk_exslpfail++; GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ? SLEEPQ_INTERRUPTIBLE : 0), queue); if ((flags & LK_TIMELOCK) && timo) sleepq_set_timeout(&lk->lock_object, timo); /* * Decisional switch for real sleeping. */ if ((flags & LK_TIMELOCK) && timo && catch) error = sleepq_timedwait_sig(&lk->lock_object, pri); else if ((flags & LK_TIMELOCK) && timo) error = sleepq_timedwait(&lk->lock_object, pri); else if (catch) error = sleepq_wait_sig(&lk->lock_object, pri); else sleepq_wait(&lk->lock_object, pri); GIANT_RESTORE(); if ((flags & LK_SLEEPFAIL) && error == 0) error = ENOLCK; return (error); } static __inline int wakeupshlk(struct lock *lk, const char *file, int line) { uintptr_t v, x; u_int realexslp; int queue, wakeup_swapper; WITNESS_UNLOCK(&lk->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line); wakeup_swapper = 0; for (;;) { x = lk->lk_lock; /* * If there is more than one shared lock held, just drop one * and return. */ if (LK_SHARERS(x) > 1) { if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, x - LK_ONE_SHARER)) break; continue; } /* * If there are not waiters on the exclusive queue, drop the * lock quickly. */ if ((x & LK_ALL_WAITERS) == 0) { MPASS((x & ~LK_EXCLUSIVE_SPINNERS) == LK_SHARERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED)) break; continue; } /* * We should have a sharer with waiters, so enter the hard * path in order to handle wakeups correctly. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); v = LK_UNLOCKED; /* * If the lock has exclusive waiters, give them preference in * order to avoid deadlock with shared runners up. * If interruptible sleeps left the exclusive queue empty * avoid a starvation for the threads sleeping on the shared * queue by giving them precedence and cleaning up the * exclusive waiters bit anyway. * Please note that lk_exslpfail count may be lying about * the real number of waiters with the LK_SLEEPFAIL flag on * because they may be used in conjuction with interruptible * sleeps so lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) { if (lk->lk_exslpfail < realexslp) { lk->lk_exslpfail = 0; queue = SQ_EXCLUSIVE_QUEUE; v |= (x & LK_SHARED_WAITERS); } else { lk->lk_exslpfail = 0; LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); queue = SQ_SHARED_QUEUE; } } else { /* * Exclusive waiters sleeping with LK_SLEEPFAIL on * and using interruptible sleeps/timeout may have * left spourious lk_exslpfail counts on, so clean * it up anyway. */ lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; } if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x, v)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue); sleepq_release(&lk->lock_object); break; } lock_profile_release_lock(&lk->lock_object); TD_LOCKS_DEC(curthread); TD_SLOCKS_DEC(curthread); return (wakeup_swapper); } static void assert_lockmgr(const struct lock_object *lock, int what) { panic("lockmgr locks do not support assertions"); } static void lock_lockmgr(struct lock_object *lock, uintptr_t how) { panic("lockmgr locks do not support sleep interlocking"); } static uintptr_t unlock_lockmgr(struct lock_object *lock) { panic("lockmgr locks do not support sleep interlocking"); } #ifdef KDTRACE_HOOKS static int owner_lockmgr(const struct lock_object *lock, struct thread **owner) { panic("lockmgr locks do not support owner inquiring"); } #endif void lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags) { int iflags; MPASS((flags & ~LK_INIT_MASK) == 0); ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock, ("%s: lockmgr not aligned for %s: %p", __func__, wmesg, &lk->lk_lock)); iflags = LO_SLEEPABLE | LO_UPGRADABLE; if (flags & LK_CANRECURSE) iflags |= LO_RECURSABLE; if ((flags & LK_NODUP) == 0) iflags |= LO_DUPOK; if (flags & LK_NOPROFILE) iflags |= LO_NOPROFILE; if ((flags & LK_NOWITNESS) == 0) iflags |= LO_WITNESS; if (flags & LK_QUIET) iflags |= LO_QUIET; if (flags & LK_IS_VNODE) iflags |= LO_IS_VNODE; iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE); lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags); lk->lk_lock = LK_UNLOCKED; lk->lk_recurse = 0; lk->lk_exslpfail = 0; lk->lk_timo = timo; lk->lk_pri = pri; STACK_ZERO(lk); } /* * XXX: Gross hacks to manipulate external lock flags after * initialization. Used for certain vnode and buf locks. */ void lockallowshare(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags &= ~LK_NOSHARE; } void lockdisableshare(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags |= LK_NOSHARE; } void lockallowrecurse(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags |= LO_RECURSABLE; } void lockdisablerecurse(struct lock *lk) { lockmgr_assert(lk, KA_XLOCKED); lk->lock_object.lo_flags &= ~LO_RECURSABLE; } void lockdestroy(struct lock *lk) { KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held")); KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed")); KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters")); lock_destroy(&lk->lock_object); } int __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk, const char *wmesg, int pri, int timo, const char *file, int line) { GIANT_DECLARE; struct lock_class *class; const char *iwmesg; uintptr_t tid, v, x; u_int op, realexslp; int error, ipri, itimo, queue, wakeup_swapper; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef ADAPTIVE_LOCKMGRS volatile struct thread *owner; u_int i, spintries = 0; #endif error = 0; tid = (uintptr_t)curthread; op = (flags & LK_TYPE_MASK); iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg; ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri; itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo; MPASS((flags & ~LK_TOTAL_MASK) == 0); KASSERT((op & (op - 1)) == 0, ("%s: Invalid requested operation @ %s:%d", __func__, file, line)); KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 || (op != LK_DOWNGRADE && op != LK_RELEASE), ("%s: Invalid flags in regard of the operation desired @ %s:%d", __func__, file, line)); KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL, ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d", __func__, file, line)); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread, lk->lock_object.lo_name, file, line)); class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL; if (panicstr != NULL) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); return (0); } if (lk->lock_object.lo_flags & LK_NOSHARE) { switch (op) { case LK_SHARED: op = LK_EXCLUSIVE; break; case LK_UPGRADE: case LK_TRYUPGRADE: case LK_DOWNGRADE: _lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line); if (flags & LK_INTERLOCK) class->lc_unlock(ilk); return (0); } } wakeup_swapper = 0; switch (op) { case LK_SHARED: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, flags & LK_INTERLOCK ? ilk : NULL); for (;;) { x = lk->lk_lock; /* * If no other thread has an exclusive lock, or * no exclusive waiter is present, bump the count of * sharers. Since we have to preserve the state of * waiters, if we fail to acquire the shared lock * loop back and retry. */ if (LK_CAN_SHARE(x, flags)) { if (atomic_cmpset_acq_ptr(&lk->lk_lock, x, x + LK_ONE_SHARER)) break; continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is already held by curthread in * exclusive way avoid a deadlock. */ if (LK_HOLDER(x) == tid) { LOCK_LOG2(lk, "%s: %p already held in exclusive mode", __func__, lk); error = EDEADLK; break; } /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } #ifdef ADAPTIVE_LOCKMGRS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. We need a double-state handle here * because for a failed acquisition the lock can be * either held in exclusive mode or shared mode * (for the writer starvation avoidance technique). */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, lk, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(td), "spinning", + "lockname:\"%s\"", lk->lock_object.lo_name); /* * If we are holding also an interlock drop it * in order to avoid a deadlock if the lockmgr * owner is adaptively spinning on the * interlock itself. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); while (LK_HOLDER(lk->lk_lock) == (uintptr_t)owner && TD_IS_RUNNING(owner)) cpu_spinwait(); + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(td), "running"); GIANT_RESTORE(); continue; } else if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) != 0 && LK_SHARERS(x) && spintries < alk_retries) { + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(td), "spinning", + "lockname:\"%s\"", lk->lock_object.lo_name); if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); spintries++; for (i = 0; i < alk_loops; i++) { if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, lk, spintries, i); x = lk->lk_lock; if ((x & LK_SHARE) == 0 || LK_CAN_SHARE(x, flags) != 0) break; cpu_spinwait(); } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(td), "running"); GIANT_RESTORE(); if (i != alk_loops) continue; } #endif /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock can be acquired in shared mode, try * again. */ if (LK_CAN_SHARE(x, flags)) { sleepq_release(&lk->lock_object); continue; } #ifdef ADAPTIVE_LOCKMGRS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owner) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&lk->lock_object); continue; } } #endif /* * Try to set the LK_SHARED_WAITERS flag. If we fail, * loop back and retry. */ if ((x & LK_SHARED_WAITERS) == 0) { if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x, x | LK_SHARED_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set shared waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * shared lock and the shared waiters flag is set, * we will sleep. */ error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo, SQ_SHARED_QUEUE); flags &= ~LK_INTERLOCK; if (error) { LOCK_LOG3(lk, "%s: interrupted sleep for %p with %d", __func__, lk, error); break; } LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line); WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); TD_SLOCKS_INC(curthread); STACK_SAVE(lk); } break; case LK_UPGRADE: case LK_TRYUPGRADE: _lockmgr_assert(lk, KA_SLOCKED, file, line); v = lk->lk_lock; x = v & LK_ALL_WAITERS; v &= LK_EXCLUSIVE_SPINNERS; /* * Try to switch from one shared lock to an exclusive one. * We need to preserve waiters flags during the operation. */ if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v, tid | x)) { LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file, line); WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_SLOCKS_DEC(curthread); break; } /* * In LK_TRYUPGRADE mode, do not drop the lock, * returning EBUSY instead. */ if (op == LK_TRYUPGRADE) { LOCK_LOG2(lk, "%s: %p failed the nowait upgrade", __func__, lk); error = EBUSY; break; } /* * We have been unable to succeed in upgrading, so just * give up the shared lock. */ wakeup_swapper |= wakeupshlk(lk, file, line); /* FALLTHROUGH */ case LK_EXCLUSIVE: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ? ilk : NULL); /* * If curthread already holds the lock and this one is * allowed to recurse, simply recurse on it. */ if (lockmgr_xlocked(lk)) { if ((flags & LK_CANRECURSE) == 0 && (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) { /* * If the lock is expected to not panic just * give up and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n", __func__, iwmesg, file, line); } lk->lk_recurse++; LOCK_LOG2(lk, "%s: %p recursing", __func__, lk); LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); break; } while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) { #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } #ifdef ADAPTIVE_LOCKMGRS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ x = lk->lk_lock; if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, lk, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(td), "spinning", + "lockname:\"%s\"", lk->lock_object.lo_name); /* * If we are holding also an interlock drop it * in order to avoid a deadlock if the lockmgr * owner is adaptively spinning on the * interlock itself. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); while (LK_HOLDER(lk->lk_lock) == (uintptr_t)owner && TD_IS_RUNNING(owner)) cpu_spinwait(); + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(td), "running"); GIANT_RESTORE(); continue; } else if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) != 0 && LK_SHARERS(x) && spintries < alk_retries) { if ((x & LK_EXCLUSIVE_SPINNERS) == 0 && !atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_SPINNERS)) continue; + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(td), "spinning", + "lockname:\"%s\"", lk->lock_object.lo_name); if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); spintries++; for (i = 0; i < alk_loops; i++) { if (LOCK_LOG_TEST(&lk->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, lk, spintries, i); if ((lk->lk_lock & LK_EXCLUSIVE_SPINNERS) == 0) break; cpu_spinwait(); } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(td), "running"); GIANT_RESTORE(); if (i != alk_loops) continue; } #endif /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock has been released while we spun on * the sleepqueue chain lock just try again. */ if (x == LK_UNLOCKED) { sleepq_release(&lk->lock_object); continue; } #ifdef ADAPTIVE_LOCKMGRS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owner) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 && LK_HOLDER(x) != LK_KERNPROC) { owner = (struct thread *)LK_HOLDER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&lk->lock_object); continue; } } #endif /* * The lock can be in the state where there is a * pending queue of waiters, but still no owner. * This happens when the lock is contested and an * owner is going to claim the lock. * If curthread is the one successfully acquiring it * claim lock ownership and return, preserving waiters * flags. */ v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); if ((x & ~v) == LK_UNLOCKED) { v &= ~LK_EXCLUSIVE_SPINNERS; if (atomic_cmpset_acq_ptr(&lk->lk_lock, x, tid | v)) { sleepq_release(&lk->lock_object); LOCK_LOG2(lk, "%s: %p claimed by a new writer", __func__, lk); break; } sleepq_release(&lk->lock_object); continue; } /* * Try to set the LK_EXCLUSIVE_WAITERS flag. If we * fail, loop back and retry. */ if ((x & LK_EXCLUSIVE_WAITERS) == 0) { if (!atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set excl waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * exclusive lock and the exclusive waiters flag * is set, we will sleep. */ error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo, SQ_EXCLUSIVE_QUEUE); flags &= ~LK_INTERLOCK; if (error) { LOCK_LOG3(lk, "%s: interrupted sleep for %p with %d", __func__, lk, error); break; } LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); STACK_SAVE(lk); } break; case LK_DOWNGRADE: _lockmgr_assert(lk, KA_XLOCKED, file, line); LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line); WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line); /* * Panic if the lock is recursed. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n", __func__, iwmesg, file, line); } TD_SLOCKS_INC(curthread); /* * In order to preserve waiters flags, just spin. */ for (;;) { x = lk->lk_lock; MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); x &= LK_ALL_WAITERS; if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x, LK_SHARERS_LOCK(1) | x)) break; cpu_spinwait(); } break; case LK_RELEASE: _lockmgr_assert(lk, KA_LOCKED, file, line); x = lk->lk_lock; if ((x & LK_SHARE) == 0) { /* * As first option, treact the lock as if it has not * any waiter. * Fix-up the tid var if the lock has been disowned. */ if (LK_HOLDER(x) == LK_KERNPROC) tid = LK_KERNPROC; else { WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_DEC(curthread); } LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line); /* * The lock is held in exclusive mode. * If the lock is recursed also, then unrecurse it. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) { LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk); lk->lk_recurse--; break; } if (tid != LK_KERNPROC) lock_profile_release_lock(&lk->lock_object); if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) break; sleepq_lock(&lk->lock_object); x = lk->lk_lock; v = LK_UNLOCKED; /* * If the lock has exclusive waiters, give them * preference in order to avoid deadlock with * shared runners up. * If interruptible sleeps left the exclusive queue * empty avoid a starvation for the threads sleeping * on the shared queue by giving them precedence * and cleaning up the exclusive waiters bit anyway. * Please note that lk_exslpfail count may be lying * about the real number of waiters with the * LK_SLEEPFAIL flag on because they may be used in * conjuction with interruptible sleeps so * lk_exslpfail might be considered an 'upper limit' * bound, including the edge cases. */ MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) { if (lk->lk_exslpfail < realexslp) { lk->lk_exslpfail = 0; queue = SQ_EXCLUSIVE_QUEUE; v |= (x & LK_SHARED_WAITERS); } else { lk->lk_exslpfail = 0; LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); queue = SQ_SHARED_QUEUE; } } else { /* * Exclusive waiters sleeping with LK_SLEEPFAIL * on and using interruptible sleeps/timeout * may have left spourious lk_exslpfail counts * on, so clean it up anyway. */ lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; } LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); atomic_store_rel_ptr(&lk->lk_lock, v); wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue); sleepq_release(&lk->lock_object); break; } else wakeup_swapper = wakeupshlk(lk, file, line); break; case LK_DRAIN: if (LK_CAN_WITNESS(flags)) WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ? ilk : NULL); /* * Trying to drain a lock we already own will result in a * deadlock. */ if (lockmgr_xlocked(lk)) { if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: draining %s with the lock held @ %s:%d\n", __func__, iwmesg, file, line); } while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) { #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&lk->lock_object, &contested, &waittime); /* * If the lock is expected to not sleep just give up * and return. */ if (LK_TRYOP(flags)) { LOCK_LOG2(lk, "%s: %p fails the try operation", __func__, lk); error = EBUSY; break; } /* * Acquire the sleepqueue chain lock because we * probabilly will need to manipulate waiters flags. */ sleepq_lock(&lk->lock_object); x = lk->lk_lock; /* * if the lock has been released while we spun on * the sleepqueue chain lock just try again. */ if (x == LK_UNLOCKED) { sleepq_release(&lk->lock_object); continue; } v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS); if ((x & ~v) == LK_UNLOCKED) { v = (x & ~LK_EXCLUSIVE_SPINNERS); /* * If interruptible sleeps left the exclusive * queue empty avoid a starvation for the * threads sleeping on the shared queue by * giving them precedence and cleaning up the * exclusive waiters bit anyway. * Please note that lk_exslpfail count may be * lying about the real number of waiters with * the LK_SLEEPFAIL flag on because they may * be used in conjuction with interruptible * sleeps so lk_exslpfail might be considered * an 'upper limit' bound, including the edge * cases. */ if (v & LK_EXCLUSIVE_WAITERS) { queue = SQ_EXCLUSIVE_QUEUE; v &= ~LK_EXCLUSIVE_WAITERS; } else { /* * Exclusive waiters sleeping with * LK_SLEEPFAIL on and using * interruptible sleeps/timeout may * have left spourious lk_exslpfail * counts on, so clean it up anyway. */ MPASS(v & LK_SHARED_WAITERS); lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; v &= ~LK_SHARED_WAITERS; } if (queue == SQ_EXCLUSIVE_QUEUE) { realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE); if (lk->lk_exslpfail >= realexslp) { lk->lk_exslpfail = 0; queue = SQ_SHARED_QUEUE; v &= ~LK_SHARED_WAITERS; if (realexslp != 0) { LOCK_LOG2(lk, "%s: %p has only LK_SLEEPFAIL sleepers", __func__, lk); LOCK_LOG2(lk, "%s: %p waking up threads on the exclusive queue", __func__, lk); wakeup_swapper = sleepq_broadcast( &lk->lock_object, SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE); } } else lk->lk_exslpfail = 0; } if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG3(lk, "%s: %p waking up all threads on the %s queue", __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); wakeup_swapper |= sleepq_broadcast( &lk->lock_object, SLEEPQ_LK, 0, queue); /* * If shared waiters have been woken up we need * to wait for one of them to acquire the lock * before to set the exclusive waiters in * order to avoid a deadlock. */ if (queue == SQ_SHARED_QUEUE) { for (v = lk->lk_lock; (v & LK_SHARE) && !LK_SHARERS(v); v = lk->lk_lock) cpu_spinwait(); } } /* * Try to set the LK_EXCLUSIVE_WAITERS flag. If we * fail, loop back and retry. */ if ((x & LK_EXCLUSIVE_WAITERS) == 0) { if (!atomic_cmpset_ptr(&lk->lk_lock, x, x | LK_EXCLUSIVE_WAITERS)) { sleepq_release(&lk->lock_object); continue; } LOCK_LOG2(lk, "%s: %p set drain waiters flag", __func__, lk); } /* * As far as we have been unable to acquire the * exclusive lock and the exclusive waiters flag * is set, we will sleep. */ if (flags & LK_INTERLOCK) { class->lc_unlock(ilk); flags &= ~LK_INTERLOCK; } GIANT_SAVE(); sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK, SQ_EXCLUSIVE_QUEUE); sleepq_wait(&lk->lock_object, ipri & PRIMASK); GIANT_RESTORE(); LOCK_LOG2(lk, "%s: %p resuming from the sleep queue", __func__, lk); } if (error == 0) { lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime, file, line); LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0, lk->lk_recurse, file, line); WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file, line); TD_LOCKS_INC(curthread); STACK_SAVE(lk); } break; default: if (flags & LK_INTERLOCK) class->lc_unlock(ilk); panic("%s: unknown lockmgr request 0x%x\n", __func__, op); } if (flags & LK_INTERLOCK) class->lc_unlock(ilk); if (wakeup_swapper) kick_proc0(); return (error); } void _lockmgr_disown(struct lock *lk, const char *file, int line) { uintptr_t tid, x; if (SCHEDULER_STOPPED()) return; tid = (uintptr_t)curthread; _lockmgr_assert(lk, KA_XLOCKED, file, line); /* * Panic if the lock is recursed. */ if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) panic("%s: disown a recursed lockmgr @ %s:%d\n", __func__, file, line); /* * If the owner is already LK_KERNPROC just skip the whole operation. */ if (LK_HOLDER(lk->lk_lock) != tid) return; lock_profile_release_lock(&lk->lock_object); LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line); WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_DEC(curthread); STACK_SAVE(lk); /* * In order to preserve waiters flags, just spin. */ for (;;) { x = lk->lk_lock; MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0); x &= LK_ALL_WAITERS; if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x, LK_KERNPROC | x)) return; cpu_spinwait(); } } void lockmgr_printinfo(const struct lock *lk) { struct thread *td; uintptr_t x; if (lk->lk_lock == LK_UNLOCKED) printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name); else if (lk->lk_lock & LK_SHARE) printf("lock type %s: SHARED (count %ju)\n", lk->lock_object.lo_name, (uintmax_t)LK_SHARERS(lk->lk_lock)); else { td = lockmgr_xholder(lk); if (td == (struct thread *)LK_KERNPROC) printf("lock type %s: EXCL by KERNPROC\n", lk->lock_object.lo_name); else printf("lock type %s: EXCL by thread %p " "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name, td, td->td_proc->p_pid, td->td_proc->p_comm, td->td_tid); } x = lk->lk_lock; if (x & LK_EXCLUSIVE_WAITERS) printf(" with exclusive waiters pending\n"); if (x & LK_SHARED_WAITERS) printf(" with shared waiters pending\n"); if (x & LK_EXCLUSIVE_SPINNERS) printf(" with exclusive spinners pending\n"); STACK_PRINT(lk); } int lockstatus(const struct lock *lk) { uintptr_t v, x; int ret; ret = LK_SHARED; x = lk->lk_lock; v = LK_HOLDER(x); if ((x & LK_SHARE) == 0) { if (v == (uintptr_t)curthread || v == LK_KERNPROC) ret = LK_EXCLUSIVE; else ret = LK_EXCLOTHER; } else if (x == LK_UNLOCKED) ret = 0; return (ret); } #ifdef INVARIANT_SUPPORT FEATURE(invariant_support, "Support for modules compiled with INVARIANTS option"); #ifndef INVARIANTS #undef _lockmgr_assert #endif void _lockmgr_assert(const struct lock *lk, int what, const char *file, int line) { int slocked = 0; if (panicstr != NULL) return; switch (what) { case KA_SLOCKED: case KA_SLOCKED | KA_NOTRECURSED: case KA_SLOCKED | KA_RECURSED: slocked = 1; case KA_LOCKED: case KA_LOCKED | KA_NOTRECURSED: case KA_LOCKED | KA_RECURSED: #ifdef WITNESS /* * We cannot trust WITNESS if the lock is held in exclusive * mode and a call to lockmgr_disown() happened. * Workaround this skipping the check if the lock is held in * exclusive mode even for the KA_LOCKED case. */ if (slocked || (lk->lk_lock & LK_SHARE)) { witness_assert(&lk->lock_object, what, file, line); break; } #endif if (lk->lk_lock == LK_UNLOCKED || ((lk->lk_lock & LK_SHARE) == 0 && (slocked || (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))))) panic("Lock %s not %slocked @ %s:%d\n", lk->lock_object.lo_name, slocked ? "share" : "", file, line); if ((lk->lk_lock & LK_SHARE) == 0) { if (lockmgr_recursed(lk)) { if (what & KA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } else if (what & KA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } break; case KA_XLOCKED: case KA_XLOCKED | KA_NOTRECURSED: case KA_XLOCKED | KA_RECURSED: if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)) panic("Lock %s not exclusively locked @ %s:%d\n", lk->lock_object.lo_name, file, line); if (lockmgr_recursed(lk)) { if (what & KA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); } else if (what & KA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", lk->lock_object.lo_name, file, line); break; case KA_UNLOCKED: if (lockmgr_xlocked(lk) || lockmgr_disowned(lk)) panic("Lock %s exclusively locked @ %s:%d\n", lk->lock_object.lo_name, file, line); break; default: panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file, line); } } #endif #ifdef DDB int lockmgr_chain(struct thread *td, struct thread **ownerp) { struct lock *lk; lk = td->td_wchan; if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr) return (0); db_printf("blocked on lockmgr %s", lk->lock_object.lo_name); if (lk->lk_lock & LK_SHARE) db_printf("SHARED (count %ju)\n", (uintmax_t)LK_SHARERS(lk->lk_lock)); else db_printf("EXCL\n"); *ownerp = lockmgr_xholder(lk); return (1); } static void db_show_lockmgr(const struct lock_object *lock) { struct thread *td; const struct lock *lk; lk = (const struct lock *)lock; db_printf(" state: "); if (lk->lk_lock == LK_UNLOCKED) db_printf("UNLOCKED\n"); else if (lk->lk_lock & LK_SHARE) db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock)); else { td = lockmgr_xholder(lk); if (td == (struct thread *)LK_KERNPROC) db_printf("XLOCK: LK_KERNPROC\n"); else db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); if (lockmgr_recursed(lk)) db_printf(" recursed: %d\n", lk->lk_recurse); } db_printf(" waiters: "); switch (lk->lk_lock & LK_ALL_WAITERS) { case LK_SHARED_WAITERS: db_printf("shared\n"); break; case LK_EXCLUSIVE_WAITERS: db_printf("exclusive\n"); break; case LK_ALL_WAITERS: db_printf("shared and exclusive\n"); break; default: db_printf("none\n"); } db_printf(" spinners: "); if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS) db_printf("exclusive\n"); else db_printf("none\n"); } #endif Index: stable/10/sys/kern/kern_mutex.c =================================================================== --- stable/10/sys/kern/kern_mutex.c (revision 278649) +++ stable/10/sys/kern/kern_mutex.c (revision 278650) @@ -1,1009 +1,1020 @@ /*- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ */ /* * Machine independent bits of mutex implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_adaptive_mutexes.h" #include "opt_ddb.h" #include "opt_global.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) #define ADAPTIVE_MUTEXES #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , lock, failed); #endif /* * Return the mutex address when the lock cookie address is provided. * This functionality assumes that struct mtx* have a member named mtx_lock. */ #define mtxlock2mtx(c) (__containerof(c, struct mtx, mtx_lock)) /* * Internal utility macros. */ #define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) #define mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED) #define mtx_owner(m) ((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK)) static void assert_mtx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_mtx(const struct lock_object *lock); #endif static void lock_mtx(struct lock_object *lock, uintptr_t how); static void lock_spin(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_mtx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_mtx(struct lock_object *lock); static uintptr_t unlock_spin(struct lock_object *lock); /* * Lock classes for sleep and spin mutexes. */ struct lock_class lock_class_mtx_sleep = { .lc_name = "sleep mutex", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_mtx, .lc_unlock = unlock_mtx, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; struct lock_class lock_class_mtx_spin = { .lc_name = "spin mutex", .lc_flags = LC_SPINLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_spin, .lc_unlock = unlock_spin, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; /* * System-wide mutexes */ struct mtx blocked_lock; struct mtx Giant; void assert_mtx(const struct lock_object *lock, int what) { mtx_assert((const struct mtx *)lock, what); } void lock_mtx(struct lock_object *lock, uintptr_t how) { mtx_lock((struct mtx *)lock); } void lock_spin(struct lock_object *lock, uintptr_t how) { panic("spin locks can only use msleep_spin"); } uintptr_t unlock_mtx(struct lock_object *lock) { struct mtx *m; m = (struct mtx *)lock; mtx_assert(m, MA_OWNED | MA_NOTRECURSED); mtx_unlock(m); return (0); } uintptr_t unlock_spin(struct lock_object *lock) { panic("spin locks can only use msleep_spin"); } #ifdef KDTRACE_HOOKS int owner_mtx(const struct lock_object *lock, struct thread **owner) { const struct mtx *m = (const struct mtx *)lock; *owner = mtx_owner(m); return (mtx_unowned(m) == 0); } #endif /* * Function versions of the inlined __mtx_* macros. These are used by * modules and can also be called from assembly language if needed. */ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, file, line); curthread->td_locks++; } void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_RELEASE_LOCK(LS_MTX_UNLOCK_RELEASE, m); __mtx_unlock(m, curthread, opts, file, line); curthread->td_locks--; } void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock_spin(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); __mtx_unlock_spin(m); } /* * The important part of mtx_trylock{,_flags}() * Tries to acquire lock `m.' If this function is called on a mutex that * is already owned, it will recursively acquire the lock. */ int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int rval; if (SCHEDULER_STOPPED()) return (1); m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0)) { m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); rval = 1; } else rval = _mtx_obtain_lock(m, (uintptr_t)curthread); opts &= ~MTX_RECURSE; LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); curthread->td_locks++; if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, m, contested, waittime, file, line); } return (rval); } /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * * We call this if the lock is either contested (i.e. we need to go to * sleep waiting for it), or if we need to recurse on it. */ void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; uintptr_t v; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif #ifdef KTR int cont_logged = 0; #endif #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (mtx_owned(m)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); return; } opts &= ~MTX_RECURSE; #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); while (!_mtx_obtain_lock(m, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* * If the owner is running on another CPU, spin until the * owner stops running or the state of the lock changes. */ v = m->mtx_lock; if (v != MTX_UNOWNED) { owner = (struct thread *)(v & ~MTX_FLAGMASK); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&m->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, m, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname((struct thread *)tid), + "spinning", "lockname:\"%s\"", + m->lock_object.lo_name); while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname((struct thread *)tid), + "running"); continue; } } #endif ts = turnstile_trywait(&m->lock_object); v = m->mtx_lock; /* * Check if the lock has been released while spinning for * the turnstile chain lock. */ if (v == MTX_UNOWNED) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_MUTEXES /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ owner = (struct thread *)(v & ~MTX_FLAGMASK); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } #endif /* * If the mutex isn't already contested and a failure occurs * setting the contested bit, the mutex was either released * or the state of the MTX_RECURSED bit changed. */ if ((v & MTX_CONTESTED) == 0 && !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { turnstile_cancel(ts); continue; } /* * We definitely must sleep for this lock. */ mtx_assert(m, MA_NOTOWNED); #ifdef KTR if (!cont_logged) { CTR6(KTR_CONTENTION, "contention: %p at %s:%d wants %s, taken by %s:%d", (void *)tid, file, line, m->lock_object.lo_name, WITNESS_FILE(&m->lock_object), WITNESS_LINE(&m->lock_object)); cont_logged = 1; } #endif /* * Block on the turnstile. */ #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif } #ifdef KTR if (cont_logged) { CTR4(KTR_CONTENTION, "contention end: %s acquired by %p at %s:%d", m->lock_object.lo_name, (void *)tid, file, line); } #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_LOCK_ACQUIRE, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_MTX_LOCK_BLOCK, m, sleep_time); /* * Only record the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt)); #endif } static void _mtx_lock_spin_failed(struct mtx *m) { struct thread *td; td = mtx_owner(m); /* If the mutex is unlocked, try again. */ if (td == NULL) return; printf( "spin lock %p (%s) held by %p (tid %d) too long\n", m, m->lock_object.lo_name, td, td->td_tid); #ifdef WITNESS witness_display_spinlock(&m->lock_object, td, printf); #endif panic("spin lock held too long"); } #ifdef SMP /* * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock. * * This is only called if we need to actually spin for the lock. Recursion * is handled inline. */ void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; int i = 0; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); + KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), + "spinning", "lockname:\"%s\"", m->lock_object.lo_name); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); while (!_mtx_obtain_lock(m, tid)) { /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { if (i++ < 10000000) { cpu_spinwait(); continue; } if (i < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); } spinlock_enter(); } if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); + KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), + "running"); LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m, contested, waittime, (file), (line)); LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i); } #endif /* SMP */ void thread_lock_flags_(struct thread *td, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid; int i; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; #endif i = 0; tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) return; for (;;) { retry: spinlock_enter(); m = td->td_lock; KASSERT(m->mtx_lock != MTX_DESTROYED, ("thread_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("thread_lock() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); while (!_mtx_obtain_lock(m, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif if (m->mtx_lock == tid) { m->mtx_recurse++; break; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); /* Give interrupts a chance while we spin. */ spinlock_exit(); while (m->mtx_lock != MTX_UNOWNED) { if (i++ < 10000000) cpu_spinwait(); else if (i < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); if (m != td->td_lock) goto retry; } spinlock_enter(); } if (m == td->td_lock) break; __mtx_unlock_spin(m); /* does spinlock_exit() */ #ifdef KDTRACE_HOOKS spin_cnt++; #endif } if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m, contested, waittime, (file), (line)); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt); } struct mtx * thread_lock_block(struct thread *td) { struct mtx *lock; THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = &blocked_lock; mtx_unlock_spin(lock); return (lock); } void thread_lock_unblock(struct thread *td, struct mtx *new) { mtx_assert(new, MA_OWNED); MPASS(td->td_lock == &blocked_lock); atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new); } void thread_lock_set(struct thread *td, struct mtx *new) { struct mtx *lock; mtx_assert(new, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = new; mtx_unlock_spin(lock); } /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * * We are only called here if the lock is recursed or contested (i.e. we * need to wake up a blocked thread). */ void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); if (mtx_recursed(m)) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); return; } /* * We have to lock the chain before the turnstile so this turnstile * can be removed from the hash list if it is empty. */ turnstile_chain_lock(&m->lock_object); ts = turnstile_lookup(&m->lock_object); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); MPASS(ts != NULL); turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); _mtx_release_lock_quick(m); /* * This turnstile is now no longer associated with the mutex. We can * unlock the chain lock so a new turnstile may take it's place. */ turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&m->lock_object); } /* * All the unlocking of MTX_SPIN locks is done inline. * See the __mtx_unlock_spin() macro for the details. */ /* * The backing function for the INVARIANTS-enabled mtx_assert() */ #ifdef INVARIANT_SUPPORT void __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct mtx *m; if (panicstr != NULL || dumping) return; m = mtxlock2mtx(c); switch (what) { case MA_OWNED: case MA_OWNED | MA_RECURSED: case MA_OWNED | MA_NOTRECURSED: if (!mtx_owned(m)) panic("mutex %s not owned at %s:%d", m->lock_object.lo_name, file, line); if (mtx_recursed(m)) { if ((what & MA_NOTRECURSED) != 0) panic("mutex %s recursed at %s:%d", m->lock_object.lo_name, file, line); } else if ((what & MA_RECURSED) != 0) { panic("mutex %s unrecursed at %s:%d", m->lock_object.lo_name, file, line); } break; case MA_NOTOWNED: if (mtx_owned(m)) panic("mutex %s owned at %s:%d", m->lock_object.lo_name, file, line); break; default: panic("unknown mtx_assert at %s:%d", file, line); } } #endif /* * The MUTEX_DEBUG-enabled mtx_validate() * * Most of these checks have been moved off into the LO_INITIALIZED flag * maintained by the witness code. */ #ifdef MUTEX_DEBUG void mtx_validate(struct mtx *); void mtx_validate(struct mtx *m) { /* * XXX: When kernacc() does not require Giant we can reenable this check */ #ifdef notyet /* * Can't call kernacc() from early init386(), especially when * initializing Giant mutex, because some stuff in kernacc() * requires Giant itself. */ if (!cold) if (!kernacc((caddr_t)m, sizeof(m), VM_PROT_READ | VM_PROT_WRITE)) panic("Can't read and write to mutex %p", m); #endif } #endif /* * General init routine used by the MTX_SYSINIT() macro. */ void mtx_sysinit(void *arg) { struct mtx_args *margs = arg; mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); } /* * Mutex initialization routine; initialize lock `m' of type contained in * `opts' with options contained in `opts' and name `name.' The optional * lock type `type' is used as a general lock category name for use with * witness. */ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts) { struct mtx *m; struct lock_class *class; int flags; m = mtxlock2mtx(c); MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE)) == 0); ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock, ("%s: mtx_lock not aligned for %s: %p", __func__, name, &m->mtx_lock)); #ifdef MUTEX_DEBUG /* Diagnostic and error correction */ mtx_validate(m); #endif /* Determine lock class and lock flags. */ if (opts & MTX_SPIN) class = &lock_class_mtx_spin; else class = &lock_class_mtx_sleep; flags = 0; if (opts & MTX_QUIET) flags |= LO_QUIET; if (opts & MTX_RECURSE) flags |= LO_RECURSABLE; if ((opts & MTX_NOWITNESS) == 0) flags |= LO_WITNESS; if (opts & MTX_DUPOK) flags |= LO_DUPOK; if (opts & MTX_NOPROFILE) flags |= LO_NOPROFILE; /* Initialize mutex. */ lock_init(&m->lock_object, class, name, type, flags); m->mtx_lock = MTX_UNOWNED; m->mtx_recurse = 0; } /* * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be * passed in as a flag here because if the corresponding mtx_init() was * called with MTX_QUIET set, then it will already be set in the mutex's * flags. */ void _mtx_destroy(volatile uintptr_t *c) { struct mtx *m; m = mtxlock2mtx(c); if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) spinlock_exit(); else curthread->td_locks--; lock_profile_release_lock(&m->lock_object); /* Tell witness this isn't locked to make it happy. */ WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__, __LINE__); } m->mtx_lock = MTX_DESTROYED; lock_destroy(&m->lock_object); } /* * Intialize the mutex code and system mutexes. This is called from the MD * startup code prior to mi_startup(). The per-CPU data space needs to be * setup before this is called. */ void mutex_init(void) { /* Setup turnstiles so that sleep mutexes work. */ init_turnstiles(); /* * Initialize mutexes. */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN); blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */ mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); } #ifdef DDB void db_show_mtx(const struct lock_object *lock) { struct thread *td; const struct mtx *m; m = (const struct mtx *)lock; db_printf(" flags: {"); if (LOCK_CLASS(lock) == &lock_class_mtx_spin) db_printf("SPIN"); else db_printf("DEF"); if (m->lock_object.lo_flags & LO_RECURSABLE) db_printf(", RECURSE"); if (m->lock_object.lo_flags & LO_DUPOK) db_printf(", DUPOK"); db_printf("}\n"); db_printf(" state: {"); if (mtx_unowned(m)) db_printf("UNOWNED"); else if (mtx_destroyed(m)) db_printf("DESTROYED"); else { db_printf("OWNED"); if (m->mtx_lock & MTX_CONTESTED) db_printf(", CONTESTED"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } db_printf("}\n"); if (!mtx_unowned(m) && !mtx_destroyed(m)) { td = mtx_owner(m); db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (mtx_recursed(m)) db_printf(" recursed: %d\n", m->mtx_recurse); } } #endif Index: stable/10/sys/kern/kern_rwlock.c =================================================================== --- stable/10/sys/kern/kern_rwlock.c (revision 278649) +++ stable/10/sys/kern/kern_rwlock.c (revision 278650) @@ -1,1229 +1,1250 @@ /*- * Copyright (c) 2006 John Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Machine independent bits of reader/writer lock implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_no_adaptive_rwlocks.h" #include #include #include #include #include #include #include #include +#include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) #define ADAPTIVE_RWLOCKS #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* * Return the rwlock address when the lock cookie address is provided. * This functionality assumes that struct rwlock* have a member named rw_lock. */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) #ifdef ADAPTIVE_RWLOCKS static int rowner_retries = 10; static int rowner_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging"); SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); #endif #ifdef DDB #include static void db_show_rwlock(const struct lock_object *lock); #endif static void assert_rw(const struct lock_object *lock, int what); static void lock_rw(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_rw(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_rw(struct lock_object *lock); struct lock_class lock_class_rw = { .lc_name = "rw", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_rw, #ifdef DDB .lc_ddb_show = db_show_rwlock, #endif .lc_lock = lock_rw, .lc_unlock = unlock_rw, #ifdef KDTRACE_HOOKS .lc_owner = owner_rw, #endif }; /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. */ #define rw_wowner(rw) \ ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ (struct thread *)RW_OWNER((rw)->rw_lock)) /* * Returns if a write owner is recursed. Write ownership is not assured * here and should be previously checked. */ #define rw_recursed(rw) ((rw)->rw_recurse != 0) /* * Return true if curthread helds the lock. */ #define rw_wlocked(rw) (rw_wowner((rw)) == curthread) /* * Return a pointer to the owning thread for this lock who should receive * any priority lent by threads that block on this lock. Currently this * is identical to rw_wowner(). */ #define rw_owner(rw) rw_wowner(rw) #ifndef INVARIANTS #define __rw_assert(c, what, file, line) #endif void assert_rw(const struct lock_object *lock, int what) { rw_assert((const struct rwlock *)lock, what); } void lock_rw(struct lock_object *lock, uintptr_t how) { struct rwlock *rw; rw = (struct rwlock *)lock; if (how) rw_rlock(rw); else rw_wlock(rw); } uintptr_t unlock_rw(struct lock_object *lock) { struct rwlock *rw; rw = (struct rwlock *)lock; rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); if (rw->rw_lock & RW_LOCK_READ) { rw_runlock(rw); return (1); } else { rw_wunlock(rw); return (0); } } #ifdef KDTRACE_HOOKS int owner_rw(const struct lock_object *lock, struct thread **owner) { const struct rwlock *rw = (const struct rwlock *)lock; uintptr_t x = rw->rw_lock; *owner = rw_wowner(rw); return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : (*owner != NULL)); } #endif void _rw_init_flags(volatile uintptr_t *c, const char *name, int opts) { struct rwlock *rw; int flags; rw = rwlock2rw(c); MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | RW_RECURSE)) == 0); ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock, ("%s: rw_lock not aligned for %s: %p", __func__, name, &rw->rw_lock)); flags = LO_UPGRADABLE; if (opts & RW_DUPOK) flags |= LO_DUPOK; if (opts & RW_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & RW_NOWITNESS)) flags |= LO_WITNESS; if (opts & RW_RECURSE) flags |= LO_RECURSABLE; if (opts & RW_QUIET) flags |= LO_QUIET; lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); rw->rw_lock = RW_UNLOCKED; rw->rw_recurse = 0; } void _rw_destroy(volatile uintptr_t *c) { struct rwlock *rw; rw = rwlock2rw(c); KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw)); KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw)); rw->rw_lock = RW_DESTROYED; lock_destroy(&rw->lock_object); } void rw_sysinit(void *arg) { struct rw_args *args = arg; rw_init((struct rwlock *)args->ra_rw, args->ra_desc); } void rw_sysinit_flags(void *arg) { struct rw_args_flags *args = arg; rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc, args->ra_flags); } int _rw_wowned(const volatile uintptr_t *c) { return (rw_wowner(rwlock2rw(c)) == curthread); } void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __rw_wlock(rw, curthread, file, line); LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); curthread->td_locks++; } int __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; int rval; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); if (rw_wlocked(rw) && (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) { rw->rw_recurse++; rval = 1; } else rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); curthread->td_locks++; } return (rval); } void _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); if (!rw_recursed(rw)) LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw); __rw_wunlock(rw, curthread, file, line); curthread->td_locks--; } /* * Determines whether a new reader can acquire a lock. Succeeds if the * reader already owns a read lock and the lock is locked for read to * prevent deadlock from reader recursion. Also succeeds if the lock * is unlocked and has no writer waiters or spinners. Failing otherwise * prioritizes writers before readers. */ #define RW_CAN_READ(_rw) \ ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ RW_LOCK_READ) void __rw_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t v; #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); KASSERT(rw_wowner(rw) != curthread, ("rw_rlock: wlock already held for %s @ %s:%d", rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif /* * Handle the easy case. If no other thread has a write * lock, then try to bump up the count of read locks. Note * that we have to preserve the current state of the * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a * read lock, then rw_lock must have changed, so restart * the loop. Note that this handles the case of a * completely unlocked rwlock since such a lock is encoded * as a read lock with no waiters. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { /* * The RW_LOCK_READ_WAITERS flag should only be set * if the lock has been unlocked and write waiters * were present. */ if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, v + RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, rw, (void *)v, (void *)(v + RW_ONE_READER)); break; } continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(curthread), "spinning", + "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(curthread), "running"); continue; } } else if (spintries < rowner_retries) { spintries++; + KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), + "spinning", "lockname:\"%s\"", + rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { v = rw->rw_lock; if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) break; cpu_spinwait(); } + KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), + "running"); if (i != rowner_loops) continue; } #endif /* * Okay, now it's the hard case. Some other thread already * has a write lock or there are write waiters present, * acquire the turnstile lock so we can begin the process * of blocking. */ ts = turnstile_trywait(&rw->lock_object); /* * The lock might have been released while we spun, so * recheck its state and restart the loop if needed. */ v = rw->rw_lock; if (RW_CAN_READ(v)) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if ((v & RW_LOCK_READ) == 0) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * The lock is held in write mode or it already has waiters. */ MPASS(!RW_CAN_READ(v)); /* * If the RW_LOCK_READ_WAITERS flag is already set, then * we can go ahead and block. If it is not set then try * to set it. If we fail to set it drop the turnstile * lock and restart the loop. */ if (!(v & RW_LOCK_READ_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_READ_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the read waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); } /* * TODO: acquire "owner of record" here. Here be turnstile dragons * however. turnstiles don't like owners changing between calls to * turnstile_wait() currently. */ LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested, waittime, file, line); LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); WITNESS_LOCK(&rw->lock_object, 0, file, line); curthread->td_locks++; curthread->td_rw_rlocks++; #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time); /* * Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); #endif } int __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t x; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d", curthread, rw->lock_object.lo_name, file, line)); for (;;) { x = rw->rw_lock; KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); if (!(x & RW_LOCK_READ)) break; if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, line); WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); curthread->td_locks++; curthread->td_rw_rlocks++; return (1); } } LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); return (0); } void _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t x, v, queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); WITNESS_UNLOCK(&rw->lock_object, 0, file, line); LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); /* TODO: drop "owner of record" here. */ for (;;) { /* * See if there is more than one read lock held. If so, * just drop one and return. */ x = rw->rw_lock; if (RW_READERS(x) > 1) { if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, x - RW_ONE_READER)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, rw, (void *)x, (void *)(x - RW_ONE_READER)); break; } continue; } /* * If there aren't any waiters for a write lock, then try * to drop it quickly. */ if (!(x & RW_LOCK_WAITERS)) { MPASS((x & ~RW_LOCK_WRITE_SPINNER) == RW_READERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&rw->rw_lock, x, RW_UNLOCKED)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, rw); break; } continue; } /* * Ok, we know we have waiters and we think we are the * last reader, so grab the turnstile lock. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); MPASS(v & RW_LOCK_WAITERS); /* * Try to drop our lock leaving the lock in a unlocked * state. * * If you wanted to do explicit lock handoff you'd have to * do it here. You'd also want to use turnstile_signal() * and you'd have to handle the race where a higher * priority thread blocks on the write lock before the * thread you wakeup actually runs and have the new thread * "steal" the lock. For now it's a lot simpler to just * wakeup all of the waiters. * * As above, if we fail, then another thread might have * acquired a read lock, so drop the turnstile lock and * restart. */ x = RW_UNLOCKED; if (v & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; x |= (v & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, x)) { turnstile_chain_unlock(&rw->lock_object); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", __func__, rw); /* * Ok. The lock is released and all that's left is to * wake up the waiters. Note that the lock might not be * free anymore, but in that case the writers will just * block again if they run before the new lock holder(s) * release the lock. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); turnstile_broadcast(ts, queue); turnstile_unpend(ts, TS_SHARED_LOCK); turnstile_chain_unlock(&rw->lock_object); break; } LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw); curthread->td_locks--; curthread->td_rw_rlocks--; } /* * This function is called when we are unable to obtain a write lock on the * first try. This means that at least one other thread holds either a * read or write lock. */ void __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; #ifdef ADAPTIVE_RWLOCKS volatile struct thread *owner; int spintries = 0; int i; #endif uintptr_t v, x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, ("%s: recursing but non-recursive rw %s @ %s:%d\n", __func__, rw->lock_object.lo_name, file, line)); rw->rw_recurse++; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); return; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); while (!_rw_write_lock(rw, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); #ifdef ADAPTIVE_RWLOCKS /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ v = rw->rw_lock; owner = (struct thread *)RW_OWNER(v); if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, rw, owner); + KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), + "spinning", "lockname:\"%s\"", + rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } + KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), + "running"); continue; } if ((v & RW_LOCK_READ) && RW_READERS(v) && spintries < rowner_retries) { if (!(v & RW_LOCK_WRITE_SPINNER)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_SPINNER)) { continue; } } spintries++; + KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread), + "spinning", "lockname:\"%s\"", + rw->lock_object.lo_name); for (i = 0; i < rowner_loops; i++) { if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) break; cpu_spinwait(); } + KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), + "running"); #ifdef KDTRACE_HOOKS spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; } #endif ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; #ifdef ADAPTIVE_RWLOCKS /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ if (!(v & RW_LOCK_READ)) { owner = (struct thread *)RW_OWNER(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } } #endif /* * Check for the waiters flags about this rwlock. * If the lock was released, without maintain any pending * waiters queue, simply try to acquire it. * If a pending waiters queue is present, claim the lock * ownership and maintain the pending queue. */ x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); if ((v & ~x) == RW_UNLOCKED) { x &= ~RW_LOCK_WRITE_SPINNER; if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); continue; } /* * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to * set it. If we fail to set it, then loop back and try * again. */ if (!(v & RW_LOCK_WRITE_WAITERS)) { if (!atomic_cmpset_ptr(&rw->rw_lock, v, v | RW_LOCK_WRITE_WAITERS)) { turnstile_cancel(ts); continue; } if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set write waiters flag", __func__, rw); } /* * We were unable to acquire the lock and the write waiters * flag is set, so we must block on the turnstile. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, rw); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); #ifdef ADAPTIVE_RWLOCKS spintries = 0; #endif } LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time); /* * Record only the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); #endif } /* * This function is called if the first try at releasing a write lock failed. * This means that one of the 2 waiter bits must be set indicating that at * least one thread is waiting on this lock. */ void __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t v; int queue; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); if (rw_wlocked(rw) && rw_recursed(rw)) { rw->rw_recurse--; if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); return; } KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); turnstile_chain_lock(&rw->lock_object); ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); /* * Use the same algo as sx locks for now. Prefer waking up shared * waiters if we have any over writers. This is probably not ideal. * * 'v' is the value we are going to write back to rw_lock. If we * have waiters on both queues, we need to preserve the state of * the waiter flag for the queue we don't wake up. For now this is * hardcoded for the algorithm mentioned above. * * In the case of both readers and writers waiting we wakeup the * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a * new writer comes in before a reader it will claim the lock up * above. There is probably a potential priority inversion in * there that could be worked around either by waking both queues * of waiters or doing some complicated lock handoff gymnastics. */ v = RW_UNLOCKED; if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { queue = TS_EXCLUSIVE_QUEUE; v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); } else queue = TS_SHARED_QUEUE; /* Wake up all waiters for the specific queue. */ if (LOCK_LOG_TEST(&rw->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, queue == TS_SHARED_QUEUE ? "read" : "write"); turnstile_broadcast(ts, queue); atomic_store_rel_ptr(&rw->rw_lock, v); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&rw->lock_object); } /* * Attempt to do a non-blocking upgrade from a read lock to a write * lock. This will only succeed if this thread holds a single read * lock. Returns true if the upgrade succeeded and false otherwise. */ int __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; uintptr_t v, x, tid; struct turnstile *ts; int success; if (SCHEDULER_STOPPED()) return (1); rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_RLOCKED, file, line); /* * Attempt to switch from one reader to a writer. If there * are any write waiters, then we will have to lock the * turnstile first to prevent races with another writer * calling turnstile_wait() before we have claimed this * turnstile. So, do the simple case of no waiters first. */ tid = (uintptr_t)curthread; success = 0; for (;;) { v = rw->rw_lock; if (RW_READERS(v) > 1) break; if (!(v & RW_LOCK_WAITERS)) { success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); if (!success) continue; break; } /* * Ok, we think we have waiters, so lock the turnstile. */ ts = turnstile_trywait(&rw->lock_object); v = rw->rw_lock; if (RW_READERS(v) > 1) { turnstile_cancel(ts); break; } /* * Try to switch from one reader to a writer again. This time * we honor the current state of the waiters flags. * If we obtain the lock with the flags set, then claim * ownership of the turnstile. */ x = rw->rw_lock & RW_LOCK_WAITERS; success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); if (success) { if (x) turnstile_claim(ts); else turnstile_cancel(ts); break; } turnstile_cancel(ts); } LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); if (success) { curthread->td_rw_rlocks--; WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw); } return (success); } /* * Downgrade a write lock into a single read lock. */ void __rw_downgrade(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; struct turnstile *ts; uintptr_t tid, v; int rwait, wwait; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); KASSERT(rw->rw_lock != RW_DESTROYED, ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); __rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line); #ifndef INVARIANTS if (rw_recursed(rw)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); /* * Convert from a writer to a single reader. First we handle * the easy case with no waiters. If there are any waiters, we * lock the turnstile and "disown" the lock. */ tid = (uintptr_t)curthread; if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) goto out; /* * Ok, we think we have waiters, so lock the turnstile so we can * read the waiter flags without any races. */ turnstile_chain_lock(&rw->lock_object); v = rw->rw_lock & RW_LOCK_WAITERS; rwait = v & RW_LOCK_READ_WAITERS; wwait = v & RW_LOCK_WRITE_WAITERS; MPASS(rwait | wwait); /* * Downgrade from a write lock while preserving waiters flag * and give up ownership of the turnstile. */ ts = turnstile_lookup(&rw->lock_object); MPASS(ts != NULL); if (!wwait) v &= ~RW_LOCK_READ_WAITERS; atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); /* * Wake other readers if there are no writers pending. Otherwise they * won't be able to acquire the lock anyway. */ if (rwait && !wwait) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); } else turnstile_disown(ts); turnstile_chain_unlock(&rw->lock_object); out: curthread->td_rw_rlocks++; LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw); } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef __rw_assert #endif /* * In the non-WITNESS case, rw_assert() can only detect that at least * *some* thread owns an rlock, but it cannot guarantee that *this* * thread owns an rlock. */ void __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct rwlock *rw; if (panicstr != NULL) return; rw = rwlock2rw(c); switch (what) { case RA_LOCKED: case RA_LOCKED | RA_RECURSED: case RA_LOCKED | RA_NOTRECURSED: case RA_RLOCKED: case RA_RLOCKED | RA_RECURSED: case RA_RLOCKED | RA_NOTRECURSED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If some other thread has a write lock or we have one * and are asserting a read lock, fail. Also, if no one * has a lock at all, fail. */ if (rw->rw_lock == RW_UNLOCKED || (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED || rw_wowner(rw) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", rw->lock_object.lo_name, (what & RA_RLOCKED) ? "read " : "", file, line); if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) { if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } #endif break; case RA_WLOCKED: case RA_WLOCKED | RA_RECURSED: case RA_WLOCKED | RA_NOTRECURSED: if (rw_wowner(rw) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); if (rw_recursed(rw)) { if (what & RA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); } else if (what & RA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", rw->lock_object.lo_name, file, line); break; case RA_UNLOCKED: #ifdef WITNESS witness_assert(&rw->lock_object, what, file, line); #else /* * If we hold a write lock fail. We can't reliably check * to see if we hold a read lock or not. */ if (rw_wowner(rw) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", rw->lock_object.lo_name, file, line); #endif break; default: panic("Unknown rw lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB void db_show_rwlock(const struct lock_object *lock) { const struct rwlock *rw; struct thread *td; rw = (const struct rwlock *)lock; db_printf(" state: "); if (rw->rw_lock == RW_UNLOCKED) db_printf("UNLOCKED\n"); else if (rw->rw_lock == RW_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (rw->rw_lock & RW_LOCK_READ) db_printf("RLOCK: %ju locks\n", (uintmax_t)(RW_READERS(rw->rw_lock))); else { td = rw_wowner(rw); db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (rw_recursed(rw)) db_printf(" recursed: %u\n", rw->rw_recurse); } db_printf(" waiters: "); switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { case RW_LOCK_READ_WAITERS: db_printf("readers\n"); break; case RW_LOCK_WRITE_WAITERS: db_printf("writers\n"); break; case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: db_printf("readers and writers\n"); break; default: db_printf("none\n"); break; } } #endif Index: stable/10/sys/kern/kern_sx.c =================================================================== --- stable/10/sys/kern/kern_sx.c (revision 278649) +++ stable/10/sys/kern/kern_sx.c (revision 278650) @@ -1,1214 +1,1231 @@ /*- * Copyright (c) 2007 Attilio Rao * Copyright (c) 2001 Jason Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice(s), this list of conditions and the following disclaimer as * the first lines of this file unmodified other than the possible * addition of one or more copyright notices. * 2. Redistributions in binary form must reproduce the above copyright * notice(s), this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ /* * Shared/exclusive locks. This implementation attempts to ensure * deterministic lock granting behavior, so that slocks and xlocks are * interleaved. * * Priority propagation will not generally raise the priority of lock holders, * so should not be relied upon in combination with sx locks. */ #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" #include "opt_no_adaptive_sx.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include +#include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #include #endif #ifdef DDB #include #endif #if defined(SMP) && !defined(NO_ADAPTIVE_SX) #define ADAPTIVE_SX #endif CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE); #ifdef HWPMC_HOOKS #include PMC_SOFT_DECLARE( , , lock, failed); #endif /* Handy macros for sleep queues. */ #define SQ_EXCLUSIVE_QUEUE 0 #define SQ_SHARED_QUEUE 1 /* * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file. We * drop Giant anytime we have to sleep or if we adaptively spin. */ #define GIANT_DECLARE \ int _giantcnt = 0; \ WITNESS_SAVE_DECL(Giant) \ #define GIANT_SAVE() do { \ if (mtx_owned(&Giant)) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ while (mtx_owned(&Giant)) { \ _giantcnt++; \ mtx_unlock(&Giant); \ } \ } \ } while (0) #define GIANT_RESTORE() do { \ if (_giantcnt > 0) { \ mtx_assert(&Giant, MA_NOTOWNED); \ while (_giantcnt--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) /* * Returns true if an exclusive lock is recursed. It assumes * curthread currently has an exclusive lock. */ #define sx_recurse lock_object.lo_data #define sx_recursed(sx) ((sx)->sx_recurse != 0) static void assert_sx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_sx(const struct lock_object *lock); #endif static void lock_sx(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_sx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_sx(struct lock_object *lock); struct lock_class lock_class_sx = { .lc_name = "sx", .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE, .lc_assert = assert_sx, #ifdef DDB .lc_ddb_show = db_show_sx, #endif .lc_lock = lock_sx, .lc_unlock = unlock_sx, #ifdef KDTRACE_HOOKS .lc_owner = owner_sx, #endif }; #ifndef INVARIANTS #define _sx_assert(sx, what, file, line) #endif #ifdef ADAPTIVE_SX static u_int asx_retries = 10; static u_int asx_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging"); SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, ""); SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, ""); #endif void assert_sx(const struct lock_object *lock, int what) { sx_assert((const struct sx *)lock, what); } void lock_sx(struct lock_object *lock, uintptr_t how) { struct sx *sx; sx = (struct sx *)lock; if (how) sx_slock(sx); else sx_xlock(sx); } uintptr_t unlock_sx(struct lock_object *lock) { struct sx *sx; sx = (struct sx *)lock; sx_assert(sx, SA_LOCKED | SA_NOTRECURSED); if (sx_xlocked(sx)) { sx_xunlock(sx); return (0); } else { sx_sunlock(sx); return (1); } } #ifdef KDTRACE_HOOKS int owner_sx(const struct lock_object *lock, struct thread **owner) { const struct sx *sx = (const struct sx *)lock; uintptr_t x = sx->sx_lock; *owner = (struct thread *)SX_OWNER(x); return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) : (*owner != NULL)); } #endif void sx_sysinit(void *arg) { struct sx_args *sargs = arg; sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags); } void sx_init_flags(struct sx *sx, const char *description, int opts) { int flags; MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK | SX_NOPROFILE | SX_NOADAPTIVE)) == 0); ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock, ("%s: sx_lock not aligned for %s: %p", __func__, description, &sx->sx_lock)); flags = LO_SLEEPABLE | LO_UPGRADABLE; if (opts & SX_DUPOK) flags |= LO_DUPOK; if (opts & SX_NOPROFILE) flags |= LO_NOPROFILE; if (!(opts & SX_NOWITNESS)) flags |= LO_WITNESS; if (opts & SX_RECURSE) flags |= LO_RECURSABLE; if (opts & SX_QUIET) flags |= LO_QUIET; flags |= opts & SX_NOADAPTIVE; lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags); sx->sx_lock = SX_LOCK_UNLOCKED; sx->sx_recurse = 0; } void sx_destroy(struct sx *sx) { KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held")); KASSERT(sx->sx_recurse == 0, ("sx lock still recursed")); sx->sx_lock = SX_LOCK_DESTROYED; lock_destroy(&sx->lock_object); } int _sx_slock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_slock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL); error = __sx_slock(sx, opts, file, line); if (!error) { LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line); WITNESS_LOCK(&sx->lock_object, 0, file, line); curthread->td_locks++; } return (error); } int sx_try_slock_(struct sx *sx, const char *file, int line) { uintptr_t x; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_slock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); for (;;) { x = sx->sx_lock; KASSERT(x != SX_LOCK_DESTROYED, ("sx_try_slock() of destroyed sx @ %s:%d", file, line)); if (!(x & SX_LOCK_SHARED)) break; if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line); WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line); curthread->td_locks++; return (1); } } LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line); return (0); } int _sx_xlock(struct sx *sx, int opts, const char *file, int line) { int error = 0; if (SCHEDULER_STOPPED()) return (0); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xlock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); error = __sx_xlock(sx, curthread, opts, file, line); if (!error) { LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); curthread->td_locks++; } return (error); } int sx_try_xlock_(struct sx *sx, const char *file, int line) { int rval; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d", curthread, sx->lock_object.lo_name, file, line)); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_xlock() of destroyed sx @ %s:%d", file, line)); if (sx_xlocked(sx) && (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) { sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); rval = 1; } else rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, (uintptr_t)curthread); LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line); if (rval) { WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); curthread->td_locks++; } return (rval); } void _sx_sunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_sunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, 0, file, line); LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line); __sx_sunlock(sx, file, line); LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_SUNLOCK_RELEASE, sx); curthread->td_locks--; } void _sx_xunlock(struct sx *sx, const char *file, int line) { if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_xunlock() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED, file, line); WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); if (!sx_recursed(sx)) LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_XUNLOCK_RELEASE, sx); __sx_xunlock(sx, curthread, file, line); curthread->td_locks--; } /* * Try to do a non-blocking upgrade from a shared lock to an exclusive lock. * This will only succeed if this thread holds a single shared lock. * Return 1 if if the upgrade succeed, 0 otherwise. */ int sx_try_upgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int success; if (SCHEDULER_STOPPED()) return (1); KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_SLOCKED, file, line); /* * Try to switch from one shared lock to an exclusive lock. We need * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that * we will wake up the exclusive waiters when we drop the lock. */ x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS; success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x, (uintptr_t)curthread | x); LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line); if (success) { WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); LOCKSTAT_RECORD0(LS_SX_TRYUPGRADE_UPGRADE, sx); } return (success); } /* * Downgrade an unrecursed exclusive lock into a single shared lock. */ void sx_downgrade_(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; KASSERT(sx->sx_lock != SX_LOCK_DESTROYED, ("sx_downgrade() of destroyed sx @ %s:%d", file, line)); _sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line); #ifndef INVARIANTS if (sx_recursed(sx)) panic("downgrade of a recursed lock"); #endif WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line); /* * Try to switch from an exclusive lock with no shared waiters * to one sharer with no shared waiters. If there are * exclusive waiters, we don't need to lock the sleep queue so * long as we preserve the flag. We do one quick try and if * that fails we grab the sleepq lock to keep the flags from * changing and do it the slow way. * * We have to lock the sleep queue if there are shared waiters * so we can wake them up. */ x = sx->sx_lock; if (!(x & SX_LOCK_SHARED_WAITERS) && atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS))) { LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); return; } /* * Lock the sleep queue so we can read the waiters bits * without any races and wakeup any shared waiters. */ sleepq_lock(&sx->lock_object); /* * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single * shared lock. If there are any shared waiters, wake them up. */ wakeup_swapper = 0; x = sx->sx_lock; atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | (x & SX_LOCK_EXCLUSIVE_WAITERS)); if (x & SX_LOCK_SHARED_WAITERS) wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_SHARED_QUEUE); sleepq_release(&sx->lock_object); LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line); LOCKSTAT_RECORD0(LS_SX_DOWNGRADE_DOWNGRADE, sx); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_xlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; u_int i, spintries = 0; #endif uintptr_t x; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int error = 0; #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n", sx->lock_object.lo_name, file, line)); sx->sx_recurse++; atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx); return (0); } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the lock is write locked and the owner is * running on another CPU, spin until the owner stops * running or the state of the lock changes. */ x = sx->sx_lock; if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { if ((x & SX_LOCK_SHARED) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(curthread), "spinning", + "lockname:\"%s\"", + sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(curthread), "running"); continue; } } else if (SX_SHARERS(x) && spintries < asx_retries) { + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(curthread), "spinning", + "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); spintries++; for (i = 0; i < asx_loops; i++) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: shared spinning on %p with %u and %u", __func__, sx, spintries, i); x = sx->sx_lock; if ((x & SX_LOCK_SHARED) == 0 || SX_SHARERS(x) == 0) break; cpu_spinwait(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(curthread), "running"); if (i != asx_loops) continue; } } #endif sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * If the lock was released while spinning on the * sleep queue chain lock, try again. */ if (x == SX_LOCK_UNLOCKED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the sleep queue * chain lock. If so, drop the sleep queue lock and try * again. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * If an exclusive lock was released with both shared * and exclusive waiters and a shared waiter hasn't * woken up and acquired the lock yet, sx_lock will be * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS. * If we see that value, try to acquire it once. Note * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS * as there are other exclusive waiters still. If we * fail, restart the loop. */ if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) { if (atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS, tid | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); CTR2(KTR_LOCK, "%s: %p claimed by new writer", __func__, sx); break; } sleepq_release(&sx->lock_object); continue; } /* * Try to set the SX_LOCK_EXCLUSIVE_WAITERS. If we fail, * than loop back and retry. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_EXCLUSIVE_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set excl waiters flag", __func__, sx); } /* * Since we have been unable to acquire the exclusive * lock and the exclusive waiters flag is set, we have * to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } GIANT_RESTORE(); if (!error) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt)); #endif return (error); } /* * This function represents the so-called 'hard case' for sx_xunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line) { uintptr_t x; int queue, wakeup_swapper; if (SCHEDULER_STOPPED()) return; MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); /* If the lock is recursed, then unrecurse one level. */ if (sx_xlocked(sx) && sx_recursed(sx)) { if ((--sx->sx_recurse) == 0) atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx); return; } MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, sx); sleepq_lock(&sx->lock_object); x = SX_LOCK_UNLOCKED; /* * The wake up algorithm here is quite simple and probably not * ideal. It gives precedence to shared waiters if they are * present. For this condition, we have to preserve the * state of the exclusive waiters flag. * If interruptible sleeps left the shared queue empty avoid a * starvation for the threads sleeping on the exclusive queue by giving * them precedence and cleaning up the shared waiters bit anyway. */ if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 && sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) { queue = SQ_SHARED_QUEUE; x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS); } else queue = SQ_EXCLUSIVE_QUEUE; /* Wake up all the waiters for the specific queue. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue", __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" : "exclusive"); atomic_store_rel_ptr(&sx->sx_lock, x); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); } /* * This function represents the so-called 'hard case' for sx_slock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) { GIANT_DECLARE; #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif uintptr_t x; int error = 0; #ifdef KDTRACE_HOOKS uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); /* * As with rwlocks, we don't make any attempt to try to block * shared locks once there is an exclusive waiter. */ for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif x = sx->sx_lock; /* * If no other thread has an exclusive lock then try to bump up * the count of sharers. Since we have to preserve the state * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the * shared lock loop back and retry. */ if (x & SX_LOCK_SHARED) { MPASS(!(x & SX_LOCK_SHARED_WAITERS)); if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeed %p -> %p", __func__, sx, (void *)x, (void *)(x + SX_ONE_SHARER)); break; } continue; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { x = SX_OWNER(x); owner = (struct thread *)x; if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, sx, owner); + KTR_STATE1(KTR_SCHED, "thread", + sched_tdname(curthread), "spinning", + "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { #ifdef KDTRACE_HOOKS spin_cnt++; #endif cpu_spinwait(); } + KTR_STATE0(KTR_SCHED, "thread", + sched_tdname(curthread), "running"); continue; } } #endif /* * Some other thread already has an exclusive lock, so * start the process of blocking. */ sleepq_lock(&sx->lock_object); x = sx->sx_lock; /* * The lock could have been released while we spun. * In this case loop back and retry. */ if (x & SX_LOCK_SHARED) { sleepq_release(&sx->lock_object); continue; } #ifdef ADAPTIVE_SX /* * If the owner is running on another CPU, spin until * the owner stops running or the state of the lock * changes. */ if (!(x & SX_LOCK_SHARED) && (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) { owner = (struct thread *)SX_OWNER(x); if (TD_IS_RUNNING(owner)) { sleepq_release(&sx->lock_object); continue; } } #endif /* * Try to set the SX_LOCK_SHARED_WAITERS flag. If we * fail to set it drop the sleep queue lock and loop * back. */ if (!(x & SX_LOCK_SHARED_WAITERS)) { if (!atomic_cmpset_ptr(&sx->sx_lock, x, x | SX_LOCK_SHARED_WAITERS)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p set shared waiters flag", __func__, sx); } /* * Since we have been unable to acquire the shared lock, * we have to sleep. */ if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p blocking on sleep queue", __func__, sx); #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(); #endif GIANT_SAVE(); sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name, SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ? SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE); if (!(opts & SX_INTERRUPTIBLE)) sleepq_wait(&sx->lock_object, 0); else error = sleepq_wait_sig(&sx->lock_object, 0); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(); sleep_cnt++; #endif if (error) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: interruptible sleep by %p suspended by signal", __func__, sx); break; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } if (error == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time); if (spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt)); #endif GIANT_RESTORE(); return (error); } /* * This function represents the so-called 'hard case' for sx_sunlock * operation. All 'easy case' failures are redirected to this. Note * that ideally this would be a static function, but it needs to be * accessible from at least sx.h. */ void _sx_sunlock_hard(struct sx *sx, const char *file, int line) { uintptr_t x; int wakeup_swapper; if (SCHEDULER_STOPPED()) return; for (;;) { x = sx->sx_lock; /* * We should never have sharers while at least one thread * holds a shared lock. */ KASSERT(!(x & SX_LOCK_SHARED_WAITERS), ("%s: waiting sharers", __func__)); /* * See if there is more than one shared lock held. If * so, just drop one and return. */ if (SX_SHARERS(x) > 1) { if (atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR4(KTR_LOCK, "%s: %p succeeded %p -> %p", __func__, sx, (void *)x, (void *)(x - SX_ONE_SHARER)); break; } continue; } /* * If there aren't any waiters for an exclusive lock, * then try to drop it quickly. */ if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) { MPASS(x == SX_SHARERS_LOCK(1)); if (atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) { if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p last succeeded", __func__, sx); break; } continue; } /* * At this point, there should just be one sharer with * exclusive waiters. */ MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS)); sleepq_lock(&sx->lock_object); /* * Wake up semantic here is quite simple: * Just wake up all the exclusive waiters. * Note that the state of the lock could have changed, * so if it fails loop back and retry. */ if (!atomic_cmpset_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS, SX_LOCK_UNLOCKED)) { sleepq_release(&sx->lock_object); continue; } if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p waking up all thread on" "exclusive queue", __func__, sx); wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, SQ_EXCLUSIVE_QUEUE); sleepq_release(&sx->lock_object); if (wakeup_swapper) kick_proc0(); break; } } #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS #undef _sx_assert #endif /* * In the non-WITNESS case, sx_assert() can only detect that at least * *some* thread owns an slock, but it cannot guarantee that *this* * thread owns an slock. */ void _sx_assert(const struct sx *sx, int what, const char *file, int line) { #ifndef WITNESS int slocked = 0; #endif if (panicstr != NULL) return; switch (what) { case SA_SLOCKED: case SA_SLOCKED | SA_NOTRECURSED: case SA_SLOCKED | SA_RECURSED: #ifndef WITNESS slocked = 1; /* FALLTHROUGH */ #endif case SA_LOCKED: case SA_LOCKED | SA_NOTRECURSED: case SA_LOCKED | SA_RECURSED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If some other thread has an exclusive lock or we * have one and are asserting a shared lock, fail. * Also, if no one has a lock at all, fail. */ if (sx->sx_lock == SX_LOCK_UNLOCKED || (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked || sx_xholder(sx) != curthread))) panic("Lock %s not %slocked @ %s:%d\n", sx->lock_object.lo_name, slocked ? "share " : "", file, line); if (!(sx->sx_lock & SX_LOCK_SHARED)) { if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } #endif break; case SA_XLOCKED: case SA_XLOCKED | SA_NOTRECURSED: case SA_XLOCKED | SA_RECURSED: if (sx_xholder(sx) != curthread) panic("Lock %s not exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); if (sx_recursed(sx)) { if (what & SA_NOTRECURSED) panic("Lock %s recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); } else if (what & SA_RECURSED) panic("Lock %s not recursed @ %s:%d\n", sx->lock_object.lo_name, file, line); break; case SA_UNLOCKED: #ifdef WITNESS witness_assert(&sx->lock_object, what, file, line); #else /* * If we hold an exclusve lock fail. We can't * reliably check to see if we hold a shared lock or * not. */ if (sx_xholder(sx) == curthread) panic("Lock %s exclusively locked @ %s:%d\n", sx->lock_object.lo_name, file, line); #endif break; default: panic("Unknown sx lock assertion: %d @ %s:%d", what, file, line); } } #endif /* INVARIANT_SUPPORT */ #ifdef DDB static void db_show_sx(const struct lock_object *lock) { struct thread *td; const struct sx *sx; sx = (const struct sx *)lock; db_printf(" state: "); if (sx->sx_lock == SX_LOCK_UNLOCKED) db_printf("UNLOCKED\n"); else if (sx->sx_lock == SX_LOCK_DESTROYED) { db_printf("DESTROYED\n"); return; } else if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else { td = sx_xholder(sx); db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (sx_recursed(sx)) db_printf(" recursed: %d\n", sx->sx_recurse); } db_printf(" waiters: "); switch(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) { case SX_LOCK_SHARED_WAITERS: db_printf("shared\n"); break; case SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive\n"); break; case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS: db_printf("exclusive and shared\n"); break; default: db_printf("none\n"); } } /* * Check to see if a thread that is blocked on a sleep queue is actually * blocked on an sx lock. If so, output some details and return true. * If the lock has an exclusive owner, return that in *ownerp. */ int sx_chain(struct thread *td, struct thread **ownerp) { struct sx *sx; /* * Check to see if this thread is blocked on an sx lock. * First, we check the lock class. If that is ok, then we * compare the lock name against the wait message. */ sx = td->td_wchan; if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx || sx->lock_object.lo_name != td->td_wmesg) return (0); /* We think we have an sx lock, so output some details. */ db_printf("blocked on sx \"%s\" ", td->td_wmesg); *ownerp = sx_xholder(sx); if (sx->sx_lock & SX_LOCK_SHARED) db_printf("SLOCK (count %ju)\n", (uintmax_t)SX_SHARERS(sx->sx_lock)); else db_printf("XLOCK\n"); return (1); } #endif Index: stable/10/sys/kern/kern_timeout.c =================================================================== --- stable/10/sys/kern/kern_timeout.c (revision 278649) +++ stable/10/sys/kern/kern_timeout.c (revision 278650) @@ -1,1446 +1,1452 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #include "opt_kdtrace.h" #if defined(__arm__) #include "opt_timer.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #ifndef NO_EVENTTIMERS DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); static int avg_gcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); static int avg_lockcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); static int avg_depth_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); static int avg_lockcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); static int avg_mpcalls_dir; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); /* * TODO: * allocate more timeout table slots when table overflows. */ u_int callwheelsize, callwheelmask; /* * The callout cpu exec entities represent informations necessary for * describing the state of callouts currently running on the CPU and the ones * necessary for migrating callouts to the new callout cpu. In particular, * the first entry of the array cc_exec_entity holds informations for callout * running in SWI thread context, while the second one holds informations * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ struct cc_exec { struct callout *cc_next; struct callout *cc_curr; #ifdef SMP void (*ce_migration_func)(void *); void *ce_migration_arg; int ce_migration_cpu; sbintime_t ce_migration_time; sbintime_t ce_migration_prec; #endif bool cc_cancel; bool cc_waiting; }; /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; u_int cc_bucket; + char cc_ktr_event_name[20]; }; #define cc_exec_curr cc_exec_entity[0].cc_curr #define cc_exec_next cc_exec_entity[0].cc_next #define cc_exec_cancel cc_exec_entity[0].cc_cancel #define cc_exec_waiting cc_exec_entity[0].cc_waiting #define cc_exec_curr_dir cc_exec_entity[1].cc_curr #define cc_exec_next_dir cc_exec_entity[1].cc_next #define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel #define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting #ifdef SMP #define cc_migration_func cc_exec_entity[0].ce_migration_func #define cc_migration_arg cc_exec_entity[0].ce_migration_arg #define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu #define cc_migration_time cc_exec_entity[0].ce_migration_time #define cc_migration_prec cc_exec_entity[0].ce_migration_prec #define cc_migration_func_dir cc_exec_entity[1].ce_migration_func #define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg #define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu #define cc_migration_time_dir cc_exec_entity[1].ce_migration_time #define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else struct callout_cpu cc_cpu; #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; -static void callout_cpu_init(struct callout_cpu *cc); +static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: * cc_curr - If a callout is in progress, it is cc_curr. * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when * cc_curr is non-NULL. */ /* * Resets the execution entity tied to a specific callout cpu. */ static void cc_cce_cleanup(struct callout_cpu *cc, int direct) { cc->cc_exec_entity[direct].cc_curr = NULL; cc->cc_exec_entity[direct].cc_next = NULL; cc->cc_exec_entity[direct].cc_cancel = false; cc->cc_exec_entity[direct].cc_waiting = false; #ifdef SMP cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; cc->cc_exec_entity[direct].ce_migration_time = 0; cc->cc_exec_entity[direct].ce_migration_prec = 0; cc->cc_exec_entity[direct].ce_migration_func = NULL; cc->cc_exec_entity[direct].ce_migration_arg = NULL; #endif } /* * Checks if migration is requested by a specific callout cpu. */ static int cc_cce_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); #else return (0); #endif } /* * Kernel low level callwheel initialization * called on cpu0 during kernel startup. */ static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Only cpu0 handles timeout(9) and receives a preallocation. * * XXX: Once all timeout(9) consumers are converted this can * be removed. */ timeout_cpu = PCPU_GET(cpuid); cc = CC_CPU(timeout_cpu); cc->cc_callout = malloc(ncallout * sizeof(struct callout), M_CALLOUT, M_WAITOK); - callout_cpu_init(cc); + callout_cpu_init(cc, timeout_cpu); } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void -callout_cpu_init(struct callout_cpu *cc) +callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = INT64_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); + snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), + "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); c->c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } #ifdef SMP /* * Switches the cpu tied to a specific callout. * The function expects a locked incoming callout cpu and returns with * locked outcoming callout cpu. */ static struct callout_cpu * callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) { struct callout_cpu *new_cc; MPASS(c != NULL && cc != NULL); CC_LOCK_ASSERT(cc); /* * Avoid interrupts and preemption firing after the callout cpu * is blocked in order to avoid deadlocks as the new thread * may be willing to acquire the callout cpu lock. */ c->c_cpu = CPUBLOCK; spinlock_enter(); CC_UNLOCK(cc); new_cc = CC_CPU(new_cpu); CC_LOCK(new_cc); spinlock_exit(); c->c_cpu = new_cpu; return (new_cc); } #endif /* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; #ifdef SMP int cpu; #endif cc = CC_CPU(timeout_cpu); if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ - callout_cpu_init(cc); + callout_cpu_init(cc, cpu); if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); } #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { struct callout *tmp, *tmpn; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = (SBT_1S / 2); first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; tmp = LIST_FIRST(sc); while (tmp != NULL) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* * Consumer told us the callout may be run * directly from hardware interrupt context. */ if (tmp->c_flags & CALLOUT_DIRECT) { #ifdef CALLOUT_PROFILING ++depth_dir; #endif cc->cc_exec_next_dir = LIST_NEXT(tmp, c_links.le); cc->cc_bucket = firstb & callwheelmask; LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, #ifdef CALLOUT_PROFILING &mpcalls_dir, &lockcalls_dir, NULL, #endif 1); tmp = cc->cc_exec_next_dir; } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_flags |= CALLOUT_PROCESSED; tmp = tmpn; } continue; } /* Skip events from distant future. */ if (tmp->c_time >= max) goto next; /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; goto next; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; next: tmp = LIST_NEXT(tmp, c_links.le); } /* Proceed with the next bucket. */ firstb++; /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; #ifndef NO_EVENTTIMERS cpu_new_callout(curcpu, last, first); #endif #ifdef CALLOUT_PROFILING avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { while (c->c_cpu == CPUBLOCK) cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } static void callout_cc_add(struct callout *c, struct callout_cpu *cc, sbintime_t sbt, sbintime_t precision, void (*func)(void *), void *arg, int cpu, int flags) { int bucket; CC_LOCK_ASSERT(cc); if (sbt < cc->cc_lastscan) sbt = cc->cc_lastscan; c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); if (flags & C_DIRECT_EXEC) c->c_flags |= CALLOUT_DIRECT; c->c_flags &= ~CALLOUT_PROCESSED; c->c_func = func; c->c_time = sbt; c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) cc->cc_exec_next_dir = c; #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (INT64_MAX - c->c_time < c->c_precision) c->c_precision = INT64_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; cpu_new_callout(cpu, sbt, c->c_time); } #endif } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING int *mpcalls, int *lockcalls, int *gcalls, #endif int direct) { struct rm_priotracker tracker; void (*c_func)(void *); void *c_arg; struct lock_class *class; struct lock_object *c_lock; uintptr_t lock_status; int c_flags; #ifdef SMP struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; int flags, new_cpu; sbintime_t new_prec, new_time; #endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == (CALLOUT_PENDING | CALLOUT_ACTIVE), ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; lock_status = 0; if (c->c_flags & CALLOUT_SHAREDLOCK) { if (class == &lock_class_rm) lock_status = (uintptr_t)&tracker; else lock_status = 1; } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; if (c->c_flags & CALLOUT_LOCAL_ALLOC) c->c_flags = CALLOUT_LOCAL_ALLOC; else c->c_flags &= ~CALLOUT_PENDING; cc->cc_exec_entity[direct].cc_curr = c; cc->cc_exec_entity[direct].cc_cancel = false; CC_UNLOCK(cc); if (c_lock != NULL) { class->lc_lock(c_lock, lock_status); /* * The callout may have been cancelled * while we switched locks. */ if (cc->cc_exec_entity[direct].cc_cancel) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ cc->cc_exec_entity[direct].cc_cancel = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING (*lockcalls)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING (*mpcalls)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } + KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", + "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); c_func(c_arg); SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif + KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) class->lc_unlock(c_lock); skip: CC_LOCK(cc); KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); cc->cc_exec_entity[direct].cc_curr = NULL; if (cc->cc_exec_entity[direct].cc_waiting) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ if (cc_cce_migrating(cc, direct)) { cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not * destroyed but that is not easy. */ c->c_flags &= ~CALLOUT_DFRMIGRATION; } cc->cc_exec_entity[direct].cc_waiting = false; CC_UNLOCK(cc); wakeup(&cc->cc_exec_entity[direct].cc_waiting); CC_LOCK(cc); } else if (cc_cce_migrating(cc, direct)) { KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, ("Migrating legacy callout %p", c)); #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; new_time = cc->cc_exec_entity[direct].ce_migration_time; new_prec = cc->cc_exec_entity[direct].ce_migration_prec; new_func = cc->cc_exec_entity[direct].ce_migration_func; new_arg = cc->cc_exec_entity[direct].ce_migration_arg; cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not destroyed * but that is not easy. * * As first thing, handle deferred callout stops. */ if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); callout_cc_del(c, cc); return; } c->c_flags &= ~CALLOUT_DFRMIGRATION; new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; callout_cc_add(c, new_cc, new_time, new_prec, new_func, new_arg, new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else panic("migration should not happen"); #endif } /* * If the current callout is locally allocated (from * timeout(9)) then put it on the freelist. * * Note: we need to check the cached copy of c_flags because * if it was not local, then it's not safe to deref the * callout pointer. */ KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || c->c_flags == CALLOUT_LOCAL_ALLOC, ("corrupted callout")); if (c_flags & CALLOUT_LOCAL_ALLOC) callout_cc_del(c, cc); } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. */ void softclock(void *arg) { struct callout_cpu *cc; struct callout *c; #ifdef CALLOUT_PROFILING int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; #endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING &mpcalls, &lockcalls, &gcalls, #endif 0); #ifdef CALLOUT_PROFILING ++depth; #endif } #ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; #endif CC_UNLOCK(cc); } /* * timeout -- * Execute a function after a specified length of time. * * untimeout -- * Cancel previous timeout function call. * * callout_handle_init -- * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle timeout(ftn, arg, to_ticks) timeout_t *ftn; void *arg; int to_ticks; { struct callout_cpu *cc; struct callout *new; struct callout_handle handle; cc = CC_CPU(timeout_cpu); CC_LOCK(cc); /* Fill in the next free callout structure. */ new = SLIST_FIRST(&cc->cc_callfree); if (new == NULL) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); return (handle); } void untimeout(ftn, arg, handle) timeout_t *ftn; void *arg; struct callout_handle handle; { struct callout_cpu *cc; /* * Check for a handle that was initialized * by callout_handle_init, but never used * for a real timeout. */ if (handle.callout == NULL) return; cc = callout_lock(handle.callout); if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); CC_UNLOCK(cc); } void callout_handle_init(struct callout_handle *handle) { handle->callout = NULL; } /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, void (*ftn)(void *), void *arg, int cpu, int flags) { sbintime_t to_sbt, pr; struct callout_cpu *cc; int cancelled, direct; cancelled = 0; if (flags & C_ABSOLUTE) { to_sbt = sbt; } else { if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; if ((flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { to_sbt = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ to_sbt += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif if ((flags & C_HARDCLOCK) == 0) to_sbt += tick_sbt; } else to_sbt = sbinuptime(); if (INT64_MAX - to_sbt < sbt) to_sbt = INT64_MAX; else to_sbt += sbt; pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : sbt >> C_PRELGET(flags)); if (pr > precision) precision = pr; } /* * Don't allow migration of pre-allocated callouts lest they * become unbalanced. */ if (c->c_flags & CALLOUT_LOCAL_ALLOC) cpu = c->c_cpu; direct = (c->c_flags & CALLOUT_DIRECT) != 0; KASSERT(!direct || c->c_lock == NULL, ("%s: direct callout %p has lock", __func__, c)); cc = callout_lock(c); if (cc->cc_exec_entity[direct].cc_curr == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) cancelled = cc->cc_exec_entity[direct].cc_cancel = true; if (cc->cc_exec_entity[direct].cc_waiting) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. */ CTR4(KTR_CALLOUT, "%s %p func %p arg %p", cancelled ? "cancelled" : "failed to cancel", c, c->c_func, c->c_arg); CC_UNLOCK(cc); return (cancelled); } } if (c->c_flags & CALLOUT_PENDING) { if ((c->c_flags & CALLOUT_PROCESSED) == 0) { if (cc->cc_exec_next_dir == c) cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } #ifdef SMP /* * If the callout must migrate try to perform it immediately. * If the callout is currently running, just defer the migration * to a more appropriate moment. */ if (c->c_cpu != cpu) { if (cc->cc_exec_entity[direct].cc_curr == c) { cc->cc_exec_entity[direct].ce_migration_cpu = cpu; cc->cc_exec_entity[direct].ce_migration_time = to_sbt; cc->cc_exec_entity[direct].ce_migration_prec = precision; cc->cc_exec_entity[direct].ce_migration_func = ftn; cc->cc_exec_entity[direct].ce_migration_arg = arg; c->c_flags |= CALLOUT_DFRMIGRATION; CTR6(KTR_CALLOUT, "migration of %p func %p arg %p in %d.%08x to %u deferred", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff), cpu); CC_UNLOCK(cc); return (cancelled); } cc = callout_cpu_switch(c, cc, cpu); } #endif callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), (u_int)(to_sbt & 0xffffffff)); CC_UNLOCK(cc); return (cancelled); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int _callout_stop_safe(c, safe) struct callout *c; int safe; { struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; /* * Some old subsystems don't hold Giant while running a callout_stop(), * so just discard this check for the moment. */ if (!safe && c->c_lock != NULL) { if (c->c_lock == &Giant.lock_object) use_lock = mtx_owned(&Giant); else { use_lock = 1; class = LOCK_CLASS(c->c_lock); class->lc_assert(c->c_lock, LA_XLOCKED); } } else use_lock = 0; direct = (c->c_flags & CALLOUT_DIRECT) != 0; sq_locked = 0; old_cc = NULL; again: cc = callout_lock(c); /* * If the callout was migrating while the callout cpu lock was * dropped, just drop the sleepqueue lock and check the states * again. */ if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); sq_locked = 0; old_cc = NULL; goto again; #else panic("migration should not happen"); #endif } /* * If the callout isn't pending, it's not on the queue, so * don't attempt to remove it from the queue. We can try to * stop it by other means however. */ if (!(c->c_flags & CALLOUT_PENDING)) { c->c_flags &= ~CALLOUT_ACTIVE; /* * If it wasn't on the queue and it isn't the current * callout, then we can't stop it, so just bail. */ if (cc->cc_exec_entity[direct].cc_curr != c) { CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); if (sq_locked) sleepq_release( &cc->cc_exec_entity[direct].cc_waiting); return (0); } if (safe) { /* * The current callout is running (or just * about to run) and blocking is allowed, so * just wait for the current invocation to * finish. */ while (cc->cc_exec_entity[direct].cc_curr == c) { /* * Use direct calls to sleepqueue interface * instead of cv/msleep in order to avoid * a LOR between cc_lock and sleepqueue * chain spinlocks. This piece of code * emulates a msleep_spin() call actually. * * If we already have the sleepqueue chain * locked, then we can safely block. If we * don't already have it locked, however, * we have to drop the cc_lock to lock * it. This opens several races, so we * restart at the beginning once we have * both locks. If nothing has changed, then * we will end up back here with sq_locked * set. */ if (!sq_locked) { CC_UNLOCK(cc); sleepq_lock( &cc->cc_exec_entity[direct].cc_waiting); sq_locked = 1; old_cc = cc; goto again; } /* * Migration could be cancelled here, but * as long as it is still not sure when it * will be packed up, just let softclock() * take care of it. */ cc->cc_exec_entity[direct].cc_waiting = true; DROP_GIANT(); CC_UNLOCK(cc); sleepq_add( &cc->cc_exec_entity[direct].cc_waiting, &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); sleepq_wait( &cc->cc_exec_entity[direct].cc_waiting, 0); sq_locked = 0; old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); CC_LOCK(cc); } } else if (use_lock && !cc->cc_exec_entity[direct].cc_cancel) { /* * The current callout is waiting for its * lock which we hold. Cancel the callout * and return. After our caller drops the * lock, the callout will be skipped in * softclock(). */ cc->cc_exec_entity[direct].cc_cancel = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); KASSERT(!cc_cce_migrating(cc, direct), ("callout wrongly scheduled for migration")); CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { c->c_flags &= ~CALLOUT_DFRMIGRATION; CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); return (1); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain still locked")); return (0); } if (sq_locked) sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); if ((c->c_flags & CALLOUT_PROCESSED) == 0) { if (cc->cc_exec_next_dir == c) cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); } else TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); callout_cc_del(c, cc); CC_UNLOCK(cc); return (1); } void callout_init(c, mpsafe) struct callout *c; int mpsafe; { bzero(c, sizeof *c); if (mpsafe) { c->c_lock = NULL; c->c_flags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; c->c_flags = 0; } c->c_cpu = timeout_cpu; } void _callout_init_lock(c, lock, flags) struct callout *c; struct lock_object *lock; int flags; { bzero(c, sizeof *c); c->c_lock = lock; KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, ("callout_init_lock: bad flags %d", flags)); KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", __func__)); c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after * an APM resume to recalculate the calltodo timer list values with the * number of hz's we have been sleeping. The next hardclock() will detect * that there are fired timers and run softclock() to execute them. * * Please note, I have not done an exhaustive analysis of what code this * might break. I am motivated to have my select()'s and alarm()'s that * have expired during suspend firing upon resume so that the applications * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers * firing, which seemed independant on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key */ void adjust_timeout_calltodo(time_change) struct timeval *time_change; { register struct callout *p; unsigned long delta_ticks; /* * How many ticks were we asleep? * (stolen from tvtohz()). */ /* Don't do anything */ if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) delta_ticks = (time_change->tv_sec * 1000000 + time_change->tv_usec + (tick - 1)) / tick + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + (time_change->tv_usec + (tick - 1)) / tick + 1; else delta_ticks = LONG_MAX; if (delta_ticks > INT_MAX) delta_ticks = INT_MAX; /* * Now rip through the timer calltodo list looking for timers * to expire. */ /* don't collide with softclock() */ CC_LOCK(cc); for (p = calltodo.c_next; p != NULL; p = p->c_next) { p->c_time -= delta_ticks; /* Break if the timer had more time on it than delta_ticks */ if (p->c_time > 0) break; /* take back the ticks the timer didn't use (p->c_time <= 0) */ delta_ticks = -p->c_time; } CC_UNLOCK(cc); return; } #endif /* APM_FIXUP_CALLTODO */ static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; #ifdef SMP int cpu; #endif val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); #ifdef SMP CPU_FOREACH(cpu) { cc = CC_CPU(cpu); #else cc = CC_CPU(timeout_cpu); #endif CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); #ifdef SMP } #endif for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); Index: stable/10/tools/sched/schedgraph.py =================================================================== --- stable/10/tools/sched/schedgraph.py (revision 278649) +++ stable/10/tools/sched/schedgraph.py (revision 278650) @@ -1,1625 +1,1623 @@ #!/usr/local/bin/python # Copyright (c) 2002-2003, 2009, Jeffrey Roberson # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice unmodified, this list of conditions, and the following # disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # $FreeBSD$ import sys import re import random from Tkinter import * # To use: # - Install the ports/x11-toolkits/py-tkinter package; e.g. # portinstall x11-toolkits/py-tkinter package # - Add KTR_SCHED to KTR_COMPILE and KTR_MASK in your KERNCONF; e.g. # options KTR # options KTR_ENTRIES=32768 # options KTR_COMPILE=(KTR_SCHED) # options KTR_MASK=(KTR_SCHED) # - It is encouraged to increase KTR_ENTRIES size to gather enough # information for analysis; e.g. # options KTR_ENTRIES=262144 # as 32768 entries may only correspond to a second or two of profiling # data depending on your workload. # - Rebuild kernel with proper changes to KERNCONF and boot new kernel. # - Run your workload to be profiled. # - While the workload is continuing (i.e. before it finishes), disable # KTR tracing by setting 'sysctl debug.ktr.mask=0'. This is necessary # to avoid a race condition while running ktrdump, i.e. the KTR ring buffer # will cycle a bit while ktrdump runs, and this confuses schedgraph because # the timestamps appear to go backwards at some point. Stopping KTR logging # while the workload is still running is to avoid wasting log entries on # "idle" time at the end. # - Dump the trace to a file: 'ktrdump -ct > ktr.out' # - Run the python script: 'python schedgraph.py ktr.out' optionally provide # your cpu frequency in ghz: 'python schedgraph.py ktr.out 2.4' # # To do: # Add a per-source summary display # "Vertical rule" to help relate data in different rows # Mouse-over popup of full thread/event/row label (currently truncated) # More visible anchors for popup event windows # # BUGS: 1) Only 8 CPUs are supported, more CPUs require more choices of # colours to represent them ;-) eventcolors = [ ("count", "red"), ("running", "green"), ("idle", "grey"), + ("spinning", "red"), ("yielding", "yellow"), ("swapped", "violet"), ("suspended", "purple"), ("iwait", "grey"), ("sleep", "blue"), ("blocked", "dark red"), ("runq add", "yellow"), ("runq rem", "yellow"), ("thread exit", "grey"), ("proc exit", "grey"), - ("callwheel idle", "grey"), - ("callout running", "green"), ("lock acquire", "blue"), ("lock contest", "purple"), ("failed lock try", "red"), ("lock release", "grey"), ("statclock", "black"), ("prio", "black"), ("lend prio", "black"), ("wokeup", "black") ] cpucolors = [ ("CPU 0", "light grey"), ("CPU 1", "dark grey"), ("CPU 2", "light blue"), ("CPU 3", "light pink"), ("CPU 4", "blanched almond"), ("CPU 5", "slate grey"), ("CPU 6", "tan"), ("CPU 7", "thistle"), ("CPU 8", "white") ] colors = [ "white", "thistle", "blanched almond", "tan", "chartreuse", "dark red", "red", "pale violet red", "pink", "light pink", "dark orange", "orange", "coral", "light coral", "goldenrod", "gold", "yellow", "light yellow", "dark green", "green", "light green", "light sea green", "dark blue", "blue", "light blue", "steel blue", "light slate blue", "dark violet", "violet", "purple", "blue violet", "dark grey", "slate grey", "light grey", "black", ] colors.sort() ticksps = None status = None colormap = None ktrfile = None clockfreq = None sources = [] lineno = -1 Y_BORDER = 10 X_BORDER = 10 Y_COUNTER = 80 Y_EVENTSOURCE = 10 XY_POINT = 4 class Colormap: def __init__(self, table): self.table = table self.map = {} for entry in table: self.map[entry[0]] = entry[1] def lookup(self, name): try: color = self.map[name] except: color = colors[random.randrange(0, len(colors))] print "Picking random color", color, "for", name self.map[name] = color self.table.append((name, color)) return (color) def ticks2sec(ticks): ticks = float(ticks) ns = float(ticksps) / 1000000000 ticks /= ns if (ticks < 1000): return ("%.2fns" % ticks) ticks /= 1000 if (ticks < 1000): return ("%.2fus" % ticks) ticks /= 1000 if (ticks < 1000): return ("%.2fms" % ticks) ticks /= 1000 return ("%.2fs" % ticks) class Scaler(Frame): def __init__(self, master, target): Frame.__init__(self, master) self.scale = None self.target = target self.label = Label(self, text="Ticks per pixel") self.label.pack(side=LEFT) self.resolution = 100 self.setmax(10000) def scaleset(self, value): self.target.scaleset(int(value)) def set(self, value): self.scale.set(value) def setmax(self, value): # # We can't reconfigure the to_ value so we delete the old # window and make a new one when we resize. # if (self.scale != None): self.scale.pack_forget() self.scale.destroy() self.scale = Scale(self, command=self.scaleset, from_=100, to_=value, orient=HORIZONTAL, resolution=self.resolution) self.scale.pack(fill="both", expand=1) self.scale.set(self.target.scaleget()) class Status(Frame): def __init__(self, master): Frame.__init__(self, master) self.label = Label(self, bd=1, relief=SUNKEN, anchor=W) self.label.pack(fill="both", expand=1) self.clear() def set(self, str): self.label.config(text=str) def clear(self): self.label.config(text="") def startup(self, str): self.set(str) root.update() class ColorConf(Frame): def __init__(self, master, name, color): Frame.__init__(self, master) if (graph.getstate(name) == "hidden"): enabled = 0 else: enabled = 1 self.name = name self.color = StringVar() self.color_default = color self.color_current = color self.color.set(color) self.enabled = IntVar() self.enabled_default = enabled self.enabled_current = enabled self.enabled.set(enabled) self.draw() def draw(self): self.label = Label(self, text=self.name, anchor=W) self.sample = Canvas(self, width=24, height=24, bg='grey') self.rect = self.sample.create_rectangle(0, 0, 24, 24, fill=self.color.get()) self.list = OptionMenu(self, self.color, command=self.setcolor, *colors) self.checkbox = Checkbutton(self, text="enabled", variable=self.enabled) self.label.grid(row=0, column=0, sticky=E+W) self.sample.grid(row=0, column=1) self.list.grid(row=0, column=2, sticky=E+W) self.checkbox.grid(row=0, column=3) self.columnconfigure(0, weight=1) self.columnconfigure(2, minsize=150) def setcolor(self, color): self.color.set(color) self.sample.itemconfigure(self.rect, fill=color) def apply(self): cchange = 0 echange = 0 if (self.color_current != self.color.get()): cchange = 1 if (self.enabled_current != self.enabled.get()): echange = 1 self.color_current = self.color.get() self.enabled_current = self.enabled.get() if (echange != 0): if (self.enabled_current): graph.setcolor(self.name, self.color_current) else: graph.hide(self.name) return if (cchange != 0): graph.setcolor(self.name, self.color_current) def revert(self): self.setcolor(self.color_default) self.enabled.set(self.enabled_default) class ColorConfigure(Toplevel): def __init__(self, table, name): Toplevel.__init__(self) self.resizable(0, 0) self.title(name) self.items = LabelFrame(self, text="Item Type") self.buttons = Frame(self) self.drawbuttons() self.items.grid(row=0, column=0, sticky=E+W) self.columnconfigure(0, weight=1) self.buttons.grid(row=1, column=0, sticky=E+W) self.types = [] self.irow = 0 for type in table: color = graph.getcolor(type[0]) if (color != ""): self.additem(type[0], color) def additem(self, name, color): item = ColorConf(self.items, name, color) self.types.append(item) item.grid(row=self.irow, column=0, sticky=E+W) self.irow += 1 def drawbuttons(self): self.apply = Button(self.buttons, text="Apply", command=self.apress) self.default = Button(self.buttons, text="Revert", command=self.rpress) self.apply.grid(row=0, column=0, sticky=E+W) self.default.grid(row=0, column=1, sticky=E+W) self.buttons.columnconfigure(0, weight=1) self.buttons.columnconfigure(1, weight=1) def apress(self): for item in self.types: item.apply() def rpress(self): for item in self.types: item.revert() class SourceConf(Frame): def __init__(self, master, source): Frame.__init__(self, master) if (source.hidden == 1): enabled = 0 else: enabled = 1 self.source = source self.name = source.name self.enabled = IntVar() self.enabled_default = enabled self.enabled_current = enabled self.enabled.set(enabled) self.draw() def draw(self): self.label = Label(self, text=self.name, anchor=W) self.checkbox = Checkbutton(self, text="enabled", variable=self.enabled) self.label.grid(row=0, column=0, sticky=E+W) self.checkbox.grid(row=0, column=1) self.columnconfigure(0, weight=1) def changed(self): if (self.enabled_current != self.enabled.get()): return 1 return 0 def apply(self): self.enabled_current = self.enabled.get() def revert(self): self.enabled.set(self.enabled_default) def check(self): self.enabled.set(1) def uncheck(self): self.enabled.set(0) class SourceConfigure(Toplevel): def __init__(self): Toplevel.__init__(self) self.resizable(0, 0) self.title("Source Configuration") self.items = [] self.iframe = Frame(self) self.iframe.grid(row=0, column=0, sticky=E+W) f = LabelFrame(self.iframe, bd=4, text="Sources") self.items.append(f) self.buttons = Frame(self) self.items[0].grid(row=0, column=0, sticky=E+W) self.columnconfigure(0, weight=1) self.sconfig = [] self.irow = 0 self.icol = 0 for source in sources: self.addsource(source) self.drawbuttons() self.buttons.grid(row=1, column=0, sticky=W) def addsource(self, source): if (self.irow > 30): self.icol += 1 self.irow = 0 c = self.icol f = LabelFrame(self.iframe, bd=4, text="Sources") f.grid(row=0, column=c, sticky=N+E+W) self.items.append(f) item = SourceConf(self.items[self.icol], source) self.sconfig.append(item) item.grid(row=self.irow, column=0, sticky=E+W) self.irow += 1 def drawbuttons(self): self.apply = Button(self.buttons, text="Apply", command=self.apress) self.default = Button(self.buttons, text="Revert", command=self.rpress) self.checkall = Button(self.buttons, text="Check All", command=self.cpress) self.uncheckall = Button(self.buttons, text="Uncheck All", command=self.upress) self.checkall.grid(row=0, column=0, sticky=W) self.uncheckall.grid(row=0, column=1, sticky=W) self.apply.grid(row=0, column=2, sticky=W) self.default.grid(row=0, column=3, sticky=W) self.buttons.columnconfigure(0, weight=1) self.buttons.columnconfigure(1, weight=1) self.buttons.columnconfigure(2, weight=1) self.buttons.columnconfigure(3, weight=1) def apress(self): disable_sources = [] enable_sources = [] for item in self.sconfig: if (item.changed() == 0): continue if (item.enabled.get() == 1): enable_sources.append(item.source) else: disable_sources.append(item.source) if (len(disable_sources)): graph.sourcehidelist(disable_sources) if (len(enable_sources)): graph.sourceshowlist(enable_sources) for item in self.sconfig: item.apply() def rpress(self): for item in self.sconfig: item.revert() def cpress(self): for item in self.sconfig: item.check() def upress(self): for item in self.sconfig: item.uncheck() # Reverse compare of second member of the tuple def cmp_counts(x, y): return y[1] - x[1] class SourceStats(Toplevel): def __init__(self, source): self.source = source Toplevel.__init__(self) self.resizable(0, 0) self.title(source.name + " statistics") self.evframe = LabelFrame(self, text="Event Count, Duration, Avg Duration") self.evframe.grid(row=0, column=0, sticky=E+W) eventtypes={} for event in self.source.events: if (event.type == "pad"): continue duration = event.duration if (eventtypes.has_key(event.name)): (c, d) = eventtypes[event.name] c += 1 d += duration eventtypes[event.name] = (c, d) else: eventtypes[event.name] = (1, duration) events = [] for k, v in eventtypes.iteritems(): (c, d) = v events.append((k, c, d)) events.sort(cmp=cmp_counts) ypos = 0 for event in events: (name, c, d) = event Label(self.evframe, text=name, bd=1, relief=SUNKEN, anchor=W, width=30).grid( row=ypos, column=0, sticky=W+E) Label(self.evframe, text=str(c), bd=1, relief=SUNKEN, anchor=W, width=10).grid( row=ypos, column=1, sticky=W+E) Label(self.evframe, text=ticks2sec(d), bd=1, relief=SUNKEN, width=10).grid( row=ypos, column=2, sticky=W+E) if (d and c): d /= c else: d = 0 Label(self.evframe, text=ticks2sec(d), bd=1, relief=SUNKEN, width=10).grid( row=ypos, column=3, sticky=W+E) ypos += 1 class SourceContext(Menu): def __init__(self, event, source): self.source = source Menu.__init__(self, tearoff=0, takefocus=0) self.add_command(label="hide", command=self.hide) self.add_command(label="hide group", command=self.hidegroup) self.add_command(label="stats", command=self.stats) self.tk_popup(event.x_root-3, event.y_root+3) def hide(self): graph.sourcehide(self.source) def hidegroup(self): grouplist = [] for source in sources: if (source.group == self.source.group): grouplist.append(source) graph.sourcehidelist(grouplist) def show(self): graph.sourceshow(self.source) def stats(self): SourceStats(self.source) class EventView(Toplevel): def __init__(self, event, canvas): Toplevel.__init__(self) self.resizable(0, 0) self.title("Event") self.event = event self.buttons = Frame(self) self.buttons.grid(row=0, column=0, sticky=E+W) self.frame = Frame(self) self.frame.grid(row=1, column=0, sticky=N+S+E+W) self.canvas = canvas self.drawlabels() self.drawbuttons() event.displayref(canvas) self.bind("", self.destroycb) def destroycb(self, event): self.unbind("") if (self.event != None): self.event.displayunref(self.canvas) self.event = None self.destroy() def clearlabels(self): for label in self.frame.grid_slaves(): label.grid_remove() def drawlabels(self): ypos = 0 labels = self.event.labels() while (len(labels) < 7): labels.append(("", "")) for label in labels: name, value = label linked = 0 if (name == "linkedto"): linked = 1 l = Label(self.frame, text=name, bd=1, width=15, relief=SUNKEN, anchor=W) if (linked): fgcolor = "blue" else: fgcolor = "black" r = Label(self.frame, text=value, bd=1, relief=SUNKEN, anchor=W, fg=fgcolor) l.grid(row=ypos, column=0, sticky=E+W) r.grid(row=ypos, column=1, sticky=E+W) if (linked): r.bind("", self.linkpress) ypos += 1 self.frame.columnconfigure(1, minsize=80) def drawbuttons(self): self.back = Button(self.buttons, text="<", command=self.bpress) self.forw = Button(self.buttons, text=">", command=self.fpress) self.new = Button(self.buttons, text="new", command=self.npress) self.back.grid(row=0, column=0, sticky=E+W) self.forw.grid(row=0, column=1, sticky=E+W) self.new.grid(row=0, column=2, sticky=E+W) self.buttons.columnconfigure(2, weight=1) def newevent(self, event): self.event.displayunref(self.canvas) self.clearlabels() self.event = event self.event.displayref(self.canvas) self.drawlabels() def npress(self): EventView(self.event, self.canvas) def bpress(self): prev = self.event.prev() if (prev == None): return while (prev.type == "pad"): prev = prev.prev() if (prev == None): return self.newevent(prev) def fpress(self): next = self.event.next() if (next == None): return while (next.type == "pad"): next = next.next() if (next == None): return self.newevent(next) def linkpress(self, wevent): event = self.event.getlinked() if (event != None): self.newevent(event) class Event: def __init__(self, source, name, cpu, timestamp, attrs): self.source = source self.name = name self.cpu = cpu self.timestamp = int(timestamp) self.attrs = attrs self.idx = None self.item = None self.dispcnt = 0 self.duration = 0 self.recno = lineno def status(self): statstr = self.name + " " + self.source.name statstr += " on: cpu" + str(self.cpu) statstr += " at: " + str(self.timestamp) statstr += " attributes: " for i in range(0, len(self.attrs)): attr = self.attrs[i] statstr += attr[0] + ": " + str(attr[1]) if (i != len(self.attrs) - 1): statstr += ", " status.set(statstr) def labels(self): return [("Source", self.source.name), ("Event", self.name), ("CPU", self.cpu), ("Timestamp", self.timestamp), ("KTR Line ", self.recno) ] + self.attrs def mouseenter(self, canvas): self.displayref(canvas) self.status() def mouseexit(self, canvas): self.displayunref(canvas) status.clear() def mousepress(self, canvas): EventView(self, canvas) def draw(self, canvas, xpos, ypos, item): self.item = item if (item != None): canvas.items[item] = self def move(self, canvas, x, y): if (self.item == None): return; canvas.move(self.item, x, y); def next(self): return self.source.eventat(self.idx + 1) def nexttype(self, type): next = self.next() while (next != None and next.type != type): next = next.next() return (next) def prev(self): return self.source.eventat(self.idx - 1) def displayref(self, canvas): if (self.dispcnt == 0): canvas.itemconfigure(self.item, width=2) self.dispcnt += 1 def displayunref(self, canvas): self.dispcnt -= 1 if (self.dispcnt == 0): canvas.itemconfigure(self.item, width=0) canvas.tag_raise("point", "state") def getlinked(self): for attr in self.attrs: if (attr[0] != "linkedto"): continue source = ktrfile.findid(attr[1]) return source.findevent(self.timestamp) return None class PointEvent(Event): type = "point" def __init__(self, source, name, cpu, timestamp, attrs): Event.__init__(self, source, name, cpu, timestamp, attrs) def draw(self, canvas, xpos, ypos): color = colormap.lookup(self.name) l = canvas.create_oval(xpos - XY_POINT, ypos, xpos + XY_POINT, ypos - (XY_POINT * 2), fill=color, width=0, tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return xpos class StateEvent(Event): type = "state" def __init__(self, source, name, cpu, timestamp, attrs): Event.__init__(self, source, name, cpu, timestamp, attrs) def draw(self, canvas, xpos, ypos): next = self.nexttype("state") if (next == None): return (xpos) self.duration = duration = next.timestamp - self.timestamp self.attrs.insert(0, ("duration", ticks2sec(duration))) color = colormap.lookup(self.name) if (duration < 0): duration = 0 print "Unsynchronized timestamp" print self.cpu, self.timestamp print next.cpu, next.timestamp delta = duration / canvas.ratio l = canvas.create_rectangle(xpos, ypos, xpos + delta, ypos - 10, fill=color, width=0, tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return (xpos + delta) class CountEvent(Event): type = "count" def __init__(self, source, count, cpu, timestamp, attrs): count = int(count) self.count = count Event.__init__(self, source, "count", cpu, timestamp, attrs) def draw(self, canvas, xpos, ypos): next = self.nexttype("count") if (next == None): return (xpos) color = colormap.lookup("count") self.duration = duration = next.timestamp - self.timestamp if (duration < 0): duration = 0 print "Unsynchronized timestamp" print self.cpu, self.timestamp print next.cpu, next.timestamp self.attrs.insert(0, ("count", self.count)) self.attrs.insert(1, ("duration", ticks2sec(duration))) delta = duration / canvas.ratio yhight = self.source.yscale() * self.count l = canvas.create_rectangle(xpos, ypos - yhight, xpos + delta, ypos, fill=color, width=0, tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return (xpos + delta) class PadEvent(StateEvent): type = "pad" def __init__(self, source, cpu, timestamp, last=0): if (last): cpu = source.events[len(source.events) -1].cpu else: cpu = source.events[0].cpu StateEvent.__init__(self, source, "pad", cpu, timestamp, []) def draw(self, canvas, xpos, ypos): next = self.next() if (next == None): return (xpos) duration = next.timestamp - self.timestamp delta = duration / canvas.ratio Event.draw(self, canvas, xpos, ypos, None) return (xpos + delta) # Sort function for start y address def source_cmp_start(x, y): return x.y - y.y class EventSource: def __init__(self, group, id): self.name = id self.events = [] self.cpuitems = [] self.group = group self.y = 0 self.item = None self.hidden = 0 self.tag = group + id def __cmp__(self, other): if (other == None): return -1 if (self.group == other.group): return cmp(self.name, other.name) return cmp(self.group, other.group) # It is much faster to append items to a list then to insert them # at the beginning. As a result, we add events in reverse order # and then swap the list during fixup. def fixup(self): self.events.reverse() def addevent(self, event): self.events.append(event) def addlastevent(self, event): self.events.insert(0, event) def draw(self, canvas, ypos): xpos = 10 cpux = 10 cpu = self.events[1].cpu for i in range(0, len(self.events)): self.events[i].idx = i for event in self.events: if (event.cpu != cpu and event.cpu != -1): self.drawcpu(canvas, cpu, cpux, xpos, ypos) cpux = xpos cpu = event.cpu xpos = event.draw(canvas, xpos, ypos) self.drawcpu(canvas, cpu, cpux, xpos, ypos) def drawname(self, canvas, ypos): self.y = ypos ypos = ypos - (self.ysize() / 2) self.item = canvas.create_text(X_BORDER, ypos, anchor="w", text=self.name) return (self.item) def drawcpu(self, canvas, cpu, fromx, tox, ypos): cpu = "CPU " + str(cpu) color = cpucolormap.lookup(cpu) # Create the cpu background colors default to hidden l = canvas.create_rectangle(fromx, ypos - self.ysize() - canvas.bdheight, tox, ypos + canvas.bdheight, fill=color, width=0, tags=("cpubg", cpu, self.tag), state="hidden") self.cpuitems.append(l) def move(self, canvas, xpos, ypos): canvas.move(self.tag, xpos, ypos) def movename(self, canvas, xpos, ypos): self.y += ypos canvas.move(self.item, xpos, ypos) def ysize(self): return (Y_EVENTSOURCE) def eventat(self, i): - if (i >= len(self.events)): + if (i >= len(self.events) or i < 0): return (None) event = self.events[i] return (event) def findevent(self, timestamp): for event in self.events: if (event.timestamp >= timestamp and event.type != "pad"): return (event) return (None) class Counter(EventSource): # # Store a hash of counter groups that keeps the max value # for a counter in this group for scaling purposes. # groups = {} def __init__(self, group, id): try: Counter.cnt = Counter.groups[group] except: Counter.groups[group] = 0 EventSource.__init__(self, group, id) def fixup(self): for event in self.events: if (event.type != "count"): continue; count = int(event.count) if (count > Counter.groups[self.group]): Counter.groups[self.group] = count EventSource.fixup(self) def ymax(self): return (Counter.groups[self.group]) def ysize(self): return (Y_COUNTER) def yscale(self): return (self.ysize() / self.ymax()) class KTRFile: def __init__(self, file): self.timestamp_f = None self.timestamp_l = None self.locks = {} - self.callwheels = {} self.ticks = {} self.load = {} self.crit = {} self.stathz = 0 self.eventcnt = 0 self.taghash = {} self.parse(file) self.fixup() global ticksps ticksps = self.ticksps() span = self.timespan() ghz = float(ticksps) / 1000000000.0 # # Update the title with some stats from the file # titlestr = "SchedGraph: " titlestr += ticks2sec(span) + " at %.3f ghz, " % ghz titlestr += str(len(sources)) + " event sources, " titlestr += str(self.eventcnt) + " events" root.title(titlestr) def parse(self, file): try: ifp = open(file) except: print "Can't open", file sys.exit(1) # quoteexp matches a quoted string, no escaping quoteexp = "\"([^\"]*)\"" # # commaexp matches a quoted string OR the string up # to the first ',' # commaexp = "(?:" + quoteexp + "|([^,]+))" # # colonstr matches a quoted string OR the string up # to the first ':' # colonexp = "(?:" + quoteexp + "|([^:]+))" # # Match various manditory parts of the KTR string this is # fairly inflexible until you get to attributes to make # parsing faster. # hdrexp = "\s*(\d+)\s+(\d+)\s+(\d+)\s+" groupexp = "KTRGRAPH group:" + quoteexp + ", " idexp = "id:" + quoteexp + ", " typeexp = "([^:]+):" + commaexp + ", " attribexp = "attributes: (.*)" # # Matches optional attributes in the KTR string. This # tolerates more variance as the users supply these values. # attrexp = colonexp + "\s*:\s*(?:" + commaexp + ", (.*)|" attrexp += quoteexp +"|(.*))" # Precompile regexp ktrre = re.compile(hdrexp + groupexp + idexp + typeexp + attribexp) attrre = re.compile(attrexp) global lineno lineno = 0 for line in ifp.readlines(): lineno += 1 if ((lineno % 2048) == 0): status.startup("Parsing line " + str(lineno)) m = ktrre.match(line); if (m == None): print "Can't parse", lineno, line, continue; (index, cpu, timestamp, group, id, type, dat, dat1, attrstring) = m.groups(); if (dat == None): dat = dat1 if (self.checkstamp(timestamp) == 0): print "Bad timestamp at", lineno, ":", print cpu, timestamp continue # # Build the table of optional attributes # attrs = [] while (attrstring != None): m = attrre.match(attrstring.strip()) if (m == None): break; # # Name may or may not be quoted. # # For val we have four cases: # 1) quotes followed by comma and more # attributes. # 2) no quotes followed by comma and more # attributes. # 3) no more attributes or comma with quotes. # 4) no more attributes or comma without quotes. # (name, name1, val, val1, attrstring, end, end1) = m.groups(); if (name == None): name = name1 if (end == None): end = end1 if (val == None): val = val1 if (val == None): val = end if (name == "stathz"): self.setstathz(val, cpu) attrs.append((name, val)) args = (dat, cpu, timestamp, attrs) e = self.makeevent(group, id, type, args) if (e == None): print "Unknown type", type, lineno, line, def makeevent(self, group, id, type, args): e = None source = self.makeid(group, id, type) if (type == "state"): e = StateEvent(source, *args) elif (type == "counter"): e = CountEvent(source, *args) elif (type == "point"): e = PointEvent(source, *args) if (e != None): self.eventcnt += 1 source.addevent(e); return e def setstathz(self, val, cpu): self.stathz = int(val) cpu = int(cpu) try: ticks = self.ticks[cpu] except: self.ticks[cpu] = 0 self.ticks[cpu] += 1 def checkstamp(self, timestamp): timestamp = int(timestamp) if (self.timestamp_f == None): self.timestamp_f = timestamp; if (self.timestamp_l != None and timestamp -2048> self.timestamp_l): return (0) self.timestamp_l = timestamp; return (1) def makeid(self, group, id, type): tag = group + id if (self.taghash.has_key(tag)): return self.taghash[tag] if (type == "counter"): source = Counter(group, id) else: source = EventSource(group, id) sources.append(source) self.taghash[tag] = source return (source) def findid(self, id): for source in sources: if (source.name == id): return source return (None) def timespan(self): return (self.timestamp_f - self.timestamp_l); def ticksps(self): oneghz = 1000000000 # Use user supplied clock first if (clockfreq != None): return int(clockfreq * oneghz) # Check for a discovered clock if (self.stathz != 0): return (self.timespan() / self.ticks[0]) * int(self.stathz) # Pretend we have a 1ns clock print "WARNING: No clock discovered and no frequency ", print "specified via the command line." print "Using fake 1ghz clock" return (oneghz); def fixup(self): for source in sources: e = PadEvent(source, -1, self.timestamp_l) source.addevent(e) e = PadEvent(source, -1, self.timestamp_f, last=1) source.addlastevent(e) source.fixup() sources.sort() class SchedNames(Canvas): def __init__(self, master, display): self.display = display self.parent = master self.bdheight = master.bdheight self.items = {} self.ysize = 0 self.lines = [] Canvas.__init__(self, master, width=120, height=display["height"], bg='grey', scrollregion=(0, 0, 50, 100)) def moveline(self, cur_y, y): for line in self.lines: (x0, y0, x1, y1) = self.coords(line) if (cur_y != y0): continue self.move(line, 0, y) return def draw(self): status.startup("Drawing names") ypos = 0 self.configure(scrollregion=(0, 0, self["width"], self.display.ysize())) for source in sources: l = self.create_line(0, ypos, self["width"], ypos, width=1, fill="black", tags=("all","sources")) self.lines.append(l) ypos += self.bdheight ypos += source.ysize() t = source.drawname(self, ypos) self.items[t] = source ypos += self.bdheight self.ysize = ypos self.create_line(0, ypos, self["width"], ypos, width=1, fill="black", tags=("all",)) self.bind("", self.master.mousepress); self.bind("", self.master.mousepressright); self.bind("", self.master.mouserelease); self.bind("", self.master.mousemotion); def updatescroll(self): self.configure(scrollregion=(0, 0, self["width"], self.display.ysize())) class SchedDisplay(Canvas): def __init__(self, master): self.ratio = 1 self.parent = master self.bdheight = master.bdheight self.items = {} self.lines = [] Canvas.__init__(self, master, width=800, height=500, bg='grey', scrollregion=(0, 0, 800, 500)) def prepare(self): # # Compute a ratio to ensure that the file's timespan fits into # 2^31. Although python may handle larger values for X # values, the Tk internals do not. # self.ratio = (ktrfile.timespan() - 1) / 2**31 + 1 def draw(self): ypos = 0 xsize = self.xsize() for source in sources: status.startup("Drawing " + source.name) l = self.create_line(0, ypos, xsize, ypos, width=1, fill="black", tags=("all",)) self.lines.append(l) ypos += self.bdheight ypos += source.ysize() source.draw(self, ypos) ypos += self.bdheight self.tag_raise("point", "state") self.tag_lower("cpubg", ALL) self.create_line(0, ypos, xsize, ypos, width=1, fill="black", tags=("lines",)) self.tag_bind("event", "", self.mouseenter) self.tag_bind("event", "", self.mouseexit) self.bind("", self.mousepress) self.bind("", self.master.mousepressright); self.bind("", self.wheelup) self.bind("", self.wheeldown) self.bind("", self.master.mouserelease); self.bind("", self.master.mousemotion); def moveline(self, cur_y, y): for line in self.lines: (x0, y0, x1, y1) = self.coords(line) if (cur_y != y0): continue self.move(line, 0, y) return def mouseenter(self, event): item, = self.find_withtag(CURRENT) self.items[item].mouseenter(self) def mouseexit(self, event): item, = self.find_withtag(CURRENT) self.items[item].mouseexit(self) def mousepress(self, event): # Find out what's beneath us items = self.find_withtag(CURRENT) if (len(items) == 0): self.master.mousepress(event) return # Only grab mouse presses for things with event tags. item = items[0] tags = self.gettags(item) for tag in tags: if (tag == "event"): self.items[item].mousepress(self) return # Leave the rest to the master window self.master.mousepress(event) def wheeldown(self, event): self.parent.display_yview("scroll", 1, "units") def wheelup(self, event): self.parent.display_yview("scroll", -1, "units") def xsize(self): return ((ktrfile.timespan() / self.ratio) + (X_BORDER * 2)) def ysize(self): ysize = 0 for source in sources: if (source.hidden == 1): continue ysize += self.parent.sourcesize(source) return ysize def scaleset(self, ratio): if (ktrfile == None): return oldratio = self.ratio xstart, xend = self.xview() midpoint = xstart + ((xend - xstart) / 2) self.ratio = ratio self.updatescroll() self.scale(ALL, 0, 0, float(oldratio) / ratio, 1) xstart, xend = self.xview() xsize = (xend - xstart) / 2 self.xview_moveto(midpoint - xsize) def updatescroll(self): self.configure(scrollregion=(0, 0, self.xsize(), self.ysize())) def scaleget(self): return self.ratio def getcolor(self, tag): return self.itemcget(tag, "fill") def getstate(self, tag): return self.itemcget(tag, "state") def setcolor(self, tag, color): self.itemconfigure(tag, state="normal", fill=color) def hide(self, tag): self.itemconfigure(tag, state="hidden") class GraphMenu(Frame): def __init__(self, master): Frame.__init__(self, master, bd=2, relief=RAISED) self.conf = Menubutton(self, text="Configure") self.confmenu = Menu(self.conf, tearoff=0) self.confmenu.add_command(label="Event Colors", command=self.econf) self.confmenu.add_command(label="CPU Colors", command=self.cconf) self.confmenu.add_command(label="Source Configure", command=self.sconf) self.conf["menu"] = self.confmenu self.conf.pack(side=LEFT) def econf(self): ColorConfigure(eventcolors, "Event Display Configuration") def cconf(self): ColorConfigure(cpucolors, "CPU Background Colors") def sconf(self): SourceConfigure() class SchedGraph(Frame): def __init__(self, master): Frame.__init__(self, master) self.menu = None self.names = None self.display = None self.scale = None self.status = None self.bdheight = Y_BORDER self.clicksource = None self.lastsource = None self.pack(expand=1, fill="both") self.buildwidgets() self.layout() def buildwidgets(self): global status self.menu = GraphMenu(self) self.display = SchedDisplay(self) self.names = SchedNames(self, self.display) self.scale = Scaler(self, self.display) status = self.status = Status(self) self.scrollY = Scrollbar(self, orient="vertical", command=self.display_yview) self.display.scrollX = Scrollbar(self, orient="horizontal", command=self.display.xview) self.display["xscrollcommand"] = self.display.scrollX.set self.display["yscrollcommand"] = self.scrollY.set self.names["yscrollcommand"] = self.scrollY.set def layout(self): self.columnconfigure(1, weight=1) self.rowconfigure(1, weight=1) self.menu.grid(row=0, column=0, columnspan=3, sticky=E+W) self.names.grid(row=1, column=0, sticky=N+S) self.display.grid(row=1, column=1, sticky=W+E+N+S) self.scrollY.grid(row=1, column=2, sticky=N+S) self.display.scrollX.grid(row=2, column=0, columnspan=2, sticky=E+W) self.scale.grid(row=3, column=0, columnspan=3, sticky=E+W) self.status.grid(row=4, column=0, columnspan=3, sticky=E+W) def draw(self): self.master.update() self.display.prepare() self.names.draw() self.display.draw() self.status.startup("") # # Configure scale related values # scalemax = ktrfile.timespan() / int(self.display["width"]) width = int(root.geometry().split('x')[0]) self.constwidth = width - int(self.display["width"]) self.scale.setmax(scalemax) self.scale.set(scalemax) self.display.xview_moveto(0) self.bind("", self.resize) def mousepress(self, event): self.clicksource = self.sourceat(event.y) def mousepressright(self, event): source = self.sourceat(event.y) if (source == None): return SourceContext(event, source) def mouserelease(self, event): if (self.clicksource == None): return newsource = self.sourceat(event.y) if (self.clicksource != newsource): self.sourceswap(self.clicksource, newsource) self.clicksource = None self.lastsource = None def mousemotion(self, event): if (self.clicksource == None): return newsource = self.sourceat(event.y) # # If we get a None source they moved off the page. # swapsource() can't handle moving multiple items so just # pretend we never clicked on anything to begin with so the # user can't mouseover a non-contiguous area. # if (newsource == None): self.clicksource = None self.lastsource = None return if (newsource == self.lastsource): return; self.lastsource = newsource if (newsource != self.clicksource): self.sourceswap(self.clicksource, newsource) # These are here because this object controls layout def sourcestart(self, source): return source.y - self.bdheight - source.ysize() def sourceend(self, source): return source.y + self.bdheight def sourcesize(self, source): return (self.bdheight * 2) + source.ysize() def sourceswap(self, source1, source2): # Sort so we always know which one is on top. if (source2.y < source1.y): swap = source1 source1 = source2 source2 = swap # Only swap adjacent sources if (self.sourceend(source1) != self.sourcestart(source2)): return # Compute start coordinates and target coordinates y1 = self.sourcestart(source1) y2 = self.sourcestart(source2) y1targ = y1 + self.sourcesize(source2) y2targ = y1 # # If the sizes are not equal, adjust the start of the lower # source to account for the lost/gained space. # if (source1.ysize() != source2.ysize()): diff = source2.ysize() - source1.ysize() self.names.moveline(y2, diff); self.display.moveline(y2, diff) source1.move(self.display, 0, y1targ - y1) source2.move(self.display, 0, y2targ - y2) source1.movename(self.names, 0, y1targ - y1) source2.movename(self.names, 0, y2targ - y2) def sourcepicky(self, source): if (source.hidden == 0): return self.sourcestart(source) # Revert to group based sort sources.sort() prev = None for s in sources: if (s == source): break if (s.hidden == 0): prev = s if (prev == None): newy = 0 else: newy = self.sourcestart(prev) + self.sourcesize(prev) return newy def sourceshow(self, source): if (source.hidden == 0): return; newy = self.sourcepicky(source) off = newy - self.sourcestart(source) self.sourceshiftall(newy-1, self.sourcesize(source)) self.sourceshift(source, off) source.hidden = 0 # # Optimized source show of multiple entries that only moves each # existing entry once. Doing sourceshow() iteratively is too # expensive due to python's canvas.move(). # def sourceshowlist(self, srclist): srclist.sort(cmp=source_cmp_start) startsize = [] for source in srclist: if (source.hidden == 0): srclist.remove(source) startsize.append((self.sourcepicky(source), self.sourcesize(source))) sources.sort(cmp=source_cmp_start, reverse=True) self.status.startup("Updating display..."); for source in sources: if (source.hidden == 1): continue nstart = self.sourcestart(source) size = 0 for hidden in startsize: (start, sz) = hidden if (start <= nstart or start+sz <= nstart): size += sz self.sourceshift(source, size) idx = 0 size = 0 for source in srclist: (newy, sz) = startsize[idx] off = (newy + size) - self.sourcestart(source) self.sourceshift(source, off) source.hidden = 0 size += sz idx += 1 self.updatescroll() self.status.set("") # # Optimized source hide of multiple entries that only moves each # remaining entry once. Doing sourcehide() iteratively is too # expensive due to python's canvas.move(). # def sourcehidelist(self, srclist): srclist.sort(cmp=source_cmp_start) sources.sort(cmp=source_cmp_start) startsize = [] off = len(sources) * 100 self.status.startup("Updating display..."); for source in srclist: if (source.hidden == 1): srclist.remove(source) # # Remember our old position so we can sort things # below us when we're done. # startsize.append((self.sourcestart(source), self.sourcesize(source))) self.sourceshift(source, off) source.hidden = 1 idx = 0 size = 0 for hidden in startsize: (start, sz) = hidden size += sz if (idx + 1 < len(startsize)): (stop, sz) = startsize[idx+1] else: stop = self.display.ysize() idx += 1 for source in sources: nstart = self.sourcestart(source) if (nstart < start or source.hidden == 1): continue if (nstart >= stop): break; self.sourceshift(source, -size) self.updatescroll() self.status.set("") def sourcehide(self, source): if (source.hidden == 1): return; # Move it out of the visible area off = len(sources) * 100 start = self.sourcestart(source) self.sourceshift(source, off) self.sourceshiftall(start, -self.sourcesize(source)) source.hidden = 1 def sourceshift(self, source, off): start = self.sourcestart(source) source.move(self.display, 0, off) source.movename(self.names, 0, off) self.names.moveline(start, off); self.display.moveline(start, off) # # We update the idle tasks to shrink the dirtied area so # it does not always include the entire screen. # self.names.update_idletasks() self.display.update_idletasks() def sourceshiftall(self, start, off): self.status.startup("Updating display..."); for source in sources: nstart = self.sourcestart(source) if (nstart < start): continue; self.sourceshift(source, off) self.updatescroll() self.status.set("") def sourceat(self, ypos): (start, end) = self.names.yview() starty = start * float(self.names.ysize) ypos += starty for source in sources: if (source.hidden == 1): continue; yend = self.sourceend(source) ystart = self.sourcestart(source) if (ypos >= ystart and ypos <= yend): return source return None def display_yview(self, *args): self.names.yview(*args) self.display.yview(*args) def resize(self, *args): width = int(root.geometry().split('x')[0]) scalemax = ktrfile.timespan() / (width - self.constwidth) self.scale.setmax(scalemax) def updatescroll(self): self.names.updatescroll() self.display.updatescroll() def setcolor(self, tag, color): self.display.setcolor(tag, color) def hide(self, tag): self.display.hide(tag) def getcolor(self, tag): return self.display.getcolor(tag) def getstate(self, tag): return self.display.getstate(tag) if (len(sys.argv) != 2 and len(sys.argv) != 3): print "usage:", sys.argv[0], " [clock freq in ghz]" sys.exit(1) if (len(sys.argv) > 2): clockfreq = float(sys.argv[2]) root = Tk() root.title("SchedGraph") colormap = Colormap(eventcolors) cpucolormap = Colormap(cpucolors) graph = SchedGraph(root) ktrfile = KTRFile(sys.argv[1]) graph.draw() root.mainloop() Index: stable/10/usr.bin/man/man.sh =================================================================== --- stable/10/usr.bin/man/man.sh (revision 278649) +++ stable/10/usr.bin/man/man.sh (revision 278650) @@ -1,965 +1,1005 @@ #! /bin/sh # # Copyright (c) 2010 Gordon Tetlow # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # Usage: add_to_manpath path # Adds a variable to manpath while ensuring we don't have duplicates. # Returns true if we were able to add something. False otherwise. add_to_manpath() { case "$manpath" in *:$1) decho " Skipping duplicate manpath entry $1" 2 ;; $1:*) decho " Skipping duplicate manpath entry $1" 2 ;; *:$1:*) decho " Skipping duplicate manpath entry $1" 2 ;; *) if [ -d "$1" ]; then decho " Adding $1 to manpath" manpath="$manpath:$1" return 0 fi ;; esac return 1 } # Usage: build_manlocales # Builds a correct MANLOCALES variable. build_manlocales() { # If the user has set manlocales, who are we to argue. if [ -n "$MANLOCALES" ]; then return fi parse_configs # Trim leading colon MANLOCALES=${manlocales#:} decho "Available manual locales: $MANLOCALES" } # Usage: build_manpath # Builds a correct MANPATH variable. build_manpath() { local IFS # If the user has set a manpath, who are we to argue. if [ -n "$MANPATH" ]; then return fi search_path decho "Adding default manpath entries" IFS=: for path in $man_default_path; do add_to_manpath "$path" done unset IFS parse_configs # Trim leading colon MANPATH=${manpath#:} decho "Using manual path: $MANPATH" } # Usage: check_cat catglob # Checks to see if a cat glob is available. check_cat() { if exists "$1"; then use_cat=yes catpage=$found setup_cattool $catpage decho " Found catpage $catpage" return 0 else return 1 fi } # Usage: check_man manglob catglob # Given 2 globs, figures out if the manglob is available, if so, check to # see if the catglob is also available and up to date. check_man() { if exists "$1"; then # We have a match, check for a cat page manpage=$found setup_cattool $manpage decho " Found manpage $manpage" if [ -n "${use_width}" ]; then # non-standard width unset use_cat decho " Skipping catpage: non-standard page width" elif exists "$2" && is_newer $found $manpage; then # cat page found and is newer, use that use_cat=yes catpage=$found setup_cattool $catpage decho " Using catpage $catpage" else # no cat page or is older unset use_cat decho " Skipping catpage: not found or old" fi return 0 fi return 1 } # Usage: decho "string" [debuglevel] # Echoes to stderr string prefaced with -- if high enough debuglevel. decho() { if [ $debug -ge ${2:-1} ]; then echo "-- $1" >&2 fi } # Usage: exists glob # Returns true if glob resolves to a real file. exists() { local IFS # Don't accidentally inherit callers IFS (breaks perl manpages) unset IFS # Use some globbing tricks in the shell to determine if a file # exists or not. set +f set -- "$1" $1 set -f if [ "$1" != "$2" -a -r "$2" ]; then found="$2" return 0 fi return 1 } # Usage: find_file path section subdir pagename # Returns: true if something is matched and found. # Search the given path/section combo for a given page. find_file() { local manroot catroot mann man0 catn cat0 manroot="$1/man$2" catroot="$1/cat$2" if [ -n "$3" ]; then manroot="$manroot/$3" catroot="$catroot/$3" fi if [ ! -d "$manroot" ]; then return 1 fi decho " Searching directory $manroot" 2 mann="$manroot/$4.$2*" man0="$manroot/$4.0*" catn="$catroot/$4.$2*" cat0="$catroot/$4.0*" # This is the behavior as seen by the original man utility. # Let's not change that which doesn't seem broken. if check_man "$mann" "$catn"; then return 0 elif check_man "$man0" "$cat0"; then return 0 elif check_cat "$catn"; then return 0 elif check_cat "$cat0"; then return 0 fi return 1 } # Usage: is_newer file1 file2 # Returns true if file1 is newer than file2 as calculated by mtime. is_newer() { if ! [ "$1" -ot "$2" ]; then decho " mtime: $1 not older than $2" 3 return 0 else decho " mtime: $1 older than $2" 3 return 1 fi } # Usage: manpath_parse_args "$@" # Parses commandline options for manpath. manpath_parse_args() { local cmd_arg while getopts 'Ldq' cmd_arg; do case "${cmd_arg}" in L) Lflag=Lflag ;; d) debug=$(( $debug + 1 )) ;; q) qflag=qflag ;; *) manpath_usage ;; esac done >&2 } # Usage: manpath_usage # Display usage for the manpath(1) utility. manpath_usage() { echo 'usage: manpath [-Ldq]' >&2 exit 1 } # Usage: manpath_warnings # Display some warnings to stderr. manpath_warnings() { if [ -z "$Lflag" -a -n "$MANPATH" ]; then echo "(Warning: MANPATH environment variable set)" >&2 fi if [ -n "$Lflag" -a -n "$MANLOCALES" ]; then echo "(Warning: MANLOCALES environment variable set)" >&2 fi } # Usage: man_check_for_so page path # Returns: True if able to resolve the file, false if it ended in tears. # Detects the presence of the .so directive and causes the file to be # redirected to another source file. man_check_for_so() { local IFS line tstr unset IFS # We need to loop to accommodate multiple .so directives. while true do line=$($cattool $manpage | head -1) case "$line" in .so*) trim "${line#.so}" decho "$manpage includes $tstr" # Glob and check for the file. if ! check_man "$path/$tstr*" ""; then decho " Unable to find $tstr" return 1 fi ;; *) break ;; esac done return 0 } -# Usage: man_display_page -# Display either the manpage or catpage depending on the use_cat variable man_display_page() { - local EQN NROFF PIC TBL TROFF REFER VGRIND - local IFS l nroff_dev pipeline preproc_arg tool + local IFS pipeline preconv_enc testline # We are called with IFS set to colon. This causes really weird # things to happen for the variables that have spaces in them. unset IFS # If we are supposed to use a catpage and we aren't using troff(1) # just zcat the catpage and we are done. if [ -z "$tflag" -a -n "$use_cat" ]; then if [ -n "$wflag" ]; then echo "$catpage (source: $manpage)" ret=0 else if [ $debug -gt 0 ]; then decho "Command: $cattool $catpage | $MANPAGER" ret=0 else eval "$cattool $catpage | $MANPAGER" ret=$? fi fi return fi # Okay, we are using the manpage, do we just need to output the # name of the manpage? if [ -n "$wflag" ]; then echo "$manpage" ret=0 return fi + + case "${manpage}" in + *.${man_charset}/*) + case "$man_charset" in + ISO8859-1) preconv_enc="latin-1" ;; + ISO8859-15) preconv_enc="latin-1" ;; + UTF-8) preconv_enc="utf-8" ;; + esac + ;; + esac + + if [ -n "$preconv_enc" ]; then + pipeline="preconv -e $preconv_enc |" + fi + testline="$pipeline mandoc -Tlint -Werror 2>/dev/null" + pipeline="$pipeline mandoc -Tlocale | $MANPAGER" + + if ! eval "$cattool $manpage | $testline" ;then + if which -s groff2; then + man_display_page_groff + else + echo "This manpage needs groff(1) to be rendered" >&2 + echo "First install groff(1): " >&2 + echo "pkg install groff " >&2 + ret=1 + fi + return + fi + + if [ $debug -gt 0 ]; then + decho "Command: $cattool $manpage | $pipeline" + ret=0 + else + eval "$cattool $manpage | $pipeline" + ret=$? + fi +} + +# Usage: man_display_page +# Display either the manpage or catpage depending on the use_cat variable +man_display_page_groff() { + local EQN NROFF PIC TBL TROFF REFER VGRIND + local IFS l nroff_dev pipeline preproc_arg tool # So, we really do need to parse the manpage. First, figure out the # device flag (-T) we have to pass to eqn(1) and groff(1). Then, # setup the pipeline of commands based on the user's request. # If the manpage is from a particular charset, we need to setup nroff # to properly output for the correct device. case "${manpage}" in *.${man_charset}/*) # I don't pretend to know this; I'm just copying from the # previous version of man(1). case "$man_charset" in KOI8-R) nroff_dev="koi8-r" ;; ISO8859-1) nroff_dev="latin1" ;; ISO8859-15) nroff_dev="latin1" ;; UTF-8) nroff_dev="utf8" ;; *) nroff_dev="ascii" ;; esac NROFF="$NROFF -T$nroff_dev" EQN="$EQN -T$nroff_dev" # Iff the manpage is from the locale and not just the charset, # then we need to define the locale string. case "${manpage}" in */${man_lang}_${man_country}.${man_charset}/*) NROFF="$NROFF -dlocale=$man_lang.$man_charset" ;; */${man_lang}.${man_charset}/*) NROFF="$NROFF -dlocale=$man_lang.$man_charset" ;; esac # Allow language specific calls to override the default # set of utilities. l=$(echo $man_lang | tr [:lower:] [:upper:]) for tool in EQN NROFF PIC TBL TROFF REFER VGRIND; do eval "$tool=\${${tool}_$l:-\$$tool}" done ;; *) NROFF="$NROFF -Tascii" EQN="$EQN -Tascii" ;; esac if [ -z "$MANCOLOR" ]; then NROFF="$NROFF -P-c" fi if [ -n "${use_width}" ]; then NROFF="$NROFF -rLL=${use_width}n -rLT=${use_width}n" fi if [ -n "$MANROFFSEQ" ]; then set -- -$MANROFFSEQ while getopts 'egprtv' preproc_arg; do case "${preproc_arg}" in e) pipeline="$pipeline | $EQN" ;; g) ;; # Ignore for compatibility. p) pipeline="$pipeline | $PIC" ;; r) pipeline="$pipeline | $REFER" ;; t) pipeline="$pipeline | $TBL" ;; v) pipeline="$pipeline | $VGRIND" ;; *) usage ;; esac done # Strip the leading " | " from the resulting pipeline. pipeline="${pipeline#" | "}" else pipeline="$TBL" fi if [ -n "$tflag" ]; then pipeline="$pipeline | $TROFF" else pipeline="$pipeline | $NROFF | $MANPAGER" fi if [ $debug -gt 0 ]; then decho "Command: $cattool $manpage | $pipeline" ret=0 else eval "$cattool $manpage | $pipeline" ret=$? fi } # Usage: man_find_and_display page # Search through the manpaths looking for the given page. man_find_and_display() { local found_page locpath p path sect # Check to see if it's a file. But only if it has a '/' in # the filename. case "$1" in */*) if [ -f "$1" -a -r "$1" ]; then decho "Found a usable page, displaying that" unset use_cat manpage="$1" setup_cattool $manpage if man_check_for_so $manpage $(dirname $manpage); then found_page=yes man_display_page fi return fi ;; esac IFS=: for sect in $MANSECT; do decho "Searching section $sect" 2 for path in $MANPATH; do for locpath in $locpaths; do p=$path/$locpath p=${p%/.} # Rid ourselves of the trailing /. # Check if there is a MACHINE specific manpath. if find_file $p $sect $MACHINE "$1"; then if man_check_for_so $manpage $p; then found_page=yes man_display_page if [ -n "$aflag" ]; then continue 2 else return fi fi fi # Check if there is a MACHINE_ARCH # specific manpath. if find_file $p $sect $MACHINE_ARCH "$1"; then if man_check_for_so $manpage $p; then found_page=yes man_display_page if [ -n "$aflag" ]; then continue 2 else return fi fi fi # Check plain old manpath. if find_file $p $sect '' "$1"; then if man_check_for_so $manpage $p; then found_page=yes man_display_page if [ -n "$aflag" ]; then continue 2 else return fi fi fi done done done unset IFS # Nothing? Well, we are done then. if [ -z "$found_page" ]; then echo "No manual entry for $1" >&2 ret=1 return fi } # Usage: man_parse_args "$@" # Parses commandline options for man. man_parse_args() { local IFS cmd_arg while getopts 'M:P:S:adfhkm:op:tw' cmd_arg; do case "${cmd_arg}" in M) MANPATH=$OPTARG ;; P) MANPAGER=$OPTARG ;; S) MANSECT=$OPTARG ;; a) aflag=aflag ;; d) debug=$(( $debug + 1 )) ;; f) fflag=fflag ;; h) man_usage 0 ;; k) kflag=kflag ;; m) mflag=$OPTARG ;; o) oflag=oflag ;; p) MANROFFSEQ=$OPTARG ;; t) tflag=tflag ;; w) wflag=wflag ;; *) man_usage ;; esac done >&2 shift $(( $OPTIND - 1 )) # Check the args for incompatible options. case "${fflag}${kflag}${tflag}${wflag}" in fflagkflag*) echo "Incompatible options: -f and -k"; man_usage ;; fflag*tflag*) echo "Incompatible options: -f and -t"; man_usage ;; fflag*wflag) echo "Incompatible options: -f and -w"; man_usage ;; *kflagtflag*) echo "Incompatible options: -k and -t"; man_usage ;; *kflag*wflag) echo "Incompatible options: -k and -w"; man_usage ;; *tflagwflag) echo "Incompatible options: -t and -w"; man_usage ;; esac # Short circuit for whatis(1) and apropos(1) if [ -n "$fflag" ]; then do_whatis "$@" exit fi if [ -n "$kflag" ]; then do_apropos "$@" exit fi IFS=: for sect in $man_default_sections; do if [ "$sect" = "$1" ]; then decho "Detected manual section as first arg: $1" MANSECT="$1" shift break fi done unset IFS pages="$*" } # Usage: man_setup # Setup various trivial but essential variables. man_setup() { # Setup machine and architecture variables. if [ -n "$mflag" ]; then MACHINE_ARCH=${mflag%%:*} MACHINE=${mflag##*:} fi if [ -z "$MACHINE_ARCH" ]; then MACHINE_ARCH=$($SYSCTL -n hw.machine_arch) fi if [ -z "$MACHINE" ]; then MACHINE=$($SYSCTL -n hw.machine) fi decho "Using architecture: $MACHINE_ARCH:$MACHINE" setup_pager # Setup manual sections to search. if [ -z "$MANSECT" ]; then MANSECT=$man_default_sections fi decho "Using manual sections: $MANSECT" build_manpath man_setup_locale man_setup_width } # Usage: man_setup_width # Set up page width. man_setup_width() { local sizes unset use_width case "$MANWIDTH" in [0-9]*) if [ "$MANWIDTH" -gt 0 2>/dev/null ]; then use_width=$MANWIDTH fi ;; [Tt][Tt][Yy]) if { sizes=$($STTY size 0>&3 2>/dev/null); } 3>&1; then set -- $sizes if [ $2 -gt 80 ]; then use_width=$(($2-2)) fi fi ;; esac if [ -n "$use_width" ]; then decho "Using non-standard page width: ${use_width}" else decho 'Using standard page width' fi } # Usage: man_setup_locale # Setup necessary locale variables. man_setup_locale() { local lang_cc locpaths='.' man_charset='US-ASCII' # Setup locale information. if [ -n "$oflag" ]; then decho 'Using non-localized manpages' else # Use the locale tool to give us the proper LC_CTYPE eval $( $LOCALE ) case "$LC_CTYPE" in C) ;; POSIX) ;; [a-z][a-z]_[A-Z][A-Z]\.*) lang_cc="${LC_CTYPE%.*}" man_lang="${LC_CTYPE%_*}" man_country="${lang_cc#*_}" man_charset="${LC_CTYPE#*.}" locpaths="$LC_CTYPE" locpaths="$locpaths:$man_lang.$man_charset" if [ "$man_lang" != "en" ]; then locpaths="$locpaths:en.$man_charset" fi locpaths="$locpaths:." ;; *) echo 'Unknown locale, assuming C' >&2 ;; esac fi decho "Using locale paths: $locpaths" } # Usage: man_usage [exitcode] # Display usage for the man utility. man_usage() { echo 'Usage:' echo ' man [-adho] [-t | -w] [-M manpath] [-P pager] [-S mansect]' echo ' [-m arch[:machine]] [-p [eprtv]] [mansect] page [...]' echo ' man -f page [...] -- Emulates whatis(1)' echo ' man -k page [...] -- Emulates apropos(1)' # When exit'ing with -h, it's not an error. exit ${1:-1} } # Usage: parse_configs # Reads the end-user adjustable config files. parse_configs() { local IFS file files if [ -n "$parsed_configs" ]; then return fi unset IFS # Read the global config first in case the user wants # to override config_local. if [ -r "$config_global" ]; then parse_file "$config_global" fi # Glob the list of files to parse. set +f files=$(echo $config_local) set -f for file in $files; do if [ -r "$file" ]; then parse_file "$file" fi done parsed_configs='yes' } # Usage: parse_file file # Reads the specified config files. parse_file() { local file line tstr var file="$1" decho "Parsing config file: $file" while read line; do decho " $line" 2 case "$line" in \#*) decho " Comment" 3 ;; MANPATH*) decho " MANPATH" 3 trim "${line#MANPATH}" add_to_manpath "$tstr" ;; MANLOCALE*) decho " MANLOCALE" 3 trim "${line#MANLOCALE}" manlocales="$manlocales:$tstr" ;; MANCONFIG*) decho " MANCONFIG" 3 trim "${line#MANCONFIG}" config_local="$tstr" ;; # Set variables in the form of FOO_BAR *_*[\ \ ]*) var="${line%%[\ \ ]*}" trim "${line#$var}" eval "$var=\"$tstr\"" decho " Parsed $var" 3 ;; esac done < "$file" } # Usage: search_path # Traverse $PATH looking for manpaths. search_path() { local IFS p path decho "Searching PATH for man directories" IFS=: for path in $PATH; do # Do a little special casing since the base manpages # are in /usr/share/man instead of /usr/man or /man. case "$path" in /bin|/usr/bin) add_to_manpath "/usr/share/man" ;; *) if add_to_manpath "$path/man"; then : elif add_to_manpath "$path/MAN"; then : else case "$path" in */bin) p="${path%/bin}/man" add_to_manpath "$p" ;; *) ;; esac fi ;; esac done unset IFS if [ -z "$manpath" ]; then decho ' Unable to find any manpaths, using default' manpath=$man_default_path fi } # Usage: search_whatis cmd [arglist] # Do the heavy lifting for apropos/whatis search_whatis() { local IFS bad cmd f good key keywords loc opt out path rval wlist cmd="$1" shift whatis_parse_args "$@" build_manpath build_manlocales setup_pager if [ "$cmd" = "whatis" ]; then opt="-w" fi f='whatis' IFS=: for path in $MANPATH; do if [ \! -d "$path" ]; then decho "Skipping non-existent path: $path" 2 continue fi if [ -f "$path/$f" -a -r "$path/$f" ]; then decho "Found whatis: $path/$f" wlist="$wlist $path/$f" fi for loc in $MANLOCALES; do if [ -f "$path/$loc/$f" -a -r "$path/$loc/$f" ]; then decho "Found whatis: $path/$loc/$f" wlist="$wlist $path/$loc/$f" fi done done unset IFS if [ -z "$wlist" ]; then echo "$cmd: no whatis databases in $MANPATH" >&2 exit 1 fi rval=0 for key in $keywords; do out=$(grep -Ehi $opt -- "$key" $wlist) if [ -n "$out" ]; then good="$good\\n$out" else bad="$bad\\n$key: nothing appropriate" rval=1 fi done # Strip leading carriage return. good=${good#\\n} bad=${bad#\\n} if [ -n "$good" ]; then echo -e "$good" | $MANPAGER fi if [ -n "$bad" ]; then echo -e "$bad" >&2 fi exit $rval } # Usage: setup_cattool page # Finds an appropriate decompressor based on extension setup_cattool() { case "$1" in *.bz) cattool='/usr/bin/bzcat' ;; *.bz2) cattool='/usr/bin/bzcat' ;; *.gz) cattool='/usr/bin/zcat' ;; *.lzma) cattool='/usr/bin/lzcat' ;; *.xz) cattool='/usr/bin/xzcat' ;; *) cattool='/usr/bin/zcat -f' ;; esac } # Usage: setup_pager # Correctly sets $MANPAGER setup_pager() { # Setup pager. if [ -z "$MANPAGER" ]; then if [ -n "$MANCOLOR" ]; then MANPAGER="less -sR" else if [ -n "$PAGER" ]; then MANPAGER="$PAGER" else MANPAGER="more -s" fi fi fi decho "Using pager: $MANPAGER" } # Usage: trim string # Trims whitespace from beginning and end of a variable trim() { tstr=$1 while true; do case "$tstr" in [\ \ ]*) tstr="${tstr##[\ \ ]}" ;; *[\ \ ]) tstr="${tstr%%[\ \ ]}" ;; *) break ;; esac done } # Usage: whatis_parse_args "$@" # Parse commandline args for whatis and apropos. whatis_parse_args() { local cmd_arg while getopts 'd' cmd_arg; do case "${cmd_arg}" in d) debug=$(( $debug + 1 )) ;; *) whatis_usage ;; esac done >&2 shift $(( $OPTIND - 1 )) keywords="$*" } # Usage: whatis_usage # Display usage for the whatis/apropos utility. whatis_usage() { echo "usage: $cmd [-d] keyword [...]" exit 1 } # Supported commands do_apropos() { search_whatis apropos "$@" } do_man() { man_parse_args "$@" if [ -z "$pages" ]; then echo 'What manual page do you want?' >&2 exit 1 fi man_setup for page in $pages; do decho "Searching for $page" man_find_and_display "$page" done exit ${ret:-0} } do_manpath() { manpath_parse_args "$@" if [ -z "$qflag" ]; then manpath_warnings fi if [ -n "$Lflag" ]; then build_manlocales echo $MANLOCALES else build_manpath echo $MANPATH fi exit 0 } do_whatis() { search_whatis whatis "$@" } # User's PATH setting decides on the groff-suite to pick up. EQN=eqn NROFF='groff -S -P-h -Wall -mtty-char -man' PIC=pic REFER=refer TBL=tbl TROFF='groff -S -man' VGRIND=vgrind LOCALE=/usr/bin/locale STTY=/bin/stty SYSCTL=/sbin/sysctl debug=0 man_default_sections='1:8:2:3:n:4:5:6:7:9:l' man_default_path='/usr/share/man:/usr/share/openssl/man:/usr/local/man' cattool='/usr/bin/zcat -f' config_global='/etc/man.conf' # This can be overridden via a setting in /etc/man.conf. config_local='/usr/local/etc/man.d/*.conf' # Set noglobbing for now. I don't want spurious globbing. set -f case "$0" in *apropos) do_apropos "$@" ;; *manpath) do_manpath "$@" ;; *whatis) do_whatis "$@" ;; *) do_man "$@" ;; esac Index: stable/10 =================================================================== --- stable/10 (revision 278649) +++ stable/10 (revision 278650) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r272315,272757,274092,274901