diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index b7316ea5f387..d67c70984528 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -1,1376 +1,1371 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ */ /* * Machine independent bits of mutex implementation. */ #include #include "opt_adaptive_mutexes.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) #define ADAPTIVE_MUTEXES #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , lock, failed); #endif /* * Return the mutex address when the lock cookie address is provided. * This functionality assumes that struct mtx* have a member named mtx_lock. */ #define mtxlock2mtx(c) (__containerof(c, struct mtx, mtx_lock)) /* * Internal utility macros. */ #define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) #define mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED) static void assert_mtx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_mtx(const struct lock_object *lock); #endif static void lock_mtx(struct lock_object *lock, uintptr_t how); static void lock_spin(struct lock_object *lock, uintptr_t how); static int trylock_mtx(struct lock_object *lock, uintptr_t how); static int trylock_spin(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_mtx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_mtx(struct lock_object *lock); static uintptr_t unlock_spin(struct lock_object *lock); /* * Lock classes for sleep and spin mutexes. */ struct lock_class lock_class_mtx_sleep = { .lc_name = "sleep mutex", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_mtx, .lc_trylock = trylock_mtx, .lc_unlock = unlock_mtx, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; struct lock_class lock_class_mtx_spin = { .lc_name = "spin mutex", .lc_flags = LC_SPINLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_spin, .lc_trylock = trylock_spin, .lc_unlock = unlock_spin, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; #ifdef ADAPTIVE_MUTEXES #ifdef MUTEX_CUSTOM_BACKOFF static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "mtx debugging"); static struct lock_delay_config __read_frequently mtx_delay; SYSCTL_U16(_debug_mtx, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_delay.base, 0, ""); SYSCTL_U16(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max, 0, ""); LOCK_DELAY_SYSINIT_DEFAULT(mtx_delay); #else #define mtx_delay locks_delay #endif #endif #ifdef MUTEX_SPIN_CUSTOM_BACKOFF static SYSCTL_NODE(_debug, OID_AUTO, mtx_spin, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "mtx spin debugging"); static struct lock_delay_config __read_frequently mtx_spin_delay; SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_spin_delay.base, 0, ""); SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_spin_delay.max, 0, ""); LOCK_DELAY_SYSINIT_DEFAULT(mtx_spin_delay); #else #define mtx_spin_delay locks_delay #endif /* * System-wide mutexes */ struct mtx blocked_lock; struct mtx __exclusive_cache_line Giant; static void _mtx_lock_indefinite_check(struct mtx *, struct lock_delay_arg *); static void assert_mtx(const struct lock_object *lock, int what) { /* * Treat LA_LOCKED as if LA_XLOCKED was asserted. * * Some callers of lc_assert uses LA_LOCKED to indicate that either * a shared lock or write lock was held, while other callers uses * the more strict LA_XLOCKED (used as MA_OWNED). * * Mutex is the only lock class that can not be shared, as a result, * we can reasonably consider the caller really intends to assert * LA_XLOCKED when they are asserting LA_LOCKED on a mutex object. */ if (what & LA_LOCKED) { what &= ~LA_LOCKED; what |= LA_XLOCKED; } mtx_assert((const struct mtx *)lock, what); } static void lock_mtx(struct lock_object *lock, uintptr_t how) { mtx_lock((struct mtx *)lock); } static void lock_spin(struct lock_object *lock, uintptr_t how) { mtx_lock_spin((struct mtx *)lock); } static int trylock_mtx(struct lock_object *lock, uintptr_t how) { return (mtx_trylock((struct mtx *)lock)); } static int trylock_spin(struct lock_object *lock, uintptr_t how) { return (mtx_trylock_spin((struct mtx *)lock)); } static uintptr_t unlock_mtx(struct lock_object *lock) { struct mtx *m; m = (struct mtx *)lock; mtx_assert(m, MA_OWNED | MA_NOTRECURSED); mtx_unlock(m); return (0); } static uintptr_t unlock_spin(struct lock_object *lock) { struct mtx *m; m = (struct mtx *)lock; mtx_assert(m, MA_OWNED | MA_NOTRECURSED); mtx_unlock_spin(m); return (0); } #ifdef KDTRACE_HOOKS static int owner_mtx(const struct lock_object *lock, struct thread **owner) { const struct mtx *m; uintptr_t x; m = (const struct mtx *)lock; x = m->mtx_lock; *owner = (struct thread *)(x & ~MTX_FLAGMASK); return (*owner != NULL); } #endif /* * Function versions of the inlined __mtx_* macros. These are used by * modules and can also be called from assembly language if needed. */ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid, v; m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() || !TD_IS_IDLETHREAD(curthread), ("mtx_lock() by idle thread %p on mutex %p @ %s:%d", curthread, m, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_spin, ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); tid = (uintptr_t)curthread; v = MTX_UNOWNED; if (!_mtx_obtain_lock_fetch(m, &v, tid)) _mtx_lock_sleep(m, v, opts, file, line); else LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, 0, 0, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_spin, ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); #ifdef LOCK_PROFILING __mtx_unlock_sleep(c, (uintptr_t)curthread, opts, file, line); #else __mtx_unlock(m, curthread, opts, file, line); #endif TD_LOCKS_DEC(curthread); } void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; #ifdef SMP uintptr_t tid, v; #endif m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock_spin() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_sleep, ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); #ifdef SMP spinlock_enter(); tid = (uintptr_t)curthread; v = MTX_UNOWNED; if (!_mtx_obtain_lock_fetch(m, &v, tid)) _mtx_lock_spin(m, v, opts, file, line); else LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, m, 0, 0, file, line); #else __mtx_lock_spin(m, curthread, opts, file, line); #endif LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return (1); m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock_spin() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_sleep, ("mtx_trylock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); KASSERT((opts & MTX_RECURSE) == 0, ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); if (__mtx_trylock_spin(m, curthread, opts, file, line)) { LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); return (1); } LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line); return (0); } void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock_spin() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_sleep, ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); __mtx_unlock_spin(m); } /* * The important part of mtx_trylock{,_flags}() * Tries to acquire lock `m.' If this function is called on a mutex that * is already owned, it will recursively acquire the lock. */ int _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF) { struct thread *td; uintptr_t tid, v; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int rval; bool recursed; td = curthread; tid = (uintptr_t)td; if (SCHEDULER_STOPPED()) return (1); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td), ("mtx_trylock() by idle thread %p on mutex %p @ %s:%d", curthread, m, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_spin, ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); rval = 1; recursed = false; v = MTX_UNOWNED; for (;;) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; if (v == MTX_UNOWNED) continue; if (v == tid && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0)) { m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); recursed = true; break; } rval = 0; break; } opts &= ~MTX_RECURSE; LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); TD_LOCKS_INC(curthread); if (!recursed) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); } return (rval); } int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; m = mtxlock2mtx(c); return (_mtx_trylock_flags_int(m, opts LOCK_FILE_LINE_ARG)); } /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * - * We call this if the lock is either contested (i.e. we need to go to - * sleep waiting for it), or if we need to recurse on it. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line) #else void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v) #endif { struct thread *td; struct mtx *m; struct turnstile *ts; uintptr_t tid; struct thread *owner; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) struct lock_delay_arg lda; #endif #ifdef KDTRACE_HOOKS u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof = 0; #endif td = curthread; tid = (uintptr_t)td; m = mtxlock2mtx(c); #ifdef KDTRACE_HOOKS if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) { while (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) goto out_lockstat; } doing_lockprof = 1; all_time -= lockstat_nsecs(&m->lock_object); } #endif #ifdef LOCK_PROFILING doing_lockprof = 1; #endif if (SCHEDULER_STOPPED()) return; if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); if (__predict_false(lv_mtx_owner(v) == td)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); #if LOCK_DEBUG > 0 opts &= ~MTX_RECURSE; #endif m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); return; } #if LOCK_DEBUG > 0 opts &= ~MTX_RECURSE; #endif #if defined(ADAPTIVE_MUTEXES) lock_delay_arg_init(&lda, &mtx_delay); #elif defined(KDTRACE_HOOKS) lock_delay_arg_init_noadapt(&lda); #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, false, &contested, &waittime); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); THREAD_CONTENDS_ON_LOCK(&m->lock_object); for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; continue; } #ifdef KDTRACE_HOOKS lda.spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* * If the owner is running on another CPU, spin until the * owner stops running or the state of the lock changes. */ owner = lv_mtx_owner(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&m->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, m, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); do { lock_delay(&lda); v = MTX_READ_VALUE(m); owner = lv_mtx_owner(v); } while (v != MTX_UNOWNED && TD_IS_RUNNING(owner)); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); continue; } #endif ts = turnstile_trywait(&m->lock_object); v = MTX_READ_VALUE(m); retry_turnstile: /* * Check if the lock has been released while spinning for * the turnstile chain lock. */ if (v == MTX_UNOWNED) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_MUTEXES /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ owner = lv_mtx_owner(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } #endif - /* - * If the mutex isn't already contested and a failure occurs - * setting the contested bit, the mutex was either released - * or the state of the MTX_RECURSED bit changed. - */ - if ((v & MTX_CONTESTED) == 0 && - !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_CONTESTED)) { + if ((v & MTX_WAITERS) == 0 && + !atomic_fcmpset_ptr(&m->mtx_lock, &v, v | MTX_WAITERS)) { goto retry_turnstile; } /* * We definitely must sleep for this lock. */ mtx_assert(m, MA_NOTOWNED); /* * Block on the turnstile. */ #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&m->lock_object); #endif #ifndef ADAPTIVE_MUTEXES owner = mtx_owner(m); #endif MPASS(owner == mtx_owner(m)); turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&m->lock_object); sleep_cnt++; #endif v = MTX_READ_VALUE(m); } THREAD_CONTENTION_DONE(&m->lock_object); #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&m->lock_object); if (sleep_time) LOCKSTAT_RECORD1(adaptive__block, m, sleep_time); /* * Only record the loops spinning and not sleeping. */ if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time); out_lockstat: #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); } #ifdef SMP /* * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock. * * This is only called if we need to actually spin for the lock. Recursion * is handled inline. */ #if LOCK_DEBUG > 0 void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line) #else void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v) #endif { struct mtx *m; struct lock_delay_arg lda; uintptr_t tid; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof = 0; #endif tid = (uintptr_t)curthread; m = mtxlock2mtx(c); #ifdef KDTRACE_HOOKS if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) { while (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) goto out_lockstat; } doing_lockprof = 1; spin_time -= lockstat_nsecs(&m->lock_object); } #endif #ifdef LOCK_PROFILING doing_lockprof = 1; #endif if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); if (__predict_false(v == tid)) { m->mtx_recurse++; return; } if (SCHEDULER_STOPPED()) return; if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); lock_delay_arg_init(&lda, &mtx_spin_delay); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, true, &contested, &waittime); for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; continue; } /* Give interrupts a chance while we spin. */ spinlock_exit(); do { if (__predict_true(lda.spin_cnt < 10000000)) { lock_delay(&lda); } else { _mtx_lock_indefinite_check(m, &lda); } v = MTX_READ_VALUE(m); } while (v != MTX_UNOWNED); spinlock_enter(); } if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); if (lda.spin_cnt != 0) LOCKSTAT_RECORD1(spin__spin, m, spin_time); out_lockstat: #endif LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); } #endif /* SMP */ #ifdef INVARIANTS static void thread_lock_validate(struct mtx *m, int opts, const char *file, int line) { KASSERT(m->mtx_lock != MTX_DESTROYED, ("thread_lock() of destroyed mutex %p @ %s:%d", m, file, line)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_sleep, ("thread_lock() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) == 0, ("thread_lock: got a recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); } #else #define thread_lock_validate(m, opts, file, line) do { } while (0) #endif #ifndef LOCK_PROFILING #if LOCK_DEBUG > 0 void _thread_lock(struct thread *td, int opts, const char *file, int line) #else void _thread_lock(struct thread *td) #endif { struct mtx *m; uintptr_t tid; tid = (uintptr_t)curthread; if (__predict_false(LOCKSTAT_PROFILE_ENABLED(spin__acquire))) goto slowpath_noirq; spinlock_enter(); m = td->td_lock; thread_lock_validate(m, 0, file, line); if (__predict_false(m == &blocked_lock)) goto slowpath_unlocked; if (__predict_false(!_mtx_obtain_lock(m, tid))) goto slowpath_unlocked; if (__predict_true(m == td->td_lock)) { WITNESS_LOCK(&m->lock_object, LOP_EXCLUSIVE, file, line); return; } atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); slowpath_unlocked: spinlock_exit(); slowpath_noirq: #if LOCK_DEBUG > 0 thread_lock_flags_(td, opts, file, line); #else thread_lock_flags_(td, 0, 0, 0); #endif } #endif void thread_lock_flags_(struct thread *td, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid, v; struct lock_delay_arg lda; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof = 1; #endif tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) { /* * Ensure that spinlock sections are balanced even when the * scheduler is stopped, since we may otherwise inadvertently * re-enable interrupts while dumping core. */ spinlock_enter(); return; } lock_delay_arg_init(&lda, &mtx_spin_delay); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif #ifdef LOCK_PROFILING doing_lockprof = 1; #elif defined(KDTRACE_HOOKS) doing_lockprof = lockstat_enabled; #endif #ifdef KDTRACE_HOOKS if (__predict_false(doing_lockprof)) spin_time -= lockstat_nsecs(&td->td_lock->lock_object); #endif spinlock_enter(); for (;;) { retry: m = td->td_lock; thread_lock_validate(m, opts, file, line); v = MTX_READ_VALUE(m); for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; continue; } MPASS(v != tid); lock_profile_obtain_lock_failed(&m->lock_object, true, &contested, &waittime); /* Give interrupts a chance while we spin. */ spinlock_exit(); do { if (__predict_true(lda.spin_cnt < 10000000)) { lock_delay(&lda); } else { _mtx_lock_indefinite_check(m, &lda); } if (m != td->td_lock) { spinlock_enter(); goto retry; } v = MTX_READ_VALUE(m); } while (v != MTX_UNOWNED); spinlock_enter(); } if (m == td->td_lock) break; atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); } LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); #endif LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (lda.spin_cnt != 0) LOCKSTAT_RECORD1(thread__spin, m, spin_time); #endif } struct mtx * thread_lock_block(struct thread *td) { struct mtx *lock; lock = td->td_lock; mtx_assert(lock, MA_OWNED); td->td_lock = &blocked_lock; return (lock); } void thread_lock_unblock(struct thread *td, struct mtx *new) { mtx_assert(new, MA_OWNED); KASSERT(td->td_lock == &blocked_lock, ("thread %p lock %p not blocked_lock %p", td, td->td_lock, &blocked_lock)); atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new); } void thread_lock_block_wait(struct thread *td) { while (td->td_lock == &blocked_lock) cpu_spinwait(); /* Acquire fence to be certain that all thread state is visible. */ atomic_thread_fence_acq(); } void thread_lock_set(struct thread *td, struct mtx *new) { struct mtx *lock; mtx_assert(new, MA_OWNED); lock = td->td_lock; mtx_assert(lock, MA_OWNED); td->td_lock = new; mtx_unlock_spin(lock); } /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * - * We are only called here if the lock is recursed, contested (i.e. we - * need to wake up a blocked thread) or lockstat probe is active. + * We get here if lock profiling is enabled, the lock is already held by + * someone else or we are recursing on it. */ #if LOCK_DEBUG > 0 void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line) #else void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v) #endif { struct mtx *m; struct turnstile *ts; uintptr_t tid; if (SCHEDULER_STOPPED()) return; tid = (uintptr_t)curthread; m = mtxlock2mtx(c); if (__predict_false(v == tid)) v = MTX_READ_VALUE(m); if (__predict_false(v & MTX_RECURSED)) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); return; } LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m); if (v == tid && _mtx_release_lock(m, tid)) return; /* * We have to lock the chain before the turnstile so this turnstile * can be removed from the hash list if it is empty. */ turnstile_chain_lock(&m->lock_object); atomic_store_rel_ptr(&m->mtx_lock, MTX_UNOWNED); ts = turnstile_lookup(&m->lock_object); MPASS(ts != NULL); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); /* * This turnstile is now no longer associated with the mutex. We can * unlock the chain lock so a new turnstile may take it's place. */ turnstile_unpend(ts); turnstile_chain_unlock(&m->lock_object); } /* * All the unlocking of MTX_SPIN locks is done inline. * See the __mtx_unlock_spin() macro for the details. */ /* * The backing function for the INVARIANTS-enabled mtx_assert() */ #ifdef INVARIANT_SUPPORT void __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct mtx *m; if (KERNEL_PANICKED() || dumping || SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); switch (what) { case MA_OWNED: case MA_OWNED | MA_RECURSED: case MA_OWNED | MA_NOTRECURSED: if (!mtx_owned(m)) panic("mutex %s not owned at %s:%d", m->lock_object.lo_name, file, line); if (mtx_recursed(m)) { if ((what & MA_NOTRECURSED) != 0) panic("mutex %s recursed at %s:%d", m->lock_object.lo_name, file, line); } else if ((what & MA_RECURSED) != 0) { panic("mutex %s unrecursed at %s:%d", m->lock_object.lo_name, file, line); } break; case MA_NOTOWNED: if (mtx_owned(m)) panic("mutex %s owned at %s:%d", m->lock_object.lo_name, file, line); break; default: panic("unknown mtx_assert at %s:%d", file, line); } } #endif /* * General init routine used by the MTX_SYSINIT() macro. */ void mtx_sysinit(const void *arg) { const struct mtx_args *margs = arg; mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); } /* * Mutex initialization routine; initialize lock `m' of type contained in * `opts' with options contained in `opts' and name `name.' The optional * lock type `type' is used as a general lock category name for use with * witness. */ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts) { struct mtx *m; struct lock_class *class; int flags; m = mtxlock2mtx(c); MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock, ("%s: mtx_lock not aligned for %s: %p", __func__, name, &m->mtx_lock)); /* Determine lock class and lock flags. */ if (opts & MTX_SPIN) class = &lock_class_mtx_spin; else class = &lock_class_mtx_sleep; flags = 0; if (opts & MTX_QUIET) flags |= LO_QUIET; if (opts & MTX_RECURSE) flags |= LO_RECURSABLE; if ((opts & MTX_NOWITNESS) == 0) flags |= LO_WITNESS; if (opts & MTX_DUPOK) flags |= LO_DUPOK; if (opts & MTX_NOPROFILE) flags |= LO_NOPROFILE; if (opts & MTX_NEW) flags |= LO_NEW; /* Initialize mutex. */ lock_init(&m->lock_object, class, name, type, flags); m->mtx_lock = MTX_UNOWNED; m->mtx_recurse = 0; } /* * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be * passed in as a flag here because if the corresponding mtx_init() was * called with MTX_QUIET set, then it will already be set in the mutex's * flags. */ void _mtx_destroy(volatile uintptr_t *c) { struct mtx *m; m = mtxlock2mtx(c); if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { - MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); + MPASS((m->mtx_lock & (MTX_RECURSED|MTX_WAITERS)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) { lock_profile_release_lock(&m->lock_object, true); spinlock_exit(); } else { TD_LOCKS_DEC(curthread); lock_profile_release_lock(&m->lock_object, false); } /* Tell witness this isn't locked to make it happy. */ WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__, __LINE__); } m->mtx_lock = MTX_DESTROYED; lock_destroy(&m->lock_object); } /* * Intialize the mutex code and system mutexes. This is called from the MD * startup code prior to mi_startup(). The per-CPU data space needs to be * setup before this is called. */ void mutex_init(void) { /* Setup turnstiles so that sleep mutexes work. */ init_turnstiles(); /* * Initialize mutexes. */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN); blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */ mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN); mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN); mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN); mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); } static void __noinline _mtx_lock_indefinite_check(struct mtx *m, struct lock_delay_arg *ldap) { struct thread *td; ldap->spin_cnt++; if (ldap->spin_cnt < 60000000 || kdb_active || KERNEL_PANICKED()) cpu_lock_delay(); else { td = mtx_owner(m); /* If the mutex is unlocked, try again. */ if (td == NULL) return; printf( "spin lock %p (%s) held by %p (tid %d) too long\n", m, m->lock_object.lo_name, td, td->td_tid); #ifdef WITNESS witness_display_spinlock(&m->lock_object, td, printf); #endif panic("spin lock held too long"); } cpu_spinwait(); } void mtx_spin_wait_unlocked(struct mtx *m) { struct lock_delay_arg lda; KASSERT(m->mtx_lock != MTX_DESTROYED, ("%s() of destroyed mutex %p", __func__, m)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_sleep, ("%s() of sleep mutex %p (%s)", __func__, m, m->lock_object.lo_name)); KASSERT(!mtx_owned(m), ("%s() waiting on myself on lock %p (%s)", __func__, m, m->lock_object.lo_name)); lda.spin_cnt = 0; while (atomic_load_acq_ptr(&m->mtx_lock) != MTX_UNOWNED) { if (__predict_true(lda.spin_cnt < 10000000)) { cpu_spinwait(); lda.spin_cnt++; } else { _mtx_lock_indefinite_check(m, &lda); } } } void mtx_wait_unlocked(struct mtx *m) { struct thread *owner; uintptr_t v; KASSERT(m->mtx_lock != MTX_DESTROYED, ("%s() of destroyed mutex %p", __func__, m)); KASSERT(LOCK_CLASS(&m->lock_object) != &lock_class_mtx_spin, ("%s() of spin mutex %p (%s)", __func__, m, m->lock_object.lo_name)); KASSERT(!mtx_owned(m), ("%s() waiting on myself on lock %p (%s)", __func__, m, m->lock_object.lo_name)); for (;;) { v = atomic_load_acq_ptr(&m->mtx_lock); if (v == MTX_UNOWNED) { break; } owner = lv_mtx_owner(v); if (!TD_IS_RUNNING(owner)) { mtx_lock(m); mtx_unlock(m); break; } cpu_spinwait(); } } #ifdef DDB static void db_show_mtx(const struct lock_object *lock) { struct thread *td; const struct mtx *m; m = (const struct mtx *)lock; db_printf(" flags: {"); if (LOCK_CLASS(lock) == &lock_class_mtx_spin) db_printf("SPIN"); else db_printf("DEF"); if (m->lock_object.lo_flags & LO_RECURSABLE) db_printf(", RECURSE"); if (m->lock_object.lo_flags & LO_DUPOK) db_printf(", DUPOK"); db_printf("}\n"); db_printf(" state: {"); if (mtx_unowned(m)) db_printf("UNOWNED"); else if (mtx_destroyed(m)) db_printf("DESTROYED"); else { db_printf("OWNED"); - if (m->mtx_lock & MTX_CONTESTED) - db_printf(", CONTESTED"); + if (m->mtx_lock & MTX_WAITERS) + db_printf(", WAITERS"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } db_printf("}\n"); if (!mtx_unowned(m) && !mtx_destroyed(m)) { td = mtx_owner(m); db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (mtx_recursed(m)) db_printf(" recursed: %d\n", m->mtx_recurse); } } #endif diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c index 978d6fd0b54d..05c6062463be 100644 --- a/sys/netgraph/netflow/netflow.c +++ b/sys/netgraph/netflow/netflow.c @@ -1,1179 +1,1179 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2010-2011 Alexander V. Chernikov * Copyright (c) 2004-2005 Gleb Smirnoff * Copyright (c) 2001-2003 Roman V. Palagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $ */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NBUCKETS (65536) /* must be power of 2 */ /* This hash is for TCP or UDP packets. */ #define FULL_HASH(addr1, addr2, port1, port2) \ (((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) ^ \ port1 ^ htons(port2)) & \ (NBUCKETS - 1)) /* This hash is for all other IP packets. */ #define ADDR_HASH(addr1, addr2) \ ((addr1 ^ (addr1 >> 16) ^ \ htons(addr2 ^ (addr2 >> 16))) & \ (NBUCKETS - 1)) /* Macros to shorten logical constructions */ /* XXX: priv must exist in namespace */ #define INACTIVE(fle) (time_uptime - fle->f.last > priv->nfinfo_inact_t) #define AGED(fle) (time_uptime - fle->f.first > priv->nfinfo_act_t) #define ISFREE(fle) (fle->f.packets == 0) /* * 4 is a magical number: statistically number of 4-packet flows is * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case * of reachable host and 4-packet otherwise. */ #define SMALL(fle) (fle->f.packets <= 4) MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash"); static int export_add(item_p, struct flow_entry *); static int export_send(priv_p, fib_export_p, item_p, int); #ifdef INET static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *, int, uint8_t, uint16_t); #endif #ifdef INET6 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *, int, uint8_t, uint16_t); #endif static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int); #ifdef INET /* * Generate hash for a given flow record. * * FIB is not used here, because: * most VRFS will carry public IPv4 addresses which are unique even * without FIB private addresses can overlap, but this is worked out * via flow_rec bcmp() containing fib id. In IPv6 world addresses are * all globally unique (it's not fully true, there is FC00::/7 for example, * but chances of address overlap are MUCH smaller) */ static inline uint32_t ip_hash(struct flow_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr, r->r_sport, r->r_dport); default: return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr); } } #endif #ifdef INET6 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */ static inline uint32_t ip6_hash(struct flow6_rec *r) { switch (r->r_ip_p) { case IPPROTO_TCP: case IPPROTO_UDP: return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport, r->r_dport); default: return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3], r->dst.r_dst6.__u6_addr.__u6_addr32[3]); } } #endif /* * Detach export datagram from priv, if there is any. * If there is no, allocate a new one. */ static item_p get_export_dgram(priv_p priv, fib_export_p fe) { item_p item = NULL; mtx_lock(&fe->export_mtx); if (fe->exp.item != NULL) { item = fe->exp.item; fe->exp.item = NULL; } mtx_unlock(&fe->export_mtx); if (item == NULL) { struct netflow_v5_export_dgram *dgram; struct mbuf *m; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (NULL); item = ng_package_data(m, NG_NOFLAGS); if (item == NULL) return (NULL); dgram = mtod(m, struct netflow_v5_export_dgram *); dgram->header.count = 0; dgram->header.version = htons(NETFLOW_V5); dgram->header.pad = 0; } return (item); } /* * Re-attach incomplete datagram back to priv. * If there is already another one, then send incomplete. */ static void return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags) { /* * It may happen on SMP, that some thread has already * put its item there, in this case we bail out and * send what we have to collector. */ mtx_lock(&fe->export_mtx); if (fe->exp.item == NULL) { fe->exp.item = item; mtx_unlock(&fe->export_mtx); } else { mtx_unlock(&fe->export_mtx); export_send(priv, fe, item, flags); } } /* * The flow is over. Call export_add() and free it. If datagram is * full, then call export_send(). */ static void expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags) { struct netflow_export_item exp; uint16_t version = fle->f.version; if ((priv->export != NULL) && (version == IPVERSION)) { exp.item = get_export_dgram(priv, fe); if (exp.item == NULL) { priv->nfinfo_export_failed++; if (priv->export9 != NULL) priv->nfinfo_export9_failed++; /* fle definitely contains IPv4 flow. */ uma_zfree_arg(priv->zone, fle, priv); return; } if (export_add(exp.item, fle) > 0) export_send(priv, fe, exp.item, flags); else return_export_dgram(priv, fe, exp.item, NG_QUEUE); } if (priv->export9 != NULL) { exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt); if (exp.item9 == NULL) { priv->nfinfo_export9_failed++; if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif else panic("ng_netflow: Unknown IP proto: %d", version); return; } if (export9_add(exp.item9, exp.item9_opt, fle) > 0) export9_send(priv, fe, exp.item9, exp.item9_opt, flags); else return_export9_dgram(priv, fe, exp.item9, exp.item9_opt, NG_QUEUE); } if (version == IPVERSION) uma_zfree_arg(priv->zone, fle, priv); #ifdef INET6 else if (version == IP6VERSION) uma_zfree_arg(priv->zone6, fle, priv); #endif } /* Get a snapshot of node statistics */ void ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i) { i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes); i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets); i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6); i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6); i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes); i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets); i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6); i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6); i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp); i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp); i->nfinfo_used = uma_zone_get_cur(priv->zone); #ifdef INET6 i->nfinfo_used6 = uma_zone_get_cur(priv->zone6); #endif i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed; i->nfinfo_export_failed = priv->nfinfo_export_failed; i->nfinfo_export9_failed = priv->nfinfo_export9_failed; i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf; i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs; i->nfinfo_inact_t = priv->nfinfo_inact_t; i->nfinfo_act_t = priv->nfinfo_act_t; } /* * Insert a record into defined slot. * * First we get for us a free flow entry, then fill in all * possible fields in it. * * TODO: consider dropping hash mutex while filling in datagram, * as this was done in previous version. Need to test & profile * to be sure. */ #ifdef INET static int hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r, int plen, uint8_t flags, uint16_t tcp_flags) { struct flow_entry *fle; mtx_assert(&hsh->mtx, MA_OWNED); fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT); if (fle == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle->f.version = IPVERSION; bcopy(r, &fle->f.r, sizeof(struct flow_rec)); fle->f.bytes = plen; fle->f.packets = 1; fle->f.tcp_flags = tcp_flags; fle->f.first = fle->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { struct rtentry *rt; struct route_nhop_data rnd; rt = fib4_lookup_rt(r->fib, fle->f.r.r_dst, 0, NHR_NONE, &rnd); if (rt != NULL) { struct in_addr addr; uint32_t scopeid; struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0); int plen; rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); fle->f.fle_o_ifx = nh->nh_ifp->if_index; if (nh->gw_sa.sa_family == AF_INET) fle->f.next_hop = nh->gw4_sa.sin_addr; /* * XXX we're leaving an empty gateway here for * IPv6 nexthops. */ fle->f.dst_mask = plen; } } /* Do route lookup on source address, to fill in src_mask. */ if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { struct rtentry *rt; struct route_nhop_data rnd; rt = fib4_lookup_rt(r->fib, fle->f.r.r_src, 0, NHR_NONE, &rnd); if (rt != NULL) { struct in_addr addr; uint32_t scopeid; int plen; rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid); fle->f.src_mask = plen; } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); return (0); } #endif #ifdef INET6 static int hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r, int plen, uint8_t flags, uint16_t tcp_flags) { struct flow6_entry *fle6; mtx_assert(&hsh6->mtx, MA_OWNED); fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT); if (fle6 == NULL) { priv->nfinfo_alloc_failed++; return (ENOMEM); } /* * Now fle is totally ours. It is detached from all lists, * we can safely edit it. */ fle6->f.version = IP6VERSION; bcopy(r, &fle6->f.r, sizeof(struct flow6_rec)); fle6->f.bytes = plen; fle6->f.packets = 1; fle6->f.tcp_flags = tcp_flags; fle6->f.first = fle6->f.last = time_uptime; /* * First we do route table lookup on destination address. So we can * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases. */ if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) { struct rtentry *rt; struct route_nhop_data rnd; rt = fib6_lookup_rt(r->fib, &fle6->f.r.dst.r_dst6, 0, NHR_NONE, &rnd); if (rt != NULL) { struct in6_addr addr; uint32_t scopeid; struct nhop_object *nh = nhop_select_func(rnd.rnd_nhop, 0); int plen; rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); fle6->f.fle_o_ifx = nh->nh_ifp->if_index; if (nh->gw_sa.sa_family == AF_INET6) fle6->f.n.next_hop6 = nh->gw6_sa.sin6_addr; fle6->f.dst_mask = plen; } } if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) { /* Do route lookup on source address, to fill in src_mask. */ struct rtentry *rt; struct route_nhop_data rnd; rt = fib6_lookup_rt(r->fib, &fle6->f.r.src.r_src6, 0, NHR_NONE, &rnd); if (rt != NULL) { struct in6_addr addr; uint32_t scopeid; int plen; rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid); fle6->f.src_mask = plen; } } /* Push new flow at the and of hash. */ TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash); return (0); } #endif /* * Non-static functions called from ng_netflow.c */ /* Allocate memory and set up flow cache */ void ng_netflow_cache_init(priv_p priv) { struct flow_hash_entry *hsh; int i; /* Initialize cache UMA zone. */ priv->zone = uma_zcreate("NetFlow IPv4 cache", sizeof(struct flow_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone, CACHESIZE); #ifdef INET6 priv->zone6 = uma_zcreate("NetFlow IPv6 cache", sizeof(struct flow6_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(priv->zone6, CACHESIZE); #endif /* Allocate hash. */ priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #ifdef INET6 /* Allocate hash. */ priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry), M_NETFLOW_HASH, M_WAITOK | M_ZERO); /* Initialize hash. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) { mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF); TAILQ_INIT(&hsh->head); } #endif priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets = counter_u64_alloc(M_WAITOK); priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK); priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK); priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK); priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK); ng_netflow_v9_cache_init(priv); CTR0(KTR_NET, "ng_netflow startup()"); } /* Initialize new FIB table for v5 and v9 */ int ng_netflow_fib_init(priv_p priv, int fib) { fib_export_p fe = priv_to_fib(priv, fib); CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib); if (fe != NULL) return (0); if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH, M_NOWAIT | M_ZERO)) == NULL) return (ENOMEM); mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF); mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF); fe->fib = fib; fe->domain_id = fib; if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib], (uintptr_t)NULL, (uintptr_t)fe) == 0) { /* FIB already set up by other ISR */ CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p", fib, fe, priv_to_fib(priv, fib)); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } else { /* Increase counter for statistics */ CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)", fib, fe, priv_to_fib(priv, fib)); priv->nfinfo_alloc_fibs++; } return (0); } /* Free all flow cache memory. Called from node close method. */ void ng_netflow_cache_flush(priv_p priv) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct netflow_export_item exp; fib_export_p fe; int i; bzero(&exp, sizeof(exp)); /* * We are going to free probably billable data. * Expire everything before freeing it. * No locking is required since callout is already drained. */ for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #ifdef INET6 for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); fe = priv_to_fib(priv, fle->f.r.fib); expire_flow(priv, fe, fle, NG_QUEUE); } #endif uma_zdestroy(priv->zone); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash != NULL) free(priv->hash, M_NETFLOW_HASH); #ifdef INET6 uma_zdestroy(priv->zone6); /* Destroy hash mutexes. */ for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) mtx_destroy(&hsh->mtx); /* Free hash memory. */ if (priv->hash6 != NULL) free(priv->hash6, M_NETFLOW_HASH); #endif for (i = 0; i < priv->maxfibs; i++) { if ((fe = priv_to_fib(priv, i)) == NULL) continue; if (fe->exp.item != NULL) export_send(priv, fe, fe->exp.item, NG_QUEUE); if (fe->exp.item9 != NULL) export9_send(priv, fe, fe->exp.item9, fe->exp.item9_opt, NG_QUEUE); mtx_destroy(&fe->export_mtx); mtx_destroy(&fe->export9_mtx); free(fe, M_NETGRAPH); } counter_u64_free(priv->nfinfo_bytes); counter_u64_free(priv->nfinfo_packets); counter_u64_free(priv->nfinfo_bytes6); counter_u64_free(priv->nfinfo_packets6); counter_u64_free(priv->nfinfo_sbytes); counter_u64_free(priv->nfinfo_spackets); counter_u64_free(priv->nfinfo_sbytes6); counter_u64_free(priv->nfinfo_spackets6); counter_u64_free(priv->nfinfo_act_exp); counter_u64_free(priv->nfinfo_inact_exp); ng_netflow_v9_cache_flush(priv); } #ifdef INET /* Insert packet from into flow cache. */ int ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; struct flow_rec r; int hlen, plen; int error = 0; uint16_t tcp_flags = 0; bzero(&r, sizeof(r)); if (ip->ip_v != IPVERSION) return (EINVAL); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) return (EINVAL); /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V4_L4; r.r_src = ip->ip_src; r.r_dst = ip->ip_dst; r.fib = fe->fib; plen = ntohs(ip->ip_len); r.r_ip_p = ip->ip_p; r.r_tos = ip->ip_tos; r.r_i_ifx = src_if_index; /* * XXX NOTE: only first fragment of fragmented TCP, UDP and * ICMP packet will be recorded with proper s_port and d_port. * Following fragments will be recorded simply as IP packet with * ip_proto = ip->ip_p and s_port, d_port set to zero. * I know, it looks like bug. But I don't want to re-implement * ip packet assebmling here. Anyway, (in)famous trafd works this way - * and nobody complains yet :) */ if ((ip->ip_off & htons(IP_OFFMASK)) == 0) switch(r.r_ip_p) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)((caddr_t )ip + hlen); r.r_sport = tcp->th_sport; r.r_dport = tcp->th_dport; tcp_flags = tcp_get_flags(tcp); break; } case IPPROTO_UDP: r.r_ports = *(uint32_t *)((caddr_t )ip + hlen); break; } counter_u64_add(priv->nfinfo_packets, 1); counter_u64_add(priv->nfinfo_bytes, plen); /* Find hash slot. */ hsh = &priv->hash[ip_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0) break; if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle) { /* An existent entry. */ fle->f.bytes += plen; fle->f.packets ++; fle->f.tcp_flags |= tcp_flags; fle->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) || (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #endif #ifdef INET6 /* Insert IPv6 packet from into flow cache. */ int ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6, caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags, unsigned int src_if_index) { struct flow_entry *fle = NULL, *fle1; struct flow6_entry *fle6; struct flow_hash_entry *hsh; struct flow6_rec r; int plen; int error = 0; uint16_t tcp_flags = 0; /* check version */ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) return (EINVAL); bzero(&r, sizeof(r)); r.src.r_src6 = ip6->ip6_src; r.dst.r_dst6 = ip6->ip6_dst; r.fib = fe->fib; /* Assume L4 template by default */ r.flow_type = NETFLOW_V9_FLOW_V6_L4; plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr); #if 0 /* XXX: set DSCP/CoS value */ r.r_tos = ip->ip_tos; #endif if ((flags & NG_NETFLOW_IS_FRAG) == 0) { switch(upper_proto) { case IPPROTO_TCP: { struct tcphdr *tcp; tcp = (struct tcphdr *)upper_ptr; r.r_ports = *(uint32_t *)upper_ptr; tcp_flags = tcp_get_flags(tcp); break; } case IPPROTO_UDP: case IPPROTO_SCTP: r.r_ports = *(uint32_t *)upper_ptr; break; } } r.r_ip_p = upper_proto; r.r_i_ifx = src_if_index; counter_u64_add(priv->nfinfo_packets6, 1); counter_u64_add(priv->nfinfo_bytes6, plen); /* Find hash slot. */ hsh = &priv->hash6[ip6_hash(&r)]; mtx_lock(&hsh->mtx); /* * Go through hash and find our entry. If we encounter an * entry, that should be expired, purge it. We do a reverse * search since most active entries are first, and most * searches are done on most active entries. */ TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) { if (fle->f.version != IP6VERSION) continue; fle6 = (struct flow6_entry *)fle; if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0) break; if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } } if (fle != NULL) { /* An existent entry. */ fle6 = (struct flow6_entry *)fle; fle6->f.bytes += plen; fle6->f.packets ++; fle6->f.tcp_flags |= tcp_flags; fle6->f.last = time_uptime; /* * We have the following reasons to expire flow in active way: * - it hit active timeout * - a TCP connection closed * - it is going to overflow counter */ if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) || (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_QUEUE); counter_u64_add(priv->nfinfo_act_exp, 1); } else { /* * It is the newest, move it to the tail, * if it isn't there already. Next search will * locate it quicker. */ if (fle != TAILQ_LAST(&hsh->head, fhead)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash); } } } else /* A new flow entry. */ error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags); mtx_unlock(&hsh->mtx); return (error); } #endif /* * Return records from cache to userland. * * TODO: matching particular IP should be done in kernel, here. */ int ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req, struct ngnf_show_header *resp) { struct flow_hash_entry *hsh; struct flow_entry *fle; struct flow_entry_data *data = (struct flow_entry_data *)(resp + 1); #ifdef INET6 struct flow6_entry_data *data6 = (struct flow6_entry_data *)(resp + 1); #endif int i, max; i = req->hash_id; if (i > NBUCKETS-1) return (EINVAL); #ifdef INET6 if (req->version == 6) { resp->version = 6; hsh = priv->hash6 + i; max = NREC6_AT_ONCE; } else #endif if (req->version == 4) { resp->version = 4; hsh = priv->hash + i; max = NREC_AT_ONCE; } else return (EINVAL); /* * We will transfer not more than NREC_AT_ONCE. More data * will come in next message. * We send current hash index and current record number in list * to userland, and userland should return it back to us. * Then, we will restart with new entry. * * The resulting cache snapshot can be inaccurate if flow expiration * is taking place on hash item between userland data requests for * this hash item id. */ resp->nentries = 0; for (; i < NBUCKETS; hsh++, i++) { int list_id; if (mtx_trylock(&hsh->mtx) == 0) { /* * Requested hash index is not available, * relay decision to skip or re-request data * to userland. */ resp->hash_id = i; resp->list_id = 0; return (0); } list_id = 0; TAILQ_FOREACH(fle, &hsh->head, fle_hash) { - if (hsh->mtx.mtx_lock & MTX_CONTESTED) { + if (hsh->mtx.mtx_lock & MTX_WAITERS) { resp->hash_id = i; resp->list_id = list_id; mtx_unlock(&hsh->mtx); return (0); } list_id++; /* Search for particular record in list. */ if (req->list_id > 0) { if (list_id < req->list_id) continue; /* Requested list position found. */ req->list_id = 0; } #ifdef INET6 if (req->version == 6) { struct flow6_entry *fle6; fle6 = (struct flow6_entry *)fle; bcopy(&fle6->f, data6 + resp->nentries, sizeof(fle6->f)); } else #endif bcopy(&fle->f, data + resp->nentries, sizeof(fle->f)); resp->nentries++; if (resp->nentries == max) { resp->hash_id = i; /* * If it was the last item in list * we simply skip to next hash_id. */ resp->list_id = list_id + 1; mtx_unlock(&hsh->mtx); return (0); } } mtx_unlock(&hsh->mtx); } resp->hash_id = resp->list_id = 0; return (0); } /* We have full datagram in privdata. Send it to export hook. */ static int export_send(priv_p priv, fib_export_p fe, item_p item, int flags) { struct mbuf *m = NGI_M(item); struct netflow_v5_export_dgram *dgram = mtod(m, struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct timespec ts; int error = 0; /* Fill mbuf header. */ m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) * header->count + sizeof(struct netflow_v5_header); /* Fill export header. */ header->sys_uptime = htonl(MILLIUPTIME(time_uptime)); getnanotime(&ts); header->unix_secs = htonl(ts.tv_sec); header->unix_nsecs = htonl(ts.tv_nsec); header->engine_type = 0; header->engine_id = fe->domain_id; header->pad = 0; header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq, header->count)); header->count = htons(header->count); if (priv->export != NULL) NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags); else NG_FREE_ITEM(item); return (error); } /* Add export record to dgram. */ static int export_add(item_p item, struct flow_entry *fle) { struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item), struct netflow_v5_export_dgram *); struct netflow_v5_header *header = &dgram->header; struct netflow_v5_record *rec; rec = &dgram->r[header->count]; header->count ++; KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS, ("ng_netflow: export too big")); /* Fill in export record. */ rec->src_addr = fle->f.r.r_src.s_addr; rec->dst_addr = fle->f.r.r_dst.s_addr; rec->next_hop = fle->f.next_hop.s_addr; rec->i_ifx = htons(fle->f.fle_i_ifx); rec->o_ifx = htons(fle->f.fle_o_ifx); rec->packets = htonl(fle->f.packets); rec->octets = htonl(fle->f.bytes); rec->first = htonl(MILLIUPTIME(fle->f.first)); rec->last = htonl(MILLIUPTIME(fle->f.last)); rec->s_port = fle->f.r.r_sport; rec->d_port = fle->f.r.r_dport; rec->flags = fle->f.tcp_flags; rec->prot = fle->f.r.r_ip_p; rec->tos = fle->f.r.r_tos; rec->dst_mask = fle->f.dst_mask; rec->src_mask = fle->f.src_mask; rec->pad1 = 0; rec->pad2 = 0; /* Not supported fields. */ rec->src_as = rec->dst_as = 0; if (header->count == NETFLOW_V5_MAX_RECORDS) return (1); /* end of datagram */ else return (0); } /* Periodic flow expiry run. */ void ng_netflow_expire(void *arg) { struct flow_entry *fle, *fle1; struct flow_hash_entry *hsh; priv_p priv = (priv_p )arg; int used, i; /* * Going through all the cache. */ used = uma_zone_get_cur(priv->zone); for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) { /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ - if (hsh->mtx.mtx_lock & MTX_CONTESTED) + if (hsh->mtx.mtx_lock & MTX_WAITERS) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle)) break; if ((INACTIVE(fle) && (SMALL(fle) || (used > (NBUCKETS*2)))) || AGED(fle)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #ifdef INET6 used = uma_zone_get_cur(priv->zone6); for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) { struct flow6_entry *fle6; /* * Skip entries, that are already being worked on. */ if (mtx_trylock(&hsh->mtx) == 0) continue; TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) { fle6 = (struct flow6_entry *)fle; /* * Interrupt thread wants this entry! * Quick! Quick! Bail out! */ - if (hsh->mtx.mtx_lock & MTX_CONTESTED) + if (hsh->mtx.mtx_lock & MTX_WAITERS) break; /* * Don't expire aggressively while hash collision * ratio is predicted small. */ if (used <= (NBUCKETS*2) && !INACTIVE(fle6)) break; if ((INACTIVE(fle6) && (SMALL(fle6) || (used > (NBUCKETS*2)))) || AGED(fle6)) { TAILQ_REMOVE(&hsh->head, fle, fle_hash); expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle, NG_NOFLAGS); used--; counter_u64_add(priv->nfinfo_inact_exp, 1); } } mtx_unlock(&hsh->mtx); } #endif /* Schedule next expire. */ callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire, (void *)priv); } diff --git a/sys/netinet/sctp_lock_bsd.h b/sys/netinet/sctp_lock_bsd.h index ec66be0cf371..a60983cb30e3 100644 --- a/sys/netinet/sctp_lock_bsd.h +++ b/sys/netinet/sctp_lock_bsd.h @@ -1,500 +1,500 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _NETINET_SCTP_LOCK_BSD_H_ #define _NETINET_SCTP_LOCK_BSD_H_ /* * General locking concepts: The goal of our locking is to of course provide * consistency and yet minimize overhead. We will attempt to use * non-recursive locks which are supposed to be quite inexpensive. Now in * order to do this the goal is that most functions are not aware of locking. * Once we have a TCB we lock it and unlock when we are through. This means * that the TCB lock is kind-of a "global" lock when working on an * association. Caution must be used when asserting a TCB_LOCK since if we * recurse we deadlock. * * Most other locks (INP and INFO) attempt to localize the locking i.e. we try * to contain the lock and unlock within the function that needs to lock it. * This sometimes mean we do extra locks and unlocks and lose a bit of * efficiency, but if the performance statements about non-recursive locks are * true this should not be a problem. One issue that arises with this only * lock when needed is that if an implicit association setup is done we have * a problem. If at the time I lookup an association I have NULL in the tcb * return, by the time I call to create the association some other processor * could have created it. This is what the CREATE lock on the endpoint. * Places where we will be implicitly creating the association OR just * creating an association (the connect call) will assert the CREATE_INP * lock. This will assure us that during all the lookup of INP and INFO if * another creator is also locking/looking up we can gate the two to * synchronize. So the CREATE_INP lock is also another one we must use * extreme caution in locking to make sure we don't hit a re-entrancy issue. * */ /* * When working with the global SCTP lists we lock and unlock the INP_INFO * lock. So when we go to lookup an association we will want to do a * SCTP_INP_INFO_RLOCK() and then when we want to add a new association to * the SCTP_BASE_INFO() list's we will do a SCTP_INP_INFO_WLOCK(). */ #define SCTP_IPI_COUNT_INIT() #define SCTP_STATLOG_INIT_LOCK() #define SCTP_STATLOG_DESTROY() #define SCTP_STATLOG_LOCK() #define SCTP_STATLOG_UNLOCK() #define SCTP_INP_INFO_LOCK_INIT() do { \ rw_init(&SCTP_BASE_INFO(ipi_ep_mtx), "sctp-info"); \ } while (0) #define SCTP_INP_INFO_LOCK_DESTROY() do { \ if (rw_wowned(&SCTP_BASE_INFO(ipi_ep_mtx))) { \ rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } \ rw_destroy(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) #define SCTP_INP_INFO_RLOCK() do { \ rw_rlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) #define SCTP_INP_INFO_WLOCK() do { \ rw_wlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) #define SCTP_INP_INFO_RUNLOCK() do { \ rw_runlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) #define SCTP_INP_INFO_WUNLOCK() do { \ rw_wunlock(&SCTP_BASE_INFO(ipi_ep_mtx)); \ } while (0) #define SCTP_INP_INFO_LOCK_ASSERT() do { \ rw_assert(&SCTP_BASE_INFO(ipi_ep_mtx), RA_LOCKED); \ } while (0) #define SCTP_INP_INFO_RLOCK_ASSERT() do { \ rw_assert(&SCTP_BASE_INFO(ipi_ep_mtx), RA_RLOCKED); \ } while (0) #define SCTP_INP_INFO_WLOCK_ASSERT() do { \ rw_assert(&SCTP_BASE_INFO(ipi_ep_mtx), RA_WLOCKED); \ } while (0) #define SCTP_MCORE_QLOCK_INIT(cpstr) do { \ mtx_init(&(cpstr)->que_mtx, "sctp-mcore_queue","queue_lock", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_MCORE_QDESTROY(cpstr) do { \ if (mtx_owned(&(cpstr)->core_mtx)) { \ mtx_unlock(&(cpstr)->que_mtx); \ } \ mtx_destroy(&(cpstr)->que_mtx); \ } while (0) #define SCTP_MCORE_QLOCK(cpstr) do { \ mtx_lock(&(cpstr)->que_mtx); \ } while (0) #define SCTP_MCORE_QUNLOCK(cpstr) do { \ mtx_unlock(&(cpstr)->que_mtx); \ } while (0) #define SCTP_MCORE_LOCK_INIT(cpstr) do { \ mtx_init(&(cpstr)->core_mtx, "sctp-cpulck","cpu_proc_lock", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_MCORE_DESTROY(cpstr) do { \ if (mtx_owned(&(cpstr)->core_mtx)) { \ mtx_unlock(&(cpstr)->core_mtx); \ } \ mtx_destroy(&(cpstr)->core_mtx); \ } while (0) #define SCTP_MCORE_LOCK(cpstr) do { \ mtx_lock(&(cpstr)->core_mtx); \ } while (0) #define SCTP_MCORE_UNLOCK(cpstr) do { \ mtx_unlock(&(cpstr)->core_mtx); \ } while (0) #define SCTP_IPI_ADDR_INIT() do { \ rw_init(&SCTP_BASE_INFO(ipi_addr_mtx), "sctp-addr"); \ } while (0) #define SCTP_IPI_ADDR_DESTROY() do { \ if (rw_wowned(&SCTP_BASE_INFO(ipi_addr_mtx))) { \ rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } \ rw_destroy(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) #define SCTP_IPI_ADDR_RLOCK() do { \ rw_rlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) #define SCTP_IPI_ADDR_WLOCK() do { \ rw_wlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) #define SCTP_IPI_ADDR_RUNLOCK() do { \ rw_runlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) #define SCTP_IPI_ADDR_WUNLOCK() do { \ rw_wunlock(&SCTP_BASE_INFO(ipi_addr_mtx)); \ } while (0) #define SCTP_IPI_ADDR_LOCK_ASSERT() do { \ rw_assert(&SCTP_BASE_INFO(ipi_addr_mtx), RA_LOCKED); \ } while (0) #define SCTP_IPI_ADDR_WLOCK_ASSERT() do { \ rw_assert(&SCTP_BASE_INFO(ipi_addr_mtx), RA_WLOCKED); \ } while (0) #define SCTP_IPI_ITERATOR_WQ_INIT() do { \ mtx_init(&sctp_it_ctl.ipi_iterator_wq_mtx, "sctp-it-wq", \ "sctp_it_wq", MTX_DEF); \ } while (0) #define SCTP_IPI_ITERATOR_WQ_DESTROY() do { \ mtx_destroy(&sctp_it_ctl.ipi_iterator_wq_mtx); \ } while (0) #define SCTP_IPI_ITERATOR_WQ_LOCK() do { \ mtx_lock(&sctp_it_ctl.ipi_iterator_wq_mtx); \ } while (0) #define SCTP_IPI_ITERATOR_WQ_UNLOCK() do { \ mtx_unlock(&sctp_it_ctl.ipi_iterator_wq_mtx); \ } while (0) #define SCTP_IP_PKTLOG_INIT() do { \ mtx_init(&SCTP_BASE_INFO(ipi_pktlog_mtx), "sctp-pktlog", \ "packetlog", MTX_DEF); \ } while (0) #define SCTP_IP_PKTLOG_DESTROY() do { \ mtx_destroy(&SCTP_BASE_INFO(ipi_pktlog_mtx)); \ } while (0) #define SCTP_IP_PKTLOG_LOCK() do { \ mtx_lock(&SCTP_BASE_INFO(ipi_pktlog_mtx)); \ } while (0) #define SCTP_IP_PKTLOG_UNLOCK() do { \ mtx_unlock(&SCTP_BASE_INFO(ipi_pktlog_mtx)); \ } while (0) /* * The INP locks we will use for locking an SCTP endpoint, so for example if * we want to change something at the endpoint level for example random_store * or cookie secrets we lock the INP level. */ #define SCTP_INP_READ_LOCK_INIT(_inp) do { \ mtx_init(&(_inp)->inp_rdata_mtx, "sctp-read", "inpr", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_INP_READ_LOCK_DESTROY(_inp) do { \ mtx_destroy(&(_inp)->inp_rdata_mtx); \ } while (0) #define SCTP_INP_READ_LOCK(_inp) do { \ mtx_lock(&(_inp)->inp_rdata_mtx); \ } while (0) #define SCTP_INP_READ_UNLOCK(_inp) do { \ mtx_unlock(&(_inp)->inp_rdata_mtx); \ } while (0) #define SCTP_INP_READ_LOCK_ASSERT(_inp) do { \ KASSERT(mtx_owned(&(_inp)->inp_rdata_mtx), \ ("Don't own INP read queue lock")); \ } while (0) #define SCTP_INP_LOCK_INIT(_inp) do { \ mtx_init(&(_inp)->inp_mtx, "sctp-inp", "inp", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_INP_LOCK_DESTROY(_inp) do { \ mtx_destroy(&(_inp)->inp_mtx); \ } while (0) #define SCTP_INP_LOCK_CONTENDED(_inp) \ - ((_inp)->inp_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_mtx.mtx_lock & MTX_WAITERS) #define SCTP_INP_READ_CONTENDED(_inp) \ - ((_inp)->inp_rdata_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_rdata_mtx.mtx_lock & MTX_WAITERS) #ifdef SCTP_LOCK_LOGGING #define SCTP_INP_RLOCK(_inp) do { \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) \ sctp_log_lock(_inp, NULL, SCTP_LOG_LOCK_INP); \ mtx_lock(&(_inp)->inp_mtx); \ } while (0) #define SCTP_INP_WLOCK(_inp) do { \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) \ sctp_log_lock(_inp, NULL, SCTP_LOG_LOCK_INP); \ mtx_lock(&(_inp)->inp_mtx); \ } while (0) #else #define SCTP_INP_RLOCK(_inp) do { \ mtx_lock(&(_inp)->inp_mtx); \ } while (0) #define SCTP_INP_WLOCK(_inp) do { \ mtx_lock(&(_inp)->inp_mtx); \ } while (0) #endif #define SCTP_INP_RUNLOCK(_inp) do { \ mtx_unlock(&(_inp)->inp_mtx); \ } while (0) #define SCTP_INP_WUNLOCK(_inp) do { \ mtx_unlock(&(_inp)->inp_mtx); \ } while (0) #define SCTP_INP_RLOCK_ASSERT(_inp) do { \ KASSERT(mtx_owned(&(_inp)->inp_mtx), \ ("Don't own INP read lock")); \ } while (0) #define SCTP_INP_WLOCK_ASSERT(_inp) do { \ KASSERT(mtx_owned(&(_inp)->inp_mtx), \ ("Don't own INP write lock")); \ } while (0) #define SCTP_INP_INCR_REF(_inp) atomic_add_int(&((_inp)->refcount), 1) #define SCTP_INP_DECR_REF(_inp) atomic_add_int(&((_inp)->refcount), -1) #define SCTP_ASOC_CREATE_LOCK_INIT(_inp) do { \ mtx_init(&(_inp)->inp_create_mtx, "sctp-create", "inp_create", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_ASOC_CREATE_LOCK_DESTROY(_inp) do { \ mtx_destroy(&(_inp)->inp_create_mtx); \ } while (0) #ifdef SCTP_LOCK_LOGGING #define SCTP_ASOC_CREATE_LOCK(_inp) do { \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) \ sctp_log_lock(_inp, NULL, SCTP_LOG_LOCK_CREATE); \ mtx_lock(&(_inp)->inp_create_mtx); \ } while (0) #else #define SCTP_ASOC_CREATE_LOCK(_inp) do { \ mtx_lock(&(_inp)->inp_create_mtx); \ } while (0) #endif #define SCTP_ASOC_CREATE_UNLOCK(_inp) do { \ mtx_unlock(&(_inp)->inp_create_mtx); \ } while (0) #define SCTP_ASOC_CREATE_LOCK_CONTENDED(_inp) \ - ((_inp)->inp_create_mtx.mtx_lock & MTX_CONTESTED) + ((_inp)->inp_create_mtx.mtx_lock & MTX_WAITERS) /* * For the majority of things (once we have found the association) we will * lock the actual association mutex. This will protect all the assoiciation * level queues and streams and such. We will need to lock the socket layer * when we stuff data up into the receiving sb_mb. I.e. we will need to do an * extra SOCKBUF_LOCK(&so->so_rcv) even though the association is locked. */ #define SCTP_TCB_LOCK_INIT(_tcb) do { \ mtx_init(&(_tcb)->tcb_mtx, "sctp-tcb", "tcb", \ MTX_DEF | MTX_DUPOK); \ } while (0) #define SCTP_TCB_LOCK_DESTROY(_tcb) do { \ mtx_destroy(&(_tcb)->tcb_mtx); \ } while (0) #ifdef SCTP_LOCK_LOGGING #define SCTP_TCB_LOCK(_tcb) do { \ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) \ sctp_log_lock(_tcb->sctp_ep, _tcb, SCTP_LOG_LOCK_TCB); \ mtx_lock(&(_tcb)->tcb_mtx); \ } while (0) #else #define SCTP_TCB_LOCK(_tcb) do { \ mtx_lock(&(_tcb)->tcb_mtx); \ } while (0) #endif #define SCTP_TCB_TRYLOCK(_tcb) \ mtx_trylock(&(_tcb)->tcb_mtx) #define SCTP_TCB_UNLOCK(_tcb) do { \ mtx_unlock(&(_tcb)->tcb_mtx); \ } while (0) #define SCTP_TCB_UNLOCK_IFOWNED(_tcb) do { \ if (mtx_owned(&(_tcb)->tcb_mtx)) \ mtx_unlock(&(_tcb)->tcb_mtx); \ } while (0) #define SCTP_TCB_LOCK_ASSERT(_tcb) do { \ KASSERT(mtx_owned(&(_tcb)->tcb_mtx), \ ("Don't own TCB lock")); \ } while (0) #define SCTP_ITERATOR_LOCK_INIT() do { \ mtx_init(&sctp_it_ctl.it_mtx, "sctp-it", "iterator", MTX_DEF); \ } while (0) #define SCTP_ITERATOR_LOCK_DESTROY() do { \ mtx_destroy(&sctp_it_ctl.it_mtx); \ } while (0) #define SCTP_ITERATOR_LOCK() \ do { \ KASSERT(!mtx_owned(&sctp_it_ctl.it_mtx), \ ("Own the iterator lock")); \ mtx_lock(&sctp_it_ctl.it_mtx); \ } while (0) #define SCTP_ITERATOR_UNLOCK() do { \ mtx_unlock(&sctp_it_ctl.it_mtx); \ } while (0) #define SCTP_WQ_ADDR_INIT() do { \ mtx_init(&SCTP_BASE_INFO(wq_addr_mtx), \ "sctp-addr-wq","sctp_addr_wq", MTX_DEF); \ } while (0) #define SCTP_WQ_ADDR_DESTROY() do { \ if (mtx_owned(&SCTP_BASE_INFO(wq_addr_mtx))) { \ mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \ } \ mtx_destroy(&SCTP_BASE_INFO(wq_addr_mtx)); \ } while (0) #define SCTP_WQ_ADDR_LOCK() do { \ mtx_lock(&SCTP_BASE_INFO(wq_addr_mtx)); \ } while (0) #define SCTP_WQ_ADDR_UNLOCK() do { \ mtx_unlock(&SCTP_BASE_INFO(wq_addr_mtx)); \ } while (0) #define SCTP_WQ_ADDR_LOCK_ASSERT() do { \ KASSERT(mtx_owned(&SCTP_BASE_INFO(wq_addr_mtx)), \ ("Don't own the ADDR-WQ lock")); \ } while (0) #define SCTP_INCR_EP_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_ep), 1); \ } while (0) #define SCTP_DECR_EP_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_ep), 1); \ } while (0) #define SCTP_INCR_ASOC_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_asoc), 1); \ } while (0) #define SCTP_DECR_ASOC_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_asoc), 1); \ } while (0) #define SCTP_INCR_LADDR_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_laddr), 1); \ } while (0) #define SCTP_DECR_LADDR_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_laddr), 1); \ } while (0) #define SCTP_INCR_RADDR_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_raddr), 1); \ } while (0) #define SCTP_DECR_RADDR_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_raddr),1); \ } while (0) #define SCTP_INCR_CHK_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_chunk), 1); \ } while (0) #define SCTP_DECR_CHK_COUNT() do { \ KASSERT(SCTP_BASE_INFO(ipi_count_chunk) > 0, \ ("ipi_count_chunk would become negative")); \ if (SCTP_BASE_INFO(ipi_count_chunk) != 0) \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_chunk), \ 1); \ } while (0) #define SCTP_INCR_READQ_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_readq), 1); \ } while (0) #define SCTP_DECR_READQ_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_readq), 1); \ } while (0) #define SCTP_INCR_STRMOQ_COUNT() do { \ atomic_add_int(&SCTP_BASE_INFO(ipi_count_strmoq), 1); \ } while (0) #define SCTP_DECR_STRMOQ_COUNT() do { \ atomic_subtract_int(&SCTP_BASE_INFO(ipi_count_strmoq), 1); \ } while (0) #if defined(SCTP_SO_LOCK_TESTING) #define SCTP_INP_SO(sctpinp) \ (sctpinp)->ip_inp.inp.inp_socket #define SCTP_SOCKET_LOCK(so, refcnt) #define SCTP_SOCKET_UNLOCK(so, refcnt) #endif #endif diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index 83300d4eb593..4f6b45d78a88 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -1,553 +1,553 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $ */ #ifndef _SYS_MUTEX_H_ #define _SYS_MUTEX_H_ #include #include #include #ifdef _KERNEL #include #include #include #include #include /* * Mutex types and options passed to mtx_init(). MTX_QUIET and MTX_DUPOK * can also be passed in. */ #define MTX_DEF 0x00000000 /* DEFAULT (sleep) lock */ #define MTX_SPIN 0x00000001 /* Spin lock (disables interrupts) */ #define MTX_RECURSE 0x00000004 /* Option: lock allowed to recurse */ #define MTX_NOWITNESS 0x00000008 /* Don't do any witness checking. */ #define MTX_NOPROFILE 0x00000020 /* Don't profile this lock */ #define MTX_NEW 0x00000040 /* Don't check for double-init */ /* * Option flags passed to certain lock/unlock routines, through the use * of corresponding mtx_{lock,unlock}_flags() interface macros. */ #define MTX_QUIET LOP_QUIET /* Don't log a mutex event */ #define MTX_DUPOK LOP_DUPOK /* Don't log a duplicate acquire */ /* * State bits kept in mutex->mtx_lock, for the DEFAULT lock type. None of this, * with the exception of MTX_UNOWNED, applies to spin locks. */ #define MTX_UNOWNED 0x00000000 /* Cookie for free mutex */ #define MTX_RECURSED 0x00000001 /* lock recursed (for MTX_DEF only) */ -#define MTX_CONTESTED 0x00000002 /* lock contested (for MTX_DEF only) */ +#define MTX_WAITERS 0x00000002 /* lock has waiters (for MTX_DEF only) */ #define MTX_DESTROYED 0x00000004 /* lock destroyed */ -#define MTX_FLAGMASK (MTX_RECURSED | MTX_CONTESTED | MTX_DESTROYED) +#define MTX_FLAGMASK (MTX_RECURSED | MTX_WAITERS | MTX_DESTROYED) /* * Prototypes * * NOTE: Functions prepended with `_' (underscore) are exported to other parts * of the kernel via macros, thus allowing us to use the cpp LOCK_FILE * and LOCK_LINE or for hiding the lock cookie crunching to the * consumers. These functions should not be called directly by any * code using the API. Their macros cover their functionality. * Functions with a `_' suffix are the entrypoint for the common * KPI covering both compat shims and fast path case. These can be * used by consumers willing to pass options, file and line * informations, in an option-independent way. * * [See below for descriptions] * */ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts); void _mtx_destroy(volatile uintptr_t *c); void mtx_sysinit(const void *arg); int _mtx_trylock_flags_int(struct mtx *m, int opts LOCK_FILE_LINE_ARG_DEF); int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line); void mutex_init(void); #if LOCK_DEBUG > 0 void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line); void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line); #else void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v); void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v); #endif void mtx_wait_unlocked(struct mtx *m); #ifdef SMP #if LOCK_DEBUG > 0 void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line); #else void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v); #endif #endif void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line); void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line); void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line); int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line); void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line); void mtx_spin_wait_unlocked(struct mtx *m); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line); #endif void thread_lock_flags_(struct thread *, int, const char *, int); #if LOCK_DEBUG > 0 void _thread_lock(struct thread *td, int opts, const char *file, int line); #else void _thread_lock(struct thread *); #endif #if defined(LOCK_PROFILING) || (defined(KLD_MODULE) && !defined(KLD_TIED)) #define thread_lock(tdp) \ thread_lock_flags_((tdp), 0, __FILE__, __LINE__) #elif LOCK_DEBUG > 0 #define thread_lock(tdp) \ _thread_lock((tdp), 0, __FILE__, __LINE__) #else #define thread_lock(tdp) \ _thread_lock((tdp)) #endif #if LOCK_DEBUG > 0 #define thread_lock_flags(tdp, opt) \ thread_lock_flags_((tdp), (opt), __FILE__, __LINE__) #else #define thread_lock_flags(tdp, opt) \ _thread_lock(tdp) #endif #define thread_unlock(tdp) \ mtx_unlock_spin((tdp)->td_lock) /* * Top-level macros to provide lock cookie once the actual mtx is passed. * They will also prevent passing a malformed object to the mtx KPI by * failing compilation as the mtx_lock reserved member will not be found. */ #define mtx_init(m, n, t, o) \ _mtx_init(&(m)->mtx_lock, n, t, o) #define mtx_destroy(m) \ _mtx_destroy(&(m)->mtx_lock) #define mtx_trylock_flags_(m, o, f, l) \ _mtx_trylock_flags_(&(m)->mtx_lock, o, f, l) #if LOCK_DEBUG > 0 #define _mtx_lock_sleep(m, v, o, f, l) \ __mtx_lock_sleep(&(m)->mtx_lock, v, o, f, l) #define _mtx_unlock_sleep(m, v, o, f, l) \ __mtx_unlock_sleep(&(m)->mtx_lock, v, o, f, l) #else #define _mtx_lock_sleep(m, v, o, f, l) \ __mtx_lock_sleep(&(m)->mtx_lock, v) #define _mtx_unlock_sleep(m, v, o, f, l) \ __mtx_unlock_sleep(&(m)->mtx_lock, v) #endif #ifdef SMP #if LOCK_DEBUG > 0 #define _mtx_lock_spin(m, v, o, f, l) \ _mtx_lock_spin_cookie(&(m)->mtx_lock, v, o, f, l) #else #define _mtx_lock_spin(m, v, o, f, l) \ _mtx_lock_spin_cookie(&(m)->mtx_lock, v) #endif #endif #define _mtx_lock_flags(m, o, f, l) \ __mtx_lock_flags(&(m)->mtx_lock, o, f, l) #define _mtx_unlock_flags(m, o, f, l) \ __mtx_unlock_flags(&(m)->mtx_lock, o, f, l) #define _mtx_lock_spin_flags(m, o, f, l) \ __mtx_lock_spin_flags(&(m)->mtx_lock, o, f, l) #define _mtx_trylock_spin_flags(m, o, f, l) \ __mtx_trylock_spin_flags(&(m)->mtx_lock, o, f, l) #define _mtx_unlock_spin_flags(m, o, f, l) \ __mtx_unlock_spin_flags(&(m)->mtx_lock, o, f, l) #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define _mtx_assert(m, w, f, l) \ __mtx_assert(&(m)->mtx_lock, w, f, l) #endif #define mtx_recurse lock_object.lo_data /* Very simple operations on mtx_lock. */ /* Try to obtain mtx_lock once. */ #define _mtx_obtain_lock(mp, tid) \ atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) #define _mtx_obtain_lock_fetch(mp, vp, tid) \ atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)) -/* Try to release mtx_lock if it is unrecursed and uncontested. */ +/* Try to release mtx_lock if it is unrecursed and without waiters. */ #define _mtx_release_lock(mp, tid) \ atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED) #define _mtx_release_lock_fetch(mp, vp) \ atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, (vp), MTX_UNOWNED) /* * Full lock operations that are suitable to be inlined in non-debug * kernels. If the lock cannot be acquired or released trivially then * the work is deferred to another function. */ /* Lock a normal mutex. */ #define __mtx_lock(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _tid = (uintptr_t)(tid); \ uintptr_t _v = MTX_UNOWNED; \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\ !_mtx_obtain_lock_fetch((mp), &_v, _tid))) \ _mtx_lock_sleep((mp), _v, (opts), (file), (line)); \ (void)0; /* ensure void type for expression */ \ }) /* * Lock a spin mutex. * * FIXME: spinlock_enter is a function call, defeating the point of inlining in * this. */ #ifdef SMP #define __mtx_lock_spin(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _tid = (uintptr_t)(tid); \ uintptr_t _v = MTX_UNOWNED; \ \ spinlock_enter(); \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(spin__acquire) || \ !_mtx_obtain_lock_fetch((mp), &_v, _tid))) \ _mtx_lock_spin((mp), _v, (opts), (file), (line)); \ (void)0; /* ensure void type for expression */ \ }) #define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _tid = (uintptr_t)(tid); \ int _ret; \ \ spinlock_enter(); \ if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\ spinlock_exit(); \ _ret = 0; \ } else { \ LOCKSTAT_PROFILE_OBTAIN_SPIN_LOCK_SUCCESS(spin__acquire, \ mp, 0, 0, file, line); \ _ret = 1; \ } \ _ret; \ }) #else /* SMP */ #define __mtx_lock_spin(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _tid = (uintptr_t)(tid); \ \ spinlock_enter(); \ if ((mp)->mtx_lock == _tid) \ (mp)->mtx_recurse++; \ else { \ KASSERT((mp)->mtx_lock == MTX_UNOWNED, ("corrupt spinlock")); \ (mp)->mtx_lock = _tid; \ } \ (void)0; /* ensure void type for expression */ \ }) #define __mtx_trylock_spin(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _tid = (uintptr_t)(tid); \ int _ret; \ \ spinlock_enter(); \ if ((mp)->mtx_lock != MTX_UNOWNED) { \ spinlock_exit(); \ _ret = 0; \ } else { \ (mp)->mtx_lock = _tid; \ _ret = 1; \ } \ _ret; \ }) #endif /* SMP */ /* Unlock a normal mutex. */ #define __mtx_unlock(mp, tid, opts, file, line) __extension__ ({ \ uintptr_t _v = (uintptr_t)(tid); \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\ !_mtx_release_lock_fetch((mp), &_v))) \ _mtx_unlock_sleep((mp), _v, (opts), (file), (line)); \ (void)0; /* ensure void type for expression */ \ }) /* * Unlock a spin mutex. * * FIXME: spinlock_exit is a function call, defeating the point of inlining in * this. * * Since we always perform a spinlock_enter() when attempting to acquire a * spin lock, we need to always perform a matching spinlock_exit() when * releasing a spin lock. This includes the recursion cases. */ #ifdef SMP #define __mtx_unlock_spin(mp) __extension__ ({ \ if (mtx_recursed((mp))) \ (mp)->mtx_recurse--; \ else { \ LOCKSTAT_PROFILE_RELEASE_SPIN_LOCK(spin__release, mp); \ atomic_store_rel_ptr(&(mp)->mtx_lock, MTX_UNOWNED); \ } \ spinlock_exit(); \ }) #else /* SMP */ #define __mtx_unlock_spin(mp) __extension__ ({ \ if (mtx_recursed((mp))) \ (mp)->mtx_recurse--; \ else { \ LOCKSTAT_PROFILE_RELEASE_SPIN_LOCK(spin__release, mp); \ (mp)->mtx_lock = MTX_UNOWNED; \ } \ spinlock_exit(); \ }) #endif /* SMP */ /* * Exported lock manipulation interface. * * mtx_lock(m) locks MTX_DEF mutex `m' * * mtx_lock_spin(m) locks MTX_SPIN mutex `m' * * mtx_unlock(m) unlocks MTX_DEF mutex `m' * * mtx_unlock_spin(m) unlocks MTX_SPIN mutex `m' * * mtx_lock_spin_flags(m, opts) and mtx_lock_flags(m, opts) locks mutex `m' * and passes option flags `opts' to the "hard" function, if required. * With these routines, it is possible to pass flags such as MTX_QUIET * to the appropriate lock manipulation routines. * * mtx_trylock(m) attempts to acquire MTX_DEF mutex `m' but doesn't sleep if * it cannot. Rather, it returns 0 on failure and non-zero on success. * It does NOT handle recursion as we assume that if a caller is properly * using this part of the interface, he will know that the lock in question * is _not_ recursed. * * mtx_trylock_flags(m, opts) is used the same way as mtx_trylock() but accepts * relevant option flags `opts.' * * mtx_trylock_spin(m) attempts to acquire MTX_SPIN mutex `m' but doesn't * spin if it cannot. Rather, it returns 0 on failure and non-zero on * success. It always returns failure for recursed lock attempts. * * mtx_initialized(m) returns non-zero if the lock `m' has been initialized. * * mtx_owned(m) returns non-zero if the current thread owns the lock `m' * * mtx_recursed(m) returns non-zero if the lock `m' is presently recursed. */ #define mtx_lock(m) mtx_lock_flags((m), 0) #define mtx_lock_spin(m) mtx_lock_spin_flags((m), 0) #define mtx_trylock(m) mtx_trylock_flags((m), 0) #define mtx_trylock_spin(m) mtx_trylock_spin_flags((m), 0) #define mtx_unlock(m) mtx_unlock_flags((m), 0) #define mtx_unlock_spin(m) mtx_unlock_spin_flags((m), 0) struct mtx_pool; struct mtx_pool *mtx_pool_create(const char *mtx_name, int pool_size, int opts); void mtx_pool_destroy(struct mtx_pool **poolp); struct mtx *mtx_pool_find(struct mtx_pool *pool, void *ptr); struct mtx *mtx_pool_alloc(struct mtx_pool *pool); #define mtx_pool_lock(pool, ptr) \ mtx_lock(mtx_pool_find((pool), (ptr))) #define mtx_pool_lock_spin(pool, ptr) \ mtx_lock_spin(mtx_pool_find((pool), (ptr))) #define mtx_pool_unlock(pool, ptr) \ mtx_unlock(mtx_pool_find((pool), (ptr))) #define mtx_pool_unlock_spin(pool, ptr) \ mtx_unlock_spin(mtx_pool_find((pool), (ptr))) /* * mtxpool_sleep is a general purpose pool of sleep mutexes. */ extern struct mtx_pool *mtxpool_sleep; #ifndef LOCK_DEBUG #error LOCK_DEBUG not defined, include before #endif #if LOCK_DEBUG > 0 || defined(MUTEX_NOINLINE) #define mtx_lock_flags_(m, opts, file, line) \ _mtx_lock_flags((m), (opts), (file), (line)) #define mtx_unlock_flags_(m, opts, file, line) \ _mtx_unlock_flags((m), (opts), (file), (line)) #define mtx_lock_spin_flags_(m, opts, file, line) \ _mtx_lock_spin_flags((m), (opts), (file), (line)) #define mtx_trylock_spin_flags_(m, opts, file, line) \ _mtx_trylock_spin_flags((m), (opts), (file), (line)) #define mtx_unlock_spin_flags_(m, opts, file, line) \ _mtx_unlock_spin_flags((m), (opts), (file), (line)) #else /* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */ #define mtx_lock_flags_(m, opts, file, line) \ __mtx_lock((m), curthread, (opts), (file), (line)) #define mtx_unlock_flags_(m, opts, file, line) \ __mtx_unlock((m), curthread, (opts), (file), (line)) #define mtx_lock_spin_flags_(m, opts, file, line) \ __mtx_lock_spin((m), curthread, (opts), (file), (line)) #define mtx_trylock_spin_flags_(m, opts, file, line) \ __mtx_trylock_spin((m), curthread, (opts), (file), (line)) #define mtx_unlock_spin_flags_(m, opts, file, line) \ __mtx_unlock_spin((m)) #endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */ #ifdef INVARIANTS #define mtx_assert_(m, what, file, line) \ _mtx_assert((m), (what), (file), (line)) #define GIANT_REQUIRED mtx_assert_(&Giant, MA_OWNED, __FILE__, __LINE__) #else /* INVARIANTS */ #define mtx_assert_(m, what, file, line) (void)0 #define GIANT_REQUIRED #endif /* INVARIANTS */ #define mtx_lock_flags(m, opts) \ mtx_lock_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_unlock_flags(m, opts) \ mtx_unlock_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_lock_spin_flags(m, opts) \ mtx_lock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_unlock_spin_flags(m, opts) \ mtx_unlock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_trylock_flags(m, opts) \ mtx_trylock_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_trylock_spin_flags(m, opts) \ mtx_trylock_spin_flags_((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_assert(m, what) \ mtx_assert_((m), (what), __FILE__, __LINE__) #define mtx_sleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define MTX_READ_VALUE(m) ((m)->mtx_lock) #define mtx_initialized(m) lock_initialized(&(m)->lock_object) #define lv_mtx_owner(v) ((struct thread *)((v) & ~MTX_FLAGMASK)) #define mtx_owner(m) lv_mtx_owner(MTX_READ_VALUE(m)) #define mtx_owned(m) (mtx_owner(m) == curthread) #define mtx_recursed(m) ((m)->mtx_recurse != 0) #define mtx_name(m) ((m)->lock_object.lo_name) /* * Global locks. */ extern struct mtx Giant; extern struct mtx blocked_lock; /* * Giant lock manipulation and clean exit macros. * Used to replace return with an exit Giant and return. * * Note that DROP_GIANT*() needs to be paired with PICKUP_GIANT() * The #ifndef is to allow lint-like tools to redefine DROP_GIANT. */ #ifndef DROP_GIANT #define DROP_GIANT() \ do { \ int _giantcnt = 0; \ WITNESS_SAVE_DECL(Giant); \ \ if (__predict_false(mtx_owned(&Giant))) { \ WITNESS_SAVE(&Giant.lock_object, Giant); \ for (_giantcnt = 0; mtx_owned(&Giant) && \ !SCHEDULER_STOPPED(); _giantcnt++) \ mtx_unlock(&Giant); \ } #define PICKUP_GIANT() \ mtx_assert(&Giant, MA_NOTOWNED); \ if (__predict_false(_giantcnt > 0)) { \ while (_giantcnt--) \ mtx_lock(&Giant); \ WITNESS_RESTORE(&Giant.lock_object, Giant); \ } \ } while (0) #endif struct mtx_args { void *ma_mtx; const char *ma_desc; int ma_opts; }; #define MTX_SYSINIT(name, mtx, desc, opts) \ static struct mtx_args name##_args = { \ (mtx), \ (desc), \ (opts) \ }; \ SYSINIT(name##_mtx_sysinit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ mtx_sysinit, &name##_args); \ SYSUNINIT(name##_mtx_sysuninit, SI_SUB_LOCK, SI_ORDER_MIDDLE, \ _mtx_destroy, __DEVOLATILE(void *, &(mtx)->mtx_lock)) /* * The INVARIANTS-enabled mtx_assert() functionality. * * The constants need to be defined for INVARIANT_SUPPORT infrastructure * support as _mtx_assert() itself uses them and the latter implies that * _mtx_assert() must build. */ #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) #define MA_OWNED LA_XLOCKED #define MA_NOTOWNED LA_UNLOCKED #define MA_RECURSED LA_RECURSED #define MA_NOTRECURSED LA_NOTRECURSED #endif /* * Common lock type names. */ #define MTX_NETWORK_LOCK "network driver" #endif /* _KERNEL */ #endif /* _SYS_MUTEX_H_ */