diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -163,7 +163,8 @@ struct cc_exec cc_exec_entity[2]; struct callout *cc_next; struct callout_list *cc_callwheel; - struct callout_tailq cc_expireq; + struct callout_tailq cc_expireq_normal; + struct callout_tailq cc_expireq_giant; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; @@ -174,6 +175,8 @@ #endif }; +#define callout_uses_giant(c) ((c)->c_lock == &Giant.lock_object) + #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr @@ -208,7 +211,7 @@ static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, + int *mpcalls, int *lockcalls, #endif int direct); @@ -323,7 +326,8 @@ DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); - TAILQ_INIT(&cc->cc_expireq); + TAILQ_INIT(&cc->cc_expireq_normal); + TAILQ_INIT(&cc->cc_expireq_giant); cc->cc_firstevent = SBT_MAX; for (i = 0; i < 2; i++) cc_cce_cleanup(cc, i); @@ -478,7 +482,7 @@ LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, #ifdef CALLOUT_PROFILING - &mpcalls_dir, &lockcalls_dir, NULL, + &mpcalls_dir, &lockcalls_dir, #endif 1); tmp = cc_exec_next(cc); @@ -486,8 +490,13 @@ } else { tmpn = LIST_NEXT(tmp, c_links.le); LIST_REMOVE(tmp, c_links.le); - TAILQ_INSERT_TAIL(&cc->cc_expireq, - tmp, c_links.tqe); + if (callout_uses_giant(tmp)) { + TAILQ_INSERT_TAIL(&cc->cc_expireq_giant, + tmp, c_links.tqe); + } else { + TAILQ_INSERT_TAIL(&cc->cc_expireq_normal, + tmp, c_links.tqe); + } tmp->c_iflags |= CALLOUT_PROCESSED; tmp = tmpn; } @@ -534,7 +543,8 @@ * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ - if (!TAILQ_EMPTY(&cc->cc_expireq)) + if (TAILQ_FIRST(&cc->cc_expireq_giant) != NULL || + TAILQ_FIRST(&cc->cc_expireq_normal) != NULL) swi_sched(cc->cc_cookie, 0); } @@ -605,7 +615,7 @@ static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, #ifdef CALLOUT_PROFILING - int *mpcalls, int *lockcalls, int *gcalls, + int *mpcalls, int *lockcalls, #endif int direct) { @@ -635,14 +645,15 @@ KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, ("softclock_call_cc: act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; - lock_status = 0; if (c->c_flags & CALLOUT_SHAREDLOCK) { if (class == &lock_class_rm) lock_status = (uintptr_t)&tracker; else lock_status = 1; + } else { + lock_status = 0; } - c_lock = c->c_lock; + c_lock = callout_uses_giant(c) ? NULL : c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_iflags = c->c_iflags; @@ -666,19 +677,11 @@ } /* The callout cannot be stopped now. */ cc_exec_cancel(cc, direct) = true; - if (c_lock == &Giant.lock_object) { -#ifdef CALLOUT_PROFILING - (*gcalls)++; -#endif - CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", - c, c_func, c_arg); - } else { #ifdef CALLOUT_PROFILING - (*lockcalls)++; + (*lockcalls)++; #endif - CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", - c, c_func, c_arg); - } + CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", + c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING (*mpcalls)++; @@ -811,17 +814,36 @@ cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + while ((c = TAILQ_FIRST(&cc->cc_expireq_normal)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq_normal, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING - &mpcalls, &lockcalls, &gcalls, + &mpcalls, &lockcalls, #endif 0); #ifdef CALLOUT_PROFILING ++depth; #endif } + + if (TAILQ_FIRST(&cc->cc_expireq_giant) && mtx_trylock(&Giant)) { + while ((c = TAILQ_FIRST(&cc->cc_expireq_giant)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq_giant, c, c_links.tqe); + CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", + c, c->c_func, c->c_arg); + + softclock_call_cc(c, cc, +#ifdef CALLOUT_PROFILING + &gcalls, &lockcalls, +#endif + 0); +#ifdef CALLOUT_PROFILING + ++depth; +#endif + } + mtx_unlock(&Giant); + } + #ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; @@ -973,8 +995,10 @@ if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); + } else if (callout_uses_giant(c)) { + TAILQ_REMOVE(&cc->cc_expireq_giant, c, c_links.tqe); } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + TAILQ_REMOVE(&cc->cc_expireq_normal, c, c_links.tqe); } cancelled = 1; c->c_iflags &= ~ CALLOUT_PENDING; @@ -1067,9 +1091,9 @@ * so just discard this check for the moment. */ if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { - if (c->c_lock == &Giant.lock_object) + if (callout_uses_giant(c)) { use_lock = mtx_owned(&Giant); - else { + } else { use_lock = 1; class = LOCK_CLASS(c->c_lock); class->lc_assert(c->c_lock, LA_XLOCKED); @@ -1301,8 +1325,10 @@ if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); LIST_REMOVE(c, c_links.le); + } else if (callout_uses_giant(c)) { + TAILQ_REMOVE(&cc->cc_expireq_giant, c, c_links.tqe); } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + TAILQ_REMOVE(&cc->cc_expireq_normal, c, c_links.tqe); } } CC_UNLOCK(cc); @@ -1318,7 +1344,7 @@ c->c_iflags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; - c->c_iflags = 0; + c->c_iflags = CALLOUT_RETURNUNLOCKED; } c->c_cpu = cc_default_cpu; } @@ -1335,6 +1361,11 @@ KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", __func__)); + if (callout_uses_giant(c)) { + KASSERT((flags & CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: Giant locked callbacks cannot return unlocked")); + flags |= CALLOUT_RETURNUNLOCKED; + } c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = cc_default_cpu; }