Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -118,6 +118,8 @@ options NETDUMP # netdump(4) client support options NETGDB # netgdb(4) client support +options CALLOUT_DEBUG_DRAIN + # Make an SMP-capable kernel by default options SMP # Symmetric MultiProcessor Kernel options EARLY_AP_STARTUP Index: sys/amd64/conf/X =================================================================== --- sys/amd64/conf/X +++ sys/amd64/conf/X @@ -32,6 +32,8 @@ options BREAK_TO_DEBUGGER options ALT_BREAK_TO_DEBUGGER +options CALLOUT_DEBUG_DRAIN + options INET options INET6 options SCTP Index: sys/compat/linuxkpi/common/src/linux_compat.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_compat.c +++ sys/compat/linuxkpi/common/src/linux_compat.c @@ -1909,9 +1909,10 @@ int ret; timer->expires = expires; - ret = callout_reset(&timer->callout, - linux_timer_jiffies_until(expires), - &linux_timer_callback_wrapper, timer); + ret = callout_reset_sbt_on_arg1(&timer->callout, + tick_sbt * linux_timer_jiffies_until(expires), 0, + &linux_timer_callback_wrapper, timer, + timer->function, -1, C_HARDCLOCK); MPASS(ret == 0 || ret == 1); @@ -1922,18 +1923,20 @@ add_timer(struct timer_list *timer) { - callout_reset(&timer->callout, - linux_timer_jiffies_until(timer->expires), - &linux_timer_callback_wrapper, timer); + callout_reset_sbt_on_arg1(&timer->callout, + tick_sbt * linux_timer_jiffies_until(timer->expires), 0, + &linux_timer_callback_wrapper, timer, + timer->function, -1, C_HARDCLOCK); } void add_timer_on(struct timer_list *timer, int cpu) { - callout_reset_on(&timer->callout, - linux_timer_jiffies_until(timer->expires), - &linux_timer_callback_wrapper, timer, cpu); + callout_reset_sbt_on_arg1(&timer->callout, + tick_sbt * linux_timer_jiffies_until(timer->expires), 0, + &linux_timer_callback_wrapper, timer, + timer->function, cpu, C_HARDCLOCK); } int Index: sys/compat/linuxkpi/common/src/linux_hrtimer.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_hrtimer.c +++ sys/compat/linuxkpi/common/src/linux_hrtimer.c @@ -46,8 +46,10 @@ ret = hrtimer->function(hrtimer); if (ret == HRTIMER_RESTART) { - callout_schedule_sbt(&hrtimer->callout, - nstosbt(hrtimer->expires), nstosbt(hrtimer->precision), 0); + callout_reset_sbt_on_arg1(&hrtimer->callout, + nstosbt(hrtimer->expires), nstosbt(hrtimer->precision), + hrtimer->callout.c_func, hrtimer->callout.c_arg, + hrtimer->function, -1, 0); } else { callout_deactivate(&hrtimer->callout); } @@ -106,8 +108,9 @@ mtx_lock(&hrtimer->mtx); hrtimer->precision = nsec; - callout_reset_sbt(&hrtimer->callout, nstosbt(ktime_to_ns(time)), - nstosbt(nsec), hrtimer_call_handler, hrtimer, 0); + callout_reset_sbt_on_arg1(&hrtimer->callout, nstosbt(ktime_to_ns(time)), + nstosbt(nsec), hrtimer_call_handler, hrtimer, + hrtimer->function, -1, 0); mtx_unlock(&hrtimer->mtx); } @@ -116,8 +119,9 @@ { mtx_lock(&hrtimer->mtx); - callout_reset_sbt(&hrtimer->callout, nstosbt(ktime_to_ns(interval)), - nstosbt(hrtimer->precision), hrtimer_call_handler, hrtimer, 0); + callout_reset_sbt_on_arg1(&hrtimer->callout, nstosbt(ktime_to_ns(interval)), + nstosbt(hrtimer->precision), hrtimer_call_handler, hrtimer, + hrtimer->function, -1, 0); mtx_unlock(&hrtimer->mtx); } Index: sys/compat/linuxkpi/common/src/linux_work.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_work.c +++ sys/compat/linuxkpi/common/src/linux_work.c @@ -240,13 +240,17 @@ linux_delayed_work_enqueue(dwork); } else if (unlikely(cpu != WORK_CPU_UNBOUND)) { mtx_lock(&dwork->timer.mtx); - callout_reset_on(&dwork->timer.callout, delay, - &linux_delayed_work_timer_fn, dwork, cpu); + callout_reset_sbt_on_arg1(&dwork->timer.callout, + tick_sbt * delay, 0, + &linux_delayed_work_timer_fn, dwork, + dwork->work.func, cpu, C_HARDCLOCK); mtx_unlock(&dwork->timer.mtx); } else { mtx_lock(&dwork->timer.mtx); - callout_reset(&dwork->timer.callout, delay, - &linux_delayed_work_timer_fn, dwork); + callout_reset_sbt_on_arg1(&dwork->timer.callout, + tick_sbt * delay, 0, + &linux_delayed_work_timer_fn, dwork, + dwork->work.func, -1, C_HARDCLOCK); mtx_unlock(&dwork->timer.mtx); } return (true); Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -79,6 +79,7 @@ BOOTHOWTO opt_global.h BOOTVERBOSE opt_global.h CALLOUT_PROFILING +CALLOUT_DEBUG_DRAIN opt_global.h CAPABILITIES opt_capsicum.h CAPABILITY_MODE opt_capsicum.h COMPAT_43 opt_global.h Index: sys/dev/mlx5/mlx5_core/mlx5_main.c =================================================================== --- sys/dev/mlx5/mlx5_core/mlx5_main.c +++ sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -1268,6 +1268,9 @@ out: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mutex_unlock(&dev->intf_state_mutex); +#ifdef CALLOUT_DEBUG_DRAIN + callout_check_drain(); +#endif return err; } Index: sys/dev/mlx5/mlx5_en/mlx5_en_main.c =================================================================== --- sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -1073,7 +1073,8 @@ queue_work(priv->wq, &priv->update_stats_work); - callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv); + callout_reset_sbt_on_arg1(&priv->watchdog, tick_sbt * hz, 0, + &mlx5e_update_stats, priv, mlx5e_update_stats, -1, C_HARDCLOCK); } static void @@ -1145,10 +1146,11 @@ if (priv->clbr_done == 0) mlx5e_calibration_callout(priv); else - callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done < + callout_reset_sbt_on_arg1(&priv->tstmp_clbr, (priv->clbr_done < mlx5e_calibration_duration ? mlx5e_fast_calibration : - mlx5e_normal_calibration) * hz, mlx5e_calibration_callout, - priv); + mlx5e_normal_calibration) * hz * tick_sbt, 0, + mlx5e_calibration_callout, priv, mlx5e_calibration_callout, + PCPU_GET(cpuid), C_HARDCLOCK); } static uint64_t @@ -1856,7 +1858,8 @@ } /* restart timer */ - callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq); + callout_reset_sbt_on_arg1(&sq->cev_callout, tick_sbt * hz, 0, + mlx5e_sq_cev_timeout, sq, mlx5e_sq_cev_timeout, -1, C_HARDCLOCK); } void Index: sys/dev/mlx5/mlx5_en/mlx5_en_rx.c =================================================================== --- sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -111,7 +111,9 @@ struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, rq->wq.head); if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, rq->wq.head))) { - callout_reset_curcpu(&rq->watchdog, 1, (void *)&mlx5e_post_rx_wqes, rq); + callout_reset_sbt_on_arg1(&rq->watchdog, tick_sbt * 1, + 0, (void *)&mlx5e_post_rx_wqes, rq, + mlx5e_post_rx_wqes, PCPU_GET(cpuid), C_HARDCLOCK); break; } mlx5_wq_ll_push(&rq->wq, be16_to_cpu(wqe->next.next_wqe_index)); Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -77,6 +77,7 @@ * violated. Typically new fields are moved to the end of the * structures. */ +#ifndef CALLOUT_DEBUG_DRAIN #ifdef __amd64__ _Static_assert(offsetof(struct thread, td_flags) == 0xfc, "struct thread KBI td_flags"); @@ -96,7 +97,7 @@ "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x4b0, "struct proc KBI p_emuldata"); -#endif +#endif /* __amd64__ */ #ifdef __i386__ _Static_assert(offsetof(struct thread, td_flags) == 0x98, "struct thread KBI td_flags"); @@ -116,7 +117,8 @@ "struct proc KBI p_comm"); _Static_assert(offsetof(struct proc, p_emuldata) == 0x308, "struct proc KBI p_emuldata"); -#endif +#endif /* __i386__ */ +#endif /* CALLOUT_DEBUG_DRAIN */ SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE(proc, , , lwp__exit); Index: sys/kern/kern_timeout.c =================================================================== --- sys/kern/kern_timeout.c +++ sys/kern/kern_timeout.c @@ -55,12 +55,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include @@ -157,6 +159,12 @@ #endif bool cc_cancel; bool cc_waiting; +#ifdef CALLOUT_DEBUG_DRAIN + void *cc_last_arg1; + void *ce_migration_arg1; + struct stack cc_stack; + struct stack ce_migration_stack; +#endif }; /* @@ -177,6 +185,10 @@ #ifdef KTR char cc_ktr_event_name[20]; #endif +#ifdef CALLOUT_DEBUG_DRAIN + struct callout_next_data *cc_nd; + struct callout_next_data cc_exp_nd; +#endif }; #define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) @@ -184,6 +196,8 @@ #define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr #define cc_exec_last_func(cc, dir) cc->cc_exec_entity[dir].cc_last_func #define cc_exec_last_arg(cc, dir) cc->cc_exec_entity[dir].cc_last_arg +#define cc_exec_last_arg1(cc, dir) cc->cc_exec_entity[dir].cc_last_arg1 +#define cc_exec_last_stack(cc, dir) cc->cc_exec_entity[dir].cc_stack #define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain #define cc_exec_next(cc) cc->cc_next #define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel @@ -194,6 +208,8 @@ #define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu #define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time #define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec +#define cc_migration_arg1(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg1 +#define cc_migration_stack(cc, dir) cc->cc_exec_entity[dir].ce_migration_stack struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU @@ -236,6 +252,115 @@ * cc_curr is non-NULL. */ +#ifdef CALLOUT_DEBUG_DRAIN +static void +callout_set_nd(struct callout_next_data *nd, struct callout *c) +{ + nd->c_func = c->c_func; + nd->c_arg = c->c_arg; + nd->c_arg1 = c->c_arg1; + stack_copy(&c->c_stack, &nd->c_stack); +} + +static void +callout_zero_nd(struct callout_next_data *nd) +{ + nd->c_func = NULL; + nd->c_arg = NULL; + nd->c_arg1 = NULL; + stack_zero(&nd->c_stack); +} + +/* Called before insertion. */ +static void +callout_insert_le_head(struct callout_cpu *cc, u_int wheel, struct callout *c) +{ + struct callout_list *sc; + + callout_set_nd(&cc->cc_nd[wheel & callwheelmask], c); + sc = &cc->cc_callwheel[wheel & callwheelmask]; + if (LIST_EMPTY(sc)) + callout_zero_nd(&c->c_nd); + else + callout_set_nd(&c->c_nd, LIST_FIRST(sc)); +} + +/* Called before removal. */ +static void +callout_remove_le(struct callout_cpu *cc, u_int wheel, struct callout *c) +{ + struct callout_list *sc; + struct callout *cn, *cp; + struct callout_next_data *nd; + + cn = LIST_NEXT(c, c_links.le); + sc = &cc->cc_callwheel[wheel & callwheelmask]; + cp = LIST_PREV(c, sc, callout, c_links.le); + nd = cp == NULL ? &cc->cc_nd[wheel & callwheelmask] : &cp->c_nd; + if (cn == NULL) + callout_zero_nd(nd); + else + callout_set_nd(nd, cn); +} + +/* Called after insertion. */ +static void +callout_insert_tqe(struct callout_cpu *cc, struct callout *c) +{ + struct callout *cn, *cp; + + cp = TAILQ_PREV(c, callout_tailq, c_links.tqe); + if (cp == NULL) { + callout_set_nd(&cc->cc_exp_nd, c); + } else { + callout_set_nd(&cp->c_nd, c); + cn = TAILQ_NEXT(c, c_links.tqe); + if (cn != NULL) + callout_set_nd(&c->c_nd, cn); + else + callout_zero_nd(&c->c_nd); + } +} + +/* Called before removal. */ +static void +callout_remove_tqe(struct callout_cpu *cc, struct callout *c) +{ + struct callout *cn, *cp; + struct callout_next_data *nd; + + cn = TAILQ_NEXT(c, c_links.tqe); + cp = TAILQ_PREV(c, callout_tailq, c_links.tqe); + nd = cp == NULL ? &cc->cc_exp_nd : &cp->c_nd; + if (cn == NULL) + callout_zero_nd(nd); + else + callout_set_nd(nd, cn); +} +#else +static void +callout_insert_le(struct callout_cpu *cc __unused, u_int w __unused, + struct callout *c __unused) +{ +} + +static void +callout_remove_le(struct callout_cpu *cc __unused, u_int w __unused, + struct callout *c __unused) +{ +} + +static void +callout_insert_tqe(struct callout_cpu *cc __unused, struct callout *c __unused) +{ +} + +static void +callout_remove_tqe(struct callout_cpu *cc __unused, struct callout *c __unused) +{ +} +#endif + /* * Resets the execution entity tied to a specific callout cpu. */ @@ -252,6 +377,10 @@ cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; +#ifdef CALLOUT_DEBUG_DRAIN + cc_migration_arg1(cc, direct) = NULL; + stack_zero(&cc_migration_stack(cc, direct)); +#endif #endif } @@ -326,6 +455,9 @@ cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK); + cc->cc_nd = malloc_domainset(sizeof(struct callout_next_data) * + callwheelsize, M_CALLOUT, + DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK | M_ZERO); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); @@ -469,6 +601,8 @@ while (tmp != NULL) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { + callout_remove_le(cc, firstb, tmp); + /* * Consumer told us the callout may be run * directly from hardware interrupt context. @@ -493,6 +627,7 @@ LIST_REMOVE(tmp, c_links.le); TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); + callout_insert_tqe(cc, tmp); tmp->c_iflags |= CALLOUT_PROCESSED; tmp = tmpn; } @@ -591,6 +726,7 @@ CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); + callout_insert_le_head(cc, bucket, c); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); if (cc->cc_bucket == bucket) cc_exec_next(cc) = c; @@ -629,6 +765,11 @@ void *new_arg; int flags, new_cpu; sbintime_t new_prec, new_time; +#ifdef CALLOUT_DEBUG_DRAIN + void *c_arg1; + void *new_arg1; + struct stack tmp_stack; +#endif #endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; @@ -653,11 +794,19 @@ c_func = c->c_func; c_arg = c->c_arg; c_iflags = c->c_iflags; +#ifdef CALLOUT_DEBUG_DRAIN + c_arg1 = c->c_arg1; + stack_copy(&c->c_stack, &tmp_stack); +#endif c->c_iflags &= ~CALLOUT_PENDING; cc_exec_curr(cc, direct) = c; cc_exec_last_func(cc, direct) = c_func; cc_exec_last_arg(cc, direct) = c_arg; +#ifdef CALLOUT_DEBUG_DRAIN + cc_exec_last_arg1(cc, direct) = c_arg1; + stack_copy(&tmp_stack, &cc_exec_last_stack(cc, direct)); +#endif cc_exec_cancel(cc, direct) = false; cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); @@ -763,6 +912,8 @@ new_prec = cc_migration_prec(cc, direct); new_func = cc_migration_func(cc, direct); new_arg = cc_migration_arg(cc, direct); + new_arg1 = cc_migration_arg1(cc, direct); + stack_copy(&cc_migration_stack(cc, direct), &tmp_stack); cc_cce_cleanup(cc, direct); /* @@ -781,6 +932,10 @@ new_cc = callout_cpu_switch(c, cc, new_cpu); flags = (direct) ? C_DIRECT_EXEC : 0; +#ifdef CALLOUT_DEBUG_DRAIN + c->c_arg1 = new_arg1; + stack_copy(&tmp_stack, &c->c_stack); +#endif callout_cc_add(c, new_cc, new_time, new_prec, new_func, new_arg, new_cpu, flags); CC_UNLOCK(new_cc); @@ -819,6 +974,7 @@ cc = (struct callout_cpu *)arg; CC_LOCK(cc); while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + callout_remove_tqe(cc, c); TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING @@ -907,8 +1063,8 @@ * callout_deactivate() - marks the callout as having been serviced */ int -callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, - callout_func_t *ftn, void *arg, int cpu, int flags) +callout_reset_sbt_on_arg1(struct callout *c, sbintime_t sbt, sbintime_t prec, + callout_func_t *ftn, void *arg, void *arg1, int cpu, int flags) { sbintime_t to_sbt, precision; struct callout_cpu *cc; @@ -978,6 +1134,10 @@ cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; +#ifdef CALLOUT_DEBUG_DRAIN + cc_migration_arg1(cc, direct) = arg1; + stack_save(&cc_migration_stack(cc, direct)); +#endif cancelled = 1; CC_UNLOCK(cc); return (cancelled); @@ -988,8 +1148,10 @@ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); + callout_remove_le(cc, callout_get_bucket(c->c_time), c); LIST_REMOVE(c, c_links.le); } else { + callout_remove_tqe(cc, c); TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } cancelled = 1; @@ -1026,6 +1188,9 @@ cc_migration_prec(cc, direct) = precision; cc_migration_func(cc, direct) = ftn; cc_migration_arg(cc, direct) = arg; +#ifdef CALLOUT_DEBUG_DRAIN + cc_migration_arg1(cc, direct) = arg1; +#endif c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); c->c_flags |= CALLOUT_ACTIVE; CTR6(KTR_CALLOUT, @@ -1039,6 +1204,10 @@ } #endif +#ifdef CALLOUT_DEBUG_DRAIN + c->c_arg1 = arg1; + stack_save(&c->c_stack); +#endif callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), @@ -1048,6 +1217,14 @@ return (cancelled); } +int +callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, + callout_func_t *ftn, void *arg, int cpu, int flags) +{ + return (callout_reset_sbt_on_arg1(c, sbt, prec, ftn, arg, 0, cpu, + flags)); +} + /* * Common idioms that can be optimized in the future. */ @@ -1234,6 +1411,10 @@ cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; +#ifdef CALLOUT_DEBUG_DRAIN + cc_migration_arg1(cc, direct) = NULL; + stack_zero(&cc_migration_stack(cc, direct)); +#endif #endif } CC_UNLOCK(cc); @@ -1262,6 +1443,10 @@ cc_migration_prec(cc, direct) = 0; cc_migration_func(cc, direct) = NULL; cc_migration_arg(cc, direct) = NULL; +#ifdef CALLOUT_DEBUG_DRAIN + cc_migration_arg1(cc, direct) = NULL; + stack_zero(&cc_migration_stack(cc, direct)); +#endif #endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); @@ -1306,8 +1491,10 @@ if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { if (cc_exec_next(cc) == c) cc_exec_next(cc) = LIST_NEXT(c, c_links.le); + callout_remove_le(cc, callout_get_bucket(c->c_time), c); LIST_REMOVE(c, c_links.le); } else { + callout_remove_tqe(cc, c); TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } } @@ -1588,3 +1775,132 @@ } } #endif /* DDB */ + +#ifdef CALLOUT_DEBUG_DRAIN +static int +c_symbol_ddb(vm_offset_t pc, const char **name, long *offset) +{ + linker_symval_t symval; + c_linker_sym_t sym; + + if (linker_ddb_search_symbol((caddr_t)pc, &sym, offset) != 0) + goto out; + if (linker_ddb_symbol_values(sym, &symval) != 0) + goto out; + if (symval.name != NULL) { + *name = symval.name; + return (0); + } + out: + *offset = 0; + *name = "??"; + return (ENOENT); +} + +static void +callout_check_drain_panic(struct callout_next_data *nd) +{ + const char *name; + long offset; + + printf("CALLOUT CHECK DRAIN %p %p %p\n", nd->c_func, nd->c_arg, + nd->c_arg1); + if (c_symbol_ddb((vm_offset_t)nd->c_func, &name, &offset) == 0) { + printf("func %p at %s+%#lx\n", (void *)nd->c_func, name, + offset); + } + if ((vm_offset_t)nd->c_arg1 != (vm_offset_t)0 && + c_symbol_ddb((vm_offset_t)nd->c_arg1, &name, &offset) == 0) { + printf("arg1 %p at %s+%#lx\n", (void *)nd->c_arg1, name, + offset); + } + stack_print_ddb(&nd->c_stack); + panic("callout check drain"); +} + +static void +callout_check_drain_wheel(struct callout_cpu *cc) +{ + struct callout_list *cl; + struct callout *c; + u_int i; + + for (i = 0; i < callwheelsize; i++) { + cl = &cc->cc_callwheel[i]; + if (LIST_EMPTY(cl)) + continue; + if (cc->cc_nd[i].c_arg1 != NULL) + callout_check_drain_panic(&cc->cc_nd[i]); + LIST_FOREACH(c, cl, c_links.le) { + if (LIST_NEXT(c, c_links.le) == NULL) + break; + if (c->c_nd.c_arg1 != NULL) + callout_check_drain_panic(&c->c_nd); + } + } +} + +static void +callout_check_drain_expired(struct callout_cpu *cc) +{ + struct callout *c; + + if (TAILQ_EMPTY(&cc->cc_expireq)) + return; + if (cc->cc_exp_nd.c_arg1 != NULL) + callout_check_drain_panic(&cc->cc_exp_nd); + TAILQ_FOREACH(c, &cc->cc_expireq, c_links.tqe) { + if (TAILQ_NEXT(c, c_links.tqe) == NULL) + break; + if (c->c_nd.c_arg1 != NULL) + callout_check_drain_panic(&c->c_nd); + } +} + +void +callout_check_drain(void) +{ + struct callout_cpu *cc; + int c; + + CPU_FOREACH(c) { + cc = CC_CPU(c); + CC_LOCK(cc); + callout_check_drain_wheel(cc); + callout_check_drain_expired(cc); + CC_UNLOCK(cc); + } +} + +static struct callout ctd_c; + +static void +callout_check_drain_test_func(void *arg) +{ + printf("callout_check_drain_test_func arg %p\n", arg); +} + +static int +callout_check_drain_test(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = 0; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (val != 0) { + callout_init(&ctd_c, 1); + callout_reset_sbt_on_arg1(&ctd_c, tick_sbt * hz * 4, 0, + callout_check_drain_test_func, (void *)0xfeedf00d, + (void *)callout_check_drain_test_func, -1, C_HARDCLOCK); + pause("cdt", 1); + callout_check_drain(); + } + return (0); +} +SYSCTL_PROC(_debug, OID_AUTO, callout_check_drain_test, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, + callout_check_drain_test, "I", + ""); +#endif Index: sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c =================================================================== --- sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c +++ sys/ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c @@ -125,8 +125,10 @@ ssk->nagle_last_unacked = mseq; } else { if (!callout_pending(&ssk->nagle_timer)) { - callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT, - sdp_nagle_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->nagle_timer, + tick_sbt * SDP_NAGLE_TIMEOUT, 0, + sdp_nagle_timeout, ssk, sdp_nagle_timeout, -1, + C_HARDCLOCK); sdp_dbg_data(ssk->socket, "Starting nagle timer\n"); } } @@ -158,8 +160,10 @@ sowwakeup(ssk->socket); out: if (sk->so_snd.sb_sndptr) - callout_reset(&ssk->nagle_timer, SDP_NAGLE_TIMEOUT, - sdp_nagle_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->nagle_timer, + tick_sbt * SDP_NAGLE_TIMEOUT, 0, + sdp_nagle_timeout, ssk, sdp_nagle_timeout, -1, + C_HARDCLOCK); } void @@ -255,6 +259,7 @@ allocfail: ssk->nagle_last_unacked = -1; - callout_reset(&ssk->nagle_timer, 1, sdp_nagle_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->nagle_timer, tick_sbt * 1, 0, + sdp_nagle_timeout, ssk, sdp_nagle_timeout, -1, C_HARDCLOCK); return; } Index: sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c =================================================================== --- sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -385,7 +385,8 @@ ssk->flags |= SDP_TIMEWAIT; ssk->state = TCPS_TIME_WAIT; soisdisconnected(ssk->socket); - callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->keep2msl, tick_sbt * TCPTV_MSL, 0, + sdp_2msl_timeout, ssk, sdp_2msl_timeout, -1, C_HARDCLOCK); } /* @@ -719,8 +720,9 @@ { SDP_WLOCK_ASSERT(ssk); - callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT, - sdp_dreq_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->keep2msl, + tick_sbt * SDP_FIN_WAIT_TIMEOUT, 0, + sdp_dreq_timeout, ssk, sdp_dreq_timeout, -1, C_HARDCLOCK); ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT; sdp_post_sends(ssk, M_NOWAIT); } @@ -1595,8 +1597,9 @@ (ssk->socket->so_options & SO_KEEPALIVE) == 0) goto out; sdp_post_keepalive(ssk); - callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME, - sdp_keepalive_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->keep2msl, tick_sbt * SDP_KEEPALIVE_TIME, + 0, sdp_keepalive_timeout, ssk, sdp_keepalive_timeout, -1, + C_HARDCLOCK); out: SDP_WUNLOCK(ssk); } @@ -1609,8 +1612,10 @@ ssk = sdp_sk(so); if (!callout_pending(&ssk->keep2msl)) - callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME, - sdp_keepalive_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->keep2msl, + tick_sbt * SDP_KEEPALIVE_TIME, 0, + sdp_keepalive_timeout, ssk, sdp_keepalive_timeout, -1, + C_HARDCLOCK); } static void Index: sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c =================================================================== --- sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c +++ sys/ofed/drivers/infiniband/ulp/sdp/sdp_tx.c @@ -52,8 +52,10 @@ /* If we don't have a pending timer, set one up to catch our recent post in case the interface becomes idle */ if (!callout_pending(&ssk->tx_ring.timer)) - callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT, - sdp_poll_tx_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->tx_ring.timer, + tick_sbt * SDP_TX_POLL_TIMEOUT, 0, + sdp_poll_tx_timeout, ssk, sdp_poll_tx_timeout, -1, + C_HARDCLOCK); /* Poll the CQ every SDP_TX_POLL_MODER packets */ if (force || (++ssk->tx_ring.poll_cnt & (SDP_TX_POLL_MODER - 1)) == 0) @@ -337,8 +339,10 @@ * been scheduled by the Tx routine then schedule it here to guarantee * completion processing of these packets */ if (inflight) - callout_reset(&ssk->tx_ring.timer, SDP_TX_POLL_TIMEOUT, - sdp_poll_tx_timeout, ssk); + callout_reset_sbt_on_arg1(&ssk->tx_ring.timer, + tick_sbt * SDP_TX_POLL_TIMEOUT, 0, + sdp_poll_tx_timeout, ssk, sdp_poll_tx_timeout, -1, + C_HARDCLOCK); out: #ifdef SDP_ZCOPY if (ssk->tx_ring.rdma_inflight && ssk->tx_ring.rdma_inflight->busy) { Index: sys/sys/_callout.h =================================================================== --- sys/sys/_callout.h +++ sys/sys/_callout.h @@ -41,6 +41,9 @@ #define _SYS__CALLOUT_H #include +#ifdef CALLOUT_DEBUG_DRAIN +#include +#endif struct lock_object; @@ -50,6 +53,15 @@ typedef void callout_func_t(void *); +#ifdef CALLOUT_DEBUG_DRAIN +struct callout_next_data { + callout_func_t *c_func; + void *c_arg; + void *c_arg1; + struct stack c_stack; +}; +#endif + struct callout { union { LIST_ENTRY(callout) le; @@ -64,6 +76,11 @@ short c_flags; /* User State */ short c_iflags; /* Internal State */ volatile int c_cpu; /* CPU we're scheduled on */ +#ifdef CALLOUT_DEBUG_DRAIN + void *c_arg1; + struct callout_next_data c_nd; + struct stack c_stack; +#endif }; #endif Index: sys/sys/callout.h =================================================================== --- sys/sys/callout.h +++ sys/sys/callout.h @@ -101,6 +101,8 @@ #define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING) int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t, void (*)(void *), void *, int, int); +int callout_reset_sbt_on_arg1(struct callout *, sbintime_t, sbintime_t, + void (*)(void *), void *, void *, int, int); #define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags)) #define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \ @@ -131,6 +133,9 @@ _callout_stop_safe(c, 0, d) void callout_when(sbintime_t sbt, sbintime_t precision, int flags, sbintime_t *sbt_res, sbintime_t *prec_res); +#ifdef CALLOUT_DEBUG_DRAIN +void callout_check_drain(void); +#endif #endif #endif /* _SYS_CALLOUT_H_ */