Index: share/man/man9/fail.9 =================================================================== --- share/man/man9/fail.9 +++ share/man/man9/fail.9 @@ -26,25 +26,31 @@ .\" .\" $FreeBSD$ .\" -.Dd May 10, 2009 +.Dd February 02, 2016 .Dt FAIL 9 .Os .Sh NAME .Nm KFAIL_POINT_CODE , +.Nm KFAIL_POINT_CODE_FLAGS , +.Nm KFAIL_POINT_CODE_COND , .Nm KFAIL_POINT_RETURN , .Nm KFAIL_POINT_RETURN_VOID , .Nm KFAIL_POINT_ERROR , .Nm KFAIL_POINT_GOTO , +.Nm KFAIL_POINT_SLEEP_CALLBACKS , .Nm fail_point , .Nm DEBUG_FP .Nd fail points .Sh SYNOPSIS .In sys/fail.h .Fn KFAIL_POINT_CODE "parent" "name" "code" +.Fn KFAIL_POINT_CODE_FLAGS "parent" "name" "flags" "code" +.Fn KFAIL_POINT_CODE_COND "parent" "name" "cond" "flags" "code" .Fn KFAIL_POINT_RETURN "parent" "name" .Fn KFAIL_POINT_RETURN_VOID "parent" "name" .Fn KFAIL_POINT_ERROR "parent" "name" "error_var" .Fn KFAIL_POINT_GOTO "parent" "name" "error_var" "label" +.Fn KFAIL_POINT_SLEEP_CALLBACKS "parent" "name" "pre_func" "pre_arg" "post_func" "post_arg" "code" .Sh DESCRIPTION Fail points are used to add code points where errors may be injected in a user controlled fashion. @@ -77,6 +83,42 @@ is derived from the .Fn return value set in the sysctl MIB. +.Pp +Additionally, +.Fn KFAIL_POINT_CODE_FLAGS +provides a +.Fa flags +argument which controls the fail point's behaviour. +This can be used to e.g., mark the fail point's context as non-sleepable, +which causes the +.Sy sleep +action to be coerced to a busy wait. +The supported flags are: +.Bl -ohang -offset indent +.It FAIL_POINT_USE_TIMEOUT_PATH +Rather than sleeping on a +.Fn sleep +call, just fire the post-sleep function after a timeout fires. +.It FAIL_POINT_NONSLEEPABLE +Mark the fail point as being in a non-sleepable context, which coerces +.Fn sleep +calls to +.Fn delay +calls. +.El +.Pp +Likewise, +.Fn KFAIL_POINT_CODE_COND +supplies a +.Fa cond +argument, which allows you to set the condition under which the fail point's +code may fire. +This is equivalent to: +.Bd -literal + if (cond) + KFAIL_POINT_CODE_FLAGS(...); + +.Ed See .Sx SYSCTL VARIABLES below. @@ -107,26 +149,12 @@ tree (referenced in code by .Sy DEBUG_FP ) . .Pp -The sysctl variable may be set using the following grammar: +The sysctl variable may be set in a number of ways: .Bd -literal - :: - ( "->" )* - - :: - ( ( "%") | ( "*" ) )* - - [ "(" ")" ] - [ "[pid " "]" ] - - :: - [ "." ] | - "." - - :: - "off" | "return" | "sleep" | "panic" | "break" | "print" + [%][*][(args...)][->] .Ed .Pp -The argument specifies which action to take: +The argument specifies which action to take; it can be one of: .Bl -tag -width ".Dv return" .It Sy off Take no action (does not trigger fail point code) @@ -140,13 +168,23 @@ Break into the debugger, or trap if there is no debugger support .It Sy print Print that the fail point executed +.It Sy pause +Threads sleep at the fail point until the fail point is set to +.Sy off +.It Sy yield +Thread yields the cpu when the fail point is evaluated +.It Sy delay +Similar to sleep, but busy waits the cpu. +(Useful in non-sleepable contexts.) .El .Pp -The % and * modifiers prior to control when +The % and * modifiers prior to control when is executed. -The % form (e.g. "1.2%") can be used to specify a +The % form (e.g. "1.2%") can be used to specify a probability that will execute. -The * form (e.g. "5*") can be used to specify the number of +This is a decimal in the range (0, 100] which can specify up to +1/10,000% precision. +The * form (e.g. "5*") can be used to specify the number of times should be executed before this is disabled. Only the last probability and the last count are used if multiple are specified, i.e. "1.2%2%" is the same as "2%". @@ -191,6 +229,10 @@ .Sh AUTHORS .An -nosplit This manual page was written by +.Pp +.An Matthew Bryan Aq Mt matthew.bryan@isilon.com +and +.Pp .An Zach Loafman Aq Mt zml@FreeBSD.org . .Sh CAVEATS It is easy to shoot yourself in the foot by setting fail points too @@ -206,3 +248,10 @@ .Fn fail_point_eval does not verify whether the context is appropriate for calling .Fn msleep . +You can force it to evaluate a +.Sy sleep +action as a +.Sy delay +action by specifying the +.Sy FAIL_POINT_NONSLEEPABLE +flag at the point the fail point is declared. Index: sys/kern/kern_fail.c =================================================================== --- sys/kern/kern_fail.c +++ sys/kern/kern_fail.c @@ -57,12 +57,18 @@ #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include #ifdef ILOG_DEFINE_FOR_FILE @@ -72,11 +78,45 @@ static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system"); #define fp_free(ptr) free(ptr, M_FAIL_POINT) #define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags)) +#define fs_free(ptr) fp_free(ptr) +#define fs_malloc() fp_malloc(sizeof(struct fail_point_setting), \ + M_WAITOK | M_ZERO) + + /** + * These define the wchans that are used for sleeping, pausing respectively. + * They are chosen arbitrarily but need to be distinct to the failpoint and + * the sleep/pause distinction. + */ +#define FP_SLEEP_CHANNEL(fp) (void*)(fp) +#define FP_PAUSE_CHANNEL(fp) __DEVOLATILE(void*, &fp->fp_setting) -static struct mtx g_fp_mtx; -MTX_SYSINIT(g_fp_mtx, &g_fp_mtx, "fail point mtx", MTX_DEF); -#define FP_LOCK() mtx_lock(&g_fp_mtx) -#define FP_UNLOCK() mtx_unlock(&g_fp_mtx) +/** + * Don't allow more than this many entries in a fail point set by sysctl. + * The 99.99...% case is to have 1 entry. I can't imagine having this many + * entries, so it should not limit us. Saves on re-mallocs while holding + * a non-sleepable lock. + */ +#define FP_MAX_ENTRY_COUNT 20 + +/* Used to drain sbufs to the sysctl output */ +int fail_sysctl_drain_func(void *, const char *, int); + +/* Head of tailq of struct fail_point_entry */ +TAILQ_HEAD(fail_point_entry_queue, fail_point_entry); + +/** + * fp entries garbage list; outstanding entries are cleaned up in the + * garbage collector + */ +STAILQ_HEAD(fail_point_setting_garbage, fail_point_setting); +static struct fail_point_setting_garbage fp_setting_garbage = + STAILQ_HEAD_INITIALIZER(fp_setting_garbage); +static struct mtx mtx_garbage_list; +MTX_SYSINIT(mtx_garbage_list, &mtx_garbage_list, "fail point garbage mtx", + MTX_SPIN); + +static struct sx sx_fp_set; +SX_SYSINIT(sx_fp_set, &sx_fp_set, "fail point set sx"); /** * Failpoint types. @@ -90,7 +130,11 @@ FAIL_POINT_BREAK, /**< break into the debugger */ FAIL_POINT_PRINT, /**< print a message */ FAIL_POINT_SLEEP, /**< sleep for some msecs */ - FAIL_POINT_NUMTYPES + FAIL_POINT_PAUSE, /**< sleep until failpoint is set to off */ + FAIL_POINT_YIELD, /**< yield the cpu */ + FAIL_POINT_DELAY, /**< busy wait the cpu */ + FAIL_POINT_NUMTYPES, + FAIL_POINT_INVALID = -1 }; static struct { @@ -104,53 +148,307 @@ [FAIL_POINT_BREAK] = FP_TYPE_NM_LEN("break"), [FAIL_POINT_PRINT] = FP_TYPE_NM_LEN("print"), [FAIL_POINT_SLEEP] = FP_TYPE_NM_LEN("sleep"), + [FAIL_POINT_PAUSE] = FP_TYPE_NM_LEN("pause"), + [FAIL_POINT_YIELD] = FP_TYPE_NM_LEN("yield"), + [FAIL_POINT_DELAY] = FP_TYPE_NM_LEN("delay"), }; +#define FE_COUNT_UNTRACKED (INT_MIN) + /** * Internal structure tracking a single term of a complete failpoint. * @ingroup failpoint_private */ struct fail_point_entry { - enum fail_point_t fe_type; /**< type of entry */ + volatile bool fe_stale; + enum fail_point_t fe_type; /**< type of entry */ int fe_arg; /**< argument to type (e.g. return value) */ int fe_prob; /**< likelihood of firing in millionths */ - int fe_count; /**< number of times to fire, 0 means always */ + int fe_count; /**< number of times to fire, -1 means infinite */ pid_t fe_pid; /**< only fail for this process */ - TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry in fail point */ + struct fail_point *fe_parent; /**< backpointer to fp */ + TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry ptr */ +}; + +struct fail_point_setting { + STAILQ_ENTRY(fail_point_setting) fs_garbage_link; + struct fail_point_entry_queue fp_entry_queue; + struct fail_point * fs_parent; + struct mtx feq_mtx; /* Gives fail_point_pause something to do. */ +}; + +/** + * Defines stating the equivalent of probablilty one (100%) + */ +enum { + PROB_MAX = 1000000, /* probability between zero and this number */ + PROB_DIGITS = 6 /* number of zero's in above number */ }; +/* Get a ref on an fp's fp_setting */ +static inline struct fail_point_setting *fail_point_setting_get_ref( + struct fail_point *fp); +/* Release a ref on an fp_setting */ +static inline void fail_point_setting_release_ref(struct fail_point *fp); +/* Allocate and initialize a struct fail_point_setting */ +static struct fail_point_setting *fail_point_setting_new(struct + fail_point *); +/* Free a struct fail_point_setting */ +static void fail_point_setting_destroy(struct fail_point_setting *fp_setting); +/* Allocate and initialize a struct fail_point_entry */ +static struct fail_point_entry *fail_point_entry_new(struct + fail_point_setting *); +/* Free a struct fail_point_entry */ +static void fail_point_entry_destroy(struct fail_point_entry *fp_entry); +/* Append fp setting to garbage list */ +static inline void fail_point_setting_garbage_append( + struct fail_point_setting *fp_setting); +/* Swap fp's setting with fp_setting_new */ +static inline struct fail_point_setting * + fail_point_swap_settings(struct fail_point *fp, + struct fail_point_setting *fp_setting_new); +/* Free up any zero-ref setting in the garbage queue */ +static void fail_point_garbage_collect(void); +/* If this fail point's setting are empty, then swap it out to NULL. */ +static inline void fail_point_eval_swap_out(struct fail_point *fp, + struct fail_point_setting *fp_setting); + +bool +fail_point_is_off(struct fail_point *fp) +{ + bool return_val; + struct fail_point_setting *fp_setting; + struct fail_point_entry *ent; + + return_val = true; + + fp_setting = fail_point_setting_get_ref(fp); + if (fp_setting != NULL) { + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, + fe_entries) { + if (!ent->fe_stale) { + return_val = false; + break; + } + } + } + fail_point_setting_release_ref(fp); + + return (return_val); +} + +/* Allocate and initialize a struct fail_point_setting */ +static struct fail_point_setting * +fail_point_setting_new(struct fail_point *fp) +{ + struct fail_point_setting *fs_new; + + fs_new = fs_malloc(); + fs_new->fs_parent = fp; + TAILQ_INIT(&fs_new->fp_entry_queue); + mtx_init(&fs_new->feq_mtx, "fail point entries", NULL, MTX_SPIN); + + fail_point_setting_garbage_append(fs_new); + + return (fs_new); +} + +/* Free a struct fail_point_setting */ +static void +fail_point_setting_destroy(struct fail_point_setting *fp_setting) +{ + struct fail_point_entry *ent; + + while (!TAILQ_EMPTY(&fp_setting->fp_entry_queue)) { + ent = TAILQ_FIRST(&fp_setting->fp_entry_queue); + TAILQ_REMOVE(&fp_setting->fp_entry_queue, ent, fe_entries); + fail_point_entry_destroy(ent); + } + + fs_free(fp_setting); +} + +/* Allocate and initialize a struct fail_point_entry */ +static struct fail_point_entry * +fail_point_entry_new(struct fail_point_setting *fp_setting) +{ + struct fail_point_entry *fp_entry; + + fp_entry = fp_malloc(sizeof(struct fail_point_entry), + M_WAITOK | M_ZERO); + fp_entry->fe_parent = fp_setting->fs_parent; + fp_entry->fe_prob = PROB_MAX; + fp_entry->fe_pid = NO_PID; + fp_entry->fe_count = FE_COUNT_UNTRACKED; + TAILQ_INSERT_TAIL(&fp_setting->fp_entry_queue, fp_entry, + fe_entries); + + return (fp_entry); +} + +/* Free a struct fail_point_entry */ +static void +fail_point_entry_destroy(struct fail_point_entry *fp_entry) +{ + + fp_free(fp_entry); +} + +/* Get a ref on an fp's fp_setting */ +static inline struct fail_point_setting * +fail_point_setting_get_ref(struct fail_point *fp) +{ + struct fail_point_setting *fp_setting; + + /* Invariant: if we have a ref, our pointer to fp_setting is safe */ + atomic_add_acq_32(&fp->fp_ref_cnt, 1); + fp_setting = fp->fp_setting; + + return (fp_setting); +} + +/* Release a ref on an fp_setting */ +static inline void +fail_point_setting_release_ref(struct fail_point *fp) +{ + + KASSERT(&fp->fp_ref_cnt > 0, ("Attempting to deref w/no refs")); + atomic_subtract_rel_32(&fp->fp_ref_cnt, 1); +} + +/* Append fp entries to fp garbage list */ +static inline void +fail_point_setting_garbage_append(struct fail_point_setting *fp_setting) +{ + + mtx_lock_spin(&mtx_garbage_list); + STAILQ_INSERT_TAIL(&fp_setting_garbage, fp_setting, + fs_garbage_link); + mtx_unlock_spin(&mtx_garbage_list); +} + +/* Swap fp's entries with fp_setting_new */ +static struct fail_point_setting * +fail_point_swap_settings(struct fail_point *fp, + struct fail_point_setting *fp_setting_new) +{ + struct fail_point_setting *fp_setting_old; + + fp_setting_old = fp->fp_setting; + fp->fp_setting = fp_setting_new; + + return (fp_setting_old); +} + +static inline void +fail_point_eval_swap_out(struct fail_point *fp, + struct fail_point_setting *fp_setting) +{ + + /* We may have already been swapped out and replaced; ignore. */ + if (fp->fp_setting == fp_setting) + fail_point_swap_settings(fp, NULL); +} + +/* Free up any zero-ref entries in the garbage queue */ +static void +fail_point_garbage_collect() +{ + struct fail_point_setting *fs_current, *fs_next; + struct fail_point_setting_garbage fp_ents_free_list; + + /** + * We will transfer the entries to free to fp_ents_free_list while holding + * the spin mutex, then free it after we drop the lock. This avoids + * triggering witness due to sleepable mutexes in the memory + * allocator. + */ + STAILQ_INIT(&fp_ents_free_list); + + mtx_lock_spin(&mtx_garbage_list); + STAILQ_FOREACH_SAFE(fs_current, &fp_setting_garbage, fs_garbage_link, + fs_next) { + if (fs_current->fs_parent->fp_setting != fs_current && + fs_current->fs_parent->fp_ref_cnt == 0) { + STAILQ_REMOVE(&fp_setting_garbage, fs_current, + fail_point_setting, fs_garbage_link); + STAILQ_INSERT_HEAD(&fp_ents_free_list, fs_current, + fs_garbage_link); + } + } + mtx_unlock_spin(&mtx_garbage_list); + + STAILQ_FOREACH_SAFE(fs_current, &fp_ents_free_list, fs_garbage_link, + fs_next) + fail_point_setting_destroy(fs_current); +} + +/* Drain out all refs from this fail point */ +static inline void +fail_point_drain(struct fail_point *fp, int expected_ref) +{ + struct fail_point_setting *entries; + + entries = fail_point_swap_settings(fp, NULL); + /** + * We have unpaused all threads; so we will wait no longer + * than the time taken for the longest remaining sleep, or + * the length of time of a long-running code block. + */ + while (fp->fp_ref_cnt > expected_ref) { + wakeup(FP_PAUSE_CHANNEL(fp)); + tsleep(&fp, PWAIT, "fail_point_drain", hz / 100); + } + fail_point_swap_settings(fp, entries); +} + static inline void -fail_point_sleep(struct fail_point *fp, struct fail_point_entry *ent, - int msecs, enum fail_point_return_code *pret) +fail_point_pause(struct fail_point *fp, enum fail_point_return_code *pret, + struct mtx *mtx_sleep) { - /* convert from millisecs to ticks, rounding up */ - int timo = ((msecs * hz) + 999) / 1000; + + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + msleep_spin(FP_PAUSE_CHANNEL(fp), mtx_sleep, "failpt", 0); + + if (fp->fp_post_sleep_fn) + fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); +} + +static inline void +fail_point_sleep(struct fail_point *fp, int msecs, + enum fail_point_return_code *pret) +{ + int timo; + + /* Convert from millisecs to ticks, rounding up */ + timo = howmany(msecs * hz, 1000); if (timo > 0) { - if (fp->fp_sleep_fn == NULL) { - msleep(fp, &g_fp_mtx, PWAIT, "failpt", timo); + if (!(fp->fp_flags & FAIL_POINT_USE_TIMEOUT_PATH)) { + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + tsleep(FP_SLEEP_CHANNEL(fp), PWAIT, "failpt", timo); + + if (fp->fp_post_sleep_fn) + fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); } else { - timeout(fp->fp_sleep_fn, fp->fp_sleep_arg, timo); + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + timeout(fp->fp_post_sleep_fn, fp->fp_post_sleep_arg, + timo); *pret = FAIL_POINT_RC_QUEUED; } } } - -/** - * Defines stating the equivalent of probablilty one (100%) - */ -enum { - PROB_MAX = 1000000, /* probability between zero and this number */ - PROB_DIGITS = 6, /* number of zero's in above number */ -}; - -static char *parse_fail_point(struct fail_point_entries *, char *); -static char *parse_term(struct fail_point_entries *, char *); +static char *parse_fail_point(struct fail_point_setting *, char *); +static char *parse_term(struct fail_point_setting *, char *); static char *parse_number(int *out_units, int *out_decimal, char *); static char *parse_type(struct fail_point_entry *, char *); -static void free_entry(struct fail_point_entries *, struct fail_point_entry *); -static void clear_entries(struct fail_point_entries *); /** * Initialize a fail_point. The name is formed in a printf-like fashion @@ -167,7 +465,7 @@ char *name; int n; - TAILQ_INIT(&fp->fp_entries); + fp->fp_setting = NULL; fp->fp_flags = 0; /* Figure out the size of the name. */ @@ -185,25 +483,33 @@ fp->fp_name = name; fp->fp_location = ""; fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME; - fp->fp_sleep_fn = NULL; - fp->fp_sleep_arg = NULL; + fp->fp_pre_sleep_fn = NULL; + fp->fp_pre_sleep_arg = NULL; + fp->fp_post_sleep_fn = NULL; + fp->fp_post_sleep_arg = NULL; } /** - * Free the resources held by a fail_point. - * + * Free the resources held by a fail_point, and wake any paused threads. + * Thou shalt not allow threads to hit this fail point after you enter this + * function, nor shall you call this multiple times for a given fp. * @ingroup failpoint */ void fail_point_destroy(struct fail_point *fp) { + fail_point_drain(fp, 0); + if ((fp->fp_flags & FAIL_POINT_DYNAMIC_NAME) != 0) { fp_free(__DECONST(void *, fp->fp_name)); fp->fp_name = NULL; } fp->fp_flags = 0; - clear_entries(&fp->fp_entries); + + sx_xlock(&sx_fp_set); + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); } /** @@ -216,21 +522,51 @@ enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *fp, int *return_value) { - enum fail_point_return_code ret = FAIL_POINT_RC_CONTINUE; - struct fail_point_entry *ent, *next; + bool execute = false; + struct fail_point_entry *ent; + struct fail_point_setting *fp_setting; + enum fail_point_return_code ret; + int cont; + int count; int msecs; + int usecs; + + ret = FAIL_POINT_RC_CONTINUE; + cont = 0; /* don't continue by default */ - FP_LOCK(); + fp_setting = fail_point_setting_get_ref(fp); + if (fp_setting == NULL) + goto abort; - TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, next) { - int cont = 0; /* don't continue by default */ + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { + + if (ent->fe_stale) + continue; if (ent->fe_prob < PROB_MAX && ent->fe_prob < random() % PROB_MAX) continue; + if (ent->fe_pid != NO_PID && ent->fe_pid != curproc->p_pid) continue; + if (ent->fe_count != FE_COUNT_UNTRACKED) { + count = ent->fe_count; + while (count > 0) { + if (atomic_cmpset_32(&ent->fe_count, count, count - 1)) { + count--; + execute = true; + break; + } + count = ent->fe_count; + } + if (execute == false) + /* We lost the race; consider the entry stale and bail now */ + continue; + if (count == 0) + ent->fe_stale = true; + } + switch (ent->fe_type) { case FAIL_POINT_PANIC: panic("fail point %s panicking", fp->fp_name); @@ -244,7 +580,7 @@ case FAIL_POINT_BREAK: printf("fail point %s breaking to debugger\n", - fp->fp_name); + fp->fp_name); breakpoint(); break; @@ -254,51 +590,95 @@ break; case FAIL_POINT_SLEEP: - /* - * Free the entry now if necessary, since - * we're about to drop the mutex and sleep. - */ msecs = ent->fe_arg; - if (ent->fe_count > 0 && --ent->fe_count == 0) { - free_entry(&fp->fp_entries, ent); - ent = NULL; - } - if (msecs) - fail_point_sleep(fp, ent, msecs, &ret); + fail_point_sleep(fp, msecs, &ret); + break; + + case FAIL_POINT_PAUSE: + /** + * Pausing is inherently strange with multiple + * entries given our design. That is because some + * entries could be unreachable, for instance in cases like: + * pause->return. We can never reach the return entry. + * The sysctl layer actually truncates all entries after + * a pause for this reason. + */ + mtx_lock_spin(&fp_setting->feq_mtx); + fail_point_pause(fp, &ret, &fp_setting->feq_mtx); + mtx_unlock_spin(&fp_setting->feq_mtx); + break; + + case FAIL_POINT_YIELD: + kern_yield(-1); + break; + + case FAIL_POINT_DELAY: + usecs = ent->fe_arg; + DELAY(usecs); break; default: break; } - if (ent != NULL && ent->fe_count > 0 && --ent->fe_count == 0) - free_entry(&fp->fp_entries, ent); if (cont == 0) break; } - /* Get rid of "off"s at the end. */ - while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) && - ent->fe_type == FAIL_POINT_OFF) - free_entry(&fp->fp_entries, ent); + if (fail_point_is_off(fp)) + fail_point_eval_swap_out(fp, fp_setting); - FP_UNLOCK(); +abort: + fail_point_setting_release_ref(fp); return (ret); + } /** * Translate internal fail_point structure into human-readable text. */ static void -fail_point_get(struct fail_point *fp, struct sbuf *sb) +fail_point_get(struct fail_point *fp, struct sbuf *sb, + bool verbose) { struct fail_point_entry *ent; + struct fail_point_setting *fp_setting; + struct fail_point_entry *fp_entry_cpy; + int cnt_sleeping; + int idx; + int printed_entry_count; + + cnt_sleeping = 0; + idx = 0; + printed_entry_count = 0; + + fp_entry_cpy = fp_malloc(sizeof(struct fail_point_entry) * + (FP_MAX_ENTRY_COUNT + 1), M_WAITOK); + + fp_setting = fail_point_setting_get_ref(fp); + + if (fp_setting != NULL) { + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { + if (ent->fe_stale) + continue; - FP_LOCK(); + KASSERT(printed_entry_count < FP_MAX_ENTRY_COUNT, + ("FP entry list larger than allowed")); - TAILQ_FOREACH(ent, &fp->fp_entries, fe_entries) { + fp_entry_cpy[printed_entry_count] = *ent; + ++printed_entry_count; + } + } + fail_point_setting_release_ref(fp); + + /* This is our equivalent of a NULL terminator */ + fp_entry_cpy[printed_entry_count].fe_type = FAIL_POINT_INVALID; + + while (idx < printed_entry_count) { + ent = &fp_entry_cpy[idx]; + ++idx; if (ent->fe_prob < PROB_MAX) { int decimal = ent->fe_prob % (PROB_MAX / 100); int units = ent->fe_prob / (PROB_MAX / 100); @@ -313,7 +693,7 @@ } sbuf_printf(sb, "%%"); } - if (ent->fe_count > 0) + if (ent->fe_count >= 0) sbuf_printf(sb, "%d*", ent->fe_count); sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type].name); if (ent->fe_arg) @@ -323,10 +703,29 @@ if (TAILQ_NEXT(ent, fe_entries)) sbuf_printf(sb, "->"); } - if (TAILQ_EMPTY(&fp->fp_entries)) + if (!printed_entry_count) sbuf_printf(sb, "off"); - FP_UNLOCK(); + fp_free(fp_entry_cpy); + if (verbose) { + /* Print number of sleeping threads. queue=0 is the argument + * used by msleep when sending our threads to sleep. */ + sbuf_printf(sb, "\nsleeping_thread_stacks = {\n"); + sleepq_sbuf_print_stacks(sb, FP_SLEEP_CHANNEL(fp), 0, + &cnt_sleeping); + + sbuf_printf(sb, "},\n"); + sbuf_printf(sb, "sleeping_thread_count = %d,\n", + cnt_sleeping); + + sbuf_printf(sb, "paused_thread_stacks = {\n"); + sleepq_sbuf_print_stacks(sb, FP_PAUSE_CHANNEL(fp), 0, + &cnt_sleeping); + + sbuf_printf(sb, "},\n"); + sbuf_printf(sb, "paused_thread_count = %d\n", + cnt_sleeping); + } } /** @@ -336,38 +735,91 @@ static int fail_point_set(struct fail_point *fp, char *buf) { - int error = 0; struct fail_point_entry *ent, *ent_next; - struct fail_point_entries new_entries; + struct fail_point_setting *entries; + bool should_wake_paused; + bool should_truncate; + int error; + + error = 0; + should_wake_paused = false; + should_truncate = false; /* Parse new entries. */ - TAILQ_INIT(&new_entries); - if (!parse_fail_point(&new_entries, buf)) { - clear_entries(&new_entries); + /** + * ref protects our new malloc'd stuff from being garbage collected + * before we link it. + */ + fail_point_setting_get_ref(fp); + entries = fail_point_setting_new(fp); + if (parse_fail_point(entries, buf) == NULL) { + STAILQ_REMOVE(&fp_setting_garbage, entries, + fail_point_setting, fs_garbage_link); + fail_point_setting_destroy(entries); error = EINVAL; goto end; } - FP_LOCK(); - - /* Move new entries in. */ - TAILQ_SWAP(&fp->fp_entries, &new_entries, fail_point_entry, fe_entries); - clear_entries(&new_entries); + /** + * Transfer the entries we are going to keep to a new list. + * Get rid of useless zero probability entries, and entries with hit + * count 0. + * If 'off' is present, and it has no hit count set, then all entries + * after it are discarded since they are unreachable. + */ + TAILQ_FOREACH_SAFE(ent, &entries->fp_entry_queue, fe_entries, ent_next) { + if (ent->fe_prob == 0 || ent->fe_count == 0) { + printf("Discarding entry which cannot execute %s\n", + fail_type_strings[ent->fe_type].name); + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + continue; + } else if (should_truncate) { + printf("Discarding unreachable entry %s\n", + fail_type_strings[ent->fe_type].name); + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + continue; + } - /* Get rid of useless zero probability entries. */ - TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, ent_next) { - if (ent->fe_prob == 0) - free_entry(&fp->fp_entries, ent); + if (ent->fe_type == FAIL_POINT_OFF) { + should_wake_paused = true; + if (ent->fe_count == FE_COUNT_UNTRACKED) { + should_truncate = true; + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + } + } else if (ent->fe_type == FAIL_POINT_PAUSE) { + should_truncate = true; + } else if (ent->fe_type == FAIL_POINT_SLEEP && (fp->fp_flags & + FAIL_POINT_NONSLEEPABLE)) { + /** + * If this fail point is annotated as being in a + * non-sleepable ctx, convert sleep to delay and + * convert the msec argument to usecs. + */ + printf("Sleep call request on fail point in " + "non-sleepable context; using delay instead " + "of sleep\n"); + ent->fe_type = FAIL_POINT_DELAY; + ent->fe_arg *= 1000; + } } - /* Get rid of "off"s at the end. */ - while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) && - ent->fe_type == FAIL_POINT_OFF) - free_entry(&fp->fp_entries, ent); - - FP_UNLOCK(); + if (TAILQ_EMPTY(&entries->fp_entry_queue)) { + entries = fail_point_swap_settings(fp, NULL); + if (entries != NULL) + wakeup(FP_PAUSE_CHANNEL(fp)); + } else { + if (should_wake_paused) + wakeup(FP_PAUSE_CHANNEL(fp)); + fail_point_swap_settings(fp, entries); + } - end: +end: #ifdef IWARNING if (error) IWARNING("Failed to set %s %s to %s", @@ -377,6 +829,7 @@ fp->fp_name, fp->fp_location, buf); #endif /* IWARNING */ + fail_point_setting_release_ref(fp); return (error); } @@ -385,25 +838,33 @@ /** * Handle kernel failpoint set/get. */ + int fail_point_sysctl(SYSCTL_HANDLER_ARGS) { - struct fail_point *fp = arg1; - char *buf = NULL; + struct fail_point *fp; + char *buf; + struct sbuf *sb_check; struct sbuf sb; int error; - /* Retrieving */ - sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND | SBUF_INCLUDENUL); - fail_point_get(fp, &sb); - sbuf_trim(&sb); - error = sbuf_finish(&sb); - if (error == 0) - error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb)); - sbuf_delete(&sb); + error = 0; + fp = arg1; + buf = NULL; + + sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); + if (sb_check != &sb) + return (ENOMEM); + + sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); /* Setting */ - if (!error && req->newptr) { + /** + * Lock protects any new entries from being garbage collected before we + * can link them to the fail point. + */ + sx_xlock(&sx_fp_set); + if (req->newptr) { if (req->newlen > MAX_FAIL_POINT_BUF) { error = EINVAL; goto out; @@ -417,31 +878,95 @@ buf[req->newlen] = '\0'; error = fail_point_set(fp, buf); - } + } + + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); + + /* Retrieving. */ + fail_point_get(fp, &sb, false); out: - fp_free(buf); + sbuf_finish(&sb); + sbuf_delete(&sb); + + if (buf) + fp_free(buf); + return (error); } +int +fail_point_sysctl_status(SYSCTL_HANDLER_ARGS) +{ + struct fail_point *fp; + struct sbuf sb, *sb_check; + + fp = arg1; + + sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); + if (sb_check != &sb) + return (ENOMEM); + + sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); + + /* Retrieving. */ + fail_point_get(fp, &sb, true); + + sbuf_finish(&sb); + sbuf_delete(&sb); + + /** + * Lock protects any new entries from being garbage collected before we + * can link them to the fail point. + */ + sx_xlock(&sx_fp_set); + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); + + return (0); +} + +int +fail_sysctl_drain_func(void *sysctl_args, const char *buf, int len) +{ + struct sysctl_req *sa; + int error; + + sa = sysctl_args; + + error = SYSCTL_OUT(sa, buf, len); + + if (error == ENOMEM) + return (-1); + else + return (len); +} + + /** * Internal helper function to translate a human-readable failpoint string * into a internally-parsable fail_point structure. */ static char * -parse_fail_point(struct fail_point_entries *ents, char *p) +parse_fail_point(struct fail_point_setting *ents, char *p) { /* :: * ( "->" )* */ + uint8_t term_count; + + term_count = 1; + p = parse_term(ents, p); if (p == NULL) return (NULL); + while (*p != '\0') { - if (p[0] != '-' || p[1] != '>') - return (NULL); - p = parse_term(ents, p + 2); - if (p == NULL) + term_count++; + if (p[0] != '-' || p[1] != '>' || + (p = parse_term(ents, p+2)) == NULL || + term_count > FP_MAX_ENTRY_COUNT) return (NULL); } return (p); @@ -451,14 +976,11 @@ * Internal helper function to parse an individual term from a failpoint. */ static char * -parse_term(struct fail_point_entries *ents, char *p) +parse_term(struct fail_point_setting *ents, char *p) { struct fail_point_entry *ent; - ent = fp_malloc(sizeof *ent, M_WAITOK | M_ZERO); - ent->fe_prob = PROB_MAX; - ent->fe_pid = NO_PID; - TAILQ_INSERT_TAIL(ents, ent, fe_entries); + ent = fail_point_entry_new(ents); /* * :: @@ -483,7 +1005,7 @@ if (ent->fe_prob > PROB_MAX) ent->fe_prob = PROB_MAX; } else if (*p == '*') { - if (!units || decimal) + if (!units || units < 0 || decimal) return (NULL); ent->fe_count = units; } else @@ -500,7 +1022,7 @@ /* [ "(" ")" ] */ if (*p != '(') - return p; + return (p); p++; if (!isdigit(*p) && *p != '-') return (NULL); @@ -509,7 +1031,7 @@ return (NULL); /* [ "[pid " "]" ] */ -#define PID_STRING "[pid " +#define PID_STRING "[pid " if (strncmp(p, PID_STRING, sizeof(PID_STRING) - 1) != 0) return (p); p += sizeof(PID_STRING) - 1; @@ -530,7 +1052,7 @@ { char *old_p; - /* + /** * :: * [ "." ] | * "." @@ -584,29 +1106,17 @@ return (NULL); } -/** - * Internal helper function to free an individual failpoint term. - */ -static void -free_entry(struct fail_point_entries *ents, struct fail_point_entry *ent) -{ - TAILQ_REMOVE(ents, ent, fe_entries); - fp_free(ent); -} +/* The fail point sysctl tree. */ +SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points"); -/** - * Internal helper function to clear out all failpoint terms for a single - * failpoint. - */ -static void -clear_entries(struct fail_point_entries *ents) +/* Debugging/testing stuff for fail point */ +static int +sysctl_test_fail_point(SYSCTL_HANDLER_ARGS) { - struct fail_point_entry *ent, *ent_next; - TAILQ_FOREACH_SAFE(ent, ents, fe_entries, ent_next) - fp_free(ent); - TAILQ_INIT(ents); + KFAIL_POINT_RETURN(DEBUG_FP, test_fail_point); + return (0); } - -/* The fail point sysctl tree. */ -SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points"); +SYSCTL_OID(_debug_fail_point, OID_AUTO, test_trigger_fail_point, + CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_test_fail_point, "A", + "Trigger test fail points"); Index: sys/kern/subr_sleepqueue.c =================================================================== --- sys/kern/subr_sleepqueue.c +++ sys/kern/subr_sleepqueue.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -83,6 +84,7 @@ #include #endif + /* * Constants for the hash table of sleep queue chains. * SC_TABLESIZE must be a power of two for SC_MASK to work properly. @@ -1033,6 +1035,120 @@ return (sleepq_resume_thread(sq, td, 0)); } +/* + * Prints the stacks of all threads presently sleeping on wchan/queue to + * the sbuf sb. Sets count_stacks_printed to the number of stacks actually + * printed. Typically, this will equal the number of threads sleeping on the + * queue, but may be less if sb overflowed before all stacks were printed. + */ +int +sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue, + int *count_stacks_printed) +{ + struct thread *td, *td_next; + struct sleepqueue *sq; + struct stack **st; + struct sbuf **td_infos; + int i, stack_idx, error, stacks_to_allocate; + bool finished, partial_print; + + error = 0; + finished = false; + partial_print = false; + + KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); + MPASS((queue >= 0) && (queue < NR_SLEEPQS)); + + stacks_to_allocate = 10; + for (i = 0; i < 3 && !finished ; i++) { + /* We cannot malloc while holding the queue's spinlock, so + * we do our mallocs now, and hope it is enough. If it + * isn't, we will free these, drop the lock, malloc more, + * and try again, up to a point. After that point we will + * give up and report ENOMEM. We also cannot write to sb + * during this time since the client may have set the + * SBUF_AUTOEXTEND flag on their sbuf, which could cause a + * malloc as we print to it. So we defer actually printing + * to sb until after we drop the spinlock. + */ + + /* Where we will store the stacks. */ + st = malloc(sizeof(struct stack *) * stacks_to_allocate, + M_TEMP, M_WAITOK); + for (stack_idx = 0; stack_idx < stacks_to_allocate; + stack_idx++) + st[stack_idx] = stack_create(); + + /* Where we will store the td name, tid, etc. */ + td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate, + M_TEMP, M_WAITOK); + for (stack_idx = 0; stack_idx < stacks_to_allocate; + stack_idx++) + td_infos[stack_idx] = sbuf_new(NULL, NULL, + MAXCOMLEN + sizeof(struct thread *) * 2 + 40, + SBUF_FIXEDLEN); + + sleepq_lock(wchan); + sq = sleepq_lookup(wchan); + if (sq == NULL) { + /* This sleepq does not exist; exit and return ENOENT. */ + error = ENOENT; + finished = true; + sleepq_release(wchan); + goto loop_end; + } + + stack_idx = 0; + /* Save thread info */ + TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, + td_next) { + if (stack_idx >= stacks_to_allocate) + goto loop_end; + + /* Note the td_lock is equal to the sleepq_lock here. */ + stack_save_td(st[stack_idx], td); + + sbuf_printf(td_infos[stack_idx], "%d: %s %p", + td->td_tid, td->td_name, td); + + ++stack_idx; + } + + finished = true; + sleepq_release(wchan); + + /* Print the stacks */ + for (i = 0; i < stack_idx; i++) { + sbuf_finish(td_infos[i]); + sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i])); + stack_sbuf_print(sb, st[i]); + sbuf_printf(sb, "\n"); + + error = sbuf_error(sb); + if (error == 0) + *count_stacks_printed = stack_idx; + } + +loop_end: + if (!finished) + sleepq_release(wchan); + for (stack_idx = 0; stack_idx < stacks_to_allocate; + stack_idx++) + stack_destroy(st[stack_idx]); + for (stack_idx = 0; stack_idx < stacks_to_allocate; + stack_idx++) + sbuf_delete(td_infos[stack_idx]); + free(st, M_TEMP); + free(td_infos, M_TEMP); + stacks_to_allocate *= 10; + } + + if (!finished && error == 0) + error = ENOMEM; + + return (error); +} + #ifdef SLEEPQUEUE_PROFILING #define SLEEPQ_PROF_LOCATIONS 1024 #define SLEEPQ_SBUFSIZE 512 Index: sys/sys/fail.h =================================================================== --- sys/sys/fail.h +++ sys/sys/fail.h @@ -37,6 +37,11 @@ #include #include #include +#include +#include +#include +#include +#include /** * Failpoint return codes, used internally. @@ -49,7 +54,8 @@ }; struct fail_point_entry; -TAILQ_HEAD(fail_point_entries, fail_point_entry); +struct fail_point_setting; + /** * Internal failpoint structure, tracking all the current details of the * failpoint. This structure is the core component shared between the @@ -57,22 +63,42 @@ * @ingroup failpoint_private */ struct fail_point { - const char *fp_name; /**< name of fail point */ - const char *fp_location; /**< file:line of fail point */ - struct fail_point_entries fp_entries; /**< list of entries */ + const char *fp_name; /* name of fail point */ + const char *fp_location; /* file:line of fail point */ + volatile int fp_ref_cnt; /** + * protects fp_setting: while holding + * a ref, fp_setting points to an + * unfreed fail_point_setting + */ + struct fail_point_setting * volatile fp_setting; int fp_flags; - void (*fp_sleep_fn)(void *); /**< Function to call at end of - * sleep for sleep failpoints */ - void *fp_sleep_arg; /**< Arg for sleep_fn */ + + /**< Function to call before sleep or pause */ + void (*fp_pre_sleep_fn)(void *); + /**< Arg for fp_pre_sleep_fn */ + void *fp_pre_sleep_arg; + + /**< Function to call after waking from sleep or pause */ + void (*fp_post_sleep_fn)(void *); + /**< Arg for fp_post_sleep_fn */ + void *fp_post_sleep_arg; }; #define FAIL_POINT_DYNAMIC_NAME 0x01 /**< Must free name on destroy */ +/**< Use timeout path for sleep instead of msleep */ +#define FAIL_POINT_USE_TIMEOUT_PATH 0x02 +/**< If fail point is set to sleep, replace the sleep call with delay */ +#define FAIL_POINT_NONSLEEPABLE 0x04 + +#define FAIL_POINT_CV_DESC "fp cv no iterators" +#define FAIL_POINT_IS_OFF(fp) (__predict_true((fp)->fp_setting == NULL) || \ + __predict_true(fail_point_is_off(fp))) __BEGIN_DECLS /* Private failpoint eval function -- use fail_point_eval() instead. */ enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *, - int *ret); + int *ret); /** * @addtogroup failpoint @@ -86,26 +112,62 @@ void fail_point_init(struct fail_point *, const char *fmt, ...) __printflike(2, 3); +/* Return true iff this fail point is set to off, false otherwise */ +bool fail_point_is_off(struct fail_point *fp); + /** - * Set the sleep function for a fail point - * If sleep_fn is specified, then FAIL_POINT_SLEEP will result in a - * (*fp->sleep_fn)(fp->sleep_arg) call by the timer thread. Otherwise, - * if sleep_fn is NULL (default), then FAIL_POINT_SLEEP will result in the - * fail_point_eval() call sleeping. + * Set the pre-sleep function for a fail point + * If fp_post_sleep_fn is specified, then FAIL_POINT_SLEEP will result in a + * (*fp->fp_pre_sleep_fn)(fp->fp_pre_sleep_arg) call by the thread. */ -static __inline void -fail_point_sleep_set_func(struct fail_point *fp, void (*sleep_fn)(void *)) +static inline void +fail_point_sleep_set_pre_func(struct fail_point *fp, void (*sleep_fn)(void *)) +{ + fp->fp_pre_sleep_fn = sleep_fn; +} + +static inline void +fail_point_sleep_set_pre_arg(struct fail_point *fp, void *sleep_arg) { - fp->fp_sleep_fn = sleep_fn; + fp->fp_pre_sleep_arg = sleep_arg; } /** - * Set the argument for the sleep function for a fail point + * Set the post-sleep function. This will be passed to timeout if we take + * the timeout path. This must be set if you sleep using the timeout path. */ -static __inline void -fail_point_sleep_set_arg(struct fail_point *fp, void *sleep_arg) +static inline void +fail_point_sleep_set_post_func(struct fail_point *fp, void (*sleep_fn)(void *)) +{ + fp->fp_post_sleep_fn = sleep_fn; +} + +static inline void +fail_point_sleep_set_post_arg(struct fail_point *fp, void *sleep_arg) +{ + fp->fp_post_sleep_arg = sleep_arg; +} +/** + * If the FAIL_POINT_USE_TIMEOUT flag is set on a failpoint, then + * FAIL_POINT_SLEEP will result in a call to timeout instead of + * msleep. Note that if you sleep while this flag is set, you must + * set fp_post_sleep_fn or an error will occur upon waking. + */ +static inline void +fail_point_use_timeout_path(struct fail_point *fp, bool use_timeout, + void (*post_sleep_fn)(void *)) { - fp->fp_sleep_arg = sleep_arg; + KASSERT(!use_timeout || post_sleep_fn != NULL || + (post_sleep_fn == NULL && fp->fp_post_sleep_fn != NULL), + ("Setting fp to use timeout, but not setting post_sleep_fn\n")); + + if (use_timeout) + fp->fp_flags |= FAIL_POINT_USE_TIMEOUT_PATH; + else + fp->fp_flags &= ~FAIL_POINT_USE_TIMEOUT_PATH; + + if (post_sleep_fn != NULL) + fp->fp_post_sleep_fn = post_sleep_fn; } /** @@ -116,33 +178,64 @@ /** * Evaluate a failpoint. */ -static __inline enum fail_point_return_code +static inline enum fail_point_return_code fail_point_eval(struct fail_point *fp, int *ret) { - if (TAILQ_EMPTY(&fp->fp_entries)) { + if (__predict_true(fp->fp_setting == NULL)) return (FAIL_POINT_RC_CONTINUE); - } return (fail_point_eval_nontrivial(fp, ret)); } __END_DECLS /* Declare a fail_point and its sysctl in a function. */ -#define _FAIL_POINT_NAME(name) _fail_point_##name -#define _FAIL_POINT_LOCATION() "(" __FILE__ ":" __XSTRING(__LINE__) ")" +#define _FAIL_POINT_NAME(name) _fail_point_##name +#define _FAIL_POINT_LOCATION() "(" __FILE__ ":" __XSTRING(__LINE__) ")" +#define _FAIL_POINT_INIT(parent, name, flags) \ + static struct fail_point _FAIL_POINT_NAME(name) = { \ + .fp_name = #name, \ + .fp_location = _FAIL_POINT_LOCATION(), \ + .fp_ref_cnt = 0, \ + .fp_setting = NULL, \ + .fp_flags = (flags), \ + .fp_pre_sleep_fn = NULL, \ + .fp_pre_sleep_arg = NULL, \ + .fp_post_sleep_fn = NULL, \ + .fp_post_sleep_arg = NULL, \ + }; \ + SYSCTL_OID(parent, OID_AUTO, name, \ + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, \ + &_FAIL_POINT_NAME(name), 0, fail_point_sysctl, \ + "A", ""); \ + SYSCTL_OID(parent, OID_AUTO, status_##name, \ + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, \ + &_FAIL_POINT_NAME(name), 0, \ + fail_point_sysctl_status, "A", ""); +#define _FAIL_POINT_EVAL(name, cond, code...) \ + int RETURN_VALUE; \ + \ + if (__predict_false(cond && \ + fail_point_eval(&_FAIL_POINT_NAME(name), &RETURN_VALUE))) { \ + \ + code; \ + \ + } + /** - * Instantiate a failpoint which returns "value" from the function when triggered. - * @param parent The parent sysctl under which to locate the sysctl + * Instantiate a failpoint which returns "RETURN_VALUE" from the function + * when triggered. + * @param parent The parent sysctl under which to locate the fp's sysctl * @param name The name of the failpoint in the sysctl tree (and printouts) - * @return Instantly returns the return("value") specified in the + * @return Instantly returns the RETURN_VALUE specified in the * failpoint, if triggered. */ #define KFAIL_POINT_RETURN(parent, name) \ KFAIL_POINT_CODE(parent, name, return RETURN_VALUE) /** - * Instantiate a failpoint which returns (void) from the function when triggered. + * Instantiate a failpoint which returns (void) from the function when + * triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and printouts) * @return Instantly returns void, if triggered in the failpoint. @@ -153,7 +246,8 @@ /** * Instantiate a failpoint which sets an error when triggered. * @param parent The parent sysctl under which to locate the sysctl - * @param name The name of the failpoint in the sysctl tree (and printouts) + * @param name The name of the failpoint in the sysctl tree (and + * printouts) * @param error_var A variable to set to the failpoint's specified * return-value when triggered */ @@ -164,7 +258,8 @@ * Instantiate a failpoint which sets an error and then goes to a * specified label in the function when triggered. * @param parent The parent sysctl under which to locate the sysctl - * @param name The name of the failpoint in the sysctl tree (and printouts) + * @param name The name of the failpoint in the sysctl tree (and + * printouts) * @param error_var A variable to set to the failpoint's specified * return-value when triggered * @param label The location to goto when triggered. @@ -173,39 +268,81 @@ KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE; goto label) /** + * Instantiate a failpoint which sets its pre- and post-sleep callback + * mechanisms. + * @param parent The parent sysctl under which to locate the sysctl + * @param name The name of the failpoint in the sysctl tree (and + * printouts) + * @param pre_func Function pointer to the pre-sleep function, which will be + * called directly before going to sleep. + * @param pre_arg Argument to the pre-sleep function + * @param post_func Function pointer to the pot-sleep function, which will be + * called directly before going to sleep. + * @param post_arg Argument to the post-sleep function + */ +#define KFAIL_POINT_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \ + post_func, post_arg) \ + KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, \ + pre_arg, post_func, post_arg, return RETURN_VALUE) + +/** + * Instantiate a failpoint which runs arbitrary code when triggered, and sets + * its pre- and post-sleep callback mechanisms + * @param parent The parent sysctl under which to locate the sysctl + * @param name The name of the failpoint in the sysctl tree (and + * printouts) + * @param pre_func Function pointer to the pre-sleep function, which will be + * called directly before going to sleep. + * @param pre_arg Argument to the pre-sleep function + * @param post_func Function pointer to the pot-sleep function, which will be + * called directly before going to sleep. + * @param post_arg Argument to the post-sleep function + * @param code The arbitrary code to run when triggered. Can reference + * "RETURN_VALUE" if desired to extract the specified + * user return-value when triggered. Note that this is + * implemented with a do-while loop so be careful of + * break and continue statements. + */ +#define KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \ + post_func, post_arg, code...) \ + do { \ + _FAIL_POINT_INIT(parent, name) \ + _FAIL_POINT_NAME(name).fp_pre_sleep_fn = pre_func; \ + _FAIL_POINT_NAME(name).fp_pre_sleep_arg = pre_arg; \ + _FAIL_POINT_NAME(name).fp_post_sleep_fn = post_func; \ + _FAIL_POINT_NAME(name).fp_post_sleep_arg = post_arg; \ + _FAIL_POINT_EVAL(name, true, code) \ + } while (0) + + +/** * Instantiate a failpoint which runs arbitrary code when triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree - * (and printouts) + * (and printouts) * @param code The arbitrary code to run when triggered. Can reference * "RETURN_VALUE" if desired to extract the specified * user return-value when triggered. Note that this is * implemented with a do-while loop so be careful of * break and continue statements. */ -#define KFAIL_POINT_CODE(parent, name, code) \ -do { \ - int RETURN_VALUE; \ - static struct fail_point _FAIL_POINT_NAME(name) = { \ - #name, \ - _FAIL_POINT_LOCATION(), \ - TAILQ_HEAD_INITIALIZER(_FAIL_POINT_NAME(name).fp_entries), \ - 0, \ - NULL, NULL, \ - }; \ - SYSCTL_OID(parent, OID_AUTO, name, \ - CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, \ - &_FAIL_POINT_NAME(name), 0, fail_point_sysctl, \ - "A", ""); \ - \ - if (__predict_false( \ - fail_point_eval(&_FAIL_POINT_NAME(name), &RETURN_VALUE))) { \ - \ - code; \ - \ - } \ -} while (0) - +#define KFAIL_POINT_CODE(parent, name, code...) \ + do { \ + _FAIL_POINT_INIT(parent, name, 0) \ + _FAIL_POINT_EVAL(name, true, code) \ + } while (0) + +#define KFAIL_POINT_CODE_FLAGS(parent, name, flags, code...) \ + do { \ + _FAIL_POINT_INIT(parent, name, flags) \ + _FAIL_POINT_EVAL(name, true, code) \ + } while (0) + +#define KFAIL_POINT_CODE_COND(parent, name, cond, flags, code...) \ + do { \ + _FAIL_POINT_INIT(parent, name, flags) \ + _FAIL_POINT_EVAL(name, cond, code) \ + } while (0) /** * @} @@ -214,6 +351,7 @@ #ifdef _KERNEL int fail_point_sysctl(SYSCTL_HANDLER_ARGS); +int fail_point_sysctl_status(SYSCTL_HANDLER_ARGS); /* The fail point sysctl tree. */ SYSCTL_DECL(_debug_fail_point); Index: sys/sys/sleepqueue.h =================================================================== --- sys/sys/sleepqueue.h +++ sys/sys/sleepqueue.h @@ -107,5 +107,9 @@ void sleepq_wait(void *wchan, int pri); int sleepq_wait_sig(void *wchan, int pri); +#include +int sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue, + int *count_stacks_printed); + #endif /* _KERNEL */ #endif /* !_SYS_SLEEPQUEUE_H_ */