Index: head/sys/kern/kern_fail.c =================================================================== --- head/sys/kern/kern_fail.c (revision 355724) +++ head/sys/kern/kern_fail.c (revision 355725) @@ -1,1126 +1,1146 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Isilon Inc http://www.isilon.com/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /** * @file * * fail(9) Facility. * * @ingroup failpoint_private */ /** * @defgroup failpoint fail(9) Facility * * Failpoints allow for injecting fake errors into running code on the fly, * without modifying code or recompiling with flags. Failpoints are always * present, and are very efficient when disabled. Failpoints are described * in man fail(9). */ /** * @defgroup failpoint_private Private fail(9) Implementation functions * * Private implementations for the actual failpoint code. * * @ingroup failpoint */ /** * @addtogroup failpoint_private * @{ */ #include __FBSDID("$FreeBSD$"); #include "opt_stack.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ILOG_DEFINE_FOR_FILE ILOG_DEFINE_FOR_FILE(L_ISI_FAIL_POINT, L_ILOG, fail_point); #endif static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system"); #define fp_free(ptr) free(ptr, M_FAIL_POINT) #define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags)) #define fs_free(ptr) fp_free(ptr) #define fs_malloc() fp_malloc(sizeof(struct fail_point_setting), \ M_WAITOK | M_ZERO) /** * These define the wchans that are used for sleeping, pausing respectively. * They are chosen arbitrarily but need to be distinct to the failpoint and * the sleep/pause distinction. */ #define FP_SLEEP_CHANNEL(fp) (void*)(fp) #define FP_PAUSE_CHANNEL(fp) __DEVOLATILE(void*, &fp->fp_setting) /** * Don't allow more than this many entries in a fail point set by sysctl. * The 99.99...% case is to have 1 entry. I can't imagine having this many * entries, so it should not limit us. Saves on re-mallocs while holding * a non-sleepable lock. */ #define FP_MAX_ENTRY_COUNT 20 /* Used to drain sbufs to the sysctl output */ int fail_sysctl_drain_func(void *, const char *, int); /* Head of tailq of struct fail_point_entry */ TAILQ_HEAD(fail_point_entry_queue, fail_point_entry); /** * fp entries garbage list; outstanding entries are cleaned up in the * garbage collector */ STAILQ_HEAD(fail_point_setting_garbage, fail_point_setting); static struct fail_point_setting_garbage fp_setting_garbage = STAILQ_HEAD_INITIALIZER(fp_setting_garbage); static struct mtx mtx_garbage_list; MTX_SYSINIT(mtx_garbage_list, &mtx_garbage_list, "fail point garbage mtx", MTX_SPIN); static struct sx sx_fp_set; SX_SYSINIT(sx_fp_set, &sx_fp_set, "fail point set sx"); /** * Failpoint types. * Don't change these without changing fail_type_strings in fail.c. * @ingroup failpoint_private */ enum fail_point_t { FAIL_POINT_OFF, /**< don't fail */ FAIL_POINT_PANIC, /**< panic */ FAIL_POINT_RETURN, /**< return an errorcode */ FAIL_POINT_BREAK, /**< break into the debugger */ FAIL_POINT_PRINT, /**< print a message */ FAIL_POINT_SLEEP, /**< sleep for some msecs */ FAIL_POINT_PAUSE, /**< sleep until failpoint is set to off */ FAIL_POINT_YIELD, /**< yield the cpu */ FAIL_POINT_DELAY, /**< busy wait the cpu */ FAIL_POINT_NUMTYPES, FAIL_POINT_INVALID = -1 }; static struct { const char *name; int nmlen; } fail_type_strings[] = { #define FP_TYPE_NM_LEN(s) { s, sizeof(s) - 1 } [FAIL_POINT_OFF] = FP_TYPE_NM_LEN("off"), [FAIL_POINT_PANIC] = FP_TYPE_NM_LEN("panic"), [FAIL_POINT_RETURN] = FP_TYPE_NM_LEN("return"), [FAIL_POINT_BREAK] = FP_TYPE_NM_LEN("break"), [FAIL_POINT_PRINT] = FP_TYPE_NM_LEN("print"), [FAIL_POINT_SLEEP] = FP_TYPE_NM_LEN("sleep"), [FAIL_POINT_PAUSE] = FP_TYPE_NM_LEN("pause"), [FAIL_POINT_YIELD] = FP_TYPE_NM_LEN("yield"), [FAIL_POINT_DELAY] = FP_TYPE_NM_LEN("delay"), }; #define FE_COUNT_UNTRACKED (INT_MIN) /** * Internal structure tracking a single term of a complete failpoint. * @ingroup failpoint_private */ struct fail_point_entry { volatile bool fe_stale; enum fail_point_t fe_type; /**< type of entry */ int fe_arg; /**< argument to type (e.g. return value) */ int fe_prob; /**< likelihood of firing in millionths */ int32_t fe_count; /**< number of times to fire, -1 means infinite */ pid_t fe_pid; /**< only fail for this process */ struct fail_point *fe_parent; /**< backpointer to fp */ TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry ptr */ }; struct fail_point_setting { STAILQ_ENTRY(fail_point_setting) fs_garbage_link; struct fail_point_entry_queue fp_entry_queue; struct fail_point * fs_parent; struct mtx feq_mtx; /* Gives fail_point_pause something to do. */ }; /** * Defines stating the equivalent of probablilty one (100%) */ enum { PROB_MAX = 1000000, /* probability between zero and this number */ PROB_DIGITS = 6 /* number of zero's in above number */ }; /* Get a ref on an fp's fp_setting */ static inline struct fail_point_setting *fail_point_setting_get_ref( struct fail_point *fp); /* Release a ref on an fp_setting */ static inline void fail_point_setting_release_ref(struct fail_point *fp); /* Allocate and initialize a struct fail_point_setting */ static struct fail_point_setting *fail_point_setting_new(struct fail_point *); /* Free a struct fail_point_setting */ static void fail_point_setting_destroy(struct fail_point_setting *fp_setting); /* Allocate and initialize a struct fail_point_entry */ static struct fail_point_entry *fail_point_entry_new(struct fail_point_setting *); /* Free a struct fail_point_entry */ static void fail_point_entry_destroy(struct fail_point_entry *fp_entry); /* Append fp setting to garbage list */ static inline void fail_point_setting_garbage_append( struct fail_point_setting *fp_setting); /* Swap fp's setting with fp_setting_new */ static inline struct fail_point_setting * fail_point_swap_settings(struct fail_point *fp, struct fail_point_setting *fp_setting_new); /* Free up any zero-ref setting in the garbage queue */ static void fail_point_garbage_collect(void); /* If this fail point's setting are empty, then swap it out to NULL. */ static inline void fail_point_eval_swap_out(struct fail_point *fp, struct fail_point_setting *fp_setting); bool fail_point_is_off(struct fail_point *fp) { bool return_val; struct fail_point_setting *fp_setting; struct fail_point_entry *ent; return_val = true; fp_setting = fail_point_setting_get_ref(fp); if (fp_setting != NULL) { TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { if (!ent->fe_stale) { return_val = false; break; } } } fail_point_setting_release_ref(fp); return (return_val); } /* Allocate and initialize a struct fail_point_setting */ static struct fail_point_setting * fail_point_setting_new(struct fail_point *fp) { struct fail_point_setting *fs_new; fs_new = fs_malloc(); fs_new->fs_parent = fp; TAILQ_INIT(&fs_new->fp_entry_queue); mtx_init(&fs_new->feq_mtx, "fail point entries", NULL, MTX_SPIN); fail_point_setting_garbage_append(fs_new); return (fs_new); } /* Free a struct fail_point_setting */ static void fail_point_setting_destroy(struct fail_point_setting *fp_setting) { struct fail_point_entry *ent; while (!TAILQ_EMPTY(&fp_setting->fp_entry_queue)) { ent = TAILQ_FIRST(&fp_setting->fp_entry_queue); TAILQ_REMOVE(&fp_setting->fp_entry_queue, ent, fe_entries); fail_point_entry_destroy(ent); } fs_free(fp_setting); } /* Allocate and initialize a struct fail_point_entry */ static struct fail_point_entry * fail_point_entry_new(struct fail_point_setting *fp_setting) { struct fail_point_entry *fp_entry; fp_entry = fp_malloc(sizeof(struct fail_point_entry), M_WAITOK | M_ZERO); fp_entry->fe_parent = fp_setting->fs_parent; fp_entry->fe_prob = PROB_MAX; fp_entry->fe_pid = NO_PID; fp_entry->fe_count = FE_COUNT_UNTRACKED; TAILQ_INSERT_TAIL(&fp_setting->fp_entry_queue, fp_entry, fe_entries); return (fp_entry); } /* Free a struct fail_point_entry */ static void fail_point_entry_destroy(struct fail_point_entry *fp_entry) { fp_free(fp_entry); } /* Get a ref on an fp's fp_setting */ static inline struct fail_point_setting * fail_point_setting_get_ref(struct fail_point *fp) { struct fail_point_setting *fp_setting; /* Invariant: if we have a ref, our pointer to fp_setting is safe */ atomic_add_acq_32(&fp->fp_ref_cnt, 1); fp_setting = fp->fp_setting; return (fp_setting); } /* Release a ref on an fp_setting */ static inline void fail_point_setting_release_ref(struct fail_point *fp) { KASSERT(&fp->fp_ref_cnt > 0, ("Attempting to deref w/no refs")); atomic_subtract_rel_32(&fp->fp_ref_cnt, 1); } /* Append fp entries to fp garbage list */ static inline void fail_point_setting_garbage_append(struct fail_point_setting *fp_setting) { mtx_lock_spin(&mtx_garbage_list); STAILQ_INSERT_TAIL(&fp_setting_garbage, fp_setting, fs_garbage_link); mtx_unlock_spin(&mtx_garbage_list); } /* Swap fp's entries with fp_setting_new */ static struct fail_point_setting * fail_point_swap_settings(struct fail_point *fp, struct fail_point_setting *fp_setting_new) { struct fail_point_setting *fp_setting_old; fp_setting_old = fp->fp_setting; fp->fp_setting = fp_setting_new; return (fp_setting_old); } static inline void fail_point_eval_swap_out(struct fail_point *fp, struct fail_point_setting *fp_setting) { /* We may have already been swapped out and replaced; ignore. */ if (fp->fp_setting == fp_setting) fail_point_swap_settings(fp, NULL); } /* Free up any zero-ref entries in the garbage queue */ static void fail_point_garbage_collect(void) { struct fail_point_setting *fs_current, *fs_next; struct fail_point_setting_garbage fp_ents_free_list; /** * We will transfer the entries to free to fp_ents_free_list while holding * the spin mutex, then free it after we drop the lock. This avoids * triggering witness due to sleepable mutexes in the memory * allocator. */ STAILQ_INIT(&fp_ents_free_list); mtx_lock_spin(&mtx_garbage_list); STAILQ_FOREACH_SAFE(fs_current, &fp_setting_garbage, fs_garbage_link, fs_next) { if (fs_current->fs_parent->fp_setting != fs_current && fs_current->fs_parent->fp_ref_cnt == 0) { STAILQ_REMOVE(&fp_setting_garbage, fs_current, fail_point_setting, fs_garbage_link); STAILQ_INSERT_HEAD(&fp_ents_free_list, fs_current, fs_garbage_link); } } mtx_unlock_spin(&mtx_garbage_list); STAILQ_FOREACH_SAFE(fs_current, &fp_ents_free_list, fs_garbage_link, fs_next) fail_point_setting_destroy(fs_current); } /* Drain out all refs from this fail point */ static inline void fail_point_drain(struct fail_point *fp, int expected_ref) { struct fail_point_setting *entries; entries = fail_point_swap_settings(fp, NULL); /** * We have unpaused all threads; so we will wait no longer * than the time taken for the longest remaining sleep, or * the length of time of a long-running code block. */ while (fp->fp_ref_cnt > expected_ref) { wakeup(FP_PAUSE_CHANNEL(fp)); tsleep(&fp, PWAIT, "fail_point_drain", hz / 100); } + if (fp->fp_callout) + callout_drain(fp->fp_callout); fail_point_swap_settings(fp, entries); } static inline void fail_point_pause(struct fail_point *fp, enum fail_point_return_code *pret, struct mtx *mtx_sleep) { if (fp->fp_pre_sleep_fn) fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); msleep_spin(FP_PAUSE_CHANNEL(fp), mtx_sleep, "failpt", 0); if (fp->fp_post_sleep_fn) fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); } static inline void fail_point_sleep(struct fail_point *fp, int msecs, enum fail_point_return_code *pret) { int timo; /* Convert from millisecs to ticks, rounding up */ timo = howmany((int64_t)msecs * hz, 1000L); if (timo > 0) { if (!(fp->fp_flags & FAIL_POINT_USE_TIMEOUT_PATH)) { if (fp->fp_pre_sleep_fn) fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); tsleep(FP_SLEEP_CHANNEL(fp), PWAIT, "failpt", timo); if (fp->fp_post_sleep_fn) fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); } else { if (fp->fp_pre_sleep_fn) fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); - timeout(fp->fp_post_sleep_fn, fp->fp_post_sleep_arg, - timo); + callout_reset(fp->fp_callout, timo, + fp->fp_post_sleep_fn, fp->fp_post_sleep_arg); *pret = FAIL_POINT_RC_QUEUED; } } } static char *parse_fail_point(struct fail_point_setting *, char *); static char *parse_term(struct fail_point_setting *, char *); static char *parse_number(int *out_units, int *out_decimal, char *); static char *parse_type(struct fail_point_entry *, char *); /** * Initialize a fail_point. The name is formed in a printf-like fashion * from "fmt" and subsequent arguments. This function is generally used * for custom failpoints located at odd places in the sysctl tree, and is * not explicitly needed for standard in-line-declared failpoints. * * @ingroup failpoint */ void fail_point_init(struct fail_point *fp, const char *fmt, ...) { va_list ap; char *name; int n; fp->fp_setting = NULL; fp->fp_flags = 0; /* Figure out the size of the name. */ va_start(ap, fmt); n = vsnprintf(NULL, 0, fmt, ap); va_end(ap); /* Allocate the name and fill it in. */ name = fp_malloc(n + 1, M_WAITOK); if (name != NULL) { va_start(ap, fmt); vsnprintf(name, n + 1, fmt, ap); va_end(ap); } fp->fp_name = name; fp->fp_location = ""; fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME; fp->fp_pre_sleep_fn = NULL; fp->fp_pre_sleep_arg = NULL; fp->fp_post_sleep_fn = NULL; fp->fp_post_sleep_arg = NULL; } +void +fail_point_alloc_callout(struct fail_point *fp) +{ + + /** + * This assumes that calls to fail_point_use_timeout_path() + * will not race. + */ + if (fp->fp_callout != NULL) + return; + fp->fp_callout = fp_malloc(sizeof(*fp->fp_callout), M_WAITOK); + callout_init(fp->fp_callout, CALLOUT_MPSAFE); +} + /** * Free the resources held by a fail_point, and wake any paused threads. * Thou shalt not allow threads to hit this fail point after you enter this * function, nor shall you call this multiple times for a given fp. * @ingroup failpoint */ void fail_point_destroy(struct fail_point *fp) { fail_point_drain(fp, 0); if ((fp->fp_flags & FAIL_POINT_DYNAMIC_NAME) != 0) { fp_free(__DECONST(void *, fp->fp_name)); fp->fp_name = NULL; } fp->fp_flags = 0; + if (fp->fp_callout) { + fp_free(fp->fp_callout); + fp->fp_callout = NULL; + } sx_xlock(&sx_fp_set); fail_point_garbage_collect(); sx_xunlock(&sx_fp_set); } /** * This does the real work of evaluating a fail point. If the fail point tells * us to return a value, this function returns 1 and fills in 'return_value' * (return_value is allowed to be null). If the fail point tells us to panic, * we never return. Otherwise we just return 0 after doing some work, which * means "keep going". */ enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *fp, int *return_value) { bool execute = false; struct fail_point_entry *ent; struct fail_point_setting *fp_setting; enum fail_point_return_code ret; int cont; int count; int msecs; int usecs; ret = FAIL_POINT_RC_CONTINUE; cont = 0; /* don't continue by default */ fp_setting = fail_point_setting_get_ref(fp); if (fp_setting == NULL) goto abort; TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { if (ent->fe_stale) continue; if (ent->fe_prob < PROB_MAX && ent->fe_prob < random() % PROB_MAX) continue; if (ent->fe_pid != NO_PID && ent->fe_pid != curproc->p_pid) continue; if (ent->fe_count != FE_COUNT_UNTRACKED) { count = ent->fe_count; while (count > 0) { if (atomic_cmpset_32(&ent->fe_count, count, count - 1)) { count--; execute = true; break; } count = ent->fe_count; } if (execute == false) /* We lost the race; consider the entry stale and bail now */ continue; if (count == 0) ent->fe_stale = true; } switch (ent->fe_type) { case FAIL_POINT_PANIC: panic("fail point %s panicking", fp->fp_name); /* NOTREACHED */ case FAIL_POINT_RETURN: if (return_value != NULL) *return_value = ent->fe_arg; ret = FAIL_POINT_RC_RETURN; break; case FAIL_POINT_BREAK: printf("fail point %s breaking to debugger\n", fp->fp_name); breakpoint(); break; case FAIL_POINT_PRINT: printf("fail point %s executing\n", fp->fp_name); cont = ent->fe_arg; break; case FAIL_POINT_SLEEP: msecs = ent->fe_arg; if (msecs) fail_point_sleep(fp, msecs, &ret); break; case FAIL_POINT_PAUSE: /** * Pausing is inherently strange with multiple * entries given our design. That is because some * entries could be unreachable, for instance in cases like: * pause->return. We can never reach the return entry. * The sysctl layer actually truncates all entries after * a pause for this reason. */ mtx_lock_spin(&fp_setting->feq_mtx); fail_point_pause(fp, &ret, &fp_setting->feq_mtx); mtx_unlock_spin(&fp_setting->feq_mtx); break; case FAIL_POINT_YIELD: kern_yield(PRI_UNCHANGED); break; case FAIL_POINT_DELAY: usecs = ent->fe_arg; DELAY(usecs); break; default: break; } if (cont == 0) break; } if (fail_point_is_off(fp)) fail_point_eval_swap_out(fp, fp_setting); abort: fail_point_setting_release_ref(fp); return (ret); } /** * Translate internal fail_point structure into human-readable text. */ static void fail_point_get(struct fail_point *fp, struct sbuf *sb, bool verbose) { struct fail_point_entry *ent; struct fail_point_setting *fp_setting; struct fail_point_entry *fp_entry_cpy; int cnt_sleeping; int idx; int printed_entry_count; cnt_sleeping = 0; idx = 0; printed_entry_count = 0; fp_entry_cpy = fp_malloc(sizeof(struct fail_point_entry) * (FP_MAX_ENTRY_COUNT + 1), M_WAITOK); fp_setting = fail_point_setting_get_ref(fp); if (fp_setting != NULL) { TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { if (ent->fe_stale) continue; KASSERT(printed_entry_count < FP_MAX_ENTRY_COUNT, ("FP entry list larger than allowed")); fp_entry_cpy[printed_entry_count] = *ent; ++printed_entry_count; } } fail_point_setting_release_ref(fp); /* This is our equivalent of a NULL terminator */ fp_entry_cpy[printed_entry_count].fe_type = FAIL_POINT_INVALID; while (idx < printed_entry_count) { ent = &fp_entry_cpy[idx]; ++idx; if (ent->fe_prob < PROB_MAX) { int decimal = ent->fe_prob % (PROB_MAX / 100); int units = ent->fe_prob / (PROB_MAX / 100); sbuf_printf(sb, "%d", units); if (decimal) { int digits = PROB_DIGITS - 2; while (!(decimal % 10)) { digits--; decimal /= 10; } sbuf_printf(sb, ".%0*d", digits, decimal); } sbuf_printf(sb, "%%"); } if (ent->fe_count >= 0) sbuf_printf(sb, "%d*", ent->fe_count); sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type].name); if (ent->fe_arg) sbuf_printf(sb, "(%d)", ent->fe_arg); if (ent->fe_pid != NO_PID) sbuf_printf(sb, "[pid %d]", ent->fe_pid); if (TAILQ_NEXT(ent, fe_entries)) sbuf_printf(sb, "->"); } if (!printed_entry_count) sbuf_printf(sb, "off"); fp_free(fp_entry_cpy); if (verbose) { #ifdef STACK /* Print number of sleeping threads. queue=0 is the argument * used by msleep when sending our threads to sleep. */ sbuf_printf(sb, "\nsleeping_thread_stacks = {\n"); sleepq_sbuf_print_stacks(sb, FP_SLEEP_CHANNEL(fp), 0, &cnt_sleeping); sbuf_printf(sb, "},\n"); #endif sbuf_printf(sb, "sleeping_thread_count = %d,\n", cnt_sleeping); #ifdef STACK sbuf_printf(sb, "paused_thread_stacks = {\n"); sleepq_sbuf_print_stacks(sb, FP_PAUSE_CHANNEL(fp), 0, &cnt_sleeping); sbuf_printf(sb, "},\n"); #endif sbuf_printf(sb, "paused_thread_count = %d\n", cnt_sleeping); } } /** * Set an internal fail_point structure from a human-readable failpoint string * in a lock-safe manner. */ static int fail_point_set(struct fail_point *fp, char *buf) { struct fail_point_entry *ent, *ent_next; struct fail_point_setting *entries; bool should_wake_paused; bool should_truncate; int error; error = 0; should_wake_paused = false; should_truncate = false; /* Parse new entries. */ /** * ref protects our new malloc'd stuff from being garbage collected * before we link it. */ fail_point_setting_get_ref(fp); entries = fail_point_setting_new(fp); if (parse_fail_point(entries, buf) == NULL) { STAILQ_REMOVE(&fp_setting_garbage, entries, fail_point_setting, fs_garbage_link); fail_point_setting_destroy(entries); error = EINVAL; goto end; } /** * Transfer the entries we are going to keep to a new list. * Get rid of useless zero probability entries, and entries with hit * count 0. * If 'off' is present, and it has no hit count set, then all entries * after it are discarded since they are unreachable. */ TAILQ_FOREACH_SAFE(ent, &entries->fp_entry_queue, fe_entries, ent_next) { if (ent->fe_prob == 0 || ent->fe_count == 0) { printf("Discarding entry which cannot execute %s\n", fail_type_strings[ent->fe_type].name); TAILQ_REMOVE(&entries->fp_entry_queue, ent, fe_entries); fp_free(ent); continue; } else if (should_truncate) { printf("Discarding unreachable entry %s\n", fail_type_strings[ent->fe_type].name); TAILQ_REMOVE(&entries->fp_entry_queue, ent, fe_entries); fp_free(ent); continue; } if (ent->fe_type == FAIL_POINT_OFF) { should_wake_paused = true; if (ent->fe_count == FE_COUNT_UNTRACKED) { should_truncate = true; TAILQ_REMOVE(&entries->fp_entry_queue, ent, fe_entries); fp_free(ent); } } else if (ent->fe_type == FAIL_POINT_PAUSE) { should_truncate = true; } else if (ent->fe_type == FAIL_POINT_SLEEP && (fp->fp_flags & FAIL_POINT_NONSLEEPABLE)) { /** * If this fail point is annotated as being in a * non-sleepable ctx, convert sleep to delay and * convert the msec argument to usecs. */ printf("Sleep call request on fail point in " "non-sleepable context; using delay instead " "of sleep\n"); ent->fe_type = FAIL_POINT_DELAY; ent->fe_arg *= 1000; } } if (TAILQ_EMPTY(&entries->fp_entry_queue)) { entries = fail_point_swap_settings(fp, NULL); if (entries != NULL) wakeup(FP_PAUSE_CHANNEL(fp)); } else { if (should_wake_paused) wakeup(FP_PAUSE_CHANNEL(fp)); fail_point_swap_settings(fp, entries); } end: #ifdef IWARNING if (error) IWARNING("Failed to set %s %s to %s", fp->fp_name, fp->fp_location, buf); else INOTICE("Set %s %s to %s", fp->fp_name, fp->fp_location, buf); #endif /* IWARNING */ fail_point_setting_release_ref(fp); return (error); } #define MAX_FAIL_POINT_BUF 1023 /** * Handle kernel failpoint set/get. */ int fail_point_sysctl(SYSCTL_HANDLER_ARGS) { struct fail_point *fp; char *buf; struct sbuf sb, *sb_check; int error; buf = NULL; error = 0; fp = arg1; sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); if (sb_check != &sb) return (ENOMEM); sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); /* Setting */ /** * Lock protects any new entries from being garbage collected before we * can link them to the fail point. */ sx_xlock(&sx_fp_set); if (req->newptr) { if (req->newlen > MAX_FAIL_POINT_BUF) { error = EINVAL; goto out; } buf = fp_malloc(req->newlen + 1, M_WAITOK); error = SYSCTL_IN(req, buf, req->newlen); if (error) goto out; buf[req->newlen] = '\0'; error = fail_point_set(fp, buf); } fail_point_garbage_collect(); sx_xunlock(&sx_fp_set); /* Retrieving. */ fail_point_get(fp, &sb, false); out: sbuf_finish(&sb); sbuf_delete(&sb); if (buf) fp_free(buf); return (error); } int fail_point_sysctl_status(SYSCTL_HANDLER_ARGS) { struct fail_point *fp; struct sbuf sb, *sb_check; fp = arg1; sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); if (sb_check != &sb) return (ENOMEM); sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); /* Retrieving. */ fail_point_get(fp, &sb, true); sbuf_finish(&sb); sbuf_delete(&sb); /** * Lock protects any new entries from being garbage collected before we * can link them to the fail point. */ sx_xlock(&sx_fp_set); fail_point_garbage_collect(); sx_xunlock(&sx_fp_set); return (0); } int fail_sysctl_drain_func(void *sysctl_args, const char *buf, int len) { struct sysctl_req *sa; int error; sa = sysctl_args; error = SYSCTL_OUT(sa, buf, len); if (error == ENOMEM) return (-1); else return (len); } /** * Internal helper function to translate a human-readable failpoint string * into a internally-parsable fail_point structure. */ static char * parse_fail_point(struct fail_point_setting *ents, char *p) { /* :: * ( "->" )* */ uint8_t term_count; term_count = 1; p = parse_term(ents, p); if (p == NULL) return (NULL); while (*p != '\0') { term_count++; if (p[0] != '-' || p[1] != '>' || (p = parse_term(ents, p+2)) == NULL || term_count > FP_MAX_ENTRY_COUNT) return (NULL); } return (p); } /** * Internal helper function to parse an individual term from a failpoint. */ static char * parse_term(struct fail_point_setting *ents, char *p) { struct fail_point_entry *ent; ent = fail_point_entry_new(ents); /* * :: * ( ( "%") | ( "*" ) )* * * [ "(" ")" ] * [ "[pid " "]" ] */ /* ( ( "%") | ( "*" ) )* */ while (isdigit(*p) || *p == '.') { int units, decimal; p = parse_number(&units, &decimal, p); if (p == NULL) return (NULL); if (*p == '%') { if (units > 100) /* prevent overflow early */ units = 100; ent->fe_prob = units * (PROB_MAX / 100) + decimal; if (ent->fe_prob > PROB_MAX) ent->fe_prob = PROB_MAX; } else if (*p == '*') { if (!units || units < 0 || decimal) return (NULL); ent->fe_count = units; } else return (NULL); p++; } /* */ p = parse_type(ent, p); if (p == NULL) return (NULL); if (*p == '\0') return (p); /* [ "(" ")" ] */ if (*p != '(') return (p); p++; if (!isdigit(*p) && *p != '-') return (NULL); ent->fe_arg = strtol(p, &p, 0); if (*p++ != ')') return (NULL); /* [ "[pid " "]" ] */ #define PID_STRING "[pid " if (strncmp(p, PID_STRING, sizeof(PID_STRING) - 1) != 0) return (p); p += sizeof(PID_STRING) - 1; if (!isdigit(*p)) return (NULL); ent->fe_pid = strtol(p, &p, 0); if (*p++ != ']') return (NULL); return (p); } /** * Internal helper function to parse a numeric for a failpoint term. */ static char * parse_number(int *out_units, int *out_decimal, char *p) { char *old_p; /** * :: * [ "." ] | * "." */ /* whole part */ old_p = p; *out_units = strtol(p, &p, 10); if (p == old_p && *p != '.') return (NULL); /* fractional part */ *out_decimal = 0; if (*p == '.') { int digits = 0; p++; while (isdigit(*p)) { int digit = *p - '0'; if (digits < PROB_DIGITS - 2) *out_decimal = *out_decimal * 10 + digit; else if (digits == PROB_DIGITS - 2 && digit >= 5) (*out_decimal)++; digits++; p++; } if (!digits) /* need at least one digit after '.' */ return (NULL); while (digits++ < PROB_DIGITS - 2) /* add implicit zeros */ *out_decimal *= 10; } return (p); /* success */ } /** * Internal helper function to parse an individual type for a failpoint term. */ static char * parse_type(struct fail_point_entry *ent, char *beg) { enum fail_point_t type; int len; for (type = FAIL_POINT_OFF; type < FAIL_POINT_NUMTYPES; type++) { len = fail_type_strings[type].nmlen; if (strncmp(fail_type_strings[type].name, beg, len) == 0) { ent->fe_type = type; return (beg + len); } } return (NULL); } /* The fail point sysctl tree. */ SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points"); /* Debugging/testing stuff for fail point */ static int sysctl_test_fail_point(SYSCTL_HANDLER_ARGS) { KFAIL_POINT_RETURN(DEBUG_FP, test_fail_point); return (0); } SYSCTL_OID(_debug_fail_point, OID_AUTO, test_trigger_fail_point, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_test_fail_point, "A", "Trigger test fail points"); Index: head/sys/sys/fail.h =================================================================== --- head/sys/sys/fail.h (revision 355724) +++ head/sys/sys/fail.h (revision 355725) @@ -1,369 +1,375 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009-2019 Dell EMC Isilon http://www.isilon.com/ * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /** * @file * * Main header for failpoint facility. */ #ifndef _SYS_FAIL_H_ #define _SYS_FAIL_H_ #include #include #include #include #include #include #include #include #include #include /** * Failpoint return codes, used internally. * @ingroup failpoint_private */ enum fail_point_return_code { FAIL_POINT_RC_CONTINUE = 0, /**< Continue with normal execution */ FAIL_POINT_RC_RETURN, /**< FP evaluated to 'return' */ FAIL_POINT_RC_QUEUED, /**< sleep_fn will be called */ }; struct fail_point_entry; struct fail_point_setting; /** * Internal failpoint structure, tracking all the current details of the * failpoint. This structure is the core component shared between the * failure-injection code and the user-interface. * @ingroup failpoint_private */ struct fail_point { const char *fp_name; /* name of fail point */ const char *fp_location; /* file:line of fail point */ volatile int fp_ref_cnt; /** * protects fp_setting: while holding * a ref, fp_setting points to an * unfreed fail_point_setting */ struct fail_point_setting * volatile fp_setting; int fp_flags; /**< Function to call before sleep or pause */ void (*fp_pre_sleep_fn)(void *); /**< Arg for fp_pre_sleep_fn */ void *fp_pre_sleep_arg; /**< Function to call after waking from sleep or pause */ void (*fp_post_sleep_fn)(void *); /**< Arg for fp_post_sleep_fn */ void *fp_post_sleep_arg; + + struct callout *fp_callout; }; #define FAIL_POINT_DYNAMIC_NAME 0x01 /**< Must free name on destroy */ /**< Use timeout path for sleep instead of msleep */ #define FAIL_POINT_USE_TIMEOUT_PATH 0x02 /**< If fail point is set to sleep, replace the sleep call with delay */ #define FAIL_POINT_NONSLEEPABLE 0x04 #define FAIL_POINT_CV_DESC "fp cv no iterators" #define FAIL_POINT_IS_OFF(fp) (__predict_true((fp)->fp_setting == NULL) || \ __predict_true(fail_point_is_off(fp))) __BEGIN_DECLS /* Private failpoint eval function -- use fail_point_eval() instead. */ enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *, int *ret); /** * @addtogroup failpoint * @{ */ /* * Initialize a fail-point. The name is formed in printf-like fashion * from "fmt" and the subsequent arguments. * Pair with fail_point_destroy(). */ void fail_point_init(struct fail_point *, const char *fmt, ...) __printflike(2, 3); /* Return true iff this fail point is set to off, false otherwise */ bool fail_point_is_off(struct fail_point *fp); /** * Set the pre-sleep function for a fail point * If fp_post_sleep_fn is specified, then FAIL_POINT_SLEEP will result in a * (*fp->fp_pre_sleep_fn)(fp->fp_pre_sleep_arg) call by the thread. */ static inline void fail_point_sleep_set_pre_func(struct fail_point *fp, void (*sleep_fn)(void *)) { fp->fp_pre_sleep_fn = sleep_fn; } static inline void fail_point_sleep_set_pre_arg(struct fail_point *fp, void *sleep_arg) { fp->fp_pre_sleep_arg = sleep_arg; } /** * Set the post-sleep function. This will be passed to timeout if we take * the timeout path. This must be set if you sleep using the timeout path. */ static inline void fail_point_sleep_set_post_func(struct fail_point *fp, void (*sleep_fn)(void *)) { fp->fp_post_sleep_fn = sleep_fn; } static inline void fail_point_sleep_set_post_arg(struct fail_point *fp, void *sleep_arg) { fp->fp_post_sleep_arg = sleep_arg; } + +void fail_point_alloc_callout(struct fail_point *); + /** * If the FAIL_POINT_USE_TIMEOUT flag is set on a failpoint, then - * FAIL_POINT_SLEEP will result in a call to timeout instead of + * FAIL_POINT_SLEEP will result in a call to callout_reset instead of * msleep. Note that if you sleep while this flag is set, you must * set fp_post_sleep_fn or an error will occur upon waking. */ static inline void fail_point_use_timeout_path(struct fail_point *fp, bool use_timeout, void (*post_sleep_fn)(void *)) { KASSERT(!use_timeout || post_sleep_fn != NULL || (post_sleep_fn == NULL && fp->fp_post_sleep_fn != NULL), ("Setting fp to use timeout, but not setting post_sleep_fn\n")); - if (use_timeout) + if (use_timeout) { + fail_point_alloc_callout(fp); fp->fp_flags |= FAIL_POINT_USE_TIMEOUT_PATH; - else + } else fp->fp_flags &= ~FAIL_POINT_USE_TIMEOUT_PATH; if (post_sleep_fn != NULL) fp->fp_post_sleep_fn = post_sleep_fn; } /** * Free the resources used by a fail-point. Pair with fail_point_init(). */ void fail_point_destroy(struct fail_point *); /** * Evaluate a failpoint. */ static inline enum fail_point_return_code fail_point_eval(struct fail_point *fp, int *ret) { if (__predict_true(fp->fp_setting == NULL)) return (FAIL_POINT_RC_CONTINUE); return (fail_point_eval_nontrivial(fp, ret)); } __END_DECLS /* Declare a fail_point and its sysctl in a function. */ #define KFAIL_POINT_DECLARE(name) \ extern struct fail_point _FAIL_POINT_NAME(name) #define _FAIL_POINT_NAME(name) _fail_point_##name #define _FAIL_POINT_LOCATION() "(" __FILE__ ":" __XSTRING(__LINE__) ")" #define KFAIL_POINT_DEFINE(parent, name, flags) \ struct fail_point _FAIL_POINT_NAME(name) = { \ .fp_name = #name, \ .fp_location = _FAIL_POINT_LOCATION(), \ .fp_ref_cnt = 0, \ .fp_setting = NULL, \ .fp_flags = (flags), \ .fp_pre_sleep_fn = NULL, \ .fp_pre_sleep_arg = NULL, \ .fp_post_sleep_fn = NULL, \ .fp_post_sleep_arg = NULL, \ }; \ SYSCTL_OID(parent, OID_AUTO, name, \ CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, \ &_FAIL_POINT_NAME(name), 0, fail_point_sysctl, \ "A", ""); \ SYSCTL_OID(parent, OID_AUTO, status_##name, \ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, \ &_FAIL_POINT_NAME(name), 0, \ fail_point_sysctl_status, "A", ""); #define _FAIL_POINT_INIT(parent, name, flags) \ static KFAIL_POINT_DEFINE(parent, name, flags) #define _FAIL_POINT_EVAL(name, cond, code...) \ int RETURN_VALUE; \ \ if (__predict_false(cond && \ fail_point_eval(&_FAIL_POINT_NAME(name), &RETURN_VALUE))) { \ \ code; \ \ } #define KFAIL_POINT_EVAL(name, code...) \ _FAIL_POINT_EVAL(name, true, code) /** * Instantiate a failpoint which returns "RETURN_VALUE" from the function * when triggered. * @param parent The parent sysctl under which to locate the fp's sysctl * @param name The name of the failpoint in the sysctl tree (and printouts) * @return Instantly returns the RETURN_VALUE specified in the * failpoint, if triggered. */ #define KFAIL_POINT_RETURN(parent, name) \ KFAIL_POINT_CODE(parent, name, return RETURN_VALUE) /** * Instantiate a failpoint which returns (void) from the function when * triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and printouts) * @return Instantly returns void, if triggered in the failpoint. */ #define KFAIL_POINT_RETURN_VOID(parent, name) \ KFAIL_POINT_CODE(parent, name, return) /** * Instantiate a failpoint which sets an error when triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and * printouts) * @param error_var A variable to set to the failpoint's specified * return-value when triggered */ #define KFAIL_POINT_ERROR(parent, name, error_var) \ KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE) /** * Instantiate a failpoint which sets an error and then goes to a * specified label in the function when triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and * printouts) * @param error_var A variable to set to the failpoint's specified * return-value when triggered * @param label The location to goto when triggered. */ #define KFAIL_POINT_GOTO(parent, name, error_var, label) \ KFAIL_POINT_CODE(parent, name, (error_var) = RETURN_VALUE; goto label) /** * Instantiate a failpoint which sets its pre- and post-sleep callback * mechanisms. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and * printouts) * @param pre_func Function pointer to the pre-sleep function, which will be * called directly before going to sleep. * @param pre_arg Argument to the pre-sleep function * @param post_func Function pointer to the pot-sleep function, which will be * called directly before going to sleep. * @param post_arg Argument to the post-sleep function */ #define KFAIL_POINT_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \ post_func, post_arg) \ KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, \ pre_arg, post_func, post_arg, return RETURN_VALUE) /** * Instantiate a failpoint which runs arbitrary code when triggered, and sets * its pre- and post-sleep callback mechanisms * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree (and * printouts) * @param pre_func Function pointer to the pre-sleep function, which will be * called directly before going to sleep. * @param pre_arg Argument to the pre-sleep function * @param post_func Function pointer to the pot-sleep function, which will be * called directly before going to sleep. * @param post_arg Argument to the post-sleep function * @param code The arbitrary code to run when triggered. Can reference * "RETURN_VALUE" if desired to extract the specified * user return-value when triggered. Note that this is * implemented with a do-while loop so be careful of * break and continue statements. */ #define KFAIL_POINT_CODE_SLEEP_CALLBACKS(parent, name, pre_func, pre_arg, \ post_func, post_arg, code...) \ do { \ _FAIL_POINT_INIT(parent, name) \ _FAIL_POINT_NAME(name).fp_pre_sleep_fn = pre_func; \ _FAIL_POINT_NAME(name).fp_pre_sleep_arg = pre_arg; \ _FAIL_POINT_NAME(name).fp_post_sleep_fn = post_func; \ _FAIL_POINT_NAME(name).fp_post_sleep_arg = post_arg; \ _FAIL_POINT_EVAL(name, true, code) \ } while (0) /** * Instantiate a failpoint which runs arbitrary code when triggered. * @param parent The parent sysctl under which to locate the sysctl * @param name The name of the failpoint in the sysctl tree * (and printouts) * @param code The arbitrary code to run when triggered. Can reference * "RETURN_VALUE" if desired to extract the specified * user return-value when triggered. Note that this is * implemented with a do-while loop so be careful of * break and continue statements. */ #define KFAIL_POINT_CODE(parent, name, code...) \ do { \ _FAIL_POINT_INIT(parent, name, 0) \ _FAIL_POINT_EVAL(name, true, code) \ } while (0) #define KFAIL_POINT_CODE_FLAGS(parent, name, flags, code...) \ do { \ _FAIL_POINT_INIT(parent, name, flags) \ _FAIL_POINT_EVAL(name, true, code) \ } while (0) #define KFAIL_POINT_CODE_COND(parent, name, cond, flags, code...) \ do { \ _FAIL_POINT_INIT(parent, name, flags) \ _FAIL_POINT_EVAL(name, cond, code) \ } while (0) /** * @} * (end group failpoint) */ #ifdef _KERNEL int fail_point_sysctl(SYSCTL_HANDLER_ARGS); int fail_point_sysctl_status(SYSCTL_HANDLER_ARGS); /* The fail point sysctl tree. */ SYSCTL_DECL(_debug_fail_point); #define DEBUG_FP _debug_fail_point #endif #endif /* _SYS_FAIL_H_ */