Index: sys/kern/kern_malloc.c =================================================================== --- sys/kern/kern_malloc.c +++ sys/kern/kern_malloc.c @@ -53,6 +53,7 @@ #include #include +#include #include #include #include @@ -230,16 +231,8 @@ "Kernel malloc debugging options"); #endif -/* - * malloc(9) fault injection -- cause malloc failures every (n) mallocs when - * the caller specifies M_NOWAIT. If set to 0, no failures are caused. - */ #ifdef MALLOC_MAKE_FAILURES -static int malloc_failure_rate; -static int malloc_nowait_count; static int malloc_failure_count; -SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RWTUN, - &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail"); SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD, &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures"); #endif @@ -505,17 +498,6 @@ once++; } } -#endif -#ifdef MALLOC_MAKE_FAILURES - if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) { - atomic_add_int(&malloc_nowait_count, 1); - if ((malloc_nowait_count % malloc_failure_rate) == 0) { - atomic_add_int(&malloc_failure_count, 1); - t_malloc_fail = time_uptime; - *vap = NULL; - return (EJUSTRETURN); - } - } #endif if (flags & M_WAITOK) { KASSERT(curthread->td_intr_nesting_level == 0, @@ -543,6 +525,22 @@ } #endif +#ifdef MALLOC_MAKE_FAILURES +static __noinline bool +malloc_inject_failure(struct malloc_type *mtp) +{ + + if (!uma_dbg_nowait_fail_enabled(mtp->ks_shortdesc)) + return (false); + + atomic_add_int(&malloc_failure_count, 1); + uma_dbg_nowait_fail_record(mtp->ks_shortdesc); + t_malloc_fail = time_uptime; + + return (true); +} +#endif + /* * malloc: * @@ -561,6 +559,11 @@ unsigned long osize = size; #endif + MALLOC_NOWAIT_FAIL_POINT(flags, + if (malloc_inject_failure(mtp)) + return (NULL); + ); + #ifdef MALLOC_DEBUG va = NULL; if (malloc_dbg(&va, &size, mtp, flags) != 0) @@ -648,6 +651,11 @@ void *ret; int domain; + MALLOC_NOWAIT_FAIL_POINT(flags, + if (malloc_inject_failure(mtp)) + return (NULL); + ); + vm_domainset_iter_policy_init(&di, ds, &domain, &flags); do { ret = malloc_domain(size, mtp, domain, flags); Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -2335,6 +2336,23 @@ uma_zfree_arg(zone, item, udata); } +#ifdef MALLOC_MAKE_FAILURES +static __noinline bool +zalloc_inject_failure(uma_zone_t zone) +{ + + if (((zone->uz_flags & UMA_ZONE_MALLOC) != 0 && + g_uma_dbg_nowait_fail_zalloc_ignore_malloc) || + !uma_dbg_nowait_fail_enabled(zone->uz_name)) + return (false); + + counter_u64_add(zone->uz_fails, 1); + uma_dbg_nowait_fail_record(zone->uz_name); + + return (true); +} +#endif + /* See uma.h */ void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) @@ -2366,6 +2384,11 @@ KASSERT((flags & M_ZERO) == 0, ("allocating from a pcpu zone " "with M_ZERO passed")); + MALLOC_NOWAIT_FAIL_POINT(flags, + if (zalloc_inject_failure(zone)) + return (NULL); + ); + #ifdef DEBUG_MEMGUARD if (memguard_cmp_zone(zone)) { item = memguard_alloc(zone->uz_size, flags); @@ -2604,6 +2627,11 @@ KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), ("uma_zalloc_domain: called with spinlock or critical section held")); + MALLOC_NOWAIT_FAIL_POINT(flags, + if (zalloc_inject_failure(zone)) + return (NULL); + ); + return (zone_alloc_item(zone, udata, domain, flags)); } Index: sys/vm/uma_dbg.c =================================================================== --- sys/vm/uma_dbg.c +++ sys/vm/uma_dbg.c @@ -35,6 +35,8 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_ddb.h" +#include "opt_stack.h" #include "opt_vm.h" #include @@ -46,6 +48,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include @@ -225,3 +232,254 @@ { (void)mtrash_ctor(mem, size, NULL, 0); } + +#ifdef MALLOC_MAKE_FAILURES +/* + * Debugging and failure injection for UMA and malloc M_NOWAIT memory + * allocations. This code and the hooks in UMA and malloc allow for + * injection of failures for specific UMA zones and malloc types and for + * tracking of the last failure injected. + * + * Configuration is done via the sysctls under debug.mnowait_failure. + * There is a whitelist and a blacklist containing UMA zone names (see + * vmstat -z) and malloc type names (see vmstat -m). If any entries are + * present in the whitelist, failure injection will be enabled for only + * the zones and malloc types matching the whitelist entries. If the + * whitelist is empty, then only blacklist matches will be excluded. + * Certain zones and malloc types may be known not to behave well with + * with failure injection, and they may be present in the default + * blacklist. + * + * Enabling failure injection is done via the fail point configurable by + * sysctl debug.fail_point.mnowait. See fail(9). + * + * By default, the zalloc failure injection hooks ignore allocations + * done for malloc. + * + * TODO: move above to a man page. + */ + +#if defined(DDB) || defined(STACK) +#define HAVE_STACK +#endif + +/* Uma Dbg Nowait Failure Globals -> g_udnf_ */ + +/* Configuration. */ +bool g_uma_dbg_nowait_fail_zalloc_ignore_malloc = true; +#define NOWAIT_FAIL_LIST_BUFSIZE 1024 +static char g_udnf_whitelist[NOWAIT_FAIL_LIST_BUFSIZE]; +static char g_udnf_blacklist[NOWAIT_FAIL_LIST_BUFSIZE] = + "ata_request," + "BUF TRIE," + "ifaddr," + "kobj," + "linker," + "pcb," + "sackhole," + "sctp_ifa," + "sctp_ifn," + "sctp_vrf"; + +static struct rwlock g_udnf_conf_lock; +RW_SYSINIT(uma_dbg_nowait_conf, &g_udnf_conf_lock, "uma dbg nowait conf"); + +/* Tracking. */ +#define NOWAIT_FAIL_NAME_BUFSIZE 80 +static char g_udnf_last_name[NOWAIT_FAIL_NAME_BUFSIZE]; +static char g_udnf_last_comm[MAXCOMLEN + 1]; +static pid_t g_udnf_last_pid; +static lwpid_t g_udnf_last_tid; +static int g_udnf_last_ticks; + +#ifdef HAVE_STACK +static struct stack g_udnf_last_stack; +#endif + +static struct mtx g_udnf_track_lock; +MTX_SYSINIT(uma_dbg_nowait_track, &g_udnf_track_lock, "uma dbg nowait track", + 0); + +void +uma_dbg_nowait_fail_record(const char *name) +{ +#ifdef HAVE_STACK + struct stack st = {}; +#endif + struct thread *td; + +#ifdef HAVE_STACK + stack_save(&st); +#endif + td = curthread; + + mtx_lock(&g_udnf_track_lock); +#ifdef HAVE_STACK + stack_copy(&st, &g_udnf_last_stack); +#endif + strlcpy(g_udnf_last_name, name, sizeof(g_udnf_last_name)); + g_udnf_last_tid = td->td_tid; + g_udnf_last_pid = td->td_proc->p_pid; + strlcpy(g_udnf_last_comm, td->td_proc->p_comm, + sizeof(g_udnf_last_comm)); + g_udnf_last_ticks = ticks; + mtx_unlock(&g_udnf_track_lock); +} + +static int +sysctl_debug_mnowait_failure_last_injection(SYSCTL_HANDLER_ARGS) +{ + char last_name[NOWAIT_FAIL_NAME_BUFSIZE]; + char last_comm[MAXCOMLEN + 1]; + struct sbuf sbuf; +#ifdef HAVE_STACK + struct stack last_stack; +#endif + pid_t last_pid; + lwpid_t last_tid; + u_int delta; + int error; + int last_ticks; + + mtx_lock(&g_udnf_track_lock); +#ifdef HAVE_STACK + stack_copy(&g_udnf_last_stack, &last_stack); +#endif + strlcpy(last_name, g_udnf_last_name, sizeof(last_name)); + last_tid = g_udnf_last_tid; + last_pid = g_udnf_last_pid; + strlcpy(last_comm, g_udnf_last_comm, sizeof(last_comm)); + last_ticks = g_udnf_last_ticks; + mtx_unlock(&g_udnf_track_lock); + + if (last_tid == 0) + return (0); + + delta = ticks - last_ticks; + + sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + sbuf_printf(&sbuf, "%s[%d] tid %d alloc %s %u.%03u s ago", + last_comm, last_pid, last_tid, + last_name, delta / hz, (delta % hz) * 1000 / hz); +#ifdef HAVE_STACK + sbuf_putc(&sbuf, '\n'); + stack_sbuf_print(&sbuf, &last_stack); +#endif + error = sbuf_finish(&sbuf); + sbuf_delete(&sbuf); + + return (error); +} + +static bool +str_in_list(const char *list, char delim, const char *str) +{ + const char *b, *e; + size_t blen, slen; + + b = list; + slen = strlen(str); + for (;;) { + e = strchr(b, delim); + blen = e == NULL ? strlen(b) : e - b; + if (blen == slen && strncmp(b, str, slen) == 0) + return (true); + if (e == NULL) + break; + b = e + 1; + } + return (false); +} + +bool +uma_dbg_nowait_fail_enabled(const char *name) +{ + bool fail; + + /* Protect ourselves from the sysctl handlers. */ + rw_rlock(&g_udnf_conf_lock); + if (g_udnf_whitelist[0] == '\0') + fail = !str_in_list(g_udnf_blacklist, ',', name); + else + fail = str_in_list(g_udnf_whitelist, ',', name); + rw_runlock(&g_udnf_conf_lock); + + return (fail); +} + +/* + * XXX provide SYSCTL_STRING_LOCKED / sysctl_handle_string_locked? + * This is basically just a different sysctl_handle_string. This one wraps + * the string manipulation in a lock and in a way that will not cause a sleep + * under that lock. + */ +static int +sysctl_debug_mnowait_failure_list(SYSCTL_HANDLER_ARGS) +{ + char *newbuf = NULL; + int error, newlen; + bool have_lock = false; + + if (req->newptr != NULL) { + newlen = req->newlen - req->newidx; + if (newlen >= arg2) { + error = EINVAL; + goto out; + } + newbuf = malloc(newlen, M_TEMP, M_WAITOK); + error = SYSCTL_IN(req, newbuf, newlen); + if (error != 0) + goto out; + } + + error = sysctl_wire_old_buffer(req, arg2); + if (error != 0) + goto out; + + rw_wlock(&g_udnf_conf_lock); + have_lock = true; + + error = SYSCTL_OUT(req, arg1, strnlen(arg1, arg2 - 1) + 1); + if (error != 0) + goto out; + + if (newbuf == NULL) + goto out; + + bcopy(newbuf, arg1, newlen); + ((char *)arg1)[newlen] = '\0'; + out: + if (have_lock) + rw_wunlock(&g_udnf_conf_lock); + free(newbuf, M_TEMP); + return (error); +} + +SYSCTL_NODE(_debug, OID_AUTO, mnowait_failure, CTLFLAG_RW, 0, + "Control of M_NOWAIT memory allocation failure injection."); + +KFAIL_POINT_DEFINE(DEBUG_FP, mnowait, 0); + +SYSCTL_PROC(_debug_mnowait_failure, OID_AUTO, blacklist, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, g_udnf_blacklist, + sizeof(g_udnf_blacklist), sysctl_debug_mnowait_failure_list, "A", + "With debug.fail_point.mnowait and with an empty whitelist, CSV list of " + "zones which remain unaffected."); + +SYSCTL_PROC(_debug_mnowait_failure, OID_AUTO, whitelist, + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, g_udnf_whitelist, + sizeof(g_udnf_whitelist), sysctl_debug_mnowait_failure_list, "A", + "With debug.fail_point.mnowait, CSV list of zones exclusively affected. " + "With an empty whitelist, all zones but those on the blacklist" + "are affected."); + +SYSCTL_BOOL(_debug_mnowait_failure, OID_AUTO, zalloc_ignore_malloc, + CTLFLAG_RW, &g_uma_dbg_nowait_fail_zalloc_ignore_malloc, 0, + "Whether zalloc failure injection ignores (does not inject) malloc " + "zones."); + +SYSCTL_PROC(_debug_mnowait_failure, OID_AUTO, last_injection, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + sysctl_debug_mnowait_failure_last_injection, "A", + "The last allocation for which a failure was injected."); +#endif /* MALLOC_MAKE_FAILURES */ Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -34,6 +34,7 @@ #include #include #include +#include /* * This file includes definitions, structures, prototypes, and inlines that @@ -493,6 +494,21 @@ /* Set a global soft limit on UMA managed memory. */ void uma_set_limit(unsigned long limit); + +#ifdef MALLOC_MAKE_FAILURES +bool uma_dbg_nowait_fail_enabled(const char *name); +void uma_dbg_nowait_fail_record(const char *name); +extern bool g_uma_dbg_nowait_fail_zalloc_ignore_malloc; +KFAIL_POINT_DECLARE(mnowait); +#define MALLOC_NOWAIT_FAIL_POINT(flags, code...) \ + KFAIL_POINT_EVAL(mnowait, \ + if (((flags) & M_NOWAIT) != 0) { \ + code; \ + } \ + ) +#else +#define MALLOC_NOWAIT_FAIL_POINT(flags, code...) +#endif /* MALLOC_MAKE_FAILURES */ #endif /* _KERNEL */ #endif /* VM_UMA_INT_H */