Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -159,6 +159,7 @@ MAC_STATIC opt_mac.h MAC_STUB opt_dontuse.h MAC_TEST opt_dontuse.h +MBUF_TRACKING opt_global.h MD_ROOT opt_md.h MD_ROOT_FSTYPE opt_md.h MD_ROOT_SIZE opt_md.h Index: sys/kern/kern_mbuf.c =================================================================== --- sys/kern/kern_mbuf.c +++ sys/kern/kern_mbuf.c @@ -379,6 +379,10 @@ */ EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, EVENTHANDLER_PRI_FIRST); + +#ifdef MBUF_TRACKING + mtx_init(&mbuf_track_mtx, "mbuf_tracking", 0, MTX_DEF); +#endif } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); Index: sys/kern/uipc_debug.c =================================================================== --- sys/kern/uipc_debug.c +++ sys/kern/uipc_debug.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -476,6 +477,19 @@ db_print_sockbuf(&so->so_snd, "so_snd", indent); } +static void +db_print_mbuf(struct mbuf *m) +{ + db_printf("m_len=%d m_flags=0x%b m_data=%p\n", + m->m_len, m->m_flags, M_FLAG_BITS, m->m_data); + db_printf("m_next=%p m_nextpkt=%p\n", m->m_next, m->m_nextpkt); +#ifdef MBUF_TRACKING + db_printf("m_owner=%s m_ownerdata=%p m_lastowner=%s m_lastfree=%s\n", + m->m_owner, m->m_ownerdata, m->m_lastowner, m->m_lastfree); +#endif + +} + DB_SHOW_COMMAND(socket, db_show_socket) { struct socket *so; @@ -520,11 +534,24 @@ struct domain *d; if (!have_addr) { - db_printf("usage: show protosw \n"); + db_printf("usage: show domain \n"); return; } d = (struct domain *)addr; db_print_domain(d, "domain", 0); } + +DB_SHOW_COMMAND(mbuf, db_show_mbuf) +{ + struct mbuf *m; + + if (!have_addr) { + db_printf("usage: show mbuf \n"); + return; + } + m = (struct mbuf *)addr; + + db_print_mbuf(m); +} #endif Index: sys/kern/uipc_mbuf.c =================================================================== --- sys/kern/uipc_mbuf.c +++ sys/kern/uipc_mbuf.c @@ -47,6 +47,13 @@ #include #include #include +#include +#ifdef MBUF_TRACKING +#include +#include +#include +#include +#endif int max_linkhdr; int max_protohdr; @@ -60,6 +67,25 @@ int m_defragrandomfailures; #endif +#ifdef MBUF_TRACKING +#define MBUF_TRACK_HASH_SIZE 32 +struct mbuflist mbufs = LIST_HEAD_INITIALIZER(mbufs); +struct mtx mbuf_track_mtx; + +struct mbufbin { + const char *name; + const void *data; + uint32_t count; + LIST_ENTRY(mbufbin) next; +}; + +MALLOC_DEFINE(M_MBUFTRACK, "mbuftrack", "mbuf tracking data"); +static int get_mbuf_usage(struct sbuf *sb); +static int sysctl_kern_ipc_mbufs(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufs, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kern_ipc_mbufs, "A", "Show who owns allocated mbufs"); +#endif + /* * sysctl(8) exported objects */ @@ -110,9 +136,13 @@ * comments. */ #if defined(__LP64__) +#ifndef MBUF_TRACKING CTASSERT(offsetof(struct mbuf, m_dat) == 32); +#endif CTASSERT(sizeof(struct pkthdr) == 56); +#ifndef MBUF_TRACKING CTASSERT(sizeof(struct m_ext) == 48); +#endif #else CTASSERT(offsetof(struct mbuf, m_dat) == 24); CTASSERT(sizeof(struct pkthdr) == 48); @@ -143,10 +173,16 @@ args.flags = flags; args.type = type; - if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) - return (uma_zalloc_arg(zone_mbuf, &args, how)); - if (size <= MCLBYTES) - return (uma_zalloc_arg(zone_pack, &args, how)); + if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) { + m = uma_zalloc_arg(zone_mbuf, &args, how); + MBUF_TRACK_INSERT(m); + return (m); + } + if (size <= MCLBYTES) { + m = uma_zalloc_arg(zone_pack, &args, how); + MBUF_TRACK_INSERT(m); + return (m); + } if (size > MJUMPAGESIZE) return (NULL); @@ -161,6 +197,8 @@ return (NULL); } + MBUF_TRACK_INSERT(m); + return (m); } @@ -191,6 +229,9 @@ uma_zfree(zone_mbuf, m); return (NULL); } + + MBUF_TRACK_INSERT(m); + return (m); } @@ -1910,6 +1951,92 @@ return (m0); } +#ifdef INVARIANTS +/** + * Check buffer boundaries when data is stored within the mbuf. + */ +void +m_check(struct mbuf *m) +{ + char *m_data_begin; + char *m_data_end; + + m_data_begin = mtod(m, char *); + m_data_end = m_data_begin + m->m_len; + if (m->m_flags & M_EXT) { + if ((m_data_begin >= (char *)m) && + (m_data_end < (char *)(m+1))) { + panic("m_check: external mbuf appears to be internal %p", m); + } + } else { + /* No local mbuf should ever overrun the buffer */ + if (m->m_len > MLEN) { + panic("m_check: mbuf at %p local data %p len %d " + "exceeds mbuf upper boundary", m, + m_data_end, m->m_len); + } + + if (m->m_flags & M_PKTHDR) { + /* Check for bad buffers for pkthdr mbufs */ + if (m->m_len > MHLEN) { + panic("m_check: mbuf at %p has m_len %d " + "exceeding %zd available for pkthdr mbuf", + m, m->m_len, MHLEN); + } + if ((m_data_begin >= (char *)m) && + (m_data_end < ((char *)(m+1) - MHLEN))) { + panic("m_check: mbuf at %p local data %p " + " len %d overlays pkthdr mbuf header", + m, m_data_begin, m->m_len); + } + } else { + /* Check for bad buffers for non-pkthdr mbufs */ + if (m->m_len > MLEN) { + panic("m_check: mbuf at %p has m_len %d " + "exceeding %zd available for data mbuf", + m, m->m_len, MLEN); + } + if ((m_data_begin >= (char *)m) && + (m_data_end < ((char *)(m+1) - MLEN))) { + panic("m_check: mbuf at %p local data %p " + " len %d overlays data mbuf header", + m, m_data_begin, m->m_len); + } + } + } +} + +/** + * Check buffer boundaries of an mbuf chain. + */ +void +m_checkm(struct mbuf *m) +{ + size_t len; + struct mbuf *n; + + /* Check buffer boundaries of mbufs in the chain */ + len = 0; + for (n = m; n != NULL; n = n->m_next) { + m_check(n); + len += n->m_len; + } + + /* + * If the leading mbuf is a pkthdr, check the length recorded + * in the header against the summed length from the chain. + */ + if (m->m_flags & M_PKTHDR) { + if (len != m->m_pkthdr.len) { + panic("m_checkm: mbuf chain at %p pkthdr len " + "%d mismatches length of all mbufs %zd", + m, m->m_pkthdr.len, len); + } + } +} + +#endif /* INVARIANTS */ + #ifdef MBUF_PROFILING #define MP_BUCKETS 32 /* don't just change this as things may overflow.*/ @@ -2059,3 +2186,109 @@ NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics"); #endif +#ifdef MBUF_TRACKING +static int +sysctl_kern_ipc_mbufs(SYSCTL_HANDLER_ARGS) +{ + struct sbuf sb; + int error = 0; + + sbuf_new_for_sysctl(&sb, NULL, 128, req); + + error = get_mbuf_usage(&sb); + if (error) + goto out; + + error = sbuf_finish(&sb); + out: + sbuf_delete(&sb); + return error; +} + +/* + * Fill sbuf with information on who owns which mbufs + */ +int +get_mbuf_usage(struct sbuf *sb) +{ + int error = 0; + u_long hashmask, hashtmp; + c_db_sym_t sym; + db_expr_t diff, symval; + struct mbuf *m; + struct mbufbin *mbi, *tmp; + const char *symname; + LIST_HEAD(mbufbinhead, mbufbin) *mbufhash, *bin; + + mbufhash = hashinit(MBUF_TRACK_HASH_SIZE, M_MBUFTRACK, &hashmask); + + /* Lock to prevent mbufs moving around. */ + mtx_lock(&mbuf_track_mtx); + + /* Build histogram of mbuf consumers */ + LIST_FOREACH(m, &mbufs, m_mlist) { + /* Generate hash from ower and data */ + hashtmp = hash32_str(m->m_owner, hashmask); + hashtmp = hash32_buf(m->m_ownerdata, sizeof(m->m_ownerdata), hashtmp); + bin = mbufhash + (hashtmp & hashmask); + + /* Search the bin for a match */ + tmp = NULL; + LIST_FOREACH(mbi, bin, next) { + if (!strcmp(mbi->name, m->m_owner) && + mbi->data == m->m_ownerdata) { + tmp = mbi; + break; + } + } + if (tmp != NULL) + tmp->count++; + else { + mbi = malloc(sizeof(*mbi), M_MBUFTRACK, M_NOWAIT); + if (mbi == NULL) { + error = ENOMEM; + goto out; + } + mbi->name = m->m_owner; + mbi->data = m->m_ownerdata; + mbi->count = 1; + LIST_INSERT_HEAD(bin, mbi, next); + } + } + + /* Built the histogram, safe to unlock */ + mtx_unlock(&mbuf_track_mtx); + + /* Display */ + sbuf_printf(sb, "Allocated mbufs:"); + for (int i = 0; i < MBUF_TRACK_HASH_SIZE; i++) { + bin = mbufhash + i; + LIST_FOREACH(mbi, bin, next) { + if (mbi->name && !strcmp(mbi->name, "function")) { + sym = db_search_symbol((db_addr_t)mbi->data, DB_STGY_PROC, &diff); + db_symbol_values(sym, &symname, &symval); + sbuf_printf(sb, "\n\t%5d owned by %s (%p)", + mbi->count, symname, mbi->data); + } else if (mbi->name) { + sbuf_printf(sb, "\n\t%5d owned by %s %p", + mbi->count, mbi->name, mbi->data); + } else { + sbuf_printf(sb, "\n\t%5d owned by (null) %p", + mbi->count, mbi->data); + } + } + } + out: + /* Free up */ + for (int i = 0; i < MBUF_TRACK_HASH_SIZE; i++) { + bin = mbufhash + i; + LIST_FOREACH_SAFE(mbi, bin, next, tmp) { + LIST_REMOVE(mbi, next); + free(mbi, M_MBUFTRACK); + } + } + hashdestroy(mbufhash, M_MBUFTRACK, hashmask); + + return error; +} +#endif Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -44,6 +44,71 @@ #endif #endif +#ifdef MBUF_TRACKING +#include +#include +#include + +LIST_HEAD(mbuflist, mbuf); + +extern struct mbuflist mbufs; +extern struct mtx mbuf_track_mtx; + +#define M_HANDOFF(m, x, y) \ + do { \ + m->m_owner = x; \ + m->m_ownerdata = y; \ + } while (0) +#define M_HANDOFFM(m, x, y) \ + do { \ + struct mbuf *_n = m; \ + while (_n) { \ + M_HANDOFF(_n, x, y); \ + _n = _n->m_next; \ + } \ + } while (0) +#define M_TRACK_FREE(m, x) \ + do { \ + m->m_lastowner = m->m_owner; \ + m->m_lastfree = x; \ + } while (0) +#define M_TRACK_FREE_EXT(m, x) \ + do { \ + m->m_ext.ext_lastowner = m->m_owner; \ + m->m_ext.ext_lastfree = x; \ + } while (0) +#define MBUF_TRACK_INSERT(mb) \ + do { \ + if (mb) { \ + M_HANDOFF(mb, "function", get_caller_ip()); \ + mtx_lock(&mbuf_track_mtx); \ + LIST_INSERT_HEAD(&mbufs, mb, m_mlist); \ + mtx_unlock(&mbuf_track_mtx); \ + } \ + } while (0) +#define MBUF_TRACK_REMOVE(mb) \ + do { \ + if (mb) { \ + mtx_lock(&mbuf_track_mtx); \ + LIST_REMOVE(mb, m_mlist); \ + mtx_unlock(&mbuf_track_mtx); \ + } \ + } while (0) + +void mbuf_track_init(void); + +#else /* !MBUF_TRACKING */ +#define M_HANDOFF(m, x, y) do { } while (0) +#define M_HANDOFFM(m, x, y) do { } while (0) +#define M_TRACK_FREE(m, x) do { } while (0) +#define M_TRACK_FREE_EXT(m, x) do { } while (0) +#define MBUF_TRACK_INSERT(mb) do { } while (0) +#define MBUF_TRACK_REMOVE(mb) do { } while (0) +#define get_mbuf_usage(fmt) do { } while (0) +#define print_mbuf_usage() do { } while (0) +#endif /* MBUF_TRACKING */ + + /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in @@ -169,6 +234,10 @@ (struct mbuf *, void *, void *); void *ext_arg1; /* optional argument pointer */ void *ext_arg2; /* optional argument pointer */ +#ifdef MBUF_TRACKING + const char *ext_lastowner; + const char *ext_lastfree; +#endif }; /* @@ -200,6 +269,13 @@ #if !defined(__LP64__) uint32_t m_pad; /* pad for 64bit alignment */ #endif +#ifdef MBUF_TRACKING + const char *m_owner; + const void *m_ownerdata; + const char *m_lastowner; + const char *m_lastfree; + LIST_ENTRY(mbuf) m_mlist; +#endif /* * A set of optional headers (packet header, external storage header) @@ -639,10 +715,13 @@ m_get(int how, short type) { struct mb_args args; + struct mbuf *mb; args.flags = 0; args.type = type; - return (uma_zalloc_arg(zone_mbuf, &args, how)); + mb = uma_zalloc_arg(zone_mbuf, &args, how); + MBUF_TRACK_INSERT(mb); + return (mb); } /* @@ -659,6 +738,7 @@ m = uma_zalloc_arg(zone_mbuf, &args, how); if (m != NULL) bzero(m->m_data, MLEN); + MBUF_TRACK_INSERT(m); return (m); } @@ -666,20 +746,26 @@ m_gethdr(int how, short type) { struct mb_args args; + struct mbuf *mb; args.flags = M_PKTHDR; args.type = type; - return (uma_zalloc_arg(zone_mbuf, &args, how)); + mb = uma_zalloc_arg(zone_mbuf, &args, how); + MBUF_TRACK_INSERT(mb); + return (mb); } static __inline struct mbuf * m_getcl(int how, short type, int flags) { struct mb_args args; + struct mbuf *mb; args.flags = flags; args.type = type; - return (uma_zalloc_arg(zone_pack, &args, how)); + mb = uma_zalloc_arg(zone_pack, &args, how); + MBUF_TRACK_INSERT(mb); + return (mb); } static __inline int @@ -983,6 +1069,10 @@ struct mbuf *m_split(struct mbuf *, int, int); struct mbuf *m_uiotombuf(struct uio *, int, int, int, int); struct mbuf *m_unshare(struct mbuf *, int); +#ifdef INVARIANTS +void m_check(struct mbuf *); +void m_checkm(struct mbuf *); +#endif /*- * Network packets may have annotations attached by affixing a list of @@ -1170,12 +1260,22 @@ { struct mbuf *n = m->m_next; + MBUF_TRACK_REMOVE(m); + M_TRACK_FREE(m, __func__); + if ((m->m_flags & M_EXT) != 0) + M_TRACK_FREE_EXT(m, __func__); + M_HANDOFF(m, __func__, 0); + if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) m_tag_delete_chain(m, NULL); if (m->m_flags & M_EXT) mb_free_ext(m); - else if ((m->m_flags & M_NOFREE) == 0) + else if ((m->m_flags & M_NOFREE) == 0) { +#ifdef INVARIANTS + m_check(m); +#endif uma_zfree(zone_mbuf, m); + } return (n); } Index: sys/x86/include/stack.h =================================================================== --- sys/x86/include/stack.h +++ sys/x86/include/stack.h @@ -25,7 +25,6 @@ * * $FreeBSD$ */ - #ifndef _X86_STACK_H #define _X86_STACK_H @@ -52,6 +51,27 @@ uint32_t f_retaddr; uint32_t f_arg0; }; +typedef const struct amd64_frame *stack_frame_ptr; +struct __call_instruction { unsigned char c; }; +typedef const struct __call_instruction *instruction_ptr; + +/** + * @return instruction pointer of the caller of the given stack + */ +instruction_ptr stack_frame_caller_ip(stack_frame_ptr frame); + +/** + * current_stack_frame() is a macro instead of an inline function so + * cannot possibly have its own stack frame. + */ +#define _current_stack_frame(fp) \ + ({ stack_frame_ptr fp; __asm("movq %%rbp,%0;" : "=r" (fp)); fp; }) +#define current_stack_frame() _current_stack_frame(__UNIQ) +/** + * This macro evaluates to the caller of the current function. It is a macro so + * that it cannot possibly have its own stack frame. + */ +#define get_caller_ip() stack_frame_caller_ip(current_stack_frame()) #endif /* __amd64__ */ #ifdef _KERNEL Index: sys/x86/x86/stack_machdep.c =================================================================== --- sys/x86/x86/stack_machdep.c +++ sys/x86/x86/stack_machdep.c @@ -166,3 +166,16 @@ #endif stack_capture(curthread, st, fp); } + +instruction_ptr +stack_frame_caller_ip(stack_frame_ptr frame) +{ + /* + * In x86 stacks, what is stored on the stack is not the + * address of the call, but rather the return address, + * i.e. what the ip should be after the call. To account for + * this, decrement the address by 1 so that it will resolve to + * the call instruction, not the one after. + */ + return (instruction_ptr) (frame->f_retaddr - 1); +}