Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/kern_mbuf.c
Show All 39 Lines | |||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#include <sys/domain.h> | #include <sys/domain.h> | ||||
#include <sys/eventhandler.h> | #include <sys/eventhandler.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/limits.h> | #include <sys/limits.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/protosw.h> | #include <sys/protosw.h> | ||||
#include <sys/sf_buf.h> | |||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/socket.h> | #include <sys/socket.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <net/if.h> | #include <net/if.h> | ||||
#include <net/if_var.h> | #include <net/if_var.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
int nmbufs; /* limits number of mbufs */ | int nmbufs; /* limits number of mbufs */ | ||||
int nmbclusters; /* limits number of mbuf clusters */ | int nmbclusters; /* limits number of mbuf clusters */ | ||||
int nmbjumbop; /* limits number of page size jumbo clusters */ | int nmbjumbop; /* limits number of page size jumbo clusters */ | ||||
int nmbjumbo9; /* limits number of 9k jumbo clusters */ | int nmbjumbo9; /* limits number of 9k jumbo clusters */ | ||||
int nmbjumbo16; /* limits number of 16k jumbo clusters */ | int nmbjumbo16; /* limits number of 16k jumbo clusters */ | ||||
bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile */ | |||||
SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, | |||||
&mb_use_ext_pgs, 0, | |||||
"Use unmapped mbufs for sendfile(2)"); | |||||
static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ | static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ | ||||
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, | SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, | ||||
"Maximum real memory allocatable to various mbuf types"); | "Maximum real memory allocatable to various mbuf types"); | ||||
static counter_u64_t snd_tag_count; | static counter_u64_t snd_tag_count; | ||||
SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW, | SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW, | ||||
&snd_tag_count, "# of active mbuf send tags"); | &snd_tag_count, "# of active mbuf send tags"); | ||||
▲ Show 20 Lines • Show All 154 Lines • ▼ Show 20 Lines | |||||
* Zones from which we allocate. | * Zones from which we allocate. | ||||
*/ | */ | ||||
uma_zone_t zone_mbuf; | uma_zone_t zone_mbuf; | ||||
uma_zone_t zone_clust; | uma_zone_t zone_clust; | ||||
uma_zone_t zone_pack; | uma_zone_t zone_pack; | ||||
uma_zone_t zone_jumbop; | uma_zone_t zone_jumbop; | ||||
uma_zone_t zone_jumbo9; | uma_zone_t zone_jumbo9; | ||||
uma_zone_t zone_jumbo16; | uma_zone_t zone_jumbo16; | ||||
uma_zone_t zone_extpgs; | |||||
/* | /* | ||||
* Local prototypes. | * Local prototypes. | ||||
*/ | */ | ||||
static int mb_ctor_mbuf(void *, int, void *, int); | static int mb_ctor_mbuf(void *, int, void *, int); | ||||
static int mb_ctor_clust(void *, int, void *, int); | static int mb_ctor_clust(void *, int, void *, int); | ||||
static int mb_ctor_pack(void *, int, void *, int); | static int mb_ctor_pack(void *, int, void *, int); | ||||
static void mb_dtor_mbuf(void *, int, void *); | static void mb_dtor_mbuf(void *, int, void *); | ||||
static void mb_dtor_pack(void *, int, void *); | static void mb_dtor_pack(void *, int, void *); | ||||
static int mb_zinit_pack(void *, int, int); | static int mb_zinit_pack(void *, int, int); | ||||
static void mb_zfini_pack(void *, int); | static void mb_zfini_pack(void *, int); | ||||
static void mb_reclaim(uma_zone_t, int); | static void mb_reclaim(uma_zone_t, int); | ||||
static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); | static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); | ||||
/* Ensure that MSIZE is a power of 2. */ | /* Ensure that MSIZE is a power of 2. */ | ||||
CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); | CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); | ||||
_Static_assert(sizeof(struct mbuf_ext_pgs) == 256, | |||||
"mbuf_ext_pgs size mismatch"); | |||||
/* | /* | ||||
* Initialize FreeBSD Network buffer allocation. | * Initialize FreeBSD Network buffer allocation. | ||||
*/ | */ | ||||
static void | static void | ||||
mbuf_init(void *dummy) | mbuf_init(void *dummy) | ||||
{ | { | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | |||||
#endif | #endif | ||||
UMA_ALIGN_PTR, 0); | UMA_ALIGN_PTR, 0); | ||||
uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); | uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); | ||||
if (nmbjumbo16 > 0) | if (nmbjumbo16 > 0) | ||||
nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); | nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); | ||||
uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); | uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); | ||||
uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); | uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); | ||||
zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME, | |||||
sizeof(struct mbuf_ext_pgs), | |||||
#ifdef INVARIANTS | |||||
trash_ctor, trash_dtor, trash_init, trash_fini, | |||||
#else | |||||
NULL, NULL, NULL, NULL, | |||||
#endif | |||||
UMA_ALIGN_CACHE, 0); | |||||
/* | /* | ||||
* Hook event handler for low-memory situation, used to | * Hook event handler for low-memory situation, used to | ||||
* drain protocols and push data back to the caches (UMA | * drain protocols and push data back to the caches (UMA | ||||
* later pushes it back to VM). | * later pushes it back to VM). | ||||
*/ | */ | ||||
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, | EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, | ||||
EVENTHANDLER_PRI_FIRST); | EVENTHANDLER_PRI_FIRST); | ||||
▲ Show 20 Lines • Show All 429 Lines • ▼ Show 20 Lines | mb_reclaim(uma_zone_t zone __unused, int pending __unused) | ||||
for (dp = domains; dp != NULL; dp = dp->dom_next) | for (dp = domains; dp != NULL; dp = dp->dom_next) | ||||
for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) | for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) | ||||
if (pr->pr_drain != NULL) | if (pr->pr_drain != NULL) | ||||
(*pr->pr_drain)(); | (*pr->pr_drain)(); | ||||
} | } | ||||
/* | /* | ||||
* Free "count" units of I/O from an mbuf chain. They could be held | |||||
* in EXT_PGS or just as a normal mbuf. This code is intended to be | |||||
* called in an error path (I/O error, closed connection, etc). | |||||
*/ | |||||
void | |||||
mb_free_notready(struct mbuf *m, int count) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < count && m != NULL; i++) { | |||||
if ((m->m_flags & M_EXT) != 0 && | |||||
m->m_ext.ext_type == EXT_PGS) { | |||||
m->m_ext.ext_pgs->nrdy--; | |||||
if (m->m_ext.ext_pgs->nrdy != 0) | |||||
continue; | |||||
} | |||||
m = m_free(m); | |||||
} | |||||
KASSERT(i == count, ("Removed only %d items from %p", i, m)); | |||||
} | |||||
/* | |||||
* Ensure it is possible to downgrade an EXT_PGS mbuf | |||||
* to a normal mbuf. | |||||
* | |||||
* XXXJHB: I think this is no longer needed? The callers of | |||||
* mb_unmapped_compress all check the length against MLEN, and | |||||
* mb_unmapped_compress allows data to be stored in unmapped pages. | |||||
*/ | |||||
CTASSERT(MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN < MLEN); | |||||
/* | |||||
* Compress an unmapped mbuf into a simple mbuf when it holds a small | |||||
* amount of data. This is used as a DOS defense to avoid having | |||||
* small packets tie up wired pages, an ext_pgs structure, and an | |||||
* mbuf. Since this converts the existing mbuf in place, it can only | |||||
* be used if there are no other references to 'm'. | |||||
*/ | |||||
int | |||||
mb_unmapped_compress(struct mbuf *m) | |||||
{ | |||||
volatile u_int *refcnt; | |||||
struct mbuf m_temp; | |||||
/* | |||||
* Assert that 'm' does not have a packet header. If 'm' had | |||||
* a packet header, it would only be able to hold MHLEN bytes | |||||
* and m_data would have to be initialized differently. | |||||
*/ | |||||
KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) && | |||||
m->m_ext.ext_type == EXT_PGS, | |||||
("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m)); | |||||
KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); | |||||
gallatin: I just hit a panic on this kassert in the netflix kernel. The mbuf looked like the following. | |||||
Done Inline ActionsHmm, this needs more changes then. Namely, we need to set m_temp.m_data and possibly m_temp.m_len up so that the m_copydata will copy the correct bytes of data. Ah, we do copy all that in the memcpy, I just need to fix the comment above the memcpy. jhb: Hmm, this needs more changes then. Namely, we need to set m_temp.m_data and possibly m_temp. | |||||
if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { | |||||
refcnt = &m->m_ext.ext_count; | |||||
} else { | |||||
KASSERT(m->m_ext.ext_cnt != NULL, | |||||
("%s: no refcounting pointer on %p", __func__, m)); | |||||
refcnt = m->m_ext.ext_cnt; | |||||
} | |||||
if (*refcnt != 1) | |||||
return (EBUSY); | |||||
/* | |||||
* Copy mbuf header and m_ext portion of 'm' to 'm_temp' to | |||||
* create a "fake" EXT_PGS mbuf that can be used with | |||||
* m_copydata() as well as the ext_free callback. | |||||
*/ | |||||
memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext)); | |||||
m_temp.m_next = NULL; | |||||
m_temp.m_nextpkt = NULL; | |||||
/* Turn 'm' into a "normal" mbuf. */ | |||||
m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP); | |||||
m->m_data = m->m_dat; | |||||
/* Copy data from template's ext_pgs. */ | |||||
m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t)); | |||||
/* Free the backing pages. */ | |||||
m_temp.m_ext.ext_free(&m_temp); | |||||
/* Finally, free the ext_pgs struct. */ | |||||
uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs); | |||||
return (0); | |||||
} | |||||
/* | |||||
* These next few routines are used to permit downgrading an unmapped | |||||
* mbuf to a chain of mapped mbufs. This is used when an interface | |||||
* doesn't supported unmapped mbufs or if checksums need to be | |||||
* computed in software. | |||||
* | |||||
* Each unmapped mbuf is converted to a chain of mbufs. First, any | |||||
* TLS header data is stored in a regular mbuf. Second, each page of | |||||
* unmapped data is stored in an mbuf with an EXT_SFBUF external | |||||
* cluster. These mbufs use an sf_buf to provide a valid KVA for the | |||||
* associated physical page. They also hold a reference on the | |||||
* original EXT_PGS mbuf to ensure the physical page doesn't go away. | |||||
* Finally, any TLS trailer data is stored in a regular mbuf. | |||||
* | |||||
* mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF | |||||
* mbufs. It frees the associated sf_buf and releases its reference | |||||
* on the original EXT_PGS mbuf. | |||||
* | |||||
* _mb_unmapped_to_ext() is a helper function that converts a single | |||||
* unmapped mbuf into a chain of mbufs. | |||||
* | |||||
* mb_unmapped_to_ext() is the public function that walks an mbuf | |||||
* chain converting any unmapped mbufs to mapped mbufs. It returns | |||||
* the new chain of unmapped mbufs on success. On failure it frees | |||||
* the original mbuf chain and returns NULL. | |||||
*/ | |||||
static void | |||||
mb_unmapped_free_mext(struct mbuf *m) | |||||
{ | |||||
struct sf_buf *sf; | |||||
struct mbuf *old_m; | |||||
sf = m->m_ext.ext_arg1; | |||||
sf_buf_free(sf); | |||||
/* Drop the reference on the backing EXT_PGS mbuf. */ | |||||
old_m = m->m_ext.ext_arg2; | |||||
mb_free_ext(old_m); | |||||
} | |||||
static struct mbuf * | |||||
_mb_unmapped_to_ext(struct mbuf *m) | |||||
{ | |||||
struct mbuf_ext_pgs *ext_pgs; | |||||
struct mbuf *m_new, *top, *prev, *mref; | |||||
struct sf_buf *sf; | |||||
vm_page_t pg; | |||||
int i, len, off, pglen, pgoff, seglen, segoff; | |||||
volatile u_int *refcnt; | |||||
u_int ref_inc = 0; | |||||
MBUF_EXT_PGS_ASSERT(m); | |||||
ext_pgs = m->m_ext.ext_pgs; | |||||
len = m->m_len; | |||||
KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p", | |||||
__func__, m)); | |||||
/* See if this is the mbuf that holds the embedded refcount. */ | |||||
if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { | |||||
refcnt = &m->m_ext.ext_count; | |||||
mref = m; | |||||
} else { | |||||
KASSERT(m->m_ext.ext_cnt != NULL, | |||||
("%s: no refcounting pointer on %p", __func__, m)); | |||||
refcnt = m->m_ext.ext_cnt; | |||||
mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); | |||||
} | |||||
/* Skip over any data removed from the front. */ | |||||
off = mtod(m, vm_offset_t); | |||||
top = NULL; | |||||
if (ext_pgs->hdr_len != 0) { | |||||
if (off >= ext_pgs->hdr_len) { | |||||
off -= ext_pgs->hdr_len; | |||||
} else { | |||||
seglen = ext_pgs->hdr_len - off; | |||||
segoff = off; | |||||
seglen = min(seglen, len); | |||||
off = 0; | |||||
len -= seglen; | |||||
m_new = m_get(M_NOWAIT, MT_DATA); | |||||
if (m_new == NULL) | |||||
goto fail; | |||||
m_new->m_len = seglen; | |||||
prev = top = m_new; | |||||
memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff], | |||||
seglen); | |||||
} | |||||
} | |||||
pgoff = ext_pgs->first_pg_off; | |||||
for (i = 0; i < ext_pgs->npgs && len > 0; i++) { | |||||
pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); | |||||
if (off >= pglen) { | |||||
off -= pglen; | |||||
pgoff = 0; | |||||
continue; | |||||
} | |||||
seglen = pglen - off; | |||||
segoff = pgoff + off; | |||||
off = 0; | |||||
seglen = min(seglen, len); | |||||
len -= seglen; | |||||
pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); | |||||
m_new = m_get(M_NOWAIT, MT_DATA); | |||||
if (m_new == NULL) | |||||
goto fail; | |||||
if (top == NULL) { | |||||
top = prev = m_new; | |||||
} else { | |||||
prev->m_next = m_new; | |||||
prev = m_new; | |||||
} | |||||
sf = sf_buf_alloc(pg, SFB_NOWAIT); | |||||
if (sf == NULL) | |||||
goto fail; | |||||
ref_inc++; | |||||
m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE, | |||||
mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF); | |||||
m_new->m_data += segoff; | |||||
m_new->m_len = seglen; | |||||
pgoff = 0; | |||||
}; | |||||
if (len != 0) { | |||||
KASSERT((off + len) <= ext_pgs->trail_len, | |||||
("off + len > trail (%d + %d > %d)", off, len, | |||||
ext_pgs->trail_len)); | |||||
m_new = m_get(M_NOWAIT, MT_DATA); | |||||
if (m_new == NULL) | |||||
goto fail; | |||||
if (top == NULL) | |||||
top = m_new; | |||||
else | |||||
prev->m_next = m_new; | |||||
m_new->m_len = len; | |||||
memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len); | |||||
} | |||||
if (ref_inc != 0) { | |||||
/* | |||||
* Obtain an additional reference on the old mbuf for | |||||
* each created EXT_SFBUF mbuf. They will be dropped | |||||
* in mb_unmapped_free_mext(). | |||||
*/ | |||||
if (*refcnt == 1) | |||||
*refcnt += ref_inc; | |||||
else | |||||
atomic_add_int(refcnt, ref_inc); | |||||
} | |||||
m_free(m); | |||||
return (top); | |||||
fail: | |||||
if (ref_inc != 0) { | |||||
/* | |||||
* Obtain an additional reference on the old mbuf for | |||||
* each created EXT_SFBUF mbuf. They will be | |||||
* immediately dropped when these mbufs are freed | |||||
* below. | |||||
*/ | |||||
if (*refcnt == 1) | |||||
*refcnt += ref_inc; | |||||
else | |||||
atomic_add_int(refcnt, ref_inc); | |||||
} | |||||
m_free(m); | |||||
m_freem(top); | |||||
return (NULL); | |||||
} | |||||
struct mbuf * | |||||
mb_unmapped_to_ext(struct mbuf *top) | |||||
{ | |||||
struct mbuf *m, *next, *prev = NULL; | |||||
prev = NULL; | |||||
for (m = top; m != NULL; m = next) { | |||||
/* m might be freed, so cache the next pointer. */ | |||||
next = m->m_next; | |||||
if (m->m_flags & M_NOMAP) { | |||||
if (prev != NULL) { | |||||
/* | |||||
* Remove 'm' from the new chain so | |||||
* that the 'top' chain terminates | |||||
* before 'm' in case 'top' is freed | |||||
* due to an error. | |||||
*/ | |||||
prev->m_next = NULL; | |||||
} | |||||
m = _mb_unmapped_to_ext(m); | |||||
if (m == NULL) { | |||||
m_freem(top); | |||||
m_freem(next); | |||||
return (NULL); | |||||
} | |||||
if (prev == NULL) { | |||||
top = m; | |||||
} else { | |||||
prev->m_next = m; | |||||
} | |||||
/* | |||||
* Replaced one mbuf with a chain, so we must | |||||
* find the end of chain. | |||||
*/ | |||||
prev = m_last(m); | |||||
} else { | |||||
if (prev != NULL) { | |||||
prev->m_next = m; | |||||
} | |||||
prev = m; | |||||
} | |||||
} | |||||
return (top); | |||||
} | |||||
/* | |||||
* Allocate an empty EXT_PGS mbuf. The ext_free routine is | |||||
* responsible for freeing any pages backing this mbuf when it is | |||||
* freed. | |||||
*/ | |||||
struct mbuf * | |||||
mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free) | |||||
{ | |||||
struct mbuf *m; | |||||
struct mbuf_ext_pgs *ext_pgs; | |||||
if (pkthdr) | |||||
m = m_gethdr(how, MT_DATA); | |||||
else | |||||
m = m_get(how, MT_DATA); | |||||
if (m == NULL) | |||||
return (NULL); | |||||
ext_pgs = uma_zalloc(zone_extpgs, how); | |||||
if (ext_pgs == NULL) { | |||||
m_free(m); | |||||
return (NULL); | |||||
} | |||||
ext_pgs->npgs = 0; | |||||
ext_pgs->nrdy = 0; | |||||
ext_pgs->first_pg_off = 0; | |||||
ext_pgs->last_pg_len = 0; | |||||
ext_pgs->hdr_len = 0; | |||||
ext_pgs->trail_len = 0; | |||||
ext_pgs->tls = NULL; | |||||
ext_pgs->so = NULL; | |||||
m->m_data = NULL; | |||||
m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP); | |||||
m->m_ext.ext_type = EXT_PGS; | |||||
m->m_ext.ext_flags = EXT_FLAG_EMBREF; | |||||
m->m_ext.ext_count = 1; | |||||
m->m_ext.ext_pgs = ext_pgs; | |||||
m->m_ext.ext_size = 0; | |||||
m->m_ext.ext_free = ext_free; | |||||
return (m); | |||||
} | |||||
#ifdef INVARIANT_SUPPORT | |||||
void | |||||
mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs) | |||||
{ | |||||
/* | |||||
* NB: This expects a non-empty buffer (npgs > 0 and | |||||
* last_pg_len > 0). | |||||
*/ | |||||
KASSERT(ext_pgs->npgs > 0, | |||||
("ext_pgs with no valid pages: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa), | |||||
("ext_pgs with too many pages: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->nrdy <= ext_pgs->npgs, | |||||
("ext_pgs with too many ready pages: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->first_pg_off < PAGE_SIZE, | |||||
("ext_pgs with too large page offset: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->last_pg_len > 0, | |||||
("ext_pgs with zero last page length: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE, | |||||
("ext_pgs with too large last page length: %p", ext_pgs)); | |||||
if (ext_pgs->npgs == 1) { | |||||
KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <= | |||||
PAGE_SIZE, ("ext_pgs with single page too large: %p", | |||||
ext_pgs)); | |||||
} | |||||
KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr), | |||||
("ext_pgs with too large header length: %p", ext_pgs)); | |||||
KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail), | |||||
("ext_pgs with too large header length: %p", ext_pgs)); | |||||
} | |||||
#endif | |||||
/* | |||||
* Clean up after mbufs with M_EXT storage attached to them if the | * Clean up after mbufs with M_EXT storage attached to them if the | ||||
* reference count hits 1. | * reference count hits 1. | ||||
*/ | */ | ||||
void | void | ||||
mb_free_ext(struct mbuf *m) | mb_free_ext(struct mbuf *m) | ||||
{ | { | ||||
volatile u_int *refcnt; | volatile u_int *refcnt; | ||||
struct mbuf *mref; | struct mbuf *mref; | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | case EXT_JUMBOP: | ||||
uma_zfree(zone_mbuf, mref); | uma_zfree(zone_mbuf, mref); | ||||
break; | break; | ||||
case EXT_JUMBO9: | case EXT_JUMBO9: | ||||
uma_zfree(zone_jumbo9, m->m_ext.ext_buf); | uma_zfree(zone_jumbo9, m->m_ext.ext_buf); | ||||
uma_zfree(zone_mbuf, mref); | uma_zfree(zone_mbuf, mref); | ||||
break; | break; | ||||
case EXT_JUMBO16: | case EXT_JUMBO16: | ||||
uma_zfree(zone_jumbo16, m->m_ext.ext_buf); | uma_zfree(zone_jumbo16, m->m_ext.ext_buf); | ||||
uma_zfree(zone_mbuf, mref); | |||||
break; | |||||
case EXT_PGS: | |||||
uma_zfree(zone_extpgs, mref->m_ext.ext_pgs); | |||||
uma_zfree(zone_mbuf, mref); | uma_zfree(zone_mbuf, mref); | ||||
break; | break; | ||||
case EXT_SFBUF: | case EXT_SFBUF: | ||||
case EXT_NET_DRV: | case EXT_NET_DRV: | ||||
case EXT_MOD_TYPE: | case EXT_MOD_TYPE: | ||||
case EXT_DISPOSABLE: | case EXT_DISPOSABLE: | ||||
KASSERT(mref->m_ext.ext_free != NULL, | KASSERT(mref->m_ext.ext_free != NULL, | ||||
("%s: ext_free not set", __func__)); | ("%s: ext_free not set", __func__)); | ||||
▲ Show 20 Lines • Show All 286 Lines • Show Last 20 Lines |
I just hit a panic on this kassert in the netflix kernel. The mbuf looked like the following. It was basically a tiny 192 byte ending chunk of a giant 73K ext_pgs mbuf. (see below).
I think that we can & should safely remove this check for NULL here.
(kgdb) p m->m_len
$5 = 0xc0
(kgdb) p m->m_data
$6 = (caddr_t) 0x11f40 <error: Cannot access memory at address 0x11f40>
(kgdb) p/d 0x11f40
$7 = 73536
(kgdb) p *(struct mbuf_ext_pgs *)0xfffff80637918000
$10 = {
<...>
(kgdb) p/d 0x12*4096
$11 = 73728
(kgdb) p/d 73728 - 0xc0
$12 = 73536