Index: sys/dev/cxgbe/crypto/t4_kern_tls.c =================================================================== --- sys/dev/cxgbe/crypto/t4_kern_tls.c +++ sys/dev/cxgbe/crypto/t4_kern_tls.c @@ -905,8 +905,8 @@ u_int plen, mlen; MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = m_tls->m_ext.ext_pgs; - hdr = (void *)ext_pgs->hdr; + ext_pgs = &m_tls->m_ext_pgs; + hdr = (void *)ext_pgs->m_epg_hdr; plen = ntohs(hdr->tls_length); /* @@ -961,8 +961,8 @@ #endif MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = m_tls->m_ext.ext_pgs; - hdr = (void *)ext_pgs->hdr; + ext_pgs = &m_tls->m_ext_pgs; + hdr = (void *)ext_pgs->m_epg_hdr; plen = ntohs(hdr->tls_length); #ifdef INVARIANTS mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; @@ -1008,7 +1008,7 @@ u_int imm_len, offset, plen, wr_len, tlen; MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = m_tls->m_ext.ext_pgs; + ext_pgs = &m_tls->m_ext_pgs; /* * Determine the size of the TLS record payload to send @@ -1040,7 +1040,7 @@ return (wr_len); } - hdr = (void *)ext_pgs->hdr; + hdr = (void *)ext_pgs->m_epg_hdr; plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; if (tlen < plen) { plen = tlen; @@ -1474,7 +1474,7 @@ /* Locate the template TLS header. */ MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = m_tls->m_ext.ext_pgs; + ext_pgs = &m_tls->m_ext_pgs; /* This should always be the last TLS record in a chain. */ MPASS(m_tls->m_next == NULL); @@ -1543,8 +1543,8 @@ (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); /* Copy the subset of the TLS header requested. */ - copy_to_txd(&txq->eq, (char *)ext_pgs->hdr + mtod(m_tls, vm_offset_t), - &out, m_tls->m_len); + copy_to_txd(&txq->eq, (char *)ext_pgs->m_epg_hdr + + mtod(m_tls, vm_offset_t), &out, m_tls->m_len); txq->imm_wrs++; txq->txpkt_wrs++; @@ -1603,8 +1603,8 @@ /* Locate the TLS header. */ MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = m_tls->m_ext.ext_pgs; - hdr = (void *)ext_pgs->hdr; + ext_pgs = &m_tls->m_ext_pgs; + hdr = (void *)ext_pgs->m_epg_hdr; plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; /* Determine how much of the TLS record to send. */ @@ -2031,7 +2031,7 @@ /* Populate the TLS header */ out = (void *)(tx_data + 1); if (offset == 0) { - memcpy(out, ext_pgs->hdr, ext_pgs->hdr_len); + memcpy(out, ext_pgs->m_epg_hdr, ext_pgs->hdr_len); out += ext_pgs->hdr_len; } Index: sys/dev/cxgbe/t4_sge.c =================================================================== --- sys/dev/cxgbe/t4_sge.c +++ sys/dev/cxgbe/t4_sge.c @@ -2419,7 +2419,7 @@ int nsegs = 0; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; off = mtod(m, vm_offset_t); len = m->m_len; off += skip; @@ -2435,7 +2435,7 @@ off = 0; len -= seglen; paddr = pmap_kextract( - (vm_offset_t)&ext_pgs->hdr[segoff]); + (vm_offset_t)&ext_pgs->m_epg_hdr[segoff]); if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; @@ -2454,7 +2454,7 @@ off = 0; seglen = min(seglen, len); len -= seglen; - paddr = ext_pgs->pa[i] + segoff; + paddr = ext_pgs->m_epg_pa[i] + segoff; if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; @@ -2463,7 +2463,7 @@ if (len != 0) { seglen = min(len, ext_pgs->trail_len - off); len -= seglen; - paddr = pmap_kextract((vm_offset_t)&ext_pgs->trail[off]); + paddr = pmap_kextract((vm_offset_t)&ext_pgs->m_epg_trail[off]); if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -732,7 +732,7 @@ if (m->m_flags & M_NOMAP) { #ifdef KERN_TLS - if (m->m_ext.ext_pgs->tls != NULL) { + if (m->m_ext_pgs.tls != NULL) { toep->flags |= TPF_KTLS; if (plen == 0) { SOCKBUF_UNLOCK(sb); @@ -1927,7 +1927,7 @@ vm_page_t pg; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; job = m->m_ext.ext_arg1; #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, @@ -1935,7 +1935,7 @@ #endif for (int i = 0; i < ext_pgs->npgs; i++) { - pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); vm_page_unwire(pg, PQ_ACTIVE); } @@ -1984,13 +1984,13 @@ if (npages < 0) break; - m = mb_alloc_ext_pgs(M_WAITOK, false, aiotx_free_pgs); + m = mb_alloc_ext_pgs(M_WAITOK, aiotx_free_pgs); if (m == NULL) { vm_page_unhold_pages(pgs, npages); break; } - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; ext_pgs->first_pg_off = pgoff; ext_pgs->npgs = npages; if (npages == 1) { @@ -2003,7 +2003,7 @@ (npages - 2) * PAGE_SIZE; } for (i = 0; i < npages; i++) - ext_pgs->pa[i] = VM_PAGE_TO_PHYS(pgs[i]); + ext_pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]); m->m_len = mlen; m->m_ext.ext_size = npages * PAGE_SIZE; Index: sys/dev/cxgbe/tom/t4_tls.c =================================================================== --- sys/dev/cxgbe/tom/t4_tls.c +++ sys/dev/cxgbe/tom/t4_tls.c @@ -1568,24 +1568,26 @@ #ifdef KERN_TLS static int -count_ext_pgs_segs(struct mbuf_ext_pgs *ext_pgs) +count_ext_pgs_segs(struct mbuf_ext_pgs *ext_pgs, + struct mbuf_ext_pgs_data *ext_pgs_data) { vm_paddr_t nextpa; u_int i, nsegs; MPASS(ext_pgs->npgs > 0); nsegs = 1; - nextpa = ext_pgs->pa[0] + PAGE_SIZE; + nextpa = ext_pgs_data->pa[0] + PAGE_SIZE; for (i = 1; i < ext_pgs->npgs; i++) { - if (nextpa != ext_pgs->pa[i]) + if (nextpa != ext_pgs_data->pa[i]) nsegs++; - nextpa = ext_pgs->pa[i] + PAGE_SIZE; + nextpa = ext_pgs_data->pa[i] + PAGE_SIZE; } return (nsegs); } static void -write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, int nsegs) +write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, + struct mbuf_ext_pgs_data *ext_pgs_data, int nsegs) { struct ulptx_sgl *usgl = dst; vm_paddr_t pa; @@ -1598,12 +1600,12 @@ V_ULPTX_NSGE(nsegs)); /* Figure out the first S/G length. */ - pa = ext_pgs->pa[0] + ext_pgs->first_pg_off; + pa = ext_pgs_data->pa[0] + ext_pgs->first_pg_off; usgl->addr0 = htobe64(pa); len = mbuf_ext_pg_len(ext_pgs, 0, ext_pgs->first_pg_off); pa += len; for (i = 1; i < ext_pgs->npgs; i++) { - if (ext_pgs->pa[i] != pa) + if (ext_pgs_data->pa[i] != pa) break; len += mbuf_ext_pg_len(ext_pgs, i, 0); pa += mbuf_ext_pg_len(ext_pgs, i, 0); @@ -1615,14 +1617,14 @@ j = -1; for (; i < ext_pgs->npgs; i++) { - if (j == -1 || ext_pgs->pa[i] != pa) { + if (j == -1 || ext_pgs_data->pa[i] != pa) { if (j >= 0) usgl->sge[j / 2].len[j & 1] = htobe32(len); j++; #ifdef INVARIANTS nsegs--; #endif - pa = ext_pgs->pa[i]; + pa = ext_pgs_data->pa[i]; usgl->sge[j / 2].addr[j & 1] = htobe64(pa); len = mbuf_ext_pg_len(ext_pgs, i, 0); pa += len; @@ -1744,7 +1746,7 @@ KASSERT(m->m_flags & M_NOMAP, ("%s: mbuf %p is not NOMAP", __func__, m)); - KASSERT(m->m_ext.ext_pgs->tls != NULL, + KASSERT(m->m_ext_pgs.tls != NULL, ("%s: mbuf %p doesn't have TLS session", __func__, m)); /* Calculate WR length. */ @@ -1756,7 +1758,8 @@ wr_len += AES_BLOCK_LEN; /* Account for SGL in work request length. */ - nsegs = count_ext_pgs_segs(m->m_ext.ext_pgs); + nsegs = count_ext_pgs_segs(&m->m_ext_pgs, + &m->m_ext.ext_pgs); wr_len += sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; @@ -1810,22 +1813,22 @@ return; } - thdr = (struct tls_hdr *)m->m_ext.ext_pgs->hdr; + thdr = (struct tls_hdr *)&m->m_epg_hdr; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x", - __func__, toep->tid, m->m_ext.ext_pgs->seqno, thdr->type, + __func__, toep->tid, m->m_ext_pgs.seqno, thdr->type, m->m_len); #endif txwr = wrtod(wr); cpl = (struct cpl_tx_tls_sfo *)(txwr + 1); memset(txwr, 0, roundup2(wr_len, 16)); credits = howmany(wr_len, 16); - expn_size = m->m_ext.ext_pgs->hdr_len + - m->m_ext.ext_pgs->trail_len; + expn_size = m->m_ext_pgs.hdr_len + + m->m_ext_pgs.trail_len; tls_size = m->m_len - expn_size; write_tlstx_wr(txwr, toep, 0, tls_size, expn_size, 1, credits, shove, 1); - toep->tls.tx_seq_no = m->m_ext.ext_pgs->seqno; + toep->tls.tx_seq_no = m->m_ext_pgs.seqno; write_tlstx_cpl(cpl, toep, thdr, tls_size, 1); tls_copy_tx_key(toep, cpl + 1); @@ -1834,7 +1837,8 @@ memcpy(buf, thdr + 1, toep->tls.iv_len); buf += AES_BLOCK_LEN; - write_ktlstx_sgl(buf, m->m_ext.ext_pgs, nsegs); + write_ktlstx_sgl(buf, &m->m_ext_pgs, &m->m_ext.ext_pgs, + nsegs); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); Index: sys/kern/kern_mbuf.c =================================================================== --- sys/kern/kern_mbuf.c +++ sys/kern/kern_mbuf.c @@ -295,7 +295,6 @@ uma_zone_t zone_jumbop; uma_zone_t zone_jumbo9; uma_zone_t zone_jumbo16; -uma_zone_t zone_extpgs; /* * Local prototypes. @@ -312,9 +311,11 @@ /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); -_Static_assert(sizeof(struct mbuf_ext_pgs) == 256, - "mbuf_ext_pgs size mismatch"); - +_Static_assert(offsetof(struct mbuf, m_ext) == + offsetof(struct mbuf, m_ext_pgs.m_ext), + "m_ext offset mismatch between mbuf and ext_pgs"); +_Static_assert(sizeof(struct mbuf) <= MSIZE, + "size of mbuf exceeds MSIZE"); /* * Initialize FreeBSD Network buffer allocation. */ @@ -369,11 +370,6 @@ uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); - zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME, - sizeof(struct mbuf_ext_pgs), - NULL, NULL, NULL, NULL, - UMA_ALIGN_CACHE, 0); - /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA @@ -840,8 +836,8 @@ for (i = 0; i < count && m != NULL; i++) { if ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_type == EXT_PGS) { - m->m_ext.ext_pgs->nrdy--; - if (m->m_ext.ext_pgs->nrdy != 0) + m->m_ext_pgs.nrdy--; + if (m->m_ext_pgs.nrdy != 0) continue; } m = m_free(m); @@ -883,27 +879,22 @@ if (*refcnt != 1) return (EBUSY); - /* - * Copy mbuf header and m_ext portion of 'm' to 'm_temp' to - * create a "fake" EXT_PGS mbuf that can be used with - * m_copydata() as well as the ext_free callback. - */ - memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext)); - m_temp.m_next = NULL; - m_temp.m_nextpkt = NULL; + m_init(&m_temp, M_NOWAIT, MT_DATA, 0); + + /* copy data out of old mbuf */ + m_copydata(m, 0, m->m_len, mtod(&m_temp, char *)); + m_temp.m_len = m->m_len; + + /* Free the backing pages. */ + m->m_ext.ext_free(m); /* Turn 'm' into a "normal" mbuf. */ m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP); m->m_data = m->m_dat; - /* Copy data from template's ext_pgs. */ - m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t)); + /* copy data back into m */ + m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, char *)); - /* Free the backing pages. */ - m_temp.m_ext.ext_free(&m_temp); - - /* Finally, free the ext_pgs struct. */ - uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs); return (0); } @@ -959,7 +950,7 @@ u_int ref_inc = 0; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; len = m->m_len; KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p", __func__, m)); @@ -993,7 +984,7 @@ goto fail; m_new->m_len = seglen; prev = top = m_new; - memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff], + memcpy(mtod(m_new, void *), &ext_pgs->m_epg_hdr[segoff], seglen); } } @@ -1011,7 +1002,7 @@ seglen = min(seglen, len); len -= seglen; - pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; @@ -1045,7 +1036,7 @@ else prev->m_next = m_new; m_new->m_len = len; - memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len); + memcpy(mtod(m_new, void *), &ext_pgs->m_epg_trail[off], len); } if (ref_inc != 0) { @@ -1132,23 +1123,16 @@ * freed. */ struct mbuf * -mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free) +mb_alloc_ext_pgs(int how, m_ext_free_t ext_free) { struct mbuf *m; struct mbuf_ext_pgs *ext_pgs; - if (pkthdr) - m = m_gethdr(how, MT_DATA); - else - m = m_get(how, MT_DATA); + m = m_get(how, MT_DATA); if (m == NULL) return (NULL); - ext_pgs = uma_zalloc(zone_extpgs, how); - if (ext_pgs == NULL) { - m_free(m); - return (NULL); - } + ext_pgs = &m->m_ext_pgs; ext_pgs->npgs = 0; ext_pgs->nrdy = 0; ext_pgs->first_pg_off = 0; @@ -1163,7 +1147,6 @@ m->m_ext.ext_type = EXT_PGS; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; - m->m_ext.ext_pgs = ext_pgs; m->m_ext.ext_size = 0; m->m_ext.ext_free = ext_free; return (m); @@ -1180,7 +1163,7 @@ */ KASSERT(ext_pgs->npgs > 0, ("ext_pgs with no valid pages: %p", ext_pgs)); - KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa), + KASSERT(ext_pgs->npgs <= nitems(ext_pgs->m_epg_pa), ("ext_pgs with too many pages: %p", ext_pgs)); KASSERT(ext_pgs->nrdy <= ext_pgs->npgs, ("ext_pgs with too many ready pages: %p", ext_pgs)); @@ -1195,9 +1178,9 @@ PAGE_SIZE, ("ext_pgs with single page too large: %p", ext_pgs)); } - KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr), + KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->m_epg_hdr), ("ext_pgs with too large header length: %p", ext_pgs)); - KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail), + KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->m_epg_trail), ("ext_pgs with too large header length: %p", ext_pgs)); } #endif @@ -1277,15 +1260,14 @@ ("%s: ext_free not set", __func__)); mref->m_ext.ext_free(mref); #ifdef KERN_TLS - pgs = mref->m_ext.ext_pgs; + pgs = &mref->m_ext_pgs; tls = pgs->tls; if (tls != NULL && !refcount_release_if_not_last(&tls->refcount)) ktls_enqueue_to_free(pgs); else #endif - uma_zfree(zone_extpgs, mref->m_ext.ext_pgs); - uma_zfree(zone_mbuf, mref); + uma_zfree(zone_mbuf, mref); break; } case EXT_SFBUF: Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -171,18 +171,18 @@ ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)); cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST; - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; for (i = 0; i < ext_pgs->npgs; i++) { if (cache_last && i == ext_pgs->npgs - 1) flags = 0; - pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); vm_page_release(pg, flags); } if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { - struct sendfile_sync *sfs = m->m_ext.ext_arg2; + struct sendfile_sync *sfs = m->m_ext.ext_arg1; mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); @@ -290,7 +290,7 @@ #if defined(KERN_TLS) && defined(INVARIANTS) if ((sfio->m->m_flags & M_EXT) != 0 && sfio->m->m_ext.ext_type == EXT_PGS) - KASSERT(sfio->tls == sfio->m->m_ext.ext_pgs->tls, + KASSERT(sfio->tls == sfio->m->m_ext_pgs.tls, ("TLS session mismatch")); else KASSERT(sfio->tls == NULL, @@ -899,7 +899,7 @@ ext_pgs_idx++; if (ext_pgs_idx == max_pgs) { - m0 = mb_alloc_ext_pgs(M_WAITOK, false, + m0 = mb_alloc_ext_pgs(M_WAITOK, sendfile_free_mext_pg); if (flags & SF_NOCACHE) { @@ -920,12 +920,18 @@ if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - m0->m_ext.ext_arg2 = sfs; + if (m0->m_ext.ext_type == + EXT_PGS) + m0->m_ext.ext_arg1 = + sfs; + else + m0->m_ext.ext_arg2 = + sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } - ext_pgs = m0->m_ext.ext_pgs; + ext_pgs = &m0->m_ext_pgs; ext_pgs_idx = 0; /* Append to mbuf chain. */ @@ -942,7 +948,7 @@ ext_pgs->nrdy++; } - ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]); + ext_pgs->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]); ext_pgs->npgs++; xfs = xfsize(i, npages, off, space); ext_pgs->last_pg_len = xfs; @@ -995,6 +1001,10 @@ m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; + if (m0->m_ext.ext_type == EXT_PGS) + m0->m_ext.ext_arg1 = sfs; + else + m0->m_ext.ext_arg2 = sfs; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; Index: sys/kern/subr_bus_dma.c =================================================================== --- sys/kern/subr_bus_dma.c +++ sys/kern/subr_bus_dma.c @@ -123,7 +123,7 @@ int error, i, off, len, pglen, pgoff, seglen, segoff; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; len = m->m_len; error = 0; @@ -141,7 +141,7 @@ off = 0; len -= seglen; error = _bus_dmamap_load_buffer(dmat, map, - &ext_pgs->hdr[segoff], seglen, kernel_pmap, + &ext_pgs->m_epg_hdr[segoff], seglen, kernel_pmap, flags, segs, nsegs); } } @@ -159,7 +159,7 @@ seglen = min(seglen, len); len -= seglen; error = _bus_dmamap_load_phys(dmat, map, - ext_pgs->pa[i] + segoff, seglen, flags, segs, nsegs); + ext_pgs->m_epg_pa[i] + segoff, seglen, flags, segs, nsegs); pgoff = 0; }; if (len != 0 && error == 0) { @@ -167,7 +167,7 @@ ("off + len > trail (%d + %d > %d)", off, len, ext_pgs->trail_len)); error = _bus_dmamap_load_buffer(dmat, map, - &ext_pgs->trail[off], len, kernel_pmap, flags, segs, + &ext_pgs->m_epg_trail[off], len, kernel_pmap, flags, segs, nsegs); } return (error); Index: sys/kern/subr_sglist.c =================================================================== --- sys/kern/subr_sglist.c +++ sys/kern/subr_sglist.c @@ -242,7 +242,8 @@ seglen = MIN(seglen, len); off = 0; len -= seglen; - nsegs += sglist_count(&ext_pgs->hdr[segoff], seglen); + nsegs += sglist_count(&ext_pgs->m_epg_hdr[segoff], + seglen); } } nextaddr = 0; @@ -259,7 +260,7 @@ off = 0; seglen = MIN(seglen, len); len -= seglen; - paddr = ext_pgs->pa[i] + segoff; + paddr = ext_pgs->m_epg_pa[i] + segoff; if (paddr != nextaddr) nsegs++; nextaddr = paddr + seglen; @@ -268,7 +269,7 @@ if (len != 0) { seglen = MIN(len, ext_pgs->trail_len - off); len -= seglen; - nsegs += sglist_count(&ext_pgs->trail[off], seglen); + nsegs += sglist_count(&ext_pgs->m_epg_trail[off], seglen); } KASSERT(len == 0, ("len != 0")); return (nsegs); @@ -283,7 +284,7 @@ { MBUF_EXT_PGS_ASSERT(m); - return (sglist_count_ext_pgs(m->m_ext.ext_pgs, mtod(m, vm_offset_t), + return (sglist_count_ext_pgs(&m->m_ext_pgs, mtod(m, vm_offset_t), m->m_len)); } @@ -412,7 +413,7 @@ off = 0; len -= seglen; error = sglist_append(sg, - &ext_pgs->hdr[segoff], seglen); + &ext_pgs->m_epg_hdr[segoff], seglen); } } pgoff = ext_pgs->first_pg_off; @@ -428,7 +429,7 @@ off = 0; seglen = MIN(seglen, len); len -= seglen; - paddr = ext_pgs->pa[i] + segoff; + paddr = ext_pgs->m_epg_pa[i] + segoff; error = sglist_append_phys(sg, paddr, seglen); pgoff = 0; }; @@ -436,7 +437,7 @@ seglen = MIN(len, ext_pgs->trail_len - off); len -= seglen; error = sglist_append(sg, - &ext_pgs->trail[off], seglen); + &ext_pgs->m_epg_trail[off], seglen); } if (error == 0) KASSERT(len == 0, ("len != 0")); @@ -454,7 +455,7 @@ /* for now, all unmapped mbufs are assumed to be EXT_PGS */ MBUF_EXT_PGS_ASSERT(m); - return (sglist_append_ext_pgs(sg, m->m_ext.ext_pgs, + return (sglist_append_ext_pgs(sg, &m->m_ext_pgs, mtod(m, vm_offset_t), m->m_len)); } Index: sys/kern/uipc_ktls.c =================================================================== --- sys/kern/uipc_ktls.c +++ sys/kern/uipc_ktls.c @@ -1216,8 +1216,11 @@ KASSERT((m->m_flags & M_NOMAP) != 0, ("ktls_seq: mapped mbuf %p", m)); - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; pgs->seqno = sb->sb_tls_seqno; + /* some HW TLS needs this for TLS 1.3 */ + memcpy(pgs->m_epg_trail, &pgs->seqno, + sizeof(pgs->seqno)); sb->sb_tls_seqno++; } } @@ -1264,7 +1267,7 @@ ("ktls_frame: mapped mbuf %p (top = %p)\n", m, top)); tls_len = m->m_len; - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; /* Save a reference to the session. */ pgs->tls = ktls_hold(tls); @@ -1297,7 +1300,7 @@ m->m_len += pgs->hdr_len + pgs->trail_len; /* Populate the TLS header. */ - tlshdr = (void *)pgs->hdr; + tlshdr = (void *)pgs->m_epg_hdr; tlshdr->tls_vmajor = tls->params.tls_vmajor; /* @@ -1380,7 +1383,7 @@ ("ktls_enqueue: %p not unready & nomap mbuf\n", m)); KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count")); - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; KASSERT(pgs->tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf")); @@ -1447,7 +1450,7 @@ */ error = 0; for (m = top; npages != total_pages; m = m->m_next) { - pgs = m->m_ext.ext_pgs; + pgs = &m->m_ext_pgs; KASSERT(pgs->tls == tls, ("different TLS sessions in a single mbuf chain: %p vs %p", @@ -1474,7 +1477,8 @@ len = mbuf_ext_pg_len(pgs, i, off); src_iov[i].iov_len = len; src_iov[i].iov_base = - (char *)(void *)PHYS_TO_DMAP(pgs->pa[i]) + off; + (char *)(void *)PHYS_TO_DMAP(pgs->m_epg_pa[i]) + + off; if (is_anon) { dst_iov[i].iov_base = src_iov[i].iov_base; @@ -1497,8 +1501,8 @@ npages += i; error = (*tls->sw_encrypt)(tls, - (const struct tls_record_layer *)pgs->hdr, - pgs->trail, src_iov, dst_iov, i, pgs->seqno, + (const struct tls_record_layer *)pgs->m_epg_hdr, + pgs->m_epg_trail, src_iov, dst_iov, i, pgs->seqno, pgs->record_type); if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); @@ -1516,7 +1520,7 @@ /* Replace them with the new pages. */ for (i = 0; i < pgs->npgs; i++) - pgs->pa[i] = parray[i]; + pgs->m_epg_pa[i] = parray[i]; /* Use the basic free routine. */ m->m_ext.ext_free = mb_free_mext_pgs; @@ -1556,6 +1560,7 @@ struct ktls_wq *wq = ctx; struct mbuf_ext_pgs *p, *n; struct ktls_session *tls; + struct mbuf *m; STAILQ_HEAD(, mbuf_ext_pgs) local_head; #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) @@ -1580,7 +1585,8 @@ } else { tls = p->tls; ktls_free(tls); - uma_zfree(zone_extpgs, p); + m = __containerof(p, struct mbuf, m_ext_pgs); + uma_zfree(zone_mbuf, m); } } } Index: sys/kern/uipc_mbuf.c =================================================================== --- sys/kern/uipc_mbuf.c +++ sys/kern/uipc_mbuf.c @@ -163,11 +163,11 @@ #if defined(__LP64__) CTASSERT(offsetof(struct mbuf, m_dat) == 32); CTASSERT(sizeof(struct pkthdr) == 56); -CTASSERT(sizeof(struct m_ext) == 48); +CTASSERT(sizeof(struct m_ext) == 168); #else CTASSERT(offsetof(struct mbuf, m_dat) == 24); CTASSERT(sizeof(struct pkthdr) == 48); -CTASSERT(sizeof(struct m_ext) == 28); +CTASSERT(sizeof(struct m_ext) == 184); #endif /* @@ -203,6 +203,9 @@ */ if (m->m_ext.ext_type == EXT_EXTREF) bcopy(&m->m_ext, &n->m_ext, sizeof(struct m_ext)); + else if (m->m_ext.ext_type == EXT_PGS) + bcopy(&m->m_ext_pgs, &n->m_ext_pgs, + sizeof(struct mbuf_ext_pgs)); else bcopy(&m->m_ext, &n->m_ext, m_ext_copylen); n->m_flags |= M_EXT; @@ -1426,7 +1429,7 @@ * XXX: This overestimates the number of fragments by assuming * all the backing physical pages are disjoint. */ - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; frags = 0; if (ext_pgs->hdr_len != 0) frags++; @@ -1618,9 +1621,9 @@ vm_page_t pg; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = &m->m_ext_pgs; for (int i = 0; i < ext_pgs->npgs; i++) { - pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); vm_page_unwire_noq(pg); vm_page_free(pg); } @@ -1653,9 +1656,9 @@ * Allocate the pages */ m = NULL; + MPASS((flags & M_PKTHDR) == 0); while (total > 0) { - mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR), - mb_free_mext_pgs); + mb = mb_alloc_ext_pgs(how, mb_free_mext_pgs); if (mb == NULL) goto failed; if (m == NULL) @@ -1663,7 +1666,7 @@ else prev->m_next = mb; prev = mb; - pgs = mb->m_ext.ext_pgs; + pgs = &mb->m_ext_pgs; pgs->flags = MBUF_PEXT_FLAG_ANON; needed = length = MIN(maxseg, total); for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) { @@ -1678,7 +1681,7 @@ } } pg_array[i]->flags &= ~PG_ZERO; - pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]); + pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg_array[i]); pgs->npgs++; } pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1); @@ -1769,7 +1772,7 @@ int error, i, off, pglen, pgoff, seglen, segoff; MBUF_EXT_PGS_ASSERT(m); - ext_pgs = m->m_ext.ext_pgs; + ext_pgs = __DECONST(void *, &m->m_ext_pgs); error = 0; /* Skip over any data removed from the front. */ @@ -1785,7 +1788,7 @@ seglen = min(seglen, len); off = 0; len -= seglen; - error = uiomove(&ext_pgs->hdr[segoff], seglen, uio); + error = uiomove(&ext_pgs->m_epg_hdr[segoff], seglen, uio); } } pgoff = ext_pgs->first_pg_off; @@ -1801,7 +1804,7 @@ off = 0; seglen = min(seglen, len); len -= seglen; - pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); + pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); error = uiomove_fromphys(&pg, segoff, seglen, uio); pgoff = 0; }; @@ -1809,7 +1812,7 @@ KASSERT((off + len) <= ext_pgs->trail_len, ("off + len > trail (%d + %d > %d, m_off = %d)", off, len, ext_pgs->trail_len, m_off)); - error = uiomove(&ext_pgs->trail[off], len, uio); + error = uiomove(&ext_pgs->m_epg_trail[off], len, uio); } return (error); } Index: sys/kern/uipc_sockbuf.c =================================================================== --- sys/kern/uipc_sockbuf.c +++ sys/kern/uipc_sockbuf.c @@ -188,13 +188,13 @@ ("%s: m %p !M_NOTREADY", __func__, m)); if ((m->m_flags & M_EXT) != 0 && m->m_ext.ext_type == EXT_PGS) { - if (count < m->m_ext.ext_pgs->nrdy) { - m->m_ext.ext_pgs->nrdy -= count; + if (count < m->m_ext_pgs.nrdy) { + m->m_ext_pgs.nrdy -= count; count = 0; break; } - count -= m->m_ext.ext_pgs->nrdy; - m->m_ext.ext_pgs->nrdy = 0; + count -= m->m_ext_pgs.nrdy; + m->m_ext_pgs.nrdy = 0; } else count--; Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -231,7 +231,7 @@ * dropping the mbuf's reference) in if_output. */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { - tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls); + tls = ktls_hold(m->m_next->m_ext_pgs.tls); mst = tls->snd_tag; /* Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1908,7 +1908,7 @@ pkthdrlen = NULL; #ifdef KERN_TLS if (hw_tls && (m->m_flags & M_NOMAP)) - tls = m->m_ext.ext_pgs->tls; + tls = m->m_ext_pgs.tls; else tls = NULL; start = m; @@ -1925,7 +1925,7 @@ #ifdef KERN_TLS if (hw_tls) { if (m->m_flags & M_NOMAP) - ntls = m->m_ext.ext_pgs->tls; + ntls = m->m_ext_pgs.tls; else ntls = NULL; Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -340,7 +340,7 @@ * dropping the mbuf's reference) in if_output. */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { - tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls); + tls = ktls_hold(m->m_next->m_ext_pgs.tls); mst = tls->snd_tag; /* Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -202,6 +202,45 @@ #define lro_csum PH_loc.sixteen[1] /* inbound during LRO (no reassembly) */ /* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */ +/* + * TLS records for TLS 1.0-1.2 can have the following header lengths: + * - 5 (AES-CBC with implicit IV) + * - 21 (AES-CBC with explicit IV) + * - 13 (AES-GCM with 8 byte explicit IV) + */ +#define MBUF_PEXT_HDR_LEN 23 + +/* + * TLS records for TLS 1.0-1.2 can have the following maximum trailer + * lengths: + * - 16 (AES-GCM) + * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding) + * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding) + * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding) + */ +#define MBUF_PEXT_TRAIL_LEN 64 + +#if defined(__LP64__) +#define MBUF_PEXT_MAX_PGS (40 / sizeof(vm_paddr_t)) +#else +#define MBUF_PEXT_MAX_PGS (72 / sizeof(vm_paddr_t)) +#endif + +#define MBUF_PEXT_MAX_BYTES \ + (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN) + +#define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */ + + +struct mbuf_ext_pgs_data { + vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pgs */ + char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */ + char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */ +}; + +struct ktls_session; +struct socket; + /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. @@ -224,18 +263,11 @@ volatile u_int ext_count; volatile u_int *ext_cnt; }; - union { - /* - * If ext_type == EXT_PGS, 'ext_pgs' points to a - * structure describing the buffer. Otherwise, - * 'ext_buf' points to the start of the buffer. - */ - struct mbuf_ext_pgs *ext_pgs; - char *ext_buf; - }; uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ + + char *ext_buf; /* * Fields below store the free context for the external storage. * They are valid only in the refcount carrying mbuf, the one with @@ -246,9 +278,38 @@ #define m_ext_copylen offsetof(struct m_ext, ext_free) m_ext_free_t *ext_free; /* free routine if not the usual */ void *ext_arg1; /* optional argument pointer */ - void *ext_arg2; /* optional argument pointer */ + union { + void *ext_arg2; /* optional argument pointer */ + struct mbuf_ext_pgs_data ext_pgs; + }; +}; + +struct mbuf_ext_pgs { + uint8_t npgs; /* Number of attached pages */ + uint8_t nrdy; /* Pages with I/O pending */ + uint8_t hdr_len; /* TLS header length */ + uint8_t trail_len; /* TLS trailer length */ + uint16_t first_pg_off; /* Offset into 1st page */ + uint16_t last_pg_len; /* Length of last page */ + uint8_t flags; /* Flags */ + uint8_t record_type; + uint8_t spare[2]; + int enc_cnt; + struct ktls_session *tls; /* TLS session */ + struct socket *so; + uint64_t seqno; + struct mbuf *mbuf; + STAILQ_ENTRY(mbuf_ext_pgs) stailq; +#if !defined(__LP64__) + uint8_t pad[8]; /* pad to size of pkthdr */ +#endif + struct m_ext m_ext; }; +#define m_epg_hdr m_ext.ext_pgs.hdr +#define m_epg_trail m_ext.ext_pgs.trail +#define m_epg_pa m_ext.ext_pgs.pa + /* * The core of the mbuf object along with some shortcut defines for practical * purposes. @@ -287,86 +348,20 @@ * order to support future work on variable-size mbufs. */ union { - struct { - struct pkthdr m_pkthdr; /* M_PKTHDR set */ - union { - struct m_ext m_ext; /* M_EXT set */ - char m_pktdat[0]; + union { + struct mbuf_ext_pgs m_ext_pgs; + struct { + struct pkthdr m_pkthdr; /* M_PKTHDR set */ + union { + struct m_ext m_ext; /* M_EXT set */ + char m_pktdat[0]; + }; }; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; }; -struct ktls_session; -struct socket; - -/* - * TLS records for TLS 1.0-1.2 can have the following header lengths: - * - 5 (AES-CBC with implicit IV) - * - 21 (AES-CBC with explicit IV) - * - 13 (AES-GCM with 8 byte explicit IV) - */ -#define MBUF_PEXT_HDR_LEN 23 - -/* - * TLS records for TLS 1.0-1.2 can have the following maximum trailer - * lengths: - * - 16 (AES-GCM) - * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding) - * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding) - * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding) - */ -#define MBUF_PEXT_TRAIL_LEN 64 - -#ifdef __LP64__ -#define MBUF_PEXT_MAX_PGS (152 / sizeof(vm_paddr_t)) -#else -#define MBUF_PEXT_MAX_PGS (156 / sizeof(vm_paddr_t)) -#endif - -#define MBUF_PEXT_MAX_BYTES \ - (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN) - -#define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */ - -/* - * This struct is 256 bytes in size and is arranged so that the most - * common case (accessing the first 4 pages of a 16KB TLS record) will - * fit in a single 64 byte cacheline. - */ -struct mbuf_ext_pgs { - uint8_t npgs; /* Number of attached pages */ - uint8_t nrdy; /* Pages with I/O pending */ - uint8_t hdr_len; /* TLS header length */ - uint8_t trail_len; /* TLS trailer length */ - uint16_t first_pg_off; /* Offset into 1st page */ - uint16_t last_pg_len; /* Length of last page */ - vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pages */ - char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */ - uint8_t flags; /* Flags */ - struct ktls_session *tls; /* TLS session */ -#if defined(__i386__) || \ - (defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE)) - /* - * i386 and Book-E PowerPC have 64-bit vm_paddr_t, so there is - * a 4 byte remainder from the space allocated for pa[]. - */ - uint32_t pad; -#endif - union { - char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */ - struct { - uint8_t record_type; /* Must be first */ - struct socket *so; - struct mbuf *mbuf; - uint64_t seqno; - STAILQ_ENTRY(mbuf_ext_pgs) stailq; - int enc_cnt; - }; - }; -}; - #ifdef _KERNEL static inline int mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff) @@ -699,7 +694,7 @@ void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); void mb_free_mext_pgs(struct mbuf *); -struct mbuf *mb_alloc_ext_pgs(int, bool, m_ext_free_t); +struct mbuf *mb_alloc_ext_pgs(int, m_ext_free_t); int mb_unmapped_compress(struct mbuf *m); struct mbuf *mb_unmapped_to_ext(struct mbuf *m); void mb_free_notready(struct mbuf *m, int count); @@ -1515,7 +1510,7 @@ if (m->m_flags & M_NOMAP) { MBUF_EXT_PGS_ASSERT(m); - if (m->m_ext.ext_pgs->tls != NULL) { + if (m->m_ext_pgs.tls != NULL) { return (true); } }