Index: share/man/man9/sglist.9 =================================================================== --- share/man/man9/sglist.9 +++ share/man/man9/sglist.9 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 28, 2019 +.Dd April 24, 2020 .Dt SGLIST 9 .Os .Sh NAME @@ -34,9 +34,8 @@ .Nm sglist_alloc , .Nm sglist_append , .Nm sglist_append_bio , -.Nm sglist_append_ext_pgs, -.Nm sglist_append_mb_ext_pgs, .Nm sglist_append_mbuf , +.Nm sglist_append_mbuf_epg, .Nm sglist_append_phys , .Nm sglist_append_sglist , .Nm sglist_append_uio , @@ -46,8 +45,7 @@ .Nm sglist_clone , .Nm sglist_consume_uio , .Nm sglist_count , -.Nm sglist_count_ext_pgs , -.Nm sglist_count_mb_ext_pgs , +.Nm sglist_count_mbuf_epg , .Nm sglist_count_vmpages , .Nm sglist_free , .Nm sglist_hold , @@ -68,9 +66,7 @@ .Ft int .Fn sglist_append_bio "struct sglist *sg" "struct bio *bp" .Ft int -.Fn sglist_append_ext_pgs "struct sglist *sg" "struct mbuf_ext_pgs *ext_pgs" "size_t offset" "size_t len" -.Ft int -.Fn sglist_append_mb_ext_pgs "struct sglist *sg" "struct mbuf *m" +.Fn sglist_append_mbuf_epg "struct sglist *sg" "struct mbuf *m" "size_t offset" "size_t len" .Ft int .Fn sglist_append_mbuf "struct sglist *sg" "struct mbuf *m" .Ft int @@ -92,9 +88,7 @@ .Ft int .Fn sglist_count "void *buf" "size_t len" .Ft int -.Fn sglist_count_ext_pgs "struct mbuf_ext_pgs *ext_pgs" "size_t offset" "size_t len" -.Ft int -.Fn sglist_count_mb_ext_pgs "struct mbuf *m" +.Fn sglist_count_mbuf_epg "struct mbuf *m" "size_t offset" "size_t len" .Ft int .Fn sglist_count_vmpages "vm_page_t *m" "size_t pgoff" "size_t len" .Ft void @@ -158,20 +152,15 @@ bytes long. .Pp The -.Nm sglist_count_ext_pgs +.Nm sglist_count_mbuf_epg function returns the number of scatter/gather list elements needed to describe -the unmapped external mbuf buffer -.Fa ext_pgs . +the external multipage mbuf buffer +.Fa m . The ranges start at an offset of .Fa offset relative to the start of the buffer and is .Fa len bytes long. -The -.Nm sglist_count_mb_ext_pgs -function returns the number of scatter/gather list elements needed to describe -the physical address ranges of a single unmapped mbuf -.Fa m . .Pp The .Nm sglist_count_vmpages @@ -265,9 +254,11 @@ .Fa sg . .Pp The -.Nm sglist_append_ext_pgs -function appends the physical address ranges described by the unmapped -external mbuf buffer +.Nm sglist_append_mbuf_epg +function appends the physical address ranges described by the +external multipage +.Xr mbuf 9 +buffer .Fa ext_pgs to the scatter/gather list .Fa sg . @@ -278,17 +269,9 @@ and continue for .Fa len bytes. -.Pp -The -.Nm sglist_append_mb_ext_pgs -function appends the physical address ranges described by the unmapped -mbuf -.Fa m -to the scatter/gather list -.Fa sg . Note that unlike .Nm sglist_append_mbuf , -.Nm sglist_append_mb_ext_pgs +.Nm sglist_append_mbuf_epg only adds ranges for a single mbuf, not an entire mbuf chain. .Pp Index: sys/dev/cxgbe/crypto/t4_kern_tls.c =================================================================== --- sys/dev/cxgbe/crypto/t4_kern_tls.c +++ sys/dev/cxgbe/crypto/t4_kern_tls.c @@ -900,13 +900,11 @@ static u_int ktls_tcp_payload_length(struct tlspcb *tlsp, struct mbuf *m_tls) { - struct mbuf_ext_pgs *ext_pgs; struct tls_record_layer *hdr; u_int plen, mlen; - MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = &m_tls->m_ext_pgs; - hdr = (void *)ext_pgs->m_epg_hdr; + M_ASSERTEXTPG(m_tls); + hdr = (void *)m_tls->m_epg_hdr; plen = ntohs(hdr->tls_length); /* @@ -924,8 +922,8 @@ * trim the length to avoid sending any of the trailer. There * is no way to send a partial trailer currently. */ - if (mlen > TLS_HEADER_LENGTH + plen - ext_pgs->trail_len) - mlen = TLS_HEADER_LENGTH + plen - ext_pgs->trail_len; + if (mlen > TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen) + mlen = TLS_HEADER_LENGTH + plen - m_tls->m_epg_trllen; /* @@ -953,22 +951,20 @@ static u_int ktls_payload_offset(struct tlspcb *tlsp, struct mbuf *m_tls) { - struct mbuf_ext_pgs *ext_pgs; struct tls_record_layer *hdr; u_int offset, plen; #ifdef INVARIANTS u_int mlen; #endif - MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = &m_tls->m_ext_pgs; - hdr = (void *)ext_pgs->m_epg_hdr; + M_ASSERTEXTPG(m_tls); + hdr = (void *)m_tls->m_epg_hdr; plen = ntohs(hdr->tls_length); #ifdef INVARIANTS mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; MPASS(mlen < TLS_HEADER_LENGTH + plen); #endif - if (mtod(m_tls, vm_offset_t) <= ext_pgs->hdr_len) + if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) return (0); if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { /* @@ -979,8 +975,8 @@ * the offset at the last byte of the record payload * to send the last cipher block. */ - offset = min(mtod(m_tls, vm_offset_t) - ext_pgs->hdr_len, - (plen - TLS_HEADER_LENGTH - ext_pgs->trail_len) - 1); + offset = min(mtod(m_tls, vm_offset_t) - m_tls->m_epg_hdrlen, + (plen - TLS_HEADER_LENGTH - m_tls->m_epg_trllen) - 1); return (rounddown(offset, AES_BLOCK_LEN)); } return (0); @@ -1003,19 +999,17 @@ ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls, int *nsegsp) { - struct mbuf_ext_pgs *ext_pgs; struct tls_record_layer *hdr; u_int imm_len, offset, plen, wr_len, tlen; - MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = &m_tls->m_ext_pgs; + M_ASSERTEXTPG(m_tls); /* * Determine the size of the TLS record payload to send * excluding header and trailer. */ tlen = ktls_tcp_payload_length(tlsp, m_tls); - if (tlen <= ext_pgs->hdr_len) { + if (tlen <= m_tls->m_epg_hdrlen) { /* * For requests that only want to send the TLS header, * send a tunnelled packet as immediate data. @@ -1040,8 +1034,8 @@ return (wr_len); } - hdr = (void *)ext_pgs->m_epg_hdr; - plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; + hdr = (void *)m_tls->m_epg_hdr; + plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen; if (tlen < plen) { plen = tlen; offset = ktls_payload_offset(tlsp, m_tls); @@ -1058,14 +1052,14 @@ */ imm_len = 0; if (offset == 0) - imm_len += ext_pgs->hdr_len; + imm_len += m_tls->m_epg_hdrlen; if (plen == tlen) imm_len += AES_BLOCK_LEN; wr_len += roundup2(imm_len, 16); /* TLS record payload via DSGL. */ - *nsegsp = sglist_count_ext_pgs(ext_pgs, ext_pgs->hdr_len + offset, - plen - (ext_pgs->hdr_len + offset)); + *nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset, + plen - (m_tls->m_epg_hdrlen + offset)); wr_len += ktls_sgl_size(*nsegsp); wr_len = roundup2(wr_len, 16); @@ -1214,7 +1208,7 @@ /* Assume all headers are in 'm' for now. */ MPASS(m->m_next != NULL); - MPASS(m->m_next->m_flags & M_NOMAP); + MPASS(m->m_next->m_flags & M_EXTPG); tot_len = 0; @@ -1224,7 +1218,7 @@ */ *nsegsp = 0; for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { - MPASS(m_tls->m_flags & M_NOMAP); + MPASS(m_tls->m_flags & M_EXTPG); wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs); #ifdef VERBOSE_TRACES @@ -1466,15 +1460,13 @@ struct ip *ip, newip; struct ip6_hdr *ip6, newip6; struct tcphdr *tcp, newtcp; - struct mbuf_ext_pgs *ext_pgs; caddr_t out; TXQ_LOCK_ASSERT_OWNED(txq); M_ASSERTPKTHDR(m); /* Locate the template TLS header. */ - MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = &m_tls->m_ext_pgs; + M_ASSERTEXTPG(m_tls); /* This should always be the last TLS record in a chain. */ MPASS(m_tls->m_next == NULL); @@ -1543,7 +1535,7 @@ (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); /* Copy the subset of the TLS header requested. */ - copy_to_txd(&txq->eq, (char *)ext_pgs->m_epg_hdr + + copy_to_txd(&txq->eq, (char *)m_tls->m_epg_hdr + mtod(m_tls, vm_offset_t), &out, m_tls->m_len); txq->imm_wrs++; @@ -1577,7 +1569,6 @@ struct ulptx_idata *idata; struct cpl_tx_sec_pdu *sec_pdu; struct cpl_tx_data *tx_data; - struct mbuf_ext_pgs *ext_pgs; struct tls_record_layer *hdr; char *iv, *out; u_int aad_start, aad_stop; @@ -1602,21 +1593,20 @@ using_scratch = (eq->sidx - pidx < SGE_MAX_WR_LEN / EQ_ESIZE); /* Locate the TLS header. */ - MBUF_EXT_PGS_ASSERT(m_tls); - ext_pgs = &m_tls->m_ext_pgs; - hdr = (void *)ext_pgs->m_epg_hdr; - plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; + M_ASSERTEXTPG(m_tls); + hdr = (void *)m_tls->m_epg_hdr; + plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - m_tls->m_epg_trllen; /* Determine how much of the TLS record to send. */ tlen = ktls_tcp_payload_length(tlsp, m_tls); - if (tlen <= ext_pgs->hdr_len) { + if (tlen <= m_tls->m_epg_hdrlen) { /* * For requests that only want to send the TLS header, * send a tunnelled packet as immediate data. */ #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: tid %d header-only TLS record %u", - __func__, tlsp->tid, (u_int)ext_pgs->seqno); + __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno); #endif return (ktls_write_tunnel_packet(txq, dst, m, m_tls, available, tcp_seqno, pidx)); @@ -1626,7 +1616,7 @@ offset = ktls_payload_offset(tlsp, m_tls); #ifdef VERBOSE_TRACES CTR4(KTR_CXGBE, "%s: tid %d short TLS record %u with offset %u", - __func__, tlsp->tid, (u_int)ext_pgs->seqno, offset); + __func__, tlsp->tid, (u_int)m_tls->m_epg_seqno, offset); #endif if (m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) != 0) { txq->kern_tls_fin_short++; @@ -1681,10 +1671,10 @@ */ tx_max_offset = mtod(m_tls, vm_offset_t); if (tx_max_offset > TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - - ext_pgs->trail_len) { + m_tls->m_epg_trllen) { /* Always send the full trailer. */ tx_max_offset = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - - ext_pgs->trail_len; + m_tls->m_epg_trllen; } if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC && tx_max_offset > TLS_HEADER_LENGTH) { @@ -1799,15 +1789,15 @@ /* Recalculate 'nsegs' if cached value is not available. */ if (nsegs == 0) - nsegs = sglist_count_ext_pgs(ext_pgs, ext_pgs->hdr_len + - offset, plen - (ext_pgs->hdr_len + offset)); + nsegs = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + + offset, plen - (m_tls->m_epg_hdrlen + offset)); /* Calculate the size of the TLS work request. */ twr_len = ktls_base_wr_size(tlsp); imm_len = 0; if (offset == 0) - imm_len += ext_pgs->hdr_len; + imm_len += m_tls->m_epg_hdrlen; if (plen == tlen) imm_len += AES_BLOCK_LEN; twr_len += roundup2(imm_len, 16); @@ -1923,13 +1913,13 @@ cipher_stop = 0; sec_pdu->pldlen = htobe32(16 + plen - - (ext_pgs->hdr_len + offset)); + (m_tls->m_epg_hdrlen + offset)); /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs; sec_pdu->ivgen_hdrlen = htobe32( tlsp->scmd0_short.ivgen_hdrlen | - V_SCMD_HDR_LEN(offset == 0 ? ext_pgs->hdr_len : 0)); + V_SCMD_HDR_LEN(offset == 0 ? m_tls->m_epg_hdrlen : 0)); txq->kern_tls_short++; } else { @@ -1942,7 +1932,7 @@ aad_start = 1; aad_stop = TLS_HEADER_LENGTH; iv_offset = TLS_HEADER_LENGTH + 1; - cipher_start = ext_pgs->hdr_len + 1; + cipher_start = m_tls->m_epg_hdrlen + 1; if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { cipher_stop = 0; auth_start = cipher_start; @@ -1981,7 +1971,7 @@ V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) | V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); - sec_pdu->scmd1 = htobe64(ext_pgs->seqno); + sec_pdu->scmd1 = htobe64(m_tls->m_epg_seqno); /* Key context */ out = (void *)(sec_pdu + 1); @@ -2021,8 +2011,8 @@ tx_data->rsvd = htobe32(tcp_seqno); } else { tx_data->len = htobe32(V_TX_DATA_MSS(mss) | - V_TX_LENGTH(tlen - (ext_pgs->hdr_len + offset))); - tx_data->rsvd = htobe32(tcp_seqno + ext_pgs->hdr_len + offset); + V_TX_LENGTH(tlen - (m_tls->m_epg_hdrlen + offset))); + tx_data->rsvd = htobe32(tcp_seqno + m_tls->m_epg_hdrlen + offset); } tx_data->flags = htobe32(F_TX_BYPASS); if (last_wr && tcp->th_flags & TH_PUSH) @@ -2031,8 +2021,8 @@ /* Populate the TLS header */ out = (void *)(tx_data + 1); if (offset == 0) { - memcpy(out, ext_pgs->m_epg_hdr, ext_pgs->hdr_len); - out += ext_pgs->hdr_len; + memcpy(out, m_tls->m_epg_hdr, m_tls->m_epg_hdrlen); + out += m_tls->m_epg_hdrlen; } /* AES IV for a short record. */ @@ -2067,8 +2057,8 @@ /* SGL for record payload */ sglist_reset(txq->gl); - if (sglist_append_ext_pgs(txq->gl, ext_pgs, ext_pgs->hdr_len + offset, - plen - (ext_pgs->hdr_len + offset)) != 0) { + if (sglist_append_mbuf_epg(txq->gl, m_tls, m_tls->m_epg_hdrlen + offset, + plen - (m_tls->m_epg_hdrlen + offset)) != 0) { #ifdef INVARIANTS panic("%s: failed to append sglist", __func__); #endif @@ -2090,7 +2080,7 @@ txq->kern_tls_waste += mtod(m_tls, vm_offset_t); else txq->kern_tls_waste += mtod(m_tls, vm_offset_t) - - (ext_pgs->hdr_len + offset); + (m_tls->m_epg_hdrlen + offset); } txsd = &txq->sdesc[pidx]; @@ -2275,7 +2265,7 @@ * for that record. */ for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { - MPASS(m_tls->m_flags & M_NOMAP); + MPASS(m_tls->m_flags & M_EXTPG); /* * Determine the initial TCP sequence number for this Index: sys/dev/cxgbe/t4_sge.c =================================================================== --- sys/dev/cxgbe/t4_sge.c +++ sys/dev/cxgbe/t4_sge.c @@ -2413,37 +2413,35 @@ static inline int count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr) { - struct mbuf_ext_pgs *ext_pgs; vm_paddr_t paddr; int i, len, off, pglen, pgoff, seglen, segoff; int nsegs = 0; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = &m->m_ext_pgs; + M_ASSERTEXTPG(m); off = mtod(m, vm_offset_t); len = m->m_len; off += skip; len -= skip; - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = min(seglen, len); off = 0; len -= seglen; paddr = pmap_kextract( - (vm_offset_t)&ext_pgs->m_epg_hdr[segoff]); + (vm_offset_t)&m->m_epg_hdr[segoff]); if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; } } - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -2454,16 +2452,16 @@ off = 0; seglen = min(seglen, len); len -= seglen; - paddr = ext_pgs->m_epg_pa[i] + segoff; + paddr = m->m_epg_pa[i] + segoff; if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; pgoff = 0; }; if (len != 0) { - seglen = min(len, ext_pgs->trail_len - off); + seglen = min(len, m->m_epg_trllen - off); len -= seglen; - paddr = pmap_kextract((vm_offset_t)&ext_pgs->m_epg_trail[off]); + paddr = pmap_kextract((vm_offset_t)&m->m_epg_trail[off]); if (*nextaddr != paddr) nsegs++; *nextaddr = paddr + seglen; @@ -2499,7 +2497,7 @@ skip -= len; continue; } - if ((m->m_flags & M_NOMAP) != 0) { + if ((m->m_flags & M_EXTPG) != 0) { *cflags |= MC_NOMAP; nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr); skip = 0; @@ -5838,9 +5836,12 @@ immhdrs -= m0->m_len; continue; } - - sglist_append(&sg, mtod(m0, char *) + immhdrs, - m0->m_len - immhdrs); + if (m0->m_flags & M_EXTPG) + sglist_append_mbuf_epg(&sg, m0, + mtod(m0, vm_offset_t), m0->m_len); + else + sglist_append(&sg, mtod(m0, char *) + immhdrs, + m0->m_len - immhdrs); immhdrs = 0; } MPASS(sg.sg_nseg == nsegs); Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -610,8 +610,9 @@ i = -1; for (m = start; m != stop; m = m->m_next) { - if (m->m_flags & M_NOMAP) - rc = sglist_append_mb_ext_pgs(&sg, m); + if (m->m_flags & M_EXTPG) + rc = sglist_append_mbuf_epg(&sg, m, + mtod(m, vm_offset_t), m->m_len); else rc = sglist_append(&sg, mtod(m, void *), m->m_len); if (__predict_false(rc != 0)) @@ -730,9 +731,9 @@ for (m = sndptr; m != NULL; m = m->m_next) { int n; - if (m->m_flags & M_NOMAP) { + if (m->m_flags & M_EXTPG) { #ifdef KERN_TLS - if (m->m_ext_pgs.tls != NULL) { + if (m->m_epg_tls != NULL) { toep->flags |= TPF_KTLS; if (plen == 0) { SOCKBUF_UNLOCK(sb); @@ -742,7 +743,8 @@ break; } #endif - n = sglist_count_mb_ext_pgs(m); + n = sglist_count_mbuf_epg(m, + mtod(m, vm_offset_t), m->m_len); } else n = sglist_count(mtod(m, void *), m->m_len); @@ -770,7 +772,7 @@ break; } - if (m->m_flags & M_NOMAP) + if (m->m_flags & M_EXTPG) nomap_mbuf_seen = true; if (max_nsegs_1mbuf < n) max_nsegs_1mbuf = n; @@ -1922,20 +1924,18 @@ static void aiotx_free_pgs(struct mbuf *m) { - struct mbuf_ext_pgs *ext_pgs; struct kaiocb *job; vm_page_t pg; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = &m->m_ext_pgs; + M_ASSERTEXTPG(m); job = m->m_ext.ext_arg1; #ifdef VERBOSE_TRACES CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, m->m_len, jobtotid(job)); #endif - for (int i = 0; i < ext_pgs->npgs; i++) { - pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); + for (int i = 0; i < m->m_epg_npgs; i++) { + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); vm_page_unwire(pg, PQ_ACTIVE); } @@ -1952,7 +1952,6 @@ struct vmspace *vm; vm_page_t pgs[MBUF_PEXT_MAX_PGS]; struct mbuf *m, *top, *last; - struct mbuf_ext_pgs *ext_pgs; vm_map_t map; vm_offset_t start; int i, mlen, npages, pgoff; @@ -1990,20 +1989,19 @@ break; } - ext_pgs = &m->m_ext_pgs; - ext_pgs->first_pg_off = pgoff; - ext_pgs->npgs = npages; + m->m_epg_1st_off = pgoff; + m->m_epg_npgs = npages; if (npages == 1) { KASSERT(mlen + pgoff <= PAGE_SIZE, ("%s: single page is too large (off %d len %d)", __func__, pgoff, mlen)); - ext_pgs->last_pg_len = mlen; + m->m_epg_last_len = mlen; } else { - ext_pgs->last_pg_len = mlen - (PAGE_SIZE - pgoff) - + m->m_epg_last_len = mlen - (PAGE_SIZE - pgoff) - (npages - 2) * PAGE_SIZE; } for (i = 0; i < npages; i++) - ext_pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]); + m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pgs[i]); m->m_len = mlen; m->m_ext.ext_size = npages * PAGE_SIZE; Index: sys/dev/cxgbe/tom/t4_tls.c =================================================================== --- sys/dev/cxgbe/tom/t4_tls.c +++ sys/dev/cxgbe/tom/t4_tls.c @@ -1568,26 +1568,24 @@ #ifdef KERN_TLS static int -count_ext_pgs_segs(struct mbuf_ext_pgs *ext_pgs, - struct mbuf_ext_pgs_data *ext_pgs_data) +count_ext_pgs_segs(struct mbuf *m) { vm_paddr_t nextpa; u_int i, nsegs; - MPASS(ext_pgs->npgs > 0); + MPASS(m->m_epg_npgs > 0); nsegs = 1; - nextpa = ext_pgs_data->pa[0] + PAGE_SIZE; - for (i = 1; i < ext_pgs->npgs; i++) { - if (nextpa != ext_pgs_data->pa[i]) + nextpa = m->m_epg_pa[0] + PAGE_SIZE; + for (i = 1; i < m->m_epg_npgs; i++) { + if (nextpa != m->m_epg_pa[i]) nsegs++; - nextpa = ext_pgs_data->pa[i] + PAGE_SIZE; + nextpa = m->m_epg_pa[i] + PAGE_SIZE; } return (nsegs); } static void -write_ktlstx_sgl(void *dst, struct mbuf_ext_pgs *ext_pgs, - struct mbuf_ext_pgs_data *ext_pgs_data, int nsegs) +write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs) { struct ulptx_sgl *usgl = dst; vm_paddr_t pa; @@ -1600,15 +1598,15 @@ V_ULPTX_NSGE(nsegs)); /* Figure out the first S/G length. */ - pa = ext_pgs_data->pa[0] + ext_pgs->first_pg_off; + pa = m->m_epg_pa[0] + m->m_epg_1st_off; usgl->addr0 = htobe64(pa); - len = mbuf_ext_pg_len(ext_pgs, 0, ext_pgs->first_pg_off); + len = m_epg_pagelen(m, 0, m->m_epg_1st_off); pa += len; - for (i = 1; i < ext_pgs->npgs; i++) { - if (ext_pgs_data->pa[i] != pa) + for (i = 1; i < m->m_epg_npgs; i++) { + if (m->m_epg_pa[i] != pa) break; - len += mbuf_ext_pg_len(ext_pgs, i, 0); - pa += mbuf_ext_pg_len(ext_pgs, i, 0); + len += m_epg_pagelen(m, i, 0); + pa += m_epg_pagelen(m, i, 0); } usgl->len0 = htobe32(len); #ifdef INVARIANTS @@ -1616,21 +1614,21 @@ #endif j = -1; - for (; i < ext_pgs->npgs; i++) { - if (j == -1 || ext_pgs_data->pa[i] != pa) { + for (; i < m->m_epg_npgs; i++) { + if (j == -1 || m->m_epg_pa[i] != pa) { if (j >= 0) usgl->sge[j / 2].len[j & 1] = htobe32(len); j++; #ifdef INVARIANTS nsegs--; #endif - pa = ext_pgs_data->pa[i]; + pa = m->m_epg_pa[i]; usgl->sge[j / 2].addr[j & 1] = htobe64(pa); - len = mbuf_ext_pg_len(ext_pgs, i, 0); + len = m_epg_pagelen(m, i, 0); pa += len; } else { - len += mbuf_ext_pg_len(ext_pgs, i, 0); - pa += mbuf_ext_pg_len(ext_pgs, i, 0); + len += m_epg_pagelen(m, i, 0); + pa += m_epg_pagelen(m, i, 0); } } if (j >= 0) { @@ -1639,8 +1637,7 @@ if ((j & 1) == 0) usgl->sge[j / 2].len[1] = htobe32(0); } - KASSERT(nsegs == 0, ("%s: nsegs %d, ext_pgs %p", __func__, nsegs, - ext_pgs)); + KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m)); } /* @@ -1744,9 +1741,9 @@ return; } - KASSERT(m->m_flags & M_NOMAP, ("%s: mbuf %p is not NOMAP", + KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP", __func__, m)); - KASSERT(m->m_ext_pgs.tls != NULL, + KASSERT(m->m_epg_tls != NULL, ("%s: mbuf %p doesn't have TLS session", __func__, m)); /* Calculate WR length. */ @@ -1758,8 +1755,7 @@ wr_len += AES_BLOCK_LEN; /* Account for SGL in work request length. */ - nsegs = count_ext_pgs_segs(&m->m_ext_pgs, - &m->m_ext.ext_pgs); + nsegs = count_ext_pgs_segs(m); wr_len += sizeof(struct ulptx_sgl) + ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; @@ -1816,19 +1812,19 @@ thdr = (struct tls_hdr *)&m->m_epg_hdr; #ifdef VERBOSE_TRACES CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x", - __func__, toep->tid, m->m_ext_pgs.seqno, thdr->type, + __func__, toep->tid, m->m_epg_seqno, thdr->type, m->m_len); #endif txwr = wrtod(wr); cpl = (struct cpl_tx_tls_sfo *)(txwr + 1); memset(txwr, 0, roundup2(wr_len, 16)); credits = howmany(wr_len, 16); - expn_size = m->m_ext_pgs.hdr_len + - m->m_ext_pgs.trail_len; + expn_size = m->m_epg_hdrlen + + m->m_epg_trllen; tls_size = m->m_len - expn_size; write_tlstx_wr(txwr, toep, 0, tls_size, expn_size, 1, credits, shove, 1); - toep->tls.tx_seq_no = m->m_ext_pgs.seqno; + toep->tls.tx_seq_no = m->m_epg_seqno; write_tlstx_cpl(cpl, toep, thdr, tls_size, 1); tls_copy_tx_key(toep, cpl + 1); @@ -1837,8 +1833,7 @@ memcpy(buf, thdr + 1, toep->tls.iv_len); buf += AES_BLOCK_LEN; - write_ktlstx_sgl(buf, &m->m_ext_pgs, &m->m_ext.ext_pgs, - nsegs); + write_ktlstx_sgl(buf, m, nsegs); KASSERT(toep->tx_credits >= credits, ("%s: not enough credits", __func__)); Index: sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c =================================================================== --- sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c +++ sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -624,7 +624,7 @@ /* check format of incoming mbuf */ if (mb->m_next == NULL || - (mb->m_next->m_flags & (M_NOMAP | M_EXT)) != (M_NOMAP | M_EXT)) { + (mb->m_next->m_flags & (M_EXTPG | M_EXT)) != (M_EXTPG | M_EXT)) { top = NULL; goto done; } @@ -680,13 +680,11 @@ static int mlx5e_sq_tls_populate(struct mbuf *mb, uint64_t *pseq) { - struct mbuf_ext_pgs *ext_pgs; for (; mb != NULL; mb = mb->m_next) { - if (!(mb->m_flags & M_NOMAP)) + if (!(mb->m_flags & M_EXTPG)) continue; - ext_pgs = &mb->m_ext_pgs; - *pseq = ext_pgs->seqno; + *pseq = mb->m_epg_seqno; return (1); } return (0); Index: sys/kern/kern_mbuf.c =================================================================== --- sys/kern/kern_mbuf.c +++ sys/kern/kern_mbuf.c @@ -115,7 +115,7 @@ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ -bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */ +bool mb_use_ext_pgs; /* use M_EXTPG mbufs for sendfile & TLS */ SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, &mb_use_ext_pgs, 0, "Use unmapped mbufs for sendfile(2) and TLS offload"); @@ -311,9 +311,6 @@ /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); -_Static_assert(offsetof(struct mbuf, m_ext) == - offsetof(struct mbuf, m_ext_pgs.m_ext), - "m_ext offset mismatch between mbuf and ext_pgs"); _Static_assert(sizeof(struct mbuf) <= MSIZE, "size of mbuf exceeds MSIZE"); /* @@ -825,7 +822,7 @@ /* * Free "count" units of I/O from an mbuf chain. They could be held - * in EXT_PGS or just as a normal mbuf. This code is intended to be + * in M_EXTPG or just as a normal mbuf. This code is intended to be * called in an error path (I/O error, closed connection, etc). */ void @@ -834,10 +831,9 @@ int i; for (i = 0; i < count && m != NULL; i++) { - if ((m->m_flags & M_EXT) != 0 && - m->m_ext.ext_type == EXT_PGS) { - m->m_ext_pgs.nrdy--; - if (m->m_ext_pgs.nrdy != 0) + if ((m->m_flags & M_EXTPG) != 0) { + m->m_epg_nrdy--; + if (m->m_epg_nrdy != 0) continue; } m = m_free(m); @@ -856,16 +852,15 @@ mb_unmapped_compress(struct mbuf *m) { volatile u_int *refcnt; - struct mbuf m_temp; + char buf[MLEN]; /* * Assert that 'm' does not have a packet header. If 'm' had * a packet header, it would only be able to hold MHLEN bytes * and m_data would have to be initialized differently. */ - KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) && - m->m_ext.ext_type == EXT_PGS, - ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m)); + KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXTPG), + ("%s: m %p !M_EXTPG or M_PKTHDR", __func__, m)); KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { @@ -879,21 +874,17 @@ if (*refcnt != 1) return (EBUSY); - m_init(&m_temp, M_NOWAIT, MT_DATA, 0); - - /* copy data out of old mbuf */ - m_copydata(m, 0, m->m_len, mtod(&m_temp, char *)); - m_temp.m_len = m->m_len; + m_copydata(m, 0, m->m_len, buf); /* Free the backing pages. */ m->m_ext.ext_free(m); /* Turn 'm' into a "normal" mbuf. */ - m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP); + m->m_flags &= ~(M_EXT | M_RDONLY | M_EXTPG); m->m_data = m->m_dat; - /* copy data back into m */ - m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, char *)); + /* Copy data back into m. */ + bcopy(buf, mtod(m, char *), m->m_len); return (0); } @@ -909,12 +900,12 @@ * unmapped data is stored in an mbuf with an EXT_SFBUF external * cluster. These mbufs use an sf_buf to provide a valid KVA for the * associated physical page. They also hold a reference on the - * original EXT_PGS mbuf to ensure the physical page doesn't go away. + * original M_EXTPG mbuf to ensure the physical page doesn't go away. * Finally, any TLS trailer data is stored in a regular mbuf. * * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF * mbufs. It frees the associated sf_buf and releases its reference - * on the original EXT_PGS mbuf. + * on the original M_EXTPG mbuf. * * _mb_unmapped_to_ext() is a helper function that converts a single * unmapped mbuf into a chain of mbufs. @@ -933,15 +924,14 @@ sf = m->m_ext.ext_arg1; sf_buf_free(sf); - /* Drop the reference on the backing EXT_PGS mbuf. */ + /* Drop the reference on the backing M_EXTPG mbuf. */ old_m = m->m_ext.ext_arg2; - mb_free_ext(old_m); + mb_free_extpg(old_m); } static struct mbuf * _mb_unmapped_to_ext(struct mbuf *m) { - struct mbuf_ext_pgs *ext_pgs; struct mbuf *m_new, *top, *prev, *mref; struct sf_buf *sf; vm_page_t pg; @@ -949,10 +939,9 @@ volatile u_int *refcnt; u_int ref_inc = 0; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = &m->m_ext_pgs; + M_ASSERTEXTPG(m); len = m->m_len; - KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p", + KASSERT(m->m_epg_tls == NULL, ("%s: can't convert TLS mbuf %p", __func__, m)); /* See if this is the mbuf that holds the embedded refcount. */ @@ -970,11 +959,11 @@ off = mtod(m, vm_offset_t); top = NULL; - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = min(seglen, len); off = 0; @@ -984,13 +973,13 @@ goto fail; m_new->m_len = seglen; prev = top = m_new; - memcpy(mtod(m_new, void *), &ext_pgs->m_epg_hdr[segoff], + memcpy(mtod(m_new, void *), &m->m_epg_hdr[segoff], seglen); } } - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -1002,7 +991,7 @@ seglen = min(seglen, len); len -= seglen; - pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; @@ -1025,9 +1014,9 @@ pgoff = 0; }; if (len != 0) { - KASSERT((off + len) <= ext_pgs->trail_len, + KASSERT((off + len) <= m->m_epg_trllen, ("off + len > trail (%d + %d > %d)", off, len, - ext_pgs->trail_len)); + m->m_epg_trllen)); m_new = m_get(M_NOWAIT, MT_DATA); if (m_new == NULL) goto fail; @@ -1036,7 +1025,7 @@ else prev->m_next = m_new; m_new->m_len = len; - memcpy(mtod(m_new, void *), &ext_pgs->m_epg_trail[off], len); + memcpy(mtod(m_new, void *), &m->m_epg_trail[off], len); } if (ref_inc != 0) { @@ -1080,7 +1069,7 @@ for (m = top; m != NULL; m = next) { /* m might be freed, so cache the next pointer. */ next = m->m_next; - if (m->m_flags & M_NOMAP) { + if (m->m_flags & M_EXTPG) { if (prev != NULL) { /* * Remove 'm' from the new chain so @@ -1118,7 +1107,7 @@ } /* - * Allocate an empty EXT_PGS mbuf. The ext_free routine is + * Allocate an empty M_EXTPG mbuf. The ext_free routine is * responsible for freeing any pages backing this mbuf when it is * freed. */ @@ -1126,25 +1115,22 @@ mb_alloc_ext_pgs(int how, m_ext_free_t ext_free) { struct mbuf *m; - struct mbuf_ext_pgs *ext_pgs; m = m_get(how, MT_DATA); if (m == NULL) return (NULL); - ext_pgs = &m->m_ext_pgs; - ext_pgs->npgs = 0; - ext_pgs->nrdy = 0; - ext_pgs->first_pg_off = 0; - ext_pgs->last_pg_len = 0; - ext_pgs->flags = 0; - ext_pgs->hdr_len = 0; - ext_pgs->trail_len = 0; - ext_pgs->tls = NULL; - ext_pgs->so = NULL; + m->m_epg_npgs = 0; + m->m_epg_nrdy = 0; + m->m_epg_1st_off = 0; + m->m_epg_last_len = 0; + m->m_epg_flags = 0; + m->m_epg_hdrlen = 0; + m->m_epg_trllen = 0; + m->m_epg_tls = NULL; + m->m_epg_so = NULL; m->m_data = NULL; - m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP); - m->m_ext.ext_type = EXT_PGS; + m->m_flags |= (M_EXT | M_RDONLY | M_EXTPG); m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_ext.ext_size = 0; @@ -1152,39 +1138,6 @@ return (m); } -#ifdef INVARIANT_SUPPORT -void -mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs) -{ - - /* - * NB: This expects a non-empty buffer (npgs > 0 and - * last_pg_len > 0). - */ - KASSERT(ext_pgs->npgs > 0, - ("ext_pgs with no valid pages: %p", ext_pgs)); - KASSERT(ext_pgs->npgs <= nitems(ext_pgs->m_epg_pa), - ("ext_pgs with too many pages: %p", ext_pgs)); - KASSERT(ext_pgs->nrdy <= ext_pgs->npgs, - ("ext_pgs with too many ready pages: %p", ext_pgs)); - KASSERT(ext_pgs->first_pg_off < PAGE_SIZE, - ("ext_pgs with too large page offset: %p", ext_pgs)); - KASSERT(ext_pgs->last_pg_len > 0, - ("ext_pgs with zero last page length: %p", ext_pgs)); - KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE, - ("ext_pgs with too large last page length: %p", ext_pgs)); - if (ext_pgs->npgs == 1) { - KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <= - PAGE_SIZE, ("ext_pgs with single page too large: %p", - ext_pgs)); - } - KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->m_epg_hdr), - ("ext_pgs with too large header length: %p", ext_pgs)); - KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->m_epg_trail), - ("ext_pgs with too large header length: %p", ext_pgs)); -} -#endif - /* * Clean up after mbufs with M_EXT storage attached to them if the * reference count hits 1. @@ -1250,26 +1203,6 @@ uma_zfree(zone_jumbo16, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; - case EXT_PGS: { -#ifdef KERN_TLS - struct mbuf_ext_pgs *pgs; - struct ktls_session *tls; -#endif - - KASSERT(mref->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); - mref->m_ext.ext_free(mref); -#ifdef KERN_TLS - pgs = &mref->m_ext_pgs; - tls = pgs->tls; - if (tls != NULL && - !refcount_release_if_not_last(&tls->refcount)) - ktls_enqueue_to_free(pgs); - else -#endif - uma_zfree(zone_mbuf, mref); - break; - } case EXT_SFBUF: case EXT_NET_DRV: case EXT_MOD_TYPE: @@ -1298,6 +1231,48 @@ uma_zfree(zone_mbuf, m); } +/* + * Clean up after mbufs with M_EXTPG storage attached to them if the + * reference count hits 1. + */ +void +mb_free_extpg(struct mbuf *m) +{ + volatile u_int *refcnt; + struct mbuf *mref; + + M_ASSERTEXTPG(m); + + /* See if this is the mbuf that holds the embedded refcount. */ + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { + refcnt = &m->m_ext.ext_count; + mref = m; + } else { + KASSERT(m->m_ext.ext_cnt != NULL, + ("%s: no refcounting pointer on %p", __func__, m)); + refcnt = m->m_ext.ext_cnt; + mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); + } + + /* Free attached storage if this mbuf is the only reference to it. */ + if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { + KASSERT(mref->m_ext.ext_free != NULL, + ("%s: ext_free not set", __func__)); + + mref->m_ext.ext_free(mref); +#ifdef KERN_TLS + if (mref->m_epg_tls != NULL && + !refcount_release_if_not_last(&mref->m_epg_tls->refcount)) + ktls_enqueue_to_free(mref); + else +#endif + uma_zfree(zone_mbuf, mref); + } + + if (m != mref) + uma_zfree(zone_mbuf, m); +} + /* * Official mbuf(9) allocation KPI for stack and drivers: * Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -165,22 +165,19 @@ static void sendfile_free_mext_pg(struct mbuf *m) { - struct mbuf_ext_pgs *ext_pgs; vm_page_t pg; int flags, i; bool cache_last; - KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS, - ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)); + M_ASSERTEXTPG(m); cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST; - ext_pgs = &m->m_ext_pgs; flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; - for (i = 0; i < ext_pgs->npgs; i++) { - if (cache_last && i == ext_pgs->npgs - 1) + for (i = 0; i < m->m_epg_npgs; i++) { + if (cache_last && i == m->m_epg_npgs - 1) flags = 0; - pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); vm_page_release(pg, flags); } @@ -347,9 +344,8 @@ } #if defined(KERN_TLS) && defined(INVARIANTS) - if ((sfio->m->m_flags & M_EXT) != 0 && - sfio->m->m_ext.ext_type == EXT_PGS) - KASSERT(sfio->tls == sfio->m->m_ext_pgs.tls, + if ((sfio->m->m_flags & M_EXTPG) != 0) + KASSERT(sfio->tls == sfio->m->m_epg_tls, ("TLS session mismatch")); else KASSERT(sfio->tls == NULL, @@ -674,7 +670,6 @@ #ifdef KERN_TLS struct ktls_session *tls; #endif - struct mbuf_ext_pgs *ext_pgs; struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; struct shmfd *shmfd; @@ -829,7 +824,7 @@ #ifdef KERN_TLS if (tls != NULL) mh = m_uiotombuf(hdr_uio, M_WAITOK, space, - tls->params.max_frame_len, M_NOMAP); + tls->params.max_frame_len, M_EXTPG); else #endif mh = m_uiotombuf(hdr_uio, M_WAITOK, @@ -999,18 +994,11 @@ if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == - EXT_PGS) - m0->m_ext.ext_arg1 = - sfs; - else - m0->m_ext.ext_arg2 = - sfs; + m0->m_ext.ext_arg1 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } - ext_pgs = &m0->m_ext_pgs; ext_pgs_idx = 0; /* Append to mbuf chain. */ @@ -1019,19 +1007,19 @@ else m = m0; mtail = m0; - ext_pgs->first_pg_off = + m0->m_epg_1st_off = vmoff(i, off) & PAGE_MASK; } if (nios) { mtail->m_flags |= M_NOTREADY; - ext_pgs->nrdy++; + m0->m_epg_nrdy++; } - ext_pgs->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga); - ext_pgs->npgs++; + m0->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga); + m0->m_epg_npgs++; xfs = xfsize(i, npages, off, space); - ext_pgs->last_pg_len = xfs; - MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs); + m0->m_epg_last_len = xfs; + MBUF_EXT_PGS_ASSERT_SANITY(m0); mtail->m_len += xfs; mtail->m_ext.ext_size += PAGE_SIZE; continue; @@ -1081,10 +1069,6 @@ m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == EXT_PGS) - m0->m_ext.ext_arg1 = sfs; - else - m0->m_ext.ext_arg2 = sfs; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; Index: sys/kern/subr_bus_dma.c =================================================================== --- sys/kern/subr_bus_dma.c +++ sys/kern/subr_bus_dma.c @@ -116,14 +116,12 @@ * Load an unmapped mbuf */ static int -_bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, +_bus_dmamap_load_mbuf_epg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m, bus_dma_segment_t *segs, int *nsegs, int flags) { - struct mbuf_ext_pgs *ext_pgs; int error, i, off, len, pglen, pgoff, seglen, segoff; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = &m->m_ext_pgs; + M_ASSERTEXTPG(m); len = m->m_len; error = 0; @@ -131,23 +129,23 @@ /* Skip over any data removed from the front. */ off = mtod(m, vm_offset_t); - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = min(seglen, len); off = 0; len -= seglen; error = _bus_dmamap_load_buffer(dmat, map, - &ext_pgs->m_epg_hdr[segoff], seglen, kernel_pmap, + &m->m_epg_hdr[segoff], seglen, kernel_pmap, flags, segs, nsegs); } } - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && error == 0 && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -159,15 +157,15 @@ seglen = min(seglen, len); len -= seglen; error = _bus_dmamap_load_phys(dmat, map, - ext_pgs->m_epg_pa[i] + segoff, seglen, flags, segs, nsegs); + m->m_epg_pa[i] + segoff, seglen, flags, segs, nsegs); pgoff = 0; }; if (len != 0 && error == 0) { - KASSERT((off + len) <= ext_pgs->trail_len, + KASSERT((off + len) <= m->m_epg_trllen, ("off + len > trail (%d + %d > %d)", off, len, - ext_pgs->trail_len)); + m->m_epg_trllen)); error = _bus_dmamap_load_buffer(dmat, map, - &ext_pgs->m_epg_trail[off], len, kernel_pmap, flags, segs, + &m->m_epg_trail[off], len, kernel_pmap, flags, segs, nsegs); } return (error); @@ -186,8 +184,8 @@ error = 0; for (m = m0; m != NULL && error == 0; m = m->m_next) { if (m->m_len > 0) { - if ((m->m_flags & M_NOMAP) != 0) - error = _bus_dmamap_load_unmapped_mbuf_sg(dmat, + if ((m->m_flags & M_EXTPG) != 0) + error = _bus_dmamap_load_mbuf_epg(dmat, map, m, segs, nsegs, flags); else error = _bus_dmamap_load_buffer(dmat, map, Index: sys/kern/subr_sglist.c =================================================================== --- sys/kern/subr_sglist.c +++ sys/kern/subr_sglist.c @@ -220,10 +220,10 @@ /* * Determine the number of scatter/gather list elements needed to - * describe an EXT_PGS buffer. + * describe an M_EXTPG mbuf. */ int -sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, size_t len) +sglist_count_mbuf_epg(struct mbuf *m, size_t off, size_t len) { vm_paddr_t nextaddr, paddr; size_t seglen, segoff; @@ -233,23 +233,23 @@ return (0); nsegs = 0; - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = MIN(seglen, len); off = 0; len -= seglen; - nsegs += sglist_count(&ext_pgs->m_epg_hdr[segoff], + nsegs += sglist_count(&m->m_epg_hdr[segoff], seglen); } } nextaddr = 0; - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -260,34 +260,21 @@ off = 0; seglen = MIN(seglen, len); len -= seglen; - paddr = ext_pgs->m_epg_pa[i] + segoff; + paddr = m->m_epg_pa[i] + segoff; if (paddr != nextaddr) nsegs++; nextaddr = paddr + seglen; pgoff = 0; }; if (len != 0) { - seglen = MIN(len, ext_pgs->trail_len - off); + seglen = MIN(len, m->m_epg_trllen - off); len -= seglen; - nsegs += sglist_count(&ext_pgs->m_epg_trail[off], seglen); + nsegs += sglist_count(&m->m_epg_trail[off], seglen); } KASSERT(len == 0, ("len != 0")); return (nsegs); } -/* - * Determine the number of scatter/gather list elements needed to - * describe an EXT_PGS mbuf. - */ -int -sglist_count_mb_ext_pgs(struct mbuf *m) -{ - - MBUF_EXT_PGS_ASSERT(m); - return (sglist_count_ext_pgs(&m->m_ext_pgs, mtod(m, vm_offset_t), - m->m_len)); -} - /* * Allocate a scatter/gather list along with 'nsegs' segments. The * 'mflags' parameters are the same as passed to malloc(9). The caller @@ -390,35 +377,36 @@ } /* - * Append the segments to describe an EXT_PGS buffer to a - * scatter/gather list. If there are insufficient segments, then this - * fails with EFBIG. + * Append the segments of single multi-page mbuf. + * If there are insufficient segments, then this fails with EFBIG. */ int -sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs, - size_t off, size_t len) +sglist_append_mbuf_epg(struct sglist *sg, struct mbuf *m, size_t off, + size_t len) { size_t seglen, segoff; vm_paddr_t paddr; int error, i, pglen, pgoff; + M_ASSERTEXTPG(m); + error = 0; - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = MIN(seglen, len); off = 0; len -= seglen; error = sglist_append(sg, - &ext_pgs->m_epg_hdr[segoff], seglen); + &m->m_epg_hdr[segoff], seglen); } } - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && error == 0 && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -429,36 +417,21 @@ off = 0; seglen = MIN(seglen, len); len -= seglen; - paddr = ext_pgs->m_epg_pa[i] + segoff; + paddr = m->m_epg_pa[i] + segoff; error = sglist_append_phys(sg, paddr, seglen); pgoff = 0; }; if (error == 0 && len > 0) { - seglen = MIN(len, ext_pgs->trail_len - off); + seglen = MIN(len, m->m_epg_trllen - off); len -= seglen; error = sglist_append(sg, - &ext_pgs->m_epg_trail[off], seglen); + &m->m_epg_trail[off], seglen); } if (error == 0) KASSERT(len == 0, ("len != 0")); return (error); } -/* - * Append the segments to describe an EXT_PGS mbuf to a scatter/gather - * list. If there are insufficient segments, then this fails with - * EFBIG. - */ -int -sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m) -{ - - /* for now, all unmapped mbufs are assumed to be EXT_PGS */ - MBUF_EXT_PGS_ASSERT(m); - return (sglist_append_ext_pgs(sg, &m->m_ext_pgs, - mtod(m, vm_offset_t), m->m_len)); -} - /* * Append the segments that describe a single mbuf chain to a * scatter/gather list. If there are insufficient segments, then this @@ -478,8 +451,9 @@ SGLIST_SAVE(sg, save); for (m = m0; m != NULL; m = m->m_next) { if (m->m_len > 0) { - if ((m->m_flags & M_NOMAP) != 0) - error = sglist_append_mb_ext_pgs(sg, m); + if ((m->m_flags & M_EXTPG) != 0) + error = sglist_append_mbuf_epg(sg, m, + mtod(m, vm_offset_t), m->m_len); else error = sglist_append(sg, m->m_data, m->m_len); Index: sys/kern/uipc_ktls.c =================================================================== --- sys/kern/uipc_ktls.c +++ sys/kern/uipc_ktls.c @@ -79,7 +79,7 @@ struct ktls_wq { struct mtx mtx; - STAILQ_HEAD(, mbuf_ext_pgs) head; + STAILQ_HEAD(, mbuf) head; bool running; } __aligned(CACHE_LINE_SIZE); @@ -1213,14 +1213,12 @@ void ktls_seq(struct sockbuf *sb, struct mbuf *m) { - struct mbuf_ext_pgs *pgs; for (; m != NULL; m = m->m_next) { - KASSERT((m->m_flags & M_NOMAP) != 0, + KASSERT((m->m_flags & M_EXTPG) != 0, ("ktls_seq: mapped mbuf %p", m)); - pgs = &m->m_ext_pgs; - pgs->seqno = sb->sb_tls_seqno; + m->m_epg_seqno = sb->sb_tls_seqno; sb->sb_tls_seqno++; } } @@ -1244,7 +1242,6 @@ { struct tls_record_layer *tlshdr; struct mbuf *m; - struct mbuf_ext_pgs *pgs; uint64_t *noncep; uint16_t tls_len; int maxlen; @@ -1263,17 +1260,16 @@ * TLS frames require unmapped mbufs to store session * info. */ - KASSERT((m->m_flags & M_NOMAP) != 0, + KASSERT((m->m_flags & M_EXTPG) != 0, ("ktls_frame: mapped mbuf %p (top = %p)\n", m, top)); tls_len = m->m_len; - pgs = &m->m_ext_pgs; /* Save a reference to the session. */ - pgs->tls = ktls_hold(tls); + m->m_epg_tls = ktls_hold(tls); - pgs->hdr_len = tls->params.tls_hlen; - pgs->trail_len = tls->params.tls_tlen; + m->m_epg_hdrlen = tls->params.tls_hlen; + m->m_epg_trllen = tls->params.tls_tlen; if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) { int bs, delta; @@ -1295,12 +1291,12 @@ */ bs = tls->params.tls_bs; delta = (tls_len + tls->params.tls_tlen) & (bs - 1); - pgs->trail_len -= delta; + m->m_epg_trllen -= delta; } - m->m_len += pgs->hdr_len + pgs->trail_len; + m->m_len += m->m_epg_hdrlen + m->m_epg_trllen; /* Populate the TLS header. */ - tlshdr = (void *)pgs->m_epg_hdr; + tlshdr = (void *)m->m_epg_hdr; tlshdr->tls_vmajor = tls->params.tls_vmajor; /* @@ -1312,8 +1308,8 @@ tlshdr->tls_vminor = TLS_MINOR_VER_TWO; tlshdr->tls_type = TLS_RLTYPE_APP; /* save the real record type for later */ - pgs->record_type = record_type; - pgs->m_epg_trail[0] = record_type; + m->m_epg_record_type = record_type; + m->m_epg_trail[0] = record_type; } else { tlshdr->tls_vminor = tls->params.tls_vminor; tlshdr->tls_type = record_type; @@ -1349,23 +1345,23 @@ */ if (tls->mode == TCP_TLS_MODE_SW) { m->m_flags |= M_NOTREADY; - pgs->nrdy = pgs->npgs; - *enq_cnt += pgs->npgs; + m->m_epg_nrdy = m->m_epg_npgs; + *enq_cnt += m->m_epg_npgs; } } } void -ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs) +ktls_enqueue_to_free(struct mbuf *m) { struct ktls_wq *wq; bool running; /* Mark it for freeing. */ - pgs->mbuf = NULL; - wq = &ktls_wq[pgs->tls->wq_index]; + m->m_epg_flags |= EPG_FLAG_2FREE; + wq = &ktls_wq[m->m_epg_tls->wq_index]; mtx_lock(&wq->mtx); - STAILQ_INSERT_TAIL(&wq->head, pgs, stailq); + STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq); running = wq->running; mtx_unlock(&wq->mtx); if (!running) @@ -1375,31 +1371,27 @@ void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count) { - struct mbuf_ext_pgs *pgs; struct ktls_wq *wq; bool running; - KASSERT(((m->m_flags & (M_NOMAP | M_NOTREADY)) == - (M_NOMAP | M_NOTREADY)), + KASSERT(((m->m_flags & (M_EXTPG | M_NOTREADY)) == + (M_EXTPG | M_NOTREADY)), ("ktls_enqueue: %p not unready & nomap mbuf\n", m)); KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count")); - pgs = &m->m_ext_pgs; + KASSERT(m->m_epg_tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf")); - KASSERT(pgs->tls->mode == TCP_TLS_MODE_SW, ("!SW TLS mbuf")); - - pgs->enc_cnt = page_count; - pgs->mbuf = m; + m->m_epg_enc_cnt = page_count; /* * Save a pointer to the socket. The caller is responsible * for taking an additional reference via soref(). */ - pgs->so = so; + m->m_epg_so = so; - wq = &ktls_wq[pgs->tls->wq_index]; + wq = &ktls_wq[m->m_epg_tls->wq_index]; mtx_lock(&wq->mtx); - STAILQ_INSERT_TAIL(&wq->head, pgs, stailq); + STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq); running = wq->running; mtx_unlock(&wq->mtx); if (!running) @@ -1408,11 +1400,11 @@ } static __noinline void -ktls_encrypt(struct mbuf_ext_pgs *pgs) +ktls_encrypt(struct mbuf *top) { struct ktls_session *tls; struct socket *so; - struct mbuf *m, *top; + struct mbuf *m; vm_paddr_t parray[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; struct iovec src_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; struct iovec dst_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; @@ -1420,16 +1412,14 @@ int error, i, len, npages, off, total_pages; bool is_anon; - so = pgs->so; - tls = pgs->tls; - top = pgs->mbuf; - KASSERT(tls != NULL, ("tls = NULL, top = %p, pgs = %p\n", top, pgs)); - KASSERT(so != NULL, ("so = NULL, top = %p, pgs = %p\n", top, pgs)); + so = top->m_epg_so; + tls = top->m_epg_tls; + KASSERT(tls != NULL, ("tls = NULL, top = %p\n", top)); + KASSERT(so != NULL, ("so = NULL, top = %p\n", top)); #ifdef INVARIANTS - pgs->so = NULL; - pgs->mbuf = NULL; + top->m_epg_so = NULL; #endif - total_pages = pgs->enc_cnt; + total_pages = top->m_epg_enc_cnt; npages = 0; /* @@ -1451,15 +1441,13 @@ */ error = 0; for (m = top; npages != total_pages; m = m->m_next) { - pgs = &m->m_ext_pgs; - - KASSERT(pgs->tls == tls, + KASSERT(m->m_epg_tls == tls, ("different TLS sessions in a single mbuf chain: %p vs %p", - tls, pgs->tls)); - KASSERT((m->m_flags & (M_NOMAP | M_NOTREADY)) == - (M_NOMAP | M_NOTREADY), + tls, m->m_epg_tls)); + KASSERT((m->m_flags & (M_EXTPG | M_NOTREADY)) == + (M_EXTPG | M_NOTREADY), ("%p not unready & nomap mbuf (top = %p)\n", m, top)); - KASSERT(npages + pgs->npgs <= total_pages, + KASSERT(npages + m->m_epg_npgs <= total_pages, ("page count mismatch: top %p, total_pages %d, m %p", top, total_pages, m)); @@ -1471,14 +1459,14 @@ * (from sendfile), anonymous wired pages are * allocated and assigned to the destination iovec. */ - is_anon = (pgs->flags & MBUF_PEXT_FLAG_ANON) != 0; + is_anon = (m->m_epg_flags & EPG_FLAG_ANON) != 0; - off = pgs->first_pg_off; - for (i = 0; i < pgs->npgs; i++, off = 0) { - len = mbuf_ext_pg_len(pgs, i, off); + off = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs; i++, off = 0) { + len = m_epg_pagelen(m, i, off); src_iov[i].iov_len = len; src_iov[i].iov_base = - (char *)(void *)PHYS_TO_DMAP(pgs->m_epg_pa[i]) + + (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]) + off; if (is_anon) { @@ -1502,9 +1490,9 @@ npages += i; error = (*tls->sw_encrypt)(tls, - (const struct tls_record_layer *)pgs->m_epg_hdr, - pgs->m_epg_trail, src_iov, dst_iov, i, pgs->seqno, - pgs->record_type); + (const struct tls_record_layer *)m->m_epg_hdr, + m->m_epg_trail, src_iov, dst_iov, i, m->m_epg_seqno, + m->m_epg_record_type); if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); break; @@ -1520,14 +1508,14 @@ m->m_ext.ext_free(m); /* Replace them with the new pages. */ - for (i = 0; i < pgs->npgs; i++) - pgs->m_epg_pa[i] = parray[i]; + for (i = 0; i < m->m_epg_npgs; i++) + m->m_epg_pa[i] = parray[i]; /* Use the basic free routine. */ m->m_ext.ext_free = mb_free_mext_pgs; /* Pages are now writable. */ - pgs->flags |= MBUF_PEXT_FLAG_ANON; + m->m_epg_flags |= EPG_FLAG_ANON; } /* @@ -1537,7 +1525,7 @@ * yet-to-be-encrypted records having an associated * session. */ - pgs->tls = NULL; + m->m_epg_tls = NULL; ktls_free(tls); } @@ -1559,10 +1547,8 @@ ktls_work_thread(void *ctx) { struct ktls_wq *wq = ctx; - struct mbuf_ext_pgs *p, *n; - struct ktls_session *tls; - struct mbuf *m; - STAILQ_HEAD(, mbuf_ext_pgs) local_head; + struct mbuf *m, *n; + STAILQ_HEAD(, mbuf) local_head; #if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) fpu_kern_thread(0); @@ -1579,15 +1565,13 @@ STAILQ_CONCAT(&local_head, &wq->head); mtx_unlock(&wq->mtx); - STAILQ_FOREACH_SAFE(p, &local_head, stailq, n) { - if (p->mbuf != NULL) { - ktls_encrypt(p); - counter_u64_add(ktls_cnt_on, -1); - } else { - tls = p->tls; - ktls_free(tls); - m = __containerof(p, struct mbuf, m_ext_pgs); + STAILQ_FOREACH_SAFE(m, &local_head, m_epg_stailq, n) { + if (m->m_epg_flags & EPG_FLAG_2FREE) { + ktls_free(m->m_epg_tls); uma_zfree(zone_mbuf, m); + } else { + ktls_encrypt(m); + counter_u64_add(ktls_cnt_on, -1); } } } Index: sys/kern/uipc_mbuf.c =================================================================== --- sys/kern/uipc_mbuf.c +++ sys/kern/uipc_mbuf.c @@ -163,11 +163,11 @@ #if defined(__LP64__) CTASSERT(offsetof(struct mbuf, m_dat) == 32); CTASSERT(sizeof(struct pkthdr) == 56); -CTASSERT(sizeof(struct m_ext) == 168); +CTASSERT(sizeof(struct m_ext) == 160); #else CTASSERT(offsetof(struct mbuf, m_dat) == 24); CTASSERT(sizeof(struct pkthdr) == 48); -CTASSERT(sizeof(struct m_ext) == 184); +CTASSERT(sizeof(struct m_ext) == 180); #endif /* @@ -191,25 +191,33 @@ { volatile u_int *refcnt; - KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); - KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); + KASSERT(m->m_flags & (M_EXT|M_EXTPG), + ("%s: M_EXT|M_EXTPG not set on %p", __func__, m)); + KASSERT(!(n->m_flags & (M_EXT|M_EXTPG)), + ("%s: M_EXT|M_EXTPG set on %p", __func__, n)); /* - * Cache access optimization. For most kinds of external - * storage we don't need full copy of m_ext, since the - * holder of the 'ext_count' is responsible to carry the - * free routine and its arguments. Exclusion is EXT_EXTREF, - * where 'ext_cnt' doesn't point into mbuf at all. + * Cache access optimization. + * + * o Regular M_EXT storage doesn't need full copy of m_ext, since + * the holder of the 'ext_count' is responsible to carry the free + * routine and its arguments. + * o M_EXTPG data is split between main part of mbuf and m_ext, the + * main part is copied in full, the m_ext part is similar to M_EXT. + * o EXT_EXTREF, where 'ext_cnt' doesn't point into mbuf at all, is + * special - it needs full copy of m_ext into each mbuf, since any + * copy could end up as the last to free. */ - if (m->m_ext.ext_type == EXT_EXTREF) + if (m->m_flags & M_EXTPG) { + bcopy(&m->m_epg_startcopy, &n->m_epg_startcopy, + __rangeof(struct mbuf, m_epg_startcopy, m_epg_endcopy)); + bcopy(&m->m_ext, &n->m_ext, m_epg_ext_copylen); + } else if (m->m_ext.ext_type == EXT_EXTREF) bcopy(&m->m_ext, &n->m_ext, sizeof(struct m_ext)); - else if (m->m_ext.ext_type == EXT_PGS) - bcopy(&m->m_ext_pgs, &n->m_ext_pgs, - sizeof(struct mbuf_ext_pgs)); else bcopy(&m->m_ext, &n->m_ext, m_ext_copylen); - n->m_flags |= M_EXT; - n->m_flags |= m->m_flags & (M_RDONLY | M_NOMAP); + + n->m_flags |= m->m_flags & (M_RDONLY | M_EXT | M_EXTPG); /* See if this is the mbuf that holds the embedded refcount. */ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { @@ -254,7 +262,7 @@ if (m->m_flags & M_PKTHDR) m_demote_pkthdr(m); m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | - M_NOMAP | flags); + M_EXTPG | flags); } } @@ -385,7 +393,7 @@ m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | - (to->m_flags & (M_EXT | M_NOMAP)); + (to->m_flags & (M_EXT | M_EXTPG)); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; /* especially tags */ @@ -424,7 +432,7 @@ m_tag_delete_chain(to, NULL); #endif to->m_flags = (from->m_flags & M_COPYFLAGS) | - (to->m_flags & (M_EXT | M_NOMAP)); + (to->m_flags & (M_EXT | M_EXTPG)); if ((to->m_flags & M_EXT) == 0) to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; @@ -514,7 +522,7 @@ copyhdr = 0; } n->m_len = min(len, m->m_len - off); - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else @@ -556,7 +564,7 @@ if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data; mb_dupcl(n, m); } else { @@ -574,7 +582,7 @@ n = n->m_next; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data; mb_dupcl(n, m); } else { @@ -634,7 +642,7 @@ while (len > 0) { KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); count = min(m->m_len - off, len); - if ((m->m_flags & M_NOMAP) != 0) + if ((m->m_flags & M_EXTPG) != 0) m_copyfromunmapped(m, off, count, cp); else bcopy(mtod(m, caddr_t) + off, cp, count); @@ -732,7 +740,7 @@ m = m->m_next; while (n) { if (!M_WRITABLE(m) || - (n->m_flags & M_NOMAP) != 0 || + (n->m_flags & M_EXTPG) != 0 || M_TRAILINGSPACE(m) < n->m_len) { /* just join the two chains */ m->m_next = n; @@ -850,7 +858,7 @@ int count; int space; - KASSERT((n->m_flags & M_NOMAP) == 0, + KASSERT((n->m_flags & M_EXTPG) == 0, ("%s: unmapped mbuf %p", __func__, n)); /* @@ -992,7 +1000,7 @@ n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; - if (m->m_flags & M_EXT) + if (m->m_flags & (M_EXT|M_EXTPG)) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ @@ -1018,7 +1026,7 @@ M_ALIGN(n, remain); } extpacket: - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + len; mb_dupcl(n, m); } else { @@ -1416,10 +1424,9 @@ static int frags_per_mbuf(struct mbuf *m) { - struct mbuf_ext_pgs *ext_pgs; int frags; - if ((m->m_flags & M_NOMAP) == 0) + if ((m->m_flags & M_EXTPG) == 0) return (1); /* @@ -1429,12 +1436,11 @@ * XXX: This overestimates the number of fragments by assuming * all the backing physical pages are disjoint. */ - ext_pgs = &m->m_ext_pgs; frags = 0; - if (ext_pgs->hdr_len != 0) + if (m->m_epg_hdrlen != 0) frags++; - frags += ext_pgs->npgs; - if (ext_pgs->trail_len != 0) + frags += m->m_epg_npgs; + if (m->m_epg_trllen != 0) frags++; return (frags); @@ -1617,13 +1623,11 @@ void mb_free_mext_pgs(struct mbuf *m) { - struct mbuf_ext_pgs *ext_pgs; vm_page_t pg; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = &m->m_ext_pgs; - for (int i = 0; i < ext_pgs->npgs; i++) { - pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); + M_ASSERTEXTPG(m); + for (int i = 0; i < m->m_epg_npgs; i++) { + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); vm_page_unwire_noq(pg); vm_page_free(pg); } @@ -1633,7 +1637,6 @@ m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags) { struct mbuf *m, *mb, *prev; - struct mbuf_ext_pgs *pgs; vm_page_t pg_array[MBUF_PEXT_MAX_PGS]; int error, length, i, needed; ssize_t total; @@ -1666,8 +1669,7 @@ else prev->m_next = mb; prev = mb; - pgs = &mb->m_ext_pgs; - pgs->flags = MBUF_PEXT_FLAG_ANON; + mb->m_epg_flags = EPG_FLAG_ANON; needed = length = MIN(maxseg, total); for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) { retry_page: @@ -1681,17 +1683,17 @@ } } pg_array[i]->flags &= ~PG_ZERO; - pgs->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg_array[i]); - pgs->npgs++; + mb->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg_array[i]); + mb->m_epg_npgs++; } - pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1); - MBUF_EXT_PGS_ASSERT_SANITY(pgs); + mb->m_epg_last_len = length - PAGE_SIZE * (mb->m_epg_npgs - 1); + MBUF_EXT_PGS_ASSERT_SANITY(mb); total -= length; error = uiomove_fromphys(pg_array, 0, length, uio); if (error != 0) goto failed; mb->m_len = length; - mb->m_ext.ext_size += PAGE_SIZE * pgs->npgs; + mb->m_ext.ext_size += PAGE_SIZE * mb->m_epg_npgs; if (flags & M_PKTHDR) m->m_pkthdr.len += length; } @@ -1713,7 +1715,7 @@ ssize_t total; int progress = 0; - if (flags & M_NOMAP) + if (flags & M_EXTPG) return (m_uiotombuf_nomap(uio, how, len, align, flags)); /* @@ -1767,33 +1769,32 @@ int m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len) { - struct mbuf_ext_pgs *ext_pgs; vm_page_t pg; int error, i, off, pglen, pgoff, seglen, segoff; - MBUF_EXT_PGS_ASSERT(m); - ext_pgs = __DECONST(void *, &m->m_ext_pgs); + M_ASSERTEXTPG(m); error = 0; /* Skip over any data removed from the front. */ off = mtod(m, vm_offset_t); off += m_off; - if (ext_pgs->hdr_len != 0) { - if (off >= ext_pgs->hdr_len) { - off -= ext_pgs->hdr_len; + if (m->m_epg_hdrlen != 0) { + if (off >= m->m_epg_hdrlen) { + off -= m->m_epg_hdrlen; } else { - seglen = ext_pgs->hdr_len - off; + seglen = m->m_epg_hdrlen - off; segoff = off; seglen = min(seglen, len); off = 0; len -= seglen; - error = uiomove(&ext_pgs->m_epg_hdr[segoff], seglen, uio); + error = uiomove(__DECONST(void *, + &m->m_epg_hdr[segoff]), seglen, uio); } } - pgoff = ext_pgs->first_pg_off; - for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) { - pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); + pgoff = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs && error == 0 && len > 0; i++) { + pglen = m_epg_pagelen(m, i, pgoff); if (off >= pglen) { off -= pglen; pgoff = 0; @@ -1804,15 +1805,16 @@ off = 0; seglen = min(seglen, len); len -= seglen; - pg = PHYS_TO_VM_PAGE(ext_pgs->m_epg_pa[i]); + pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); error = uiomove_fromphys(&pg, segoff, seglen, uio); pgoff = 0; }; if (len != 0 && error == 0) { - KASSERT((off + len) <= ext_pgs->trail_len, + KASSERT((off + len) <= m->m_epg_trllen, ("off + len > trail (%d + %d > %d, m_off = %d)", off, len, - ext_pgs->trail_len, m_off)); - error = uiomove(&ext_pgs->m_epg_trail[off], len, uio); + m->m_epg_trllen, m_off)); + error = uiomove(__DECONST(void *, &m->m_epg_trail[off]), + len, uio); } return (error); } @@ -1835,7 +1837,7 @@ for (; m != NULL; m = m->m_next) { length = min(m->m_len, total - progress); - if ((m->m_flags & M_NOMAP) != 0) + if ((m->m_flags & M_EXTPG) != 0) error = m_unmappedtouio(m, 0, uio, length); else error = uiomove(mtod(m, void *), length, uio); Index: sys/kern/uipc_sockbuf.c =================================================================== --- sys/kern/uipc_sockbuf.c +++ sys/kern/uipc_sockbuf.c @@ -124,34 +124,30 @@ #ifdef KERN_TLS /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && - (m->m_flags & M_NOMAP) && - (n->m_flags & M_NOMAP) && + (m->m_flags & M_EXTPG) && + (n->m_flags & M_EXTPG) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n)) { - struct mbuf_ext_pgs *mpgs, *npgs; int hdr_len, trail_len; - mpgs = &m->m_ext_pgs; - npgs = &n->m_ext_pgs; - hdr_len = npgs->hdr_len; - trail_len = mpgs->trail_len; + hdr_len = n->m_epg_hdrlen; + trail_len = m->m_epg_trllen; if (trail_len != 0 && hdr_len != 0 && trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { /* copy n's header to m's trailer */ memcpy(&m->m_epg_trail[trail_len], n->m_epg_hdr, hdr_len); - mpgs->trail_len += hdr_len; + m->m_epg_trllen += hdr_len; m->m_len += hdr_len; - npgs->hdr_len = 0; + n->m_epg_hdrlen = 0; n->m_len -= hdr_len; } } #endif /* Compress small unmapped mbufs into plain mbufs. */ - if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN && + if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && !mbuf_has_tls_session(m)) { - MPASS(m->m_flags & M_EXT); ext_size = m->m_ext.ext_size; if (mb_unmapped_compress(m) == 0) { sb->sb_mbcnt -= ext_size; @@ -161,7 +157,7 @@ while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && M_WRITABLE(m) && - (m->m_flags & M_NOMAP) == 0 && + (m->m_flags & M_EXTPG) == 0 && !mbuf_has_tls_session(n) && !mbuf_has_tls_session(m) && n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ @@ -193,8 +189,8 @@ /* * Mark ready "count" units of I/O starting with "m". Most mbufs - * count as a single unit of I/O except for EXT_PGS-backed mbufs which - * can be backed by multiple pages. + * count as a single unit of I/O except for M_EXTPG mbufs which + * are backed by multiple pages. */ int sbready(struct sockbuf *sb, struct mbuf *m0, int count) @@ -212,15 +208,14 @@ while (count > 0) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); - if ((m->m_flags & M_EXT) != 0 && - m->m_ext.ext_type == EXT_PGS) { - if (count < m->m_ext_pgs.nrdy) { - m->m_ext_pgs.nrdy -= count; + if ((m->m_flags & M_EXTPG) != 0) { + if (count < m->m_epg_nrdy) { + m->m_epg_nrdy -= count; count = 0; break; } - count -= m->m_ext_pgs.nrdy; - m->m_ext_pgs.nrdy = 0; + count -= m->m_epg_nrdy; + m->m_epg_nrdy = 0; } else count--; @@ -1175,7 +1170,7 @@ M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && - !(n->m_flags & (M_NOTREADY | M_NOMAP)) && + !(n->m_flags & (M_NOTREADY | M_EXTPG)) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ @@ -1192,7 +1187,7 @@ m = m_free(m); continue; } - if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) && + if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) && (m->m_flags & M_NOTREADY) == 0 && !mbuf_has_tls_session(m)) (void)mb_unmapped_compress(m); Index: sys/kern/uipc_socket.c =================================================================== --- sys/kern/uipc_socket.c +++ sys/kern/uipc_socket.c @@ -1689,7 +1689,7 @@ if (tls != NULL) { top = m_uiotombuf(uio, M_WAITOK, space, tls->params.max_frame_len, - M_NOMAP | + M_EXTPG | ((flags & MSG_EOR) ? M_EOR : 0)); if (top != NULL) { ktls_frame(top, tls, @@ -2157,7 +2157,7 @@ SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); - if ((m->m_flags & M_NOMAP) != 0) + if ((m->m_flags & M_EXTPG) != 0) error = m_unmappedtouio(m, moff, uio, (int)len); else error = uiomove(mtod(m, char *) + moff, Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -233,7 +233,7 @@ * dropping the mbuf's reference) in if_output. */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { - tls = ktls_hold(m->m_next->m_ext_pgs.tls); + tls = ktls_hold(m->m_next->m_epg_tls); mst = tls->snd_tag; /* Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1908,8 +1908,8 @@ top = NULL; pkthdrlen = NULL; #ifdef KERN_TLS - if (hw_tls && (m->m_flags & M_NOMAP)) - tls = m->m_ext_pgs.tls; + if (hw_tls && (m->m_flags & M_EXTPG)) + tls = m->m_epg_tls; else tls = NULL; start = m; @@ -1925,8 +1925,8 @@ } #ifdef KERN_TLS if (hw_tls) { - if (m->m_flags & M_NOMAP) - ntls = m->m_ext_pgs.tls; + if (m->m_flags & M_EXTPG) + ntls = m->m_epg_tls; else ntls = NULL; @@ -1958,14 +1958,14 @@ mlen = min(len, m->m_len - off); if (seglimit) { /* - * For M_NOMAP mbufs, add 3 segments + * For M_EXTPG mbufs, add 3 segments * + 1 in case we are crossing page boundaries * + 2 in case the TLS hdr/trailer are used * It is cheaper to just add the segments * than it is to take the cache miss to look * at the mbuf ext_pgs state in detail. */ - if (m->m_flags & M_NOMAP) { + if (m->m_flags & M_EXTPG) { fragsize = min(segsize, PAGE_SIZE); frags = 3; } else { @@ -2020,7 +2020,7 @@ } n->m_len = mlen; len_cp += n->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else Index: sys/netinet/tcp_pcap.c =================================================================== --- sys/netinet/tcp_pcap.c +++ sys/netinet/tcp_pcap.c @@ -308,10 +308,13 @@ * last reference, go through the normal * free-ing process. */ - if (mhead->m_flags & M_EXT) { + if (mhead->m_flags & M_EXTPG) { + /* Don't mess around with these. */ + tcp_pcap_m_freem(mhead); + continue; + } else if (mhead->m_flags & M_EXT) { switch (mhead->m_ext.ext_type) { case EXT_SFBUF: - case EXT_PGS: /* Don't mess around with these. */ tcp_pcap_m_freem(mhead); continue; @@ -339,8 +342,7 @@ tcp_pcap_alloc_reuse_ext++; break; } - } - else { + } else { tcp_pcap_alloc_reuse_mbuf++; } @@ -366,7 +368,8 @@ * In cases where that isn't possible, settle for what we can * get. */ - if ((m->m_flags & M_EXT) && tcp_pcap_take_cluster_reference()) { + if ((m->m_flags & (M_EXT|M_EXTPG) && + tcp_pcap_take_cluster_reference()) { n->m_data = m->m_data; n->m_len = m->m_len; mb_dupcl(n, m); @@ -384,7 +387,7 @@ __func__, n->m_flags)); n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m); n->m_len = m->m_len; - if (m->m_flags & M_NOMAP) + if (m->m_flags & M_EXTPG) m_copydata(m, 0, m->m_len, n->m_data); else bcopy(M_START(m), n->m_dat, Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -341,7 +341,7 @@ * dropping the mbuf's reference) in if_output. */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { - tls = ktls_hold(m->m_next->m_ext_pgs.tls); + tls = ktls_hold(m->m_next->m_epg_tls); mst = tls->snd_tag; /* Index: sys/sys/ktls.h =================================================================== --- sys/sys/ktls.h +++ sys/sys/ktls.h @@ -139,7 +139,6 @@ struct ktls_session; struct m_snd_tag; struct mbuf; -struct mbuf_ext_pgs; struct sockbuf; struct socket; @@ -181,7 +180,7 @@ uint8_t record_type); void ktls_seq(struct sockbuf *sb, struct mbuf *m); void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count); -void ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs); +void ktls_enqueue_to_free(struct mbuf *m); int ktls_set_tx_mode(struct socket *so, int mode); int ktls_get_tx_mode(struct socket *so); int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -229,15 +229,6 @@ #define MBUF_PEXT_MAX_BYTES \ (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN) -#define MBUF_PEXT_FLAG_ANON 1 /* Data can be encrypted in place. */ - - -struct mbuf_ext_pgs_data { - vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pgs */ - char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */ - char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */ -}; - struct ktls_session; struct socket; @@ -266,49 +257,49 @@ uint32_t ext_size; /* size of buffer, for ext_free */ uint32_t ext_type:8, /* type of external storage */ ext_flags:24; /* external storage mbuf flags */ - char *ext_buf; /* start of buffer */ - /* - * Fields below store the free context for the external storage. - * They are valid only in the refcount carrying mbuf, the one with - * EXT_FLAG_EMBREF flag, with exclusion for EXT_EXTREF type, where - * the free context is copied into all mbufs that use same external - * storage. - */ -#define m_ext_copylen offsetof(struct m_ext, ext_free) - m_ext_free_t *ext_free; /* free routine if not the usual */ - void *ext_arg1; /* optional argument pointer */ union { - void *ext_arg2; /* optional argument pointer */ - struct mbuf_ext_pgs_data ext_pgs; + struct { + /* + * Regular M_EXT mbuf: + * o ext_buf always points to the external buffer. + * o ext_free (below) and two optional arguments + * ext_arg1 and ext_arg2 store the free context for + * the external storage. They are set only in the + * refcount carrying mbuf, the one with + * EXT_FLAG_EMBREF flag, with exclusion for + * EXT_EXTREF type, where the free context is copied + * into all mbufs that use same external storage. + */ + char *ext_buf; /* start of buffer */ +#define m_ext_copylen offsetof(struct m_ext, ext_arg2) + void *ext_arg2; + }; + struct { + /* + * Multi-page M_EXTPG mbuf: + * o extpg_pa - page vector. + * o extpg_trail and extpg_hdr - TLS trailer and + * header. + * Uses ext_free and may also use ext_arg1. + */ + vm_paddr_t extpg_pa[MBUF_PEXT_MAX_PGS]; + char extpg_trail[MBUF_PEXT_TRAIL_LEN]; + char extpg_hdr[MBUF_PEXT_HDR_LEN]; + /* Pretend these 3 fields are part of mbuf itself. */ +#define m_epg_pa m_ext.extpg_pa +#define m_epg_trail m_ext.extpg_trail +#define m_epg_hdr m_ext.extpg_hdr +#define m_epg_ext_copylen offsetof(struct m_ext, ext_free) + }; }; + /* + * Free method and optional argument pointer, both + * used by M_EXT and M_EXTPG. + */ + m_ext_free_t *ext_free; + void *ext_arg1; }; -struct mbuf_ext_pgs { - uint8_t npgs; /* Number of attached pages */ - uint8_t nrdy; /* Pages with I/O pending */ - uint8_t hdr_len; /* TLS header length */ - uint8_t trail_len; /* TLS trailer length */ - uint16_t first_pg_off; /* Offset into 1st page */ - uint16_t last_pg_len; /* Length of last page */ - uint8_t flags; /* Flags */ - uint8_t record_type; - uint8_t spare[2]; - int enc_cnt; - struct ktls_session *tls; /* TLS session */ - struct socket *so; - uint64_t seqno; - struct mbuf *mbuf; - STAILQ_ENTRY(mbuf_ext_pgs) stailq; -#if !defined(__LP64__) - uint8_t pad[8]; /* pad to size of pkthdr */ -#endif - struct m_ext m_ext; -}; - -#define m_epg_hdr m_ext.ext_pgs.hdr -#define m_epg_trail m_ext.ext_pgs.trail -#define m_epg_pa m_ext.ext_pgs.pa - /* * The core of the mbuf object along with some shortcut defines for practical * purposes. @@ -347,15 +338,51 @@ * order to support future work on variable-size mbufs. */ union { - union { - struct { - struct pkthdr m_pkthdr; /* M_PKTHDR set */ - union { - struct m_ext m_ext; /* M_EXT set */ - char m_pktdat[0]; + struct { + union { + /* M_PKTHDR set. */ + struct pkthdr m_pkthdr; + + /* M_EXTPG set. + * Multi-page M_EXTPG mbuf has its meta data + * split between the below anonymous structure + * and m_ext. It carries vector of pages, + * optional header and trailer char vectors + * and pointers to socket/TLS data. + */ +#define m_epg_startcopy m_epg_npgs +#define m_epg_endcopy m_epg_stailq + struct { + /* Overall count of pages and count of + * pages with I/O pending. */ + uint8_t m_epg_npgs; + uint8_t m_epg_nrdy; + /* TLS header and trailer lenghts. + * The data itself resides in m_ext. */ + uint8_t m_epg_hdrlen; + uint8_t m_epg_trllen; + /* Offset into 1st page and length of + * data in the last page. */ + uint16_t m_epg_1st_off; + uint16_t m_epg_last_len; + uint8_t m_epg_flags; +#define EPG_FLAG_ANON 0x1 /* Data can be encrypted in place. */ +#define EPG_FLAG_2FREE 0x2 /* Scheduled for free. */ + uint8_t m_epg_record_type; + uint8_t __spare[2]; + int m_epg_enc_cnt; + struct ktls_session *m_epg_tls; + struct socket *m_epg_so; + uint64_t m_epg_seqno; + STAILQ_ENTRY(mbuf) m_epg_stailq; }; }; - struct mbuf_ext_pgs m_ext_pgs; + union { + /* M_EXT or M_EXTPG set. */ + struct m_ext m_ext; + /* M_PKTHDR set, neither M_EXT nor M_EXTPG. */ + char m_pktdat[0]; + }; }; char m_dat[0]; /* !M_PKTHDR, !M_EXT */ }; @@ -363,24 +390,49 @@ #ifdef _KERNEL static inline int -mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff) +m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff) { + KASSERT(pgoff == 0 || pidx == 0, - ("page %d with non-zero offset %d in %p", pidx, pgoff, ext_pgs)); - if (pidx == ext_pgs->npgs - 1) { - return (ext_pgs->last_pg_len); + ("page %d with non-zero offset %d in %p", pidx, pgoff, m)); + + if (pidx == m->m_epg_npgs - 1) { + return (m->m_epg_last_len); } else { return (PAGE_SIZE - pgoff); } } -#ifdef INVARIANT_SUPPORT -void mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs); -#endif #ifdef INVARIANTS -#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) mb_ext_pgs_check((ext_pgs)) +#define MCHECK(ex, msg) KASSERT((ex), \ + ("Multi page mbuf %p with " #msg " at %s:%d", \ + m, __FILE__, __LINE__)) +/* + * NB: This expects a non-empty buffer (npgs > 0 and + * last_pg_len > 0). + */ +#define MBUF_EXT_PGS_ASSERT_SANITY(m) do { \ + MCHECK(m->m_epg_npgs > 0, "no valid pages"); \ + MCHECK(m->m_epg_npgs <= nitems(m->m_epg_pa), \ + "too many pages"); \ + MCHECK(m->m_epg_nrdy <= m->m_epg_npgs, \ + "too many ready pages"); \ + MCHECK(m->m_epg_1st_off < PAGE_SIZE, \ + "too large page offset"); \ + MCHECK(m->m_epg_last_len > 0, "zero last page length"); \ + MCHECK(m->m_epg_last_len <= PAGE_SIZE, \ + "too large last page length"); \ + if (m->m_epg_npgs == 1) \ + MCHECK(m->m_epg_1st_off + \ + m->m_epg_last_len <= PAGE_SIZE, \ + "single page too large"); \ + MCHECK(m->m_epg_hdrlen <= sizeof(m->m_epg_hdr), \ + "too large header length"); \ + MCHECK(m->m_epg_trllen <= sizeof(m->m_epg_trail), \ + "too large header length"); \ +} while (0) #else -#define MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs) +#define MBUF_EXT_PGS_ASSERT_SANITY(m) do {} while (0); #endif #endif @@ -398,7 +450,7 @@ #define M_MCAST 0x00000020 /* send/received as link-level multicast */ #define M_PROMISC 0x00000040 /* packet was not for us */ #define M_VLANTAG 0x00000080 /* ether_vtag is valid */ -#define M_NOMAP 0x00000100 /* mbuf data is unmapped */ +#define M_EXTPG 0x00000100 /* has array of unmapped pages and TLS */ #define M_NOFREE 0x00000200 /* do not free mbuf, embedded in cluster */ #define M_TSTMP 0x00000400 /* rcv_tstmp field is valid */ #define M_TSTMP_HPREC 0x00000800 /* rcv_tstmp is high-prec, typically @@ -439,7 +491,7 @@ */ #define M_FLAG_BITS \ "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \ - "\7M_PROMISC\10M_VLANTAG\11M_NOMAP\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC\15M_TSTMP_LRO" + "\7M_PROMISC\10M_VLANTAG\11M_EXTPG\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC\15M_TSTMP_LRO" #define M_FLAG_PROTOBITS \ "\16M_PROTO1\17M_PROTO2\20M_PROTO3\21M_PROTO4" \ "\22M_PROTO5\23M_PROTO6\24M_PROTO7\25M_PROTO8\26M_PROTO9" \ @@ -511,7 +563,6 @@ #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_RXRING 8 /* data in NIC receive ring */ -#define EXT_PGS 9 /* array of unmapped pages */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ @@ -556,11 +607,6 @@ "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \ "\30EXT_FLAG_EXP4" -#define MBUF_EXT_PGS_ASSERT(m) \ - KASSERT((((m)->m_flags & M_EXT) != 0) && \ - ((m)->m_ext.ext_type == EXT_PGS), \ - ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)) - /* * Flags indicating checksum, segmentation and other offload work to be * done, or already done, by hardware or lower layers. It is split into @@ -692,6 +738,7 @@ void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); +void mb_free_extpg(struct mbuf *); void mb_free_mext_pgs(struct mbuf *); struct mbuf *mb_alloc_ext_pgs(int, m_ext_free_t); int mb_unmapped_compress(struct mbuf *m); @@ -986,7 +1033,7 @@ * be both the local data payload, or an external buffer area, depending on * whether M_EXT is set). */ -#define M_WRITABLE(m) (((m)->m_flags & (M_RDONLY | M_NOMAP)) == 0 && \ +#define M_WRITABLE(m) (((m)->m_flags & (M_RDONLY | M_EXTPG)) == 0 && \ (!(((m)->m_flags & M_EXT)) || \ (m_extrefcnt(m) == 1))) @@ -995,6 +1042,11 @@ KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR, \ ("%s: no mbuf packet header!", __func__)) +/* Check if mbuf is multipage. */ +#define M_ASSERTEXTPG(m) \ + KASSERT(((m)->m_flags & (M_EXTPG|M_PKTHDR)) == M_EXTPG, \ + ("%s: m %p is not multipage!", __func__, m)) + /* * Ensure that the supplied mbuf is a valid, non-free mbuf. * @@ -1009,7 +1061,7 @@ * handling external storage, packet-header mbufs, and regular data mbufs. */ #define M_START(m) \ - (((m)->m_flags & M_NOMAP) ? NULL : \ + (((m)->m_flags & M_EXTPG) ? NULL : \ ((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf : \ ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] : \ &(m)->m_dat[0]) @@ -1335,7 +1387,9 @@ m_tag_delete_chain(m, NULL); if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG) m_snd_tag_rele(m->m_pkthdr.snd_tag); - if (m->m_flags & M_EXT) + if (m->m_flags & M_EXTPG) + mb_free_extpg(m); + else if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m); @@ -1507,9 +1561,8 @@ mbuf_has_tls_session(struct mbuf *m) { - if (m->m_flags & M_NOMAP) { - MBUF_EXT_PGS_ASSERT(m); - if (m->m_ext_pgs.tls != NULL) { + if (m->m_flags & M_EXTPG) { + if (m->m_epg_tls != NULL) { return (true); } } Index: sys/sys/sglist.h =================================================================== --- sys/sys/sglist.h +++ sys/sys/sglist.h @@ -57,7 +57,6 @@ struct bio; struct mbuf; -struct mbuf_ext_pgs; struct uio; static __inline void @@ -88,10 +87,9 @@ struct sglist *sglist_alloc(int nsegs, int mflags); int sglist_append(struct sglist *sg, void *buf, size_t len); int sglist_append_bio(struct sglist *sg, struct bio *bp); -int sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs, - size_t off, size_t len); -int sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m); int sglist_append_mbuf(struct sglist *sg, struct mbuf *m0); +int sglist_append_mbuf_epg(struct sglist *sg, struct mbuf *m0, size_t off, + size_t len); int sglist_append_phys(struct sglist *sg, vm_paddr_t paddr, size_t len); int sglist_append_sglist(struct sglist *sg, struct sglist *source, @@ -105,9 +103,7 @@ struct sglist *sglist_clone(struct sglist *sg, int mflags); int sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid); int sglist_count(void *buf, size_t len); -int sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, - size_t len); -int sglist_count_mb_ext_pgs(struct mbuf *m); +int sglist_count_mbuf_epg(struct mbuf *m, size_t off, size_t len); int sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_t len); void sglist_free(struct sglist *sg); int sglist_join(struct sglist *first, struct sglist *second);