Index: sbin/ifconfig/ifconfig.8
===================================================================
--- sbin/ifconfig/ifconfig.8
+++ sbin/ifconfig/ifconfig.8
@@ -538,6 +538,12 @@
 If the driver supports
 .Xr tcp 4
 large receive offloading, disable LRO on the interface.
+.It Cm nomap
+If the driver supports unmapped network buffers,
+enable them on the interface.
+.It Fl nomap
+If the driver supports unmapped network buffers,
+disable them on the interface.
 .It Cm wol , wol_ucast , wol_mcast , wol_magic
 Enable Wake On Lan (WOL) support, if available.
 WOL is a facility whereby a machine in a low power state may be woken
Index: sbin/ifconfig/ifconfig.c
===================================================================
--- sbin/ifconfig/ifconfig.c
+++ sbin/ifconfig/ifconfig.c
@@ -1257,7 +1257,7 @@
 "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \
 "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \
 "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \
-"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP"
+"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP\33NOMAP"
 
 /*
  * Print the status of the interface.  If an address family was
@@ -1557,6 +1557,8 @@
 	DEF_CMD("-link2",	-IFF_LINK2,	setifflags),
 	DEF_CMD("monitor",	IFF_MONITOR,	setifflags),
 	DEF_CMD("-monitor",	-IFF_MONITOR,	setifflags),
+	DEF_CMD("nomap",	IFCAP_NOMAP,	setifcap),
+	DEF_CMD("-nomap",	-IFCAP_NOMAP,	setifcap),
 	DEF_CMD("staticarp",	IFF_STATICARP,	setifflags),
 	DEF_CMD("-staticarp",	-IFF_STATICARP,	setifflags),
 	DEF_CMD("rxcsum6",	IFCAP_RXCSUM_IPV6,	setifcap),
Index: share/man/man9/Makefile
===================================================================
--- share/man/man9/Makefile
+++ share/man/man9/Makefile
@@ -1824,6 +1824,8 @@
 MLINKS+=sglist.9 sglist_alloc.9 \
 	sglist.9 sglist_append.9 \
 	sglist.9 sglist_append_bio.9 \
+	sglist.9 sglist_append_ext_pgs.9 \
+	sglist.9 sglist_append_mb_ext_pgs.9 \
 	sglist.9 sglist_append_mbuf.9 \
 	sglist.9 sglist_append_phys.9 \
 	sglist.9 sglist_append_sglist.9 \
@@ -1834,6 +1836,8 @@
 	sglist.9 sglist_clone.9 \
 	sglist.9 sglist_consume_uio.9 \
 	sglist.9 sglist_count.9 \
+	sglist.9 sglist_count_ext_pgs.9 \
+	sglist.9 sglist_count_mb_ext_pgs.9 \
 	sglist.9 sglist_count_vmpages.9 \
 	sglist.9 sglist_free.9 \
 	sglist.9 sglist_hold.9 \
Index: share/man/man9/mbuf.9
===================================================================
--- share/man/man9/mbuf.9
+++ share/man/man9/mbuf.9
@@ -254,6 +254,8 @@
 #define EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define EXT_MBUF	7	/* external mbuf reference */
+#define	EXT_RXRING	8	/* data in NIC receive ring */
+#define	EXT_PGS		9	/* array of unmapped pages */
 #define EXT_NET_DRV	252	/* custom ext_buf provided by net driver(s) */
 #define EXT_MOD_TYPE	253	/* custom module's ext_buf type */
 #define EXT_DISPOSABLE	254	/* can throw this buffer away w/page flipping */
Index: share/man/man9/sglist.9
===================================================================
--- share/man/man9/sglist.9
+++ share/man/man9/sglist.9
@@ -34,6 +34,8 @@
 .Nm sglist_alloc ,
 .Nm sglist_append ,
 .Nm sglist_append_bio ,
+.Nm sglist_append_ext_pgs,
+.Nm sglist_append_mb_ext_pgs,
 .Nm sglist_append_mbuf ,
 .Nm sglist_append_phys ,
 .Nm sglist_append_sglist ,
@@ -44,6 +46,8 @@
 .Nm sglist_clone ,
 .Nm sglist_consume_uio ,
 .Nm sglist_count ,
+.Nm sglist_count_ext_pgs ,
+.Nm sglist_count_mb_ext_pgs ,
 .Nm sglist_count_vmpages ,
 .Nm sglist_free ,
 .Nm sglist_hold ,
@@ -64,6 +68,10 @@
 .Ft int
 .Fn sglist_append_bio "struct sglist *sg" "struct bio *bp"
 .Ft int
+.Fn sglist_append_ext_pgs "struct sglist *sg" "struct mbuf_ext_pgs *ext_pgs" "size_t offset" "size_t len"
+.Ft int
+.Fn sglist_append_mb_ext_pgs "struct sglist *sg" "struct mbuf *m"
+.Ft int
 .Fn sglist_append_mbuf "struct sglist *sg" "struct mbuf *m"
 .Ft int
 .Fn sglist_append_phys "struct sglist *sg" "vm_paddr_t paddr" "size_t len"
@@ -84,6 +92,10 @@
 .Ft int
 .Fn sglist_count "void *buf" "size_t len"
 .Ft int
+.Fn sglist_count_ext_pgs "struct mbuf_ext_pgs *ext_pgs" "size_t offset" "size_t len"
+.Ft int
+.Fn sglist_count_mb_ext_pgs "struct mbuf *m"
+.Ft int
 .Fn sglist_count_vmpages "vm_page_t *m" "size_t pgoff" "size_t len"
 .Ft void
 .Fn sglist_free "struct sglist *sg"
@@ -146,6 +158,22 @@
 bytes long.
 .Pp
 The
+.Nm sglist_count_ext_pgs
+function returns the number of scatter/gather list elements needed to describe
+the unmapped external mbuf buffer
+.Fa ext_pgs .
+The ranges start at an offset of
+.Fa offset
+relative to the start of the buffer and is
+.Fa len
+bytes long.
+The
+.Nm sglist_count_mb_ext_pgs
+function returns the number of scatter/gather list elements needed to describe
+the physical address ranges of a single unmapped mbuf
+.Fa m .
+.Pp
+The
 .Nm sglist_count_vmpages
 function returns the number of scatter/gather list elements needed to describe
 the physical address ranges of a buffer backed by an array of virtual memory
@@ -237,6 +265,34 @@
 .Fa sg .
 .Pp
 The
+.Nm sglist_append_ext_pgs
+function appends the physical address ranges described by the unmapped
+external mbuf buffer
+.Fa ext_pgs
+to the scatter/gather list
+.Fa sg .
+The physical address ranges start at offset
+.Fa offset
+within
+.Fa ext_pgs
+and continue for
+.Fa len
+bytes.
+.Pp
+The
+.Nm sglist_append_mb_ext_pgs
+function appends the physical address ranges described by the unmapped
+mbuf
+.Fa m
+to the scatter/gather list
+.Fa sg .
+Note that unlike
+.Nm sglist_append_mbuf ,
+.Nm sglist_append_mb_ext_pgs
+only adds ranges for a single mbuf,
+not an entire mbuf chain.
+.Pp
+The
 .Nm sglist_append_mbuf
 function appends the physical address ranges described by an entire mbuf
 chain
@@ -467,8 +523,7 @@
 .Pp
 The
 .Nm sglist_count
-and
-.Nm sglist_count_vmpages
+family of
 functions return a count of scatter/gather list elements.
 .Pp
 The
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4268,7 +4268,8 @@
 netinet/tcp_output.c		optional inet | inet6
 netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_hpts.c              optional tcphpts inet | tcphpts inet6
-netinet/tcp_pcap.c		optional inet tcppcap | inet6 tcppcap
+netinet/tcp_pcap.c		optional inet tcppcap | inet6 tcppcap \
+	compile-with "${NORMAL_C} ${NO_WNONNULL}"
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6
Index: sys/conf/kern.mk
===================================================================
--- sys/conf/kern.mk
+++ sys/conf/kern.mk
@@ -76,6 +76,7 @@
 # GCC 4.2 doesn't have -Wno-error=cast-qual, so just disable the warning for
 # the few files that are already known to generate cast-qual warnings.
 NO_WCAST_QUAL= -Wno-cast-qual
+NO_WNONNULL=	-Wno-nonnull
 .endif
 .endif
 
Index: sys/dev/cxgbe/t4_main.c
===================================================================
--- sys/dev/cxgbe/t4_main.c
+++ sys/dev/cxgbe/t4_main.c
@@ -1623,7 +1623,7 @@
 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS | \
-    IFCAP_HWRXTSTMP)
+    IFCAP_HWRXTSTMP | IFCAP_NOMAP)
 #define T4_CAP_ENABLE (T4_CAP)
 
 static int
@@ -1986,6 +1986,8 @@
 					rxq->iq.flags &= ~IQ_RX_TIMESTAMP;
 			}
 		}
+		if (mask & IFCAP_NOMAP)
+			ifp->if_capenable ^= IFCAP_NOMAP;
 
 #ifdef VLAN_CAPABILITIES
 		VLAN_CAPABILITIES(ifp);
Index: sys/dev/cxgbe/t4_sge.c
===================================================================
--- sys/dev/cxgbe/t4_sge.c
+++ sys/dev/cxgbe/t4_sge.c
@@ -83,6 +83,7 @@
 #endif
 
 /* Internal mbuf flags stored in PH_loc.eight[1]. */
+#define	MC_NOMAP		0x01
 #define	MC_RAW_WR		0x02
 
 /*
@@ -2434,15 +2435,78 @@
 	return ((void *)p);
 }
 
+static inline int
+count_mbuf_ext_pgs(struct mbuf *m, int skip, vm_paddr_t *nextaddr)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	vm_paddr_t paddr;
+	int i, len, off, pglen, pgoff, seglen, segoff;
+	int nsegs = 0;
+
+	MBUF_EXT_PGS_ASSERT(m);
+	ext_pgs = m->m_ext.ext_pgs;
+	off = mtod(m, vm_offset_t);
+	len = m->m_len;
+	off += skip;
+	len -= skip;
+
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = min(seglen, len);
+			off = 0;
+			len -= seglen;
+			paddr = pmap_kextract(
+			    (vm_offset_t)&ext_pgs->hdr[segoff]);
+			if (*nextaddr != paddr)
+				nsegs++;
+			*nextaddr = paddr + seglen;
+		}
+	}
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = min(seglen, len);
+		len -= seglen;
+		paddr = ext_pgs->pa[i] + segoff;
+		if (*nextaddr != paddr)
+			nsegs++;
+		*nextaddr = paddr + seglen;
+		pgoff = 0;
+	};
+	if (len != 0) {
+		seglen = min(len, ext_pgs->trail_len - off);
+		len -= seglen;
+		paddr = pmap_kextract((vm_offset_t)&ext_pgs->trail[off]);
+		if (*nextaddr != paddr)
+			nsegs++;
+		*nextaddr = paddr + seglen;
+	}
+
+	return (nsegs);
+}
+
+
 /*
  * Can deal with empty mbufs in the chain that have m_len = 0, but the chain
  * must have at least one mbuf that's not empty.  It is possible for this
  * routine to return 0 if skip accounts for all the contents of the mbuf chain.
  */
 static inline int
-count_mbuf_nsegs(struct mbuf *m, int skip)
+count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags)
 {
-	vm_paddr_t lastb, next;
+	vm_paddr_t nextaddr, paddr;
 	vm_offset_t va;
 	int len, nsegs;
 
@@ -2451,9 +2515,8 @@
 	MPASS(m->m_pkthdr.len >= skip);
 
 	nsegs = 0;
-	lastb = 0;
+	nextaddr = 0;
 	for (; m; m = m->m_next) {
-
 		len = m->m_len;
 		if (__predict_false(len == 0))
 			continue;
@@ -2461,14 +2524,20 @@
 			skip -= len;
 			continue;
 		}
+		if ((m->m_flags & M_NOMAP) != 0) {
+			*cflags |= MC_NOMAP;
+			nsegs += count_mbuf_ext_pgs(m, skip, &nextaddr);
+			skip = 0;
+			continue;
+		}
 		va = mtod(m, vm_offset_t) + skip;
 		len -= skip;
 		skip = 0;
-		next = pmap_kextract(va);
+		paddr = pmap_kextract(va);
 		nsegs += sglist_count((void *)(uintptr_t)va, len);
-		if (lastb + 1 == next)
+		if (paddr == nextaddr)
 			nsegs--;
-		lastb = pmap_kextract(va + len - 1);
+		nextaddr = pmap_kextract(va + len - 1) + 1;
 	}
 
 	return (nsegs);
@@ -2490,7 +2559,9 @@
 	struct tcphdr *tcp;
 #endif
 	uint16_t eh_type;
+	uint8_t cflags;
 
+	cflags = 0;
 	M_ASSERTPKTHDR(m0);
 	if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) {
 		rc = EINVAL;
@@ -2506,7 +2577,7 @@
 	 */
 	M_ASSERTPKTHDR(m0);
 	MPASS(m0->m_pkthdr.len > 0);
-	nsegs = count_mbuf_nsegs(m0, 0);
+	nsegs = count_mbuf_nsegs(m0, 0, &cflags);
 	if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
 		if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
 			rc = EFBIG;
@@ -2516,7 +2587,8 @@
 		goto restart;
 	}
 
-	if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) {
+	if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN &&
+	    !(cflags & MC_NOMAP))) {
 		m0 = m_pullup(m0, m0->m_pkthdr.len);
 		if (m0 == NULL) {
 			/* Should have left well enough alone. */
@@ -2527,7 +2599,7 @@
 		goto restart;
 	}
 	set_mbuf_nsegs(m0, nsegs);
-	set_mbuf_cflags(m0, 0);
+	set_mbuf_cflags(m0, cflags);
 	if (sc->flags & IS_VF)
 		set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0)));
 	else
@@ -2616,7 +2688,9 @@
 		/* EO WRs have the headers in the WR and not the GL. */
 		immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen +
 		    m0->m_pkthdr.l4hlen;
-		nsegs = count_mbuf_nsegs(m0, immhdrs);
+		cflags = 0;
+		nsegs = count_mbuf_nsegs(m0, immhdrs, &cflags);
+		MPASS(cflags == mbuf_cflags(m0));
 		set_mbuf_eo_nsegs(m0, nsegs);
 		set_mbuf_eo_len16(m0,
 		    txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0)));
@@ -4723,7 +4797,8 @@
 	ctrl = sizeof(struct cpl_tx_pkt_core);
 	if (needs_tso(m0))
 		ctrl += sizeof(struct cpl_tx_pkt_lso_core);
-	else if (pktlen <= imm_payload(2) && available >= 2) {
+	else if (!(mbuf_cflags(m0) & MC_NOMAP) && pktlen <= imm_payload(2) &&
+	    available >= 2) {
 		/* Immediate data.  Recalculate len16 and set nsegs to 0. */
 		ctrl += pktlen;
 		len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) +
Index: sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- sys/dev/cxgbe/tom/t4_cpl_io.c
+++ sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -666,6 +666,8 @@
 		if (IS_AIOTX_MBUF(m))
 			rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m),
 			    aiotx_mbuf_pgoff(m), m->m_len);
+		else if (m->m_flags & M_NOMAP)
+			rc = sglist_append_mb_ext_pgs(&sg, m);
 		else
 			rc = sglist_append(&sg, mtod(m, void *), m->m_len);
 		if (__predict_false(rc != 0))
@@ -787,6 +789,8 @@
 			if (IS_AIOTX_MBUF(m))
 				n = sglist_count_vmpages(aiotx_mbuf_pages(m),
 				    aiotx_mbuf_pgoff(m), m->m_len);
+			else if (m->m_flags & M_NOMAP)
+				n = sglist_count_mb_ext_pgs(m);
 			else
 				n = sglist_count(mtod(m, void *), m->m_len);
 
Index: sys/dev/mlx5/mlx5_en/mlx5_en_main.c
===================================================================
--- sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -3279,6 +3279,8 @@
 				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
+		if (mask & IFCAP_NOMAP)
+			ifp->if_capenable ^= IFCAP_NOMAP;
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
@@ -4145,6 +4147,7 @@
 	ifp->if_capabilities |= IFCAP_LRO;
 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
+	ifp->if_capabilities |= IFCAP_NOMAP;
 	ifp->if_capabilities |= IFCAP_TXRTLMT;
 	ifp->if_snd_tag_alloc = mlx5e_snd_tag_alloc;
 	ifp->if_snd_tag_free = mlx5e_snd_tag_free;
Index: sys/kern/kern_mbuf.c
===================================================================
--- sys/kern/kern_mbuf.c
+++ sys/kern/kern_mbuf.c
@@ -45,6 +45,7 @@
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
+#include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
@@ -111,6 +112,11 @@
 int nmbjumbo9;			/* limits number of 9k jumbo clusters */
 int nmbjumbo16;			/* limits number of 16k jumbo clusters */
 
+bool mb_use_ext_pgs;		/* use EXT_PGS mbufs for sendfile */
+SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
+    &mb_use_ext_pgs, 0,
+    "Use unmapped mbufs for sendfile(2)");
+
 static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
 
 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
@@ -281,6 +287,7 @@
 uma_zone_t	zone_jumbop;
 uma_zone_t	zone_jumbo9;
 uma_zone_t	zone_jumbo16;
+uma_zone_t	zone_extpgs;
 
 /*
  * Local prototypes.
@@ -298,6 +305,9 @@
 /* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
 
+_Static_assert(sizeof(struct mbuf_ext_pgs) == 256,
+    "mbuf_ext_pgs size mismatch");
+
 /*
  * Initialize FreeBSD Network buffer allocation.
  */
@@ -379,6 +389,15 @@
 	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
 	uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
 
+	zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME,
+	    sizeof(struct mbuf_ext_pgs),
+#ifdef INVARIANTS
+	    trash_ctor, trash_dtor, trash_init, trash_fini,
+#else
+	    NULL, NULL, NULL, NULL,
+#endif
+	    UMA_ALIGN_CACHE, 0);
+
 	/*
 	 * Hook event handler for low-memory situation, used to
 	 * drain protocols and push data back to the caches (UMA
@@ -823,6 +842,390 @@
 				(*pr->pr_drain)();
 }
 
+/*
+ * Free "count" units of I/O from an mbuf chain.  They could be held
+ * in EXT_PGS or just as a normal mbuf.  This code is intended to be
+ * called in an error path (I/O error, closed connection, etc).
+ */
+void
+mb_free_notready(struct mbuf *m, int count)
+{
+	int i;
+
+	for (i = 0; i < count && m != NULL; i++) {
+		if ((m->m_flags & M_EXT) != 0 &&
+		    m->m_ext.ext_type == EXT_PGS) {
+			m->m_ext.ext_pgs->nrdy--;
+			if (m->m_ext.ext_pgs->nrdy != 0)
+				continue;
+		}
+		m = m_free(m);
+	}
+	KASSERT(i == count, ("Removed only %d items from %p", i, m));
+}
+
+/*
+ * Ensure it is possible to downgrade an EXT_PGS mbuf
+ * to a normal mbuf.
+ *
+ * XXXJHB: I think this is no longer needed?  The callers of
+ * mb_unmapped_compress all check the length against MLEN, and
+ * mb_unmapped_compress allows data to be stored in unmapped pages.
+ */
+CTASSERT(MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN < MLEN);
+
+/*
+ * Compress an unmapped mbuf into a simple mbuf when it holds a small
+ * amount of data.  This is used as a DOS defense to avoid having
+ * small packets tie up wired pages, an ext_pgs structure, and an
+ * mbuf.  Since this converts the existing mbuf in place, it can only
+ * be used if there are no other references to 'm'.
+ */
+int
+mb_unmapped_compress(struct mbuf *m)
+{
+	volatile u_int *refcnt;
+	struct mbuf m_temp;
+
+	/*
+	 * Assert that 'm' does not have a packet header.  If 'm' had
+	 * a packet header, it would only be able to hold MHLEN bytes
+	 * and m_data would have to be initialized differently.
+	 */
+	KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) &&
+	    m->m_ext.ext_type == EXT_PGS,
+            ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m));
+	KASSERT(m->m_len <= MLEN, ("m_len too large %p", m));
+
+	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+		refcnt = &m->m_ext.ext_count;
+	} else {
+		KASSERT(m->m_ext.ext_cnt != NULL,
+		    ("%s: no refcounting pointer on %p", __func__, m));
+		refcnt = m->m_ext.ext_cnt;
+	}
+
+	if (*refcnt != 1)
+		return (EBUSY);
+
+	/*
+	 * Copy mbuf header and m_ext portion of 'm' to 'm_temp' to
+	 * create a "fake" EXT_PGS mbuf that can be used with
+	 * m_copydata() as well as the ext_free callback.
+	 */
+	memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext));
+	m_temp.m_next = NULL;
+	m_temp.m_nextpkt = NULL;
+
+	/* Turn 'm' into a "normal" mbuf. */
+	m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP);
+	m->m_data = m->m_dat;
+
+	/* Copy data from template's ext_pgs. */
+	m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t));
+
+	/* Free the backing pages. */
+	m_temp.m_ext.ext_free(&m_temp);
+
+	/* Finally, free the ext_pgs struct. */
+	uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs);
+	return (0);
+}
+
+/*
+ * These next few routines are used to permit downgrading an unmapped
+ * mbuf to a chain of mapped mbufs.  This is used when an interface
+ * doesn't supported unmapped mbufs or if checksums need to be
+ * computed in software.
+ *
+ * Each unmapped mbuf is converted to a chain of mbufs.  First, any
+ * TLS header data is stored in a regular mbuf.  Second, each page of
+ * unmapped data is stored in an mbuf with an EXT_SFBUF external
+ * cluster.  These mbufs use an sf_buf to provide a valid KVA for the
+ * associated physical page.  They also hold a reference on the
+ * original EXT_PGS mbuf to ensure the physical page doesn't go away.
+ * Finally, any TLS trailer data is stored in a regular mbuf.
+ *
+ * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF
+ * mbufs.  It frees the associated sf_buf and releases its reference
+ * on the original EXT_PGS mbuf.
+ *
+ * _mb_unmapped_to_ext() is a helper function that converts a single
+ * unmapped mbuf into a chain of mbufs.
+ *
+ * mb_unmapped_to_ext() is the public function that walks an mbuf
+ * chain converting any unmapped mbufs to mapped mbufs.  It returns
+ * the new chain of unmapped mbufs on success.  On failure it frees
+ * the original mbuf chain and returns NULL.
+ */
+static void
+mb_unmapped_free_mext(struct mbuf *m)
+{
+	struct sf_buf *sf;
+	struct mbuf *old_m;
+
+	sf = m->m_ext.ext_arg1;
+	sf_buf_free(sf);
+
+	/* Drop the reference on the backing EXT_PGS mbuf. */
+	old_m = m->m_ext.ext_arg2;
+	mb_free_ext(old_m);
+}
+
+static struct mbuf *
+_mb_unmapped_to_ext(struct mbuf *m)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	struct mbuf *m_new, *top, *prev, *mref;
+	struct sf_buf *sf;
+	vm_page_t pg;
+	int i, len, off, pglen, pgoff, seglen, segoff;
+	volatile u_int *refcnt;
+	u_int ref_inc = 0;
+
+	MBUF_EXT_PGS_ASSERT(m);
+	ext_pgs = m->m_ext.ext_pgs;
+	len = m->m_len;
+	KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p",
+	    __func__, m));
+
+	/* See if this is the mbuf that holds the embedded refcount. */
+	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+		refcnt = &m->m_ext.ext_count;
+		mref = m;
+	} else {
+		KASSERT(m->m_ext.ext_cnt != NULL,
+		    ("%s: no refcounting pointer on %p", __func__, m));
+		refcnt = m->m_ext.ext_cnt;
+		mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
+	}
+
+	/* Skip over any data removed from the front. */
+	off = mtod(m, vm_offset_t);
+
+	top = NULL;
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = min(seglen, len);
+			off = 0;
+			len -= seglen;
+			m_new = m_get(M_NOWAIT, MT_DATA);
+			if (m_new == NULL)
+				goto fail;
+			m_new->m_len = seglen;
+			prev = top = m_new;
+			memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff],
+			    seglen);
+		}
+	}
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = min(seglen, len);
+		len -= seglen;
+
+		pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+		m_new = m_get(M_NOWAIT, MT_DATA);
+		if (m_new == NULL)
+			goto fail;
+		if (top == NULL) {
+			top = prev = m_new;
+		} else {
+			prev->m_next = m_new;
+			prev = m_new;
+		}
+		sf = sf_buf_alloc(pg, SFB_NOWAIT);
+		if (sf == NULL)
+			goto fail;
+
+		ref_inc++;
+		m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE,
+		    mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF);
+		m_new->m_data += segoff;
+		m_new->m_len = seglen;
+
+		pgoff = 0;
+	};
+	if (len != 0) {
+		KASSERT((off + len) <= ext_pgs->trail_len,
+		    ("off + len > trail (%d + %d > %d)", off, len,
+		    ext_pgs->trail_len));
+		m_new = m_get(M_NOWAIT, MT_DATA);
+		if (m_new == NULL)
+			goto fail;
+		if (top == NULL)
+			top = m_new;
+		else
+			prev->m_next = m_new;
+		m_new->m_len = len;
+		memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len);
+	}
+
+	if (ref_inc != 0) {
+		/*
+		 * Obtain an additional reference on the old mbuf for
+		 * each created EXT_SFBUF mbuf.  They will be dropped
+		 * in mb_unmapped_free_mext().
+		 */
+		if (*refcnt == 1)
+			*refcnt += ref_inc;
+		else
+			atomic_add_int(refcnt, ref_inc);
+	}
+	m_free(m);
+	return (top);
+
+fail:
+	if (ref_inc != 0) {
+		/*
+		 * Obtain an additional reference on the old mbuf for
+		 * each created EXT_SFBUF mbuf.  They will be
+		 * immediately dropped when these mbufs are freed
+		 * below.
+		 */
+		if (*refcnt == 1)
+			*refcnt += ref_inc;
+		else
+			atomic_add_int(refcnt, ref_inc);
+	}
+	m_free(m);
+	m_freem(top);
+	return (NULL);
+}
+
+struct mbuf *
+mb_unmapped_to_ext(struct mbuf *top)
+{
+	struct mbuf *m, *next, *prev = NULL;
+
+	prev = NULL;
+	for (m = top; m != NULL; m = next) {
+		/* m might be freed, so cache the next pointer. */
+		next = m->m_next;
+		if (m->m_flags & M_NOMAP) {
+			if (prev != NULL) {
+				/*
+				 * Remove 'm' from the new chain so
+				 * that the 'top' chain terminates
+				 * before 'm' in case 'top' is freed
+				 * due to an error.
+				 */
+				prev->m_next = NULL;
+			}
+			m = _mb_unmapped_to_ext(m);
+			if (m == NULL) {
+				m_freem(top);
+				m_freem(next);
+				return (NULL);
+			}
+			if (prev == NULL) {
+				top = m;
+			} else {
+				prev->m_next = m;
+			}
+
+			/*
+			 * Replaced one mbuf with a chain, so we must
+			 * find the end of chain.
+			 */
+			prev = m_last(m);
+		} else {
+			if (prev != NULL) {
+				prev->m_next = m;
+			}
+			prev = m;
+		}
+	}
+	return (top);
+}
+
+/*
+ * Allocate an empty EXT_PGS mbuf.  The ext_free routine is
+ * responsible for freeing any pages backing this mbuf when it is
+ * freed.
+ */
+struct mbuf *
+mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free)
+{
+	struct mbuf *m;
+	struct mbuf_ext_pgs *ext_pgs;
+
+	if (pkthdr)
+		m = m_gethdr(how, MT_DATA);
+	else
+		m = m_get(how, MT_DATA);
+	if (m == NULL)
+		return (NULL);
+
+	ext_pgs = uma_zalloc(zone_extpgs, how);
+	if (ext_pgs == NULL) {
+		m_free(m);
+		return (NULL);
+	}
+	ext_pgs->npgs = 0;
+	ext_pgs->nrdy = 0;
+	ext_pgs->first_pg_off = 0;
+	ext_pgs->last_pg_len = 0;
+	ext_pgs->hdr_len = 0;
+	ext_pgs->trail_len = 0;
+	ext_pgs->tls = NULL;
+	ext_pgs->so = NULL;
+	m->m_data = NULL;
+	m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP);
+	m->m_ext.ext_type = EXT_PGS;
+	m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+	m->m_ext.ext_count = 1;
+	m->m_ext.ext_pgs = ext_pgs;
+	m->m_ext.ext_size = 0;
+	m->m_ext.ext_free = ext_free;
+	return (m);
+}
+
+#ifdef INVARIANT_SUPPORT
+void
+mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs)
+{
+
+	/*
+	 * NB: This expects a non-empty buffer (npgs > 0 and
+	 * last_pg_len > 0).
+	 */
+	KASSERT(ext_pgs->npgs > 0,
+	    ("ext_pgs with no valid pages: %p", ext_pgs));
+	KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa),
+	    ("ext_pgs with too many pages: %p", ext_pgs));
+	KASSERT(ext_pgs->nrdy <= ext_pgs->npgs,
+	    ("ext_pgs with too many ready pages: %p", ext_pgs));
+	KASSERT(ext_pgs->first_pg_off < PAGE_SIZE,
+	    ("ext_pgs with too large page offset: %p", ext_pgs));
+	KASSERT(ext_pgs->last_pg_len > 0,
+	    ("ext_pgs with zero last page length: %p", ext_pgs));
+	KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE,
+	    ("ext_pgs with too large last page length: %p", ext_pgs));
+	if (ext_pgs->npgs == 1) {
+		KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <=
+		    PAGE_SIZE, ("ext_pgs with single page too large: %p",
+		    ext_pgs));
+	}
+	KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr),
+	    ("ext_pgs with too large header length: %p", ext_pgs));
+	KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail),
+	    ("ext_pgs with too large header length: %p", ext_pgs));
+}
+#endif
+
 /*
  * Clean up after mbufs with M_EXT storage attached to them if the
  * reference count hits 1.
@@ -888,6 +1291,10 @@
 			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
 			uma_zfree(zone_mbuf, mref);
 			break;
+		case EXT_PGS:
+			uma_zfree(zone_extpgs, mref->m_ext.ext_pgs);
+			uma_zfree(zone_mbuf, mref);
+			break;
 		case EXT_SFBUF:
 		case EXT_NET_DRV:
 		case EXT_MOD_TYPE:
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -34,6 +34,7 @@
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
+#include <netinet/in.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
@@ -62,6 +63,7 @@
 
 #define	EXT_FLAG_SYNC		EXT_FLAG_VENDOR1
 #define	EXT_FLAG_NOCACHE	EXT_FLAG_VENDOR2
+#define	EXT_FLAG_CACHE_LAST	EXT_FLAG_VENDOR3
 
 /*
  * Structure describing a single sendfile(2) I/O, which may consist of
@@ -201,6 +203,39 @@
 	}
 }
 
+static void
+sendfile_free_mext_pg(struct mbuf *m)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	vm_page_t pg;
+	int i;
+	bool nocache, cache_last;
+
+	KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS,
+	    ("%s: m %p !M_EXT or !EXT_PGS", __func__, m));
+
+	nocache = m->m_ext.ext_flags & EXT_FLAG_NOCACHE;
+	cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST;
+	ext_pgs = m->m_ext.ext_pgs;
+
+	for (i = 0; i < ext_pgs->npgs; i++) {
+		if (cache_last && i == ext_pgs->npgs - 1)
+			nocache = false;
+		pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+		sendfile_free_page(pg, nocache);
+	}
+
+	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
+		struct sendfile_sync *sfs = m->m_ext.ext_arg2;
+
+		mtx_lock(&sfs->mtx);
+		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
+		if (--sfs->count == 0)
+			cv_signal(&sfs->cv);
+		mtx_unlock(&sfs->mtx);
+	}
+}
+
 /*
  * Helper function to calculate how much data to put into page i of n.
  * Only first and last pages are special.
@@ -283,8 +318,6 @@
 
 	CURVNET_SET(so->so_vnet);
 	if (sfio->error) {
-		struct mbuf *m;
-
 		/*
 		 * I/O operation failed.  The state of data in the socket
 		 * is now inconsistent, and all what we can do is to tear
@@ -299,11 +332,9 @@
 		so->so_proto->pr_usrreqs->pru_abort(so);
 		so->so_error = EIO;
 
-		m = sfio->m;
-		for (int i = 0; i < sfio->npages; i++)
-			m = m_free(m);
+		mb_free_notready(sfio->m, sfio->npages);
 	} else
-		(void )(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
+		(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
 		    sfio->npages);
 
 	SOCK_LOCK(so);
@@ -540,13 +571,15 @@
 	struct vnode *vp;
 	struct vm_object *obj;
 	struct socket *so;
+	struct mbuf_ext_pgs *ext_pgs;
 	struct mbuf *m, *mh, *mhtail;
 	struct sf_buf *sf;
 	struct shmfd *shmfd;
 	struct sendfile_sync *sfs;
 	struct vattr va;
 	off_t off, sbytes, rem, obj_size;
-	int error, softerr, bsize, hdrlen;
+	int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
+	bool use_ext_pgs;
 
 	obj = NULL;
 	so = NULL;
@@ -554,6 +587,7 @@
 	sfs = NULL;
 	hdrlen = sbytes = 0;
 	softerr = 0;
+	use_ext_pgs = false;
 
 	error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
 	if (error != 0)
@@ -714,6 +748,17 @@
 
 		if (space > rem)
 			space = rem;
+		else if (space > PAGE_SIZE) {
+			/*
+			 * Use page boundaries when possible for large
+			 * requests.
+			 */
+			if (off & PAGE_MASK)
+				space -= (PAGE_SIZE - (off & PAGE_MASK));
+			space = trunc_page(space);
+			if (off & PAGE_MASK)
+				space += (PAGE_SIZE - (off & PAGE_MASK));
+		}
 
 		npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
 
@@ -751,6 +796,22 @@
 		 * dumped into socket buffer.
 		 */
 		pa = sfio->pa;
+
+		/*
+		 * Use unmapped mbufs if enabled for TCP.  Unmapped
+		 * bufs are restricted to TCP as that is what has been
+		 * tested.  In particular, unmapped mbufs have not
+		 * been tested with UNIX-domain sockets.
+		 */
+		if (mb_use_ext_pgs &&
+		    so->so_proto->pr_protocol == IPPROTO_TCP) {
+			use_ext_pgs = true;
+			max_pgs = MBUF_PEXT_MAX_PGS;
+
+			/* Start at last index, to wrap on first use. */
+			ext_pgs_idx = max_pgs - 1;
+		}
+
 		for (int i = 0; i < npages; i++) {
 			struct mbuf *m0;
 
@@ -766,6 +827,66 @@
 				break;
 			}
 
+			if (use_ext_pgs) {
+				off_t xfs;
+
+				ext_pgs_idx++;
+				if (ext_pgs_idx == max_pgs) {
+					m0 = mb_alloc_ext_pgs(M_WAITOK, false,
+					    sendfile_free_mext_pg);
+
+					if (flags & SF_NOCACHE) {
+						m0->m_ext.ext_flags |=
+						    EXT_FLAG_NOCACHE;
+
+						/*
+						 * See comment below regarding
+						 * ignoring SF_NOCACHE for the
+						 * last page.
+						 */
+						if ((npages - i <= max_pgs) &&
+						    ((off + space) & PAGE_MASK) &&
+						    (rem > space || rhpages > 0))
+							m0->m_ext.ext_flags |=
+							    EXT_FLAG_CACHE_LAST;
+					}
+					if (sfs != NULL) {
+						m0->m_ext.ext_flags |=
+						    EXT_FLAG_SYNC;
+						m0->m_ext.ext_arg2 = sfs;
+						mtx_lock(&sfs->mtx);
+						sfs->count++;
+						mtx_unlock(&sfs->mtx);
+					}
+					ext_pgs = m0->m_ext.ext_pgs;
+					if (i == 0)
+						sfio->m = m0;
+					ext_pgs_idx = 0;
+
+					/* Append to mbuf chain. */
+					if (mtail != NULL)
+						mtail->m_next = m0;
+					else
+						m = m0;
+					mtail = m0;
+					ext_pgs->first_pg_off =
+					    vmoff(i, off) & PAGE_MASK;
+				}
+				if (nios) {
+					mtail->m_flags |= M_NOTREADY;
+					ext_pgs->nrdy++;
+				}
+
+				ext_pgs->pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pa[i]);
+				ext_pgs->npgs++;
+				xfs = xfsize(i, npages, off, space);
+				ext_pgs->last_pg_len = xfs;
+				MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs);
+				mtail->m_len += xfs;
+				mtail->m_ext.ext_size += PAGE_SIZE;
+				continue;
+			}
+
 			/*
 			 * Get a sendfile buf.  When allocating the
 			 * first buffer for mbuf chain, we usually
Index: sys/kern/subr_bus_dma.c
===================================================================
--- sys/kern/subr_bus_dma.c
+++ sys/kern/subr_bus_dma.c
@@ -110,6 +110,67 @@
 	return (error);
 }
 
+/*
+ * Load an unmapped mbuf
+ */
+static int
+_bus_dmamap_load_unmapped_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
+    struct mbuf *m, bus_dma_segment_t *segs, int *nsegs, int flags)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	int error, i, off, len, pglen, pgoff, seglen, segoff;
+
+	MBUF_EXT_PGS_ASSERT(m);
+	ext_pgs = m->m_ext.ext_pgs;
+
+	len = m->m_len;
+	error = 0;
+
+	/* Skip over any data removed from the front. */
+	off = mtod(m, vm_offset_t);
+
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = min(seglen, len);
+			off = 0;
+			len -= seglen;
+			error = _bus_dmamap_load_buffer(dmat, map,
+			    &ext_pgs->hdr[segoff], seglen, kernel_pmap,
+			    flags, segs, nsegs);
+		}
+	}
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = min(seglen, len);
+		len -= seglen;
+		error = _bus_dmamap_load_phys(dmat, map,
+		    ext_pgs->pa[i] + segoff, seglen, flags, segs, nsegs);
+		pgoff = 0;
+	};
+	if (len != 0 && error == 0) {
+		KASSERT((off + len) <= ext_pgs->trail_len,
+		    ("off + len > trail (%d + %d > %d)", off, len,
+		    ext_pgs->trail_len));
+		error = _bus_dmamap_load_buffer(dmat, map,
+		    &ext_pgs->trail[off], len, kernel_pmap, flags, segs,
+		    nsegs);
+	}
+	return (error);
+}
+
 /*
  * Load an mbuf chain.
  */
@@ -123,9 +184,13 @@
 	error = 0;
 	for (m = m0; m != NULL && error == 0; m = m->m_next) {
 		if (m->m_len > 0) {
-			error = _bus_dmamap_load_buffer(dmat, map, m->m_data,
-			    m->m_len, kernel_pmap, flags | BUS_DMA_LOAD_MBUF,
-			    segs, nsegs);
+			if ((m->m_flags & M_NOMAP) != 0)
+				error = _bus_dmamap_load_unmapped_mbuf_sg(dmat,
+				    map, m, segs, nsegs, flags);
+			else
+				error = _bus_dmamap_load_buffer(dmat, map,
+				    m->m_data, m->m_len, kernel_pmap,
+				    flags | BUS_DMA_LOAD_MBUF, segs, nsegs);
 		}
 	}
 	CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d",
Index: sys/kern/subr_sglist.c
===================================================================
--- sys/kern/subr_sglist.c
+++ sys/kern/subr_sglist.c
@@ -218,6 +218,75 @@
 	return (nsegs);
 }
 
+/*
+ * Determine the number of scatter/gather list elements needed to
+ * describe an EXT_PGS buffer.
+ */
+int
+sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off, size_t len)
+{
+	vm_paddr_t nextaddr, paddr;
+	size_t seglen, segoff;
+	int i, nsegs, pglen, pgoff;
+
+	if (len == 0)
+		return (0);
+
+	nsegs = 0;
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = MIN(seglen, len);
+			off = 0;
+			len -= seglen;
+			nsegs += sglist_count(&ext_pgs->hdr[segoff], seglen);
+		}
+	}
+	nextaddr = 0;
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = MIN(seglen, len);
+		len -= seglen;
+		paddr = ext_pgs->pa[i] + segoff;
+		if (paddr != nextaddr)
+			nsegs++;
+		nextaddr = paddr + seglen;
+		pgoff = 0;
+	};
+	if (len != 0) {
+		seglen = MIN(len, ext_pgs->trail_len - off);
+		len -= seglen;
+		nsegs += sglist_count(&ext_pgs->trail[off], seglen);
+	}
+	KASSERT(len == 0, ("len != 0"));
+	return (nsegs);
+}
+
+/*
+ * Determine the number of scatter/gather list elements needed to
+ * describe an EXT_PGS mbuf.
+ */
+int
+sglist_count_mb_ext_pgs(struct mbuf *m)
+{
+
+	MBUF_EXT_PGS_ASSERT(m);
+	return (sglist_count_ext_pgs(m->m_ext.ext_pgs, mtod(m, vm_offset_t),
+	    m->m_len));
+}
+
 /*
  * Allocate a scatter/gather list along with 'nsegs' segments.  The
  * 'mflags' parameters are the same as passed to malloc(9).  The caller
@@ -319,6 +388,76 @@
 	return (error);
 }
 
+/*
+ * Append the segments to describe an EXT_PGS buffer to a
+ * scatter/gather list.  If there are insufficient segments, then this
+ * fails with EFBIG.
+ */
+int
+sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
+    size_t off, size_t len)
+{
+	size_t seglen, segoff;
+	vm_paddr_t paddr;
+	int error, i, pglen, pgoff;
+
+	error = 0;
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = MIN(seglen, len);
+			off = 0;
+			len -= seglen;
+			error = sglist_append(sg,
+			    &ext_pgs->hdr[segoff], seglen);
+		}
+	}
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = MIN(seglen, len);
+		len -= seglen;
+		paddr = ext_pgs->pa[i] + segoff;
+		error = sglist_append_phys(sg, paddr, seglen);
+		pgoff = 0;
+	};
+	if (error == 0 && len > 0) {
+		seglen = MIN(len, ext_pgs->trail_len - off);
+		len -= seglen;
+		error = sglist_append(sg,
+		    &ext_pgs->trail[off], seglen);
+	}
+	if (error == 0)
+		KASSERT(len == 0, ("len != 0"));
+	return (error);
+}
+
+/*
+ * Append the segments to describe an EXT_PGS mbuf to a scatter/gather
+ * list.  If there are insufficient segments, then this fails with
+ * EFBIG.
+ */
+int
+sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m)
+{
+
+	/* for now, all unmapped mbufs are assumed to be EXT_PGS */
+	MBUF_EXT_PGS_ASSERT(m);
+	return (sglist_append_ext_pgs(sg, m->m_ext.ext_pgs,
+	    mtod(m, vm_offset_t), m->m_len));
+}
+
 /*
  * Append the segments that describe a single mbuf chain to a
  * scatter/gather list.  If there are insufficient segments, then this
@@ -338,7 +477,11 @@
 	SGLIST_SAVE(sg, save);
 	for (m = m0; m != NULL; m = m->m_next) {
 		if (m->m_len > 0) {
-			error = sglist_append(sg, m->m_data, m->m_len);
+			if ((m->m_flags & M_NOMAP) != 0)
+				error = sglist_append_mb_ext_pgs(sg, m);
+			else
+				error = sglist_append(sg, m->m_data,
+				    m->m_len);
 			if (error) {
 				SGLIST_RESTORE(sg, save);
 				return (error);
Index: sys/kern/uipc_mbuf.c
===================================================================
--- sys/kern/uipc_mbuf.c
+++ sys/kern/uipc_mbuf.c
@@ -50,6 +50,10 @@
 #include <sys/protosw.h>
 #include <sys/uio.h>
 #include <sys/sdt.h>
+#include <vm/vm.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_page.h>
+#include <sys/vmmeter.h>
 
 SDT_PROBE_DEFINE5_XLATE(sdt, , , m__init,
     "struct mbuf *", "mbufinfo_t *",
@@ -202,7 +206,7 @@
 	else
 		bcopy(&m->m_ext, &n->m_ext, m_ext_copylen);
 	n->m_flags |= M_EXT;
-	n->m_flags |= m->m_flags & M_RDONLY;
+	n->m_flags |= m->m_flags & (M_RDONLY | M_NOMAP);
 
 	/* See if this is the mbuf that holds the embedded refcount. */
 	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
@@ -246,7 +250,8 @@
 		    __func__, m, m0));
 		if (m->m_flags & M_PKTHDR)
 			m_demote_pkthdr(m);
-		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
+		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE |
+		    M_NOMAP | flags);
 	}
 }
 
@@ -376,7 +381,8 @@
 	if (to->m_flags & M_PKTHDR)
 		m_tag_delete_chain(to, NULL);
 #endif
-	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+	to->m_flags = (from->m_flags & M_COPYFLAGS) |
+	    (to->m_flags & (M_EXT | M_NOMAP));
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
@@ -414,7 +420,8 @@
 	if (to->m_flags & M_PKTHDR)
 		m_tag_delete_chain(to, NULL);
 #endif
-	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+	to->m_flags = (from->m_flags & M_COPYFLAGS) |
+	    (to->m_flags & (M_EXT | M_NOMAP));
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;
@@ -579,6 +586,30 @@
 	return (NULL);
 }
 
+static void
+m_copyfromunmapped(const struct mbuf *m, int off, int len, caddr_t cp)
+{
+	struct iovec iov;
+	struct uio uio;
+	int error;
+
+	KASSERT(off >= 0, ("m_copyfromunmapped: negative off %d", off));
+	KASSERT(len >= 0, ("m_copyfromunmapped: negative len %d", len));
+	KASSERT(off < m->m_len,
+	    ("m_copyfromunmapped: len exceeds mbuf length"));
+	iov.iov_base = cp;
+	iov.iov_len = len;
+	uio.uio_resid = len;
+	uio.uio_iov = &iov;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = 0;
+	uio.uio_rw = UIO_READ;
+	error = m_unmappedtouio(m, off, &uio, len);
+	KASSERT(error == 0, ("m_unmappedtouio failed: off %d, len %d", off,
+	   len));
+}
+
 /*
  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  * continuing for "len" bytes, into the indicated buffer.
@@ -600,7 +631,10 @@
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
 		count = min(m->m_len - off, len);
-		bcopy(mtod(m, caddr_t) + off, cp, count);
+		if ((m->m_flags & M_NOMAP) != 0)
+			m_copyfromunmapped(m, off, count, cp);
+		else
+			bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
@@ -695,6 +729,7 @@
 		m = m->m_next;
 	while (n) {
 		if (!M_WRITABLE(m) ||
+		    (n->m_flags & M_NOMAP) != 0 ||
 		    M_TRAILINGSPACE(m) < n->m_len) {
 			/* just join the two chains */
 			m->m_next = n;
@@ -812,6 +847,9 @@
 	int count;
 	int space;
 
+	KASSERT((n->m_flags & M_NOMAP) == 0,
+	    ("%s: unmapped mbuf %p", __func__, n));
+
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
@@ -1364,6 +1402,41 @@
 	return (NULL);
 }
 
+/*
+ * Return the number of fragments an mbuf will use.  This is usually
+ * used as a proxy for the number of scatter/gather elements needed by
+ * a DMA engine to access an mbuf.  In general mapped mbufs are
+ * assumed to be backed by physically contiguous buffers that only
+ * need a single fragment.  Unmapped mbufs, on the other hand, can
+ * span disjoint physical pages.
+ */
+static int
+frags_per_mbuf(struct mbuf *m)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	int frags;
+
+	if ((m->m_flags & M_NOMAP) == 0)
+		return (1);
+
+	/*
+	 * The header and trailer are counted as a single fragment
+	 * each when present.
+	 *
+	 * XXX: This overestimates the number of fragments by assuming
+	 * all the backing physical pages are disjoint.
+	 */
+	ext_pgs = m->m_ext.ext_pgs;
+	frags = 0;
+	if (ext_pgs->hdr_len != 0)
+		frags++;
+	frags += ext_pgs->npgs;
+	if (ext_pgs->trail_len != 0)
+		frags++;
+
+	return (frags);
+}
+
 /*
  * Defragment an mbuf chain, returning at most maxfrags separate
  * mbufs+clusters.  If this is not possible NULL is returned and
@@ -1384,7 +1457,7 @@
 	 */
 	curfrags = 0;
 	for (m = m0; m != NULL; m = m->m_next)
-		curfrags++;
+		curfrags += frags_per_mbuf(m);
 	/*
 	 * First, try to collapse mbufs.  Note that we always collapse
 	 * towards the front so we don't need to deal with moving the
@@ -1399,12 +1472,13 @@
 			break;
 		if (M_WRITABLE(m) &&
 		    n->m_len < M_TRAILINGSPACE(m)) {
-			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
-				n->m_len);
+			m_copydata(n, 0, n->m_len,
+			    mtod(m, char *) + m->m_len);
 			m->m_len += n->m_len;
 			m->m_next = n->m_next;
+			curfrags -= frags_per_mbuf(n);
 			m_free(n);
-			if (--curfrags <= maxfrags)
+			if (curfrags <= maxfrags)
 				return m0;
 		} else
 			m = n;
@@ -1421,15 +1495,18 @@
 			m = m_getcl(how, MT_DATA, 0);
 			if (m == NULL)
 				goto bad;
-			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
-			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
-				n2->m_len);
+			m_copydata(n, 0,  n->m_len, mtod(m, char *));
+			m_copydata(n2, 0,  n2->m_len,
+			    mtod(m, char *) + n->m_len);
 			m->m_len = n->m_len + n2->m_len;
 			m->m_next = n2->m_next;
 			*prev = m;
+			curfrags += 1;  /* For the new cluster */
+			curfrags -= frags_per_mbuf(n);
+			curfrags -= frags_per_mbuf(n2);
 			m_free(n);
 			m_free(n2);
-			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
+			if (curfrags <= maxfrags)
 				return m0;
 			/*
 			 * Still not there, try the normal collapse
@@ -1529,6 +1606,111 @@
 
 #endif
 
+/*
+ * Free pages from mbuf_ext_pgs, assuming they were allocated via
+ * vm_page_alloc() and aren't associated with any object.  Complement
+ * to allocator from m_uiotombuf_nomap().
+ */
+void
+mb_free_mext_pgs(struct mbuf *m)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	vm_page_t pg;
+	int wire_adj;
+
+	MBUF_EXT_PGS_ASSERT(m);
+	ext_pgs = m->m_ext.ext_pgs;
+	wire_adj = 0;
+	for (int i = 0; i < ext_pgs->npgs; i++) {
+		pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+		/*
+		 * Note: page is not locked, as it has no
+		 * object and is not on any queues.
+		 */
+		vm_page_free_toq(pg);
+		wire_adj++;
+	}
+	if (wire_adj)
+		vm_wire_sub(wire_adj);
+}
+
+static struct mbuf *
+m_uiotombuf_nomap(struct uio *uio, int how, int len, int maxseg, int flags)
+{
+	struct mbuf *m, *mb, *prev;
+	struct mbuf_ext_pgs *pgs;
+	vm_page_t pg_array[MBUF_PEXT_MAX_PGS];
+	int error, length, i, needed, wire_adj = 0;
+	ssize_t total;
+	int pflags = malloc2vm_flags(how) | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP;
+
+	/*
+	 * len can be zero or an arbitrary large value bound by
+	 * the total data supplied by the uio.
+	 */
+	if (len > 0)
+		total = MIN(uio->uio_resid, len);
+	else
+		total = uio->uio_resid;
+
+	if (maxseg == 0)
+		maxseg = MBUF_PEXT_MAX_PGS * PAGE_SIZE;
+
+	/*
+	 * Allocate the pages
+	 */
+	m = NULL;
+	while (total > 0) {
+		mb = mb_alloc_ext_pgs(how, (flags & M_PKTHDR),
+		    mb_free_mext_pgs);
+		if (mb == NULL)
+			goto failed;
+		if (m == NULL)
+			m = mb;
+		else
+			prev->m_next = mb;
+		prev = mb;
+		pgs = mb->m_ext.ext_pgs;
+		needed = length = MIN(maxseg, total);
+		for (i = 0; needed > 0; i++, needed -= PAGE_SIZE) {
+retry_page:
+			pg_array[i] = vm_page_alloc(NULL, 0, pflags);
+			if (pg_array[i] == NULL) {
+				if (wire_adj)
+					vm_wire_add(wire_adj);
+				wire_adj = 0;
+				if (how & M_NOWAIT) {
+					goto failed;
+				} else {
+					vm_wait(NULL);
+					goto retry_page;
+				}
+			}
+			wire_adj++;
+			pg_array[i]->flags &= ~PG_ZERO;
+			pgs->pa[i] = VM_PAGE_TO_PHYS(pg_array[i]);
+			pgs->npgs++;
+		}
+		pgs->last_pg_len = length - PAGE_SIZE * (pgs->npgs - 1);
+		MBUF_EXT_PGS_ASSERT_SANITY(pgs);
+		vm_wire_add(wire_adj);
+		wire_adj = 0;
+		total -= length;
+		error = uiomove_fromphys(pg_array, 0, length, uio);
+		if (error != 0)
+			goto failed;
+		mb->m_len = length;
+		mb->m_ext.ext_size += PAGE_SIZE * pgs->npgs;
+		if (flags & M_PKTHDR)
+			m->m_pkthdr.len += length;
+	}
+	return (m);
+
+failed:
+	m_freem(m);
+	return (NULL);
+}
+
 /*
  * Copy the contents of uio into a properly sized mbuf chain.
  */
@@ -1540,6 +1722,9 @@
 	ssize_t total;
 	int progress = 0;
 
+	if (flags & M_NOMAP)
+		return (m_uiotombuf_nomap(uio, how, len, align, flags));
+
 	/*
 	 * len can be zero or an arbitrary large value bound by
 	 * the total data supplied by the uio.
@@ -1585,6 +1770,62 @@
 	return (m);
 }
 
+/*
+ * Copy data from an unmapped mbuf into a uio limited by len if set.
+ */
+int
+m_unmappedtouio(const struct mbuf *m, int m_off, struct uio *uio, int len)
+{
+	struct mbuf_ext_pgs *ext_pgs;
+	vm_page_t pg;
+	int error, i, off, pglen, pgoff, seglen, segoff;
+
+	MBUF_EXT_PGS_ASSERT(m);
+	ext_pgs = m->m_ext.ext_pgs;
+	error = 0;
+
+	/* Skip over any data removed from the front. */
+	off = mtod(m, vm_offset_t);
+
+	off += m_off;
+	if (ext_pgs->hdr_len != 0) {
+		if (off >= ext_pgs->hdr_len) {
+			off -= ext_pgs->hdr_len;
+		} else {
+			seglen = ext_pgs->hdr_len - off;
+			segoff = off;
+			seglen = min(seglen, len);
+			off = 0;
+			len -= seglen;
+			error = uiomove(&ext_pgs->hdr[segoff], seglen, uio);
+		}
+	}
+	pgoff = ext_pgs->first_pg_off;
+	for (i = 0; i < ext_pgs->npgs && error == 0 && len > 0; i++) {
+		pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff);
+		if (off >= pglen) {
+			off -= pglen;
+			pgoff = 0;
+			continue;
+		}
+		seglen = pglen - off;
+		segoff = pgoff + off;
+		off = 0;
+		seglen = min(seglen, len);
+		len -= seglen;
+		pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]);
+		error = uiomove_fromphys(&pg, segoff, seglen, uio);
+		pgoff = 0;
+	};
+	if (len != 0 && error == 0) {
+		KASSERT((off + len) <= ext_pgs->trail_len,
+		    ("off + len > trail (%d + %d > %d, m_off = %d)", off, len,
+		    ext_pgs->trail_len, m_off));
+		error = uiomove(&ext_pgs->trail[off], len, uio);
+	}
+	return (error);
+}
+
 /*
  * Copy an mbuf chain into a uio limited by len if set.
  */
@@ -1603,7 +1844,10 @@
 	for (; m != NULL; m = m->m_next) {
 		length = min(m->m_len, total - progress);
 
-		error = uiomove(mtod(m, void *), length, uio);
+		if ((m->m_flags & M_NOMAP) != 0)
+			error = m_unmappedtouio(m, 0, uio, length);
+		else
+			error = uiomove(mtod(m, void *), length, uio);
 		if (error)
 			return (error);
 
Index: sys/kern/uipc_sockbuf.c
===================================================================
--- sys/kern/uipc_sockbuf.c
+++ sys/kern/uipc_sockbuf.c
@@ -89,28 +89,130 @@
 }
 
 /*
- * Mark ready "count" mbufs starting with "m".
+ * Compress M_NOTREADY mbufs after they have been readied by sbready().
+ *
+ * sbcompress() skips M_NOTREADY mbufs since the data is not available to
+ * be copied at the time of sbcompress().  This function combines small
+ * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
+ * mbuf sbready() marked ready, and 'end' is the first mbuf still not
+ * ready.
+ */
+static void
+sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
+{
+	struct mbuf *m, *n;
+	int ext_size;
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	if ((sb->sb_flags & SB_NOCOALESCE) != 0)
+		return;
+
+	for (m = m0; m != end; m = m->m_next) {
+		MPASS((m->m_flags & M_NOTREADY) == 0);
+
+		/* Compress small unmapped mbufs into plain mbufs. */
+		if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN) {
+			MPASS(m->m_flags & M_EXT);
+			ext_size = m->m_ext.ext_size;
+			if (mb_unmapped_compress(m) == 0) {
+				sb->sb_mbcnt -= ext_size;
+				sb->sb_ccnt -= 1;
+			}
+		}
+
+		/*
+		 * NB: In sbcompress(), 'n' is the last mbuf in the
+		 * socket buffer and 'm' is the new mbuf being copied
+		 * into the trailing space of 'n'.  Here, the roles
+		 * are reversed and 'n' is the next mbuf after 'm'
+		 * that is being copied into the trailing space of
+		 * 'm'.
+		 */
+		n = m->m_next;
+		while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
+		    M_WRITABLE(m) &&
+		    (m->m_flags & M_NOMAP) == 0 &&
+		    n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+		    n->m_len <= M_TRAILINGSPACE(m) &&
+		    m->m_type == n->m_type) {
+			KASSERT(sb->sb_lastrecord != n,
+		    ("%s: merging start of record (%p) into previous mbuf (%p)",
+			    __func__, n, m));
+			m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
+			m->m_len += n->m_len;
+			m->m_next = n->m_next;
+			m->m_flags |= n->m_flags & M_EOR;
+			if (sb->sb_mbtail == n)
+				sb->sb_mbtail = m;
+
+			sb->sb_mbcnt -= MSIZE;
+			sb->sb_mcnt -= 1;
+			if (n->m_flags & M_EXT) {
+				sb->sb_mbcnt -= n->m_ext.ext_size;
+				sb->sb_ccnt -= 1;
+			}
+			m_free(n);
+			n = m->m_next;
+		}
+	}
+	SBLASTRECORDCHK(sb);
+	SBLASTMBUFCHK(sb);
+}
+
+/*
+ * Mark ready "count" units of I/O starting with "m".  Most mbufs
+ * count as a single unit of I/O except for EXT_PGS-backed mbufs which
+ * can be backed by multiple pages.
  */
 int
-sbready(struct sockbuf *sb, struct mbuf *m, int count)
+sbready(struct sockbuf *sb, struct mbuf *m0, int count)
 {
+	struct mbuf *m;
 	u_int blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
+	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
 
+	m = m0;
 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
 
-	for (int i = 0; i < count; i++, m = m->m_next) {
+	while (count > 0) {
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
+		if ((m->m_flags & M_EXT) != 0 &&
+		    m->m_ext.ext_type == EXT_PGS) {
+			if (count < m->m_ext.ext_pgs->nrdy) {
+				m->m_ext.ext_pgs->nrdy -= count;
+				count = 0;
+				break;
+			}
+			count -= m->m_ext.ext_pgs->nrdy;
+			m->m_ext.ext_pgs->nrdy = 0;
+		} else
+			count--;
+
 		m->m_flags &= ~(M_NOTREADY | blocker);
 		if (blocker)
 			sb->sb_acc += m->m_len;
+		m = m->m_next;
 	}
 
-	if (!blocker)
+	/*
+	 * If the first mbuf is still not fully ready because only
+	 * some of its backing pages were readied, no further progress
+	 * can be made.
+	 */
+	if (m0 == m) {
+		MPASS(m->m_flags & M_NOTREADY);
 		return (EINPROGRESS);
+	}
+
+	if (!blocker) {
+		sbready_compress(sb, m0, m);
+		return (EINPROGRESS);
+	}
 
 	/* This one was blocking all the queue. */
 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
@@ -121,6 +223,7 @@
 	}
 
 	sb->sb_fnrdy = m;
+	sbready_compress(sb, m0, m);
 
 	return (0);
 }
@@ -1030,12 +1133,11 @@
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(m->m_flags & M_NOTREADY) &&
-		    !(n->m_flags & M_NOTREADY) &&
+		    !(n->m_flags & (M_NOTREADY | M_NOMAP)) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
-			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
-			    (unsigned)m->m_len);
+			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			if (sb->sb_fnrdy == NULL)
@@ -1046,6 +1148,9 @@
 			m = m_free(m);
 			continue;
 		}
+		if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) &&
+		    (m->m_flags & M_NOTREADY) == 0)
+			(void)mb_unmapped_compress(m);
 		if (n)
 			n->m_next = m;
 		else
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -1044,7 +1044,7 @@
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
-	 * dom_dispose() and sbrelease_internal() are an inlining of what was
+	 * dom_dispose() and sbdestroy() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
@@ -1982,7 +1982,11 @@
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
-			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
+			if ((m->m_flags & M_NOMAP) != 0)
+				error = m_unmappedtouio(m, moff, uio, (int)len);
+			else
+				error = uiomove(mtod(m, char *) + moff,
+				    (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
Index: sys/net/bpf.c
===================================================================
--- sys/net/bpf.c
+++ sys/net/bpf.c
@@ -2369,6 +2369,7 @@
 	 * Note that we cut corners here; we only setup what's
 	 * absolutely needed--this mbuf should never go anywhere else.
 	 */
+	mb.m_flags = 0;
 	mb.m_next = m;
 	mb.m_data = data;
 	mb.m_len = dlen;
Index: sys/net/bpf_buffer.c
===================================================================
--- sys/net/bpf_buffer.c
+++ sys/net/bpf_buffer.c
@@ -119,19 +119,10 @@
 {
 	const struct mbuf *m;
 	u_char *dst;
-	u_int count;
 
 	m = (struct mbuf *)src;
 	dst = (u_char *)buf + offset;
-	while (len > 0) {
-		if (m == NULL)
-			panic("bpf_mcopy");
-		count = min(m->m_len, len);
-		bcopy(mtod(m, void *), dst, count);
-		m = m->m_next;
-		dst += count;
-		len -= count;
-	}
+	m_copydata(m, 0, len, dst);
 }
 
 /*
Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -246,6 +246,7 @@
 #define	IFCAP_HWSTATS		0x800000 /* manages counters internally */
 #define	IFCAP_TXRTLMT		0x1000000 /* hardware supports TX rate limiting */
 #define	IFCAP_HWRXTSTMP		0x2000000 /* hardware rx timestamping */
+#define	IFCAP_NOMAP		0x4000000 /* can TX unmapped mbufs */
 
 #define IFCAP_HWCSUM_IPV6	(IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6)
 
Index: sys/net/if_vlan.c
===================================================================
--- sys/net/if_vlan.c
+++ sys/net/if_vlan.c
@@ -1731,6 +1731,16 @@
 	ena |= (mena & IFCAP_TXRTLMT);
 #endif
 
+	/*
+	 * If the parent interface supports unmapped mbufs, so does
+	 * the VLAN interface.  Note that this should be fine even for
+	 * interfaces that don't support hardware tagging as headers
+	 * are prepended in normal mbufs to unmapped mbufs holding
+	 * payload data.
+	 */
+	cap |= (p->if_capabilities & IFCAP_NOMAP);
+	ena |= (mena & IFCAP_NOMAP);
+
 	ifp->if_capabilities = cap;
 	ifp->if_capenable = ena;
 	ifp->if_hwassist = hwa;
Index: sys/netinet/ip_output.c
===================================================================
--- sys/netinet/ip_output.c
+++ sys/netinet/ip_output.c
@@ -35,13 +35,13 @@
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
-#include "opt_ratelimit.h"
 #include "opt_ipsec.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
+#include "opt_ratelimit.h"
 #include "opt_route.h"
-#include "opt_sctp.h"
 #include "opt_rss.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -283,6 +283,7 @@
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	int no_route_but_check_spd = 0;
 #endif
+
 	M_ASSERTPKTHDR(m);
 
 	if (inp != NULL) {
@@ -685,11 +686,30 @@
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			IPSTAT_INC(ips_odropped);
+			error = ENOBUFS;
+			goto bad;
+		}
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+	} else if ((ifp->if_capenable & IFCAP_NOMAP) == 0) {
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			IPSTAT_INC(ips_odropped);
+			error = ENOBUFS;
+			goto bad;
+		}
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			IPSTAT_INC(ips_odropped);
+			error = ENOBUFS;
+			goto bad;
+		}
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
@@ -825,11 +845,23 @@
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+		m0 = mb_unmapped_to_ext(m0);
+		if (m0 == NULL) {
+			error = ENOBUFS;
+			IPSTAT_INC(ips_odropped);
+			goto done;
+		}
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
+		m0 = mb_unmapped_to_ext(m0);
+		if (m0 == NULL) {
+			error = ENOBUFS;
+			IPSTAT_INC(ips_odropped);
+			goto done;
+		}
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
Index: sys/netinet/tcp_pcap.c
===================================================================
--- sys/netinet/tcp_pcap.c
+++ sys/netinet/tcp_pcap.c
@@ -311,6 +311,7 @@
 			if (mhead->m_flags & M_EXT) {
 				switch (mhead->m_ext.ext_type) {
 				case EXT_SFBUF:
+				case EXT_PGS:
 					/* Don't mess around with these. */
 					tcp_pcap_m_freem(mhead);
 					continue;
@@ -383,8 +384,11 @@
 			__func__, n->m_flags));
 		n->m_data = n->m_dat + M_LEADINGSPACE_NOWRITE(m);
 		n->m_len = m->m_len;
-		bcopy(M_START(m), n->m_dat,
-			m->m_len + M_LEADINGSPACE_NOWRITE(m));
+		if (m->m_flags & M_NOMAP)
+			m_copydata(m, 0, m->m_len, n->m_data);
+		else
+			bcopy(M_START(m), n->m_dat,
+			    m->m_len + M_LEADINGSPACE_NOWRITE(m));
 	}
 	else {
 		/*
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -798,8 +798,12 @@
 		}
 	}
 
+	if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
+		*num_names = 0;
+		return (EINVAL);
+	}
+
 	refcount_init(&blk->tfb_refcnt, 0);
-	blk->tfb_flags = 0;
 	blk->tfb_id = atomic_fetchadd_int(&next_tcp_stack_id, 1);
 	for (i = 0; i < *num_names; i++) {
 		n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1190,8 +1190,7 @@
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
-		for (int i = 0; i < count; i++)
-			m = m_free(m);
+		mb_free_notready(m, count);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
Index: sys/netinet6/ip6_output.c
===================================================================
--- sys/netinet6/ip6_output.c
+++ sys/netinet6/ip6_output.c
@@ -67,11 +67,11 @@
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
-#include "opt_ratelimit.h"
 #include "opt_ipsec.h"
-#include "opt_sctp.h"
+#include "opt_ratelimit.h"
 #include "opt_route.h"
 #include "opt_rss.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -963,11 +963,30 @@
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			error = ENOBUFS;
+			IP6STAT_INC(ip6s_odropped);
+			goto bad;
+		}
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
+	} else if ((ifp->if_capenable & IFCAP_NOMAP) == 0) {
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			error = ENOBUFS;
+			IP6STAT_INC(ip6s_odropped);
+			goto bad;
+		}
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
+		m = mb_unmapped_to_ext(m);
+		if (m == NULL) {
+			error = ENOBUFS;
+			IP6STAT_INC(ip6s_odropped);
+			goto bad;
+		}
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
@@ -1055,11 +1074,23 @@
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
+			m = mb_unmapped_to_ext(m);
+			if (m == NULL) {
+				in6_ifstat_inc(ifp, ifs6_out_fragfail);
+				error = ENOBUFS;
+				goto bad;
+			}
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
+			m = mb_unmapped_to_ext(m);
+			if (m == NULL) {
+				in6_ifstat_inc(ifp, ifs6_out_fragfail);
+				error = ENOBUFS;
+				goto bad;
+			}
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
Index: sys/sys/mbuf.h
===================================================================
--- sys/sys/mbuf.h
+++ sys/sys/mbuf.h
@@ -227,7 +227,15 @@
 		volatile u_int	 ext_count;
 		volatile u_int	*ext_cnt;
 	};
-	char		*ext_buf;	/* start of buffer */
+	union {
+		/*
+		 * If ext_type == EXT_PGS, 'ext_pgs' points to a
+		 * structure describing the buffer.  Otherwise,
+		 * 'ext_buf' points to the start of the buffer.
+		 */
+		struct mbuf_ext_pgs *ext_pgs;
+		char		*ext_buf;
+	};
 	uint32_t	 ext_size;	/* size of buffer, for ext_free */
 	uint32_t	 ext_type:8,	/* type of external storage */
 			 ext_flags:24;	/* external storage mbuf flags */
@@ -293,6 +301,92 @@
 	};
 };
 
+struct socket;
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following header lengths:
+ * - 5 (AES-CBC with implicit IV)
+ * - 21 (AES-CBC with explicit IV)
+ * - 13 (AES-GCM with 8 byte explicit IV)
+ */
+#define	MBUF_PEXT_HDR_LEN	24
+
+/*
+ * TLS records for TLS 1.0-1.2 can have the following maximum trailer
+ * lengths:
+ * - 16 (AES-GCM)
+ * - 36 (AES-CBC with SHA1 and up to 16 bytes of padding)
+ * - 48 (AES-CBC with SHA2-256 and up to 16 bytes of padding)
+ * - 64 (AES-CBC with SHA2-384 and up to 16 bytes of padding)
+ */
+#define	MBUF_PEXT_TRAIL_LEN	64
+
+#ifdef __LP64__
+#define	MBUF_PEXT_MAX_PGS	(152 / sizeof(vm_paddr_t))
+#else
+#define	MBUF_PEXT_MAX_PGS	(156 / sizeof(vm_paddr_t))
+#endif
+
+#define	MBUF_PEXT_MAX_BYTES						\
+    (MBUF_PEXT_MAX_PGS * PAGE_SIZE + MBUF_PEXT_HDR_LEN + MBUF_PEXT_TRAIL_LEN)
+
+/*
+ * This struct is 256 bytes in size and is arranged so that the most
+ * common case (accessing the first 4 pages of a 16KB TLS record) will
+ * fit in a single 64 byte cacheline.
+ */
+struct mbuf_ext_pgs {
+	uint8_t		npgs;			/* Number of attached pages */
+	uint8_t		nrdy;			/* Pages with I/O pending */
+	uint8_t		hdr_len;		/* TLS header length */
+	uint8_t		trail_len;		/* TLS trailer length */
+	uint16_t	first_pg_off;		/* Offset into 1st page */
+	uint16_t	last_pg_len;		/* Length of last page */
+	vm_paddr_t	pa[MBUF_PEXT_MAX_PGS];	/* phys addrs of pages */
+	char		hdr[MBUF_PEXT_HDR_LEN];	/* TLS header */
+	void		*tls;			/* TLS session */
+#if defined(__i386__) || \
+    (defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE))
+	/*
+	 * i386 and Book-E PowerPC have 64-bit vm_paddr_t, so there is
+	 * a 4 byte remainder from the space allocated for pa[].
+	 */
+	uint32_t	pad;
+#endif
+	union {
+		char	trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */
+		struct {
+			struct socket *so;
+			void	*mbuf;
+			uint64_t seqno;
+			STAILQ_ENTRY(mbuf_ext_pgs) stailq;
+		};
+	};
+};
+
+#ifdef _KERNEL
+static inline int
+mbuf_ext_pg_len(struct mbuf_ext_pgs *ext_pgs, int pidx, int pgoff)
+{
+	KASSERT(pgoff == 0 || pidx == 0,
+	    ("page %d with non-zero offset %d in %p", pidx, pgoff, ext_pgs));
+	if (pidx == ext_pgs->npgs - 1) {
+		return (ext_pgs->last_pg_len);
+	} else {
+		return (PAGE_SIZE - pgoff);
+	}
+}
+
+#ifdef INVARIANT_SUPPORT
+void	mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs);
+#endif
+#ifdef INVARIANTS
+#define	MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs)	mb_ext_pgs_check((ext_pgs))
+#else
+#define	MBUF_EXT_PGS_ASSERT_SANITY(ext_pgs)
+#endif
+#endif
+
 /*
  * mbuf flags of global significance and layer crossing.
  * Those of only protocol/layer specific significance are to be mapped
@@ -307,7 +401,7 @@
 #define	M_MCAST		0x00000020 /* send/received as link-level multicast */
 #define	M_PROMISC	0x00000040 /* packet was not for us */
 #define	M_VLANTAG	0x00000080 /* ether_vtag is valid */
-#define	M_NOMAP		0x00000100 /* mbuf data is unmapped (soon from Drew) */
+#define	M_NOMAP		0x00000100 /* mbuf data is unmapped */
 #define	M_NOFREE	0x00000200 /* do not free mbuf, embedded in cluster */
 #define	M_TSTMP		0x00000400 /* rcv_tstmp field is valid */
 #define	M_TSTMP_HPREC	0x00000800 /* rcv_tstmp is high-prec, typically
@@ -348,7 +442,7 @@
  */
 #define	M_FLAG_BITS \
     "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
-    "\7M_PROMISC\10M_VLANTAG\13M_TSTMP\14M_TSTMP_HPREC"
+    "\7M_PROMISC\10M_VLANTAG\11M_NOMAP\12M_NOFREE\13M_TSTMP\14M_TSTMP_HPREC"
 #define	M_FLAG_PROTOBITS \
     "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
     "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
@@ -420,6 +514,7 @@
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference */
 #define	EXT_RXRING	8	/* data in NIC receive ring */
+#define	EXT_PGS		9	/* array of unmapped pages */
 
 #define	EXT_VENDOR1	224	/* for vendor-internal use */
 #define	EXT_VENDOR2	225	/* for vendor-internal use */
@@ -464,6 +559,11 @@
     "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
     "\30EXT_FLAG_EXP4"
 
+#define MBUF_EXT_PGS_ASSERT(m)						\
+	KASSERT((((m)->m_flags & M_EXT) != 0) &&			\
+	    ((m)->m_ext.ext_type == EXT_PGS),				\
+	    ("%s: m %p !M_EXT or !EXT_PGS", __func__, m))
+
 /*
  * Flags indicating checksum, segmentation and other offload work to be
  * done, or already done, by hardware or lower layers.  It is split into
@@ -566,6 +666,7 @@
 #define	MBUF_JUMBO16_MEM_NAME	"mbuf_jumbo_16k"
 #define	MBUF_TAG_MEM_NAME	"mbuf_tag"
 #define	MBUF_EXTREFCNT_MEM_NAME	"mbuf_ext_refcnt"
+#define	MBUF_EXTPGS_MEM_NAME	"mbuf_extpgs"
 
 #ifdef _KERNEL
 
@@ -590,9 +691,15 @@
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
+extern uma_zone_t	zone_extpgs;
 
 void		 mb_dupcl(struct mbuf *, struct mbuf *);
 void		 mb_free_ext(struct mbuf *);
+void		 mb_free_mext_pgs(struct mbuf *);
+struct mbuf	*mb_alloc_ext_pgs(int, bool, m_ext_free_t);
+int		 mb_unmapped_compress(struct mbuf *m);
+struct mbuf 	*mb_unmapped_to_ext(struct mbuf *m);
+void		 mb_free_notready(struct mbuf *m, int count);
 void		 m_adj(struct mbuf *, int);
 int		 m_apply(struct mbuf *, int, int,
 		    int (*)(void *, void *, u_int), void *);
@@ -627,6 +734,7 @@
 struct mbuf	*m_getptr(struct mbuf *, int, int *);
 u_int		 m_length(struct mbuf *, struct mbuf **);
 int		 m_mbuftouio(struct uio *, const struct mbuf *, int);
+int		 m_unmappedtouio(const struct mbuf *, int, struct uio *, int);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 int		 m_pkthdr_init(struct mbuf *, int);
 struct mbuf	*m_prepend(struct mbuf *, int, int);
@@ -881,7 +989,7 @@
  * be both the local data payload, or an external buffer area, depending on
  * whether M_EXT is set).
  */
-#define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
+#define	M_WRITABLE(m)	(((m)->m_flags & (M_RDONLY | M_NOMAP)) == 0 &&	\
 			 (!(((m)->m_flags & M_EXT)) ||			\
 			 (m_extrefcnt(m) == 1)))
 
@@ -904,7 +1012,8 @@
  * handling external storage, packet-header mbufs, and regular data mbufs.
  */
 #define	M_START(m)							\
-	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
+	(((m)->m_flags & M_NOMAP) ? NULL :				\
+	 ((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
 	 ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] :		\
 	 &(m)->m_dat[0])
 
@@ -1020,6 +1129,7 @@
 extern int		max_linkhdr;	/* Largest link-level header */
 extern int		max_protohdr;	/* Largest protocol header */
 extern int		nmbclusters;	/* Maximum number of clusters */
+extern bool		mb_use_ext_pgs;	/* Use ext_pgs for sendfile */
 
 /*-
  * Network packets may have annotations attached by affixing a list of
Index: sys/sys/sglist.h
===================================================================
--- sys/sys/sglist.h
+++ sys/sys/sglist.h
@@ -57,6 +57,7 @@
 
 struct bio;
 struct mbuf;
+struct mbuf_ext_pgs;
 struct uio;
 
 static __inline void
@@ -87,6 +88,9 @@
 struct sglist *sglist_alloc(int nsegs, int mflags);
 int	sglist_append(struct sglist *sg, void *buf, size_t len);
 int	sglist_append_bio(struct sglist *sg, struct bio *bp);
+int	sglist_append_ext_pgs(struct sglist *sg, struct mbuf_ext_pgs *ext_pgs,
+	    size_t off, size_t len);
+int	sglist_append_mb_ext_pgs(struct sglist *sg, struct mbuf *m);
 int	sglist_append_mbuf(struct sglist *sg, struct mbuf *m0);
 int	sglist_append_phys(struct sglist *sg, vm_paddr_t paddr,
 	    size_t len);
@@ -101,6 +105,9 @@
 struct sglist *sglist_clone(struct sglist *sg, int mflags);
 int	sglist_consume_uio(struct sglist *sg, struct uio *uio, size_t resid);
 int	sglist_count(void *buf, size_t len);
+int	sglist_count_ext_pgs(struct mbuf_ext_pgs *ext_pgs, size_t off,
+	    size_t len);
+int	sglist_count_mb_ext_pgs(struct mbuf *m);
 int	sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_t len);
 void	sglist_free(struct sglist *sg);
 int	sglist_join(struct sglist *first, struct sglist *second);