diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c index 2f60bb64ed3b..cbc26489d8fa 100644 --- a/sys/kern/uipc_debug.c +++ b/sys/kern/uipc_debug.c @@ -1,536 +1,534 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Debugger routines relating to sockets, protocols, etc, for use in DDB. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #ifdef DDB #include static void db_print_sotype(short so_type) { switch (so_type) { case SOCK_STREAM: db_printf("SOCK_STREAM"); break; case SOCK_DGRAM: db_printf("SOCK_DGRAM"); break; case SOCK_RAW: db_printf("SOCK_RAW"); break; case SOCK_RDM: db_printf("SOCK_RDM"); break; case SOCK_SEQPACKET: db_printf("SOCK_SEQPACKET"); break; default: db_printf("unknown"); break; } } static void db_print_sooptions(int so_options) { int comma; comma = 0; if (so_options & SO_DEBUG) { db_printf("%sSO_DEBUG", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTCONN) { db_printf("%sSO_ACCEPTCONN", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEADDR) { db_printf("%sSO_REUSEADDR", comma ? ", " : ""); comma = 1; } if (so_options & SO_KEEPALIVE) { db_printf("%sSO_KEEPALIVE", comma ? ", " : ""); comma = 1; } if (so_options & SO_DONTROUTE) { db_printf("%sSO_DONTROUTE", comma ? ", " : ""); comma = 1; } if (so_options & SO_BROADCAST) { db_printf("%sSO_BROADCAST", comma ? ", " : ""); comma = 1; } if (so_options & SO_USELOOPBACK) { db_printf("%sSO_USELOOPBACK", comma ? ", " : ""); comma = 1; } if (so_options & SO_LINGER) { db_printf("%sSO_LINGER", comma ? ", " : ""); comma = 1; } if (so_options & SO_OOBINLINE) { db_printf("%sSO_OOBINLINE", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEPORT) { db_printf("%sSO_REUSEPORT", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEPORT_LB) { db_printf("%sSO_REUSEPORT_LB", comma ? ", " : ""); comma = 1; } if (so_options & SO_TIMESTAMP) { db_printf("%sSO_TIMESTAMP", comma ? ", " : ""); comma = 1; } if (so_options & SO_NOSIGPIPE) { db_printf("%sSO_NOSIGPIPE", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTFILTER) { db_printf("%sSO_ACCEPTFILTER", comma ? ", " : ""); comma = 1; } if (so_options & SO_BINTIME) { db_printf("%sSO_BINTIME", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_OFFLOAD) { db_printf("%sSO_NO_OFFLOAD", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_DDP) { db_printf("%sSO_NO_DDP", comma ? ", " : ""); comma = 1; } } static void db_print_sostate(short so_state) { int comma; comma = 0; if (so_state & SS_NOFDREF) { db_printf("%sSS_NOFDREF", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONNECTED) { db_printf("%sSS_ISCONNECTED", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONNECTING) { db_printf("%sSS_ISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISDISCONNECTING) { db_printf("%sSS_ISDISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_NBIO) { db_printf("%sSS_NBIO", comma ? ", " : ""); comma = 1; } if (so_state & SS_ASYNC) { db_printf("%sSS_ASYNC", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONFIRMING) { db_printf("%sSS_ISCONFIRMING", comma ? ", " : ""); comma = 1; } if (so_state & SS_PROTOREF) { db_printf("%sSS_PROTOREF", comma ? ", " : ""); comma = 1; } } static void db_print_soqstate(int so_qstate) { int comma; comma = 0; if (so_qstate & SQ_INCOMP) { db_printf("%sSQ_INCOMP", comma ? ", " : ""); comma = 1; } if (so_qstate & SQ_COMP) { db_printf("%sSQ_COMP", comma ? ", " : ""); comma = 1; } } static void db_print_sbstate(short sb_state) { int comma; comma = 0; if (sb_state & SBS_CANTSENDMORE) { db_printf("%sSBS_CANTSENDMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_CANTRCVMORE) { db_printf("%sSBS_CANTRCVMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_RCVATMARK) { db_printf("%sSBS_RCVATMARK", comma ? ", " : ""); comma = 1; } } static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_domain(struct domain *d, const char *domain_name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", domain_name, d); indent += 2; db_print_indent(indent); db_printf("dom_family: %d ", d->dom_family); db_printf("dom_name: %s\n", d->dom_name); db_print_indent(indent); db_printf("dom_externalize: %p ", d->dom_externalize); db_printf("dom_dispose: %p\n", d->dom_dispose); db_print_indent(indent); db_printf("dom_protosw: %p ", d->dom_protosw); db_printf("dom_next: %p\n", d->dom_next); db_print_indent(indent); db_printf("dom_rtattach: %p ", d->dom_rtattach); db_print_indent(indent); db_printf("dom_ifattach: %p ", d->dom_ifattach); db_printf("dom_ifdetach: %p\n", d->dom_ifdetach); } static void db_print_prflags(short pr_flags) { int comma; comma = 0; if (pr_flags & PR_ATOMIC) { db_printf("%sPR_ATOMIC", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_ADDR) { db_printf("%sPR_ADDR", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_CONNREQUIRED) { db_printf("%sPR_CONNREQUIRED", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_WANTRCVD) { db_printf("%sPR_WANTRCVD", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_RIGHTS) { db_printf("%sPR_RIGHTS", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_IMPLOPCL) { db_printf("%sPR_IMPLOPCL", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_LASTHDR) { db_printf("%sPR_LASTHDR", comma ? ", " : ""); comma = 1; } } static void db_print_protosw(struct protosw *pr, const char *prname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", prname, pr); indent += 2; db_print_indent(indent); db_printf("pr_type: %d ", pr->pr_type); db_printf("pr_domain: %p\n", pr->pr_domain); if (pr->pr_domain != NULL) db_print_domain(pr->pr_domain, "pr_domain", indent); db_print_indent(indent); db_printf("pr_protocol: %d\n", pr->pr_protocol); db_print_indent(indent); db_printf("pr_flags: %d (", pr->pr_flags); db_print_prflags(pr->pr_flags); db_printf(")\n"); db_print_indent(indent); db_printf("pr_input: %p ", pr->pr_input); db_printf("pr_output: %p ", pr->pr_output); db_printf("pr_ctlinput: %p\n", pr->pr_ctlinput); db_printf("pr_ctloutput: %p ", pr->pr_ctloutput); db_print_indent(indent); db_printf("pr_fasttimo: %p ", pr->pr_fasttimo); db_printf("pr_slowtimo: %p ", pr->pr_slowtimo); db_printf("pr_drain: %p\n", pr->pr_drain); } static void db_print_sbflags(short sb_flags) { int comma; comma = 0; if (sb_flags & SB_WAIT) { db_printf("%sSB_WAIT", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_SEL) { db_printf("%sSB_SEL", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_ASYNC) { db_printf("%sSB_ASYNC", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_UPCALL) { db_printf("%sSB_UPCALL", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_NOINTR) { db_printf("%sSB_NOINTR", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_AIO) { db_printf("%sSB_AIO", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_KNOTE) { db_printf("%sSB_KNOTE", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_AUTOSIZE) { db_printf("%sSB_AUTOSIZE", comma ? ", " : ""); comma = 1; } } static void db_print_sockbuf(struct sockbuf *sb, const char *sockbufname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", sockbufname, sb); indent += 2; db_print_indent(indent); db_printf("sb_state: 0x%x (", sb->sb_state); db_print_sbstate(sb->sb_state); db_printf(")\n"); db_print_indent(indent); db_printf("sb_mb: %p ", sb->sb_mb); db_printf("sb_mbtail: %p ", sb->sb_mbtail); db_printf("sb_lastrecord: %p\n", sb->sb_lastrecord); db_print_indent(indent); db_printf("sb_sndptr: %p ", sb->sb_sndptr); db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff); db_print_indent(indent); db_printf("sb_acc: %u ", sb->sb_acc); db_printf("sb_ccc: %u ", sb->sb_ccc); db_printf("sb_hiwat: %u ", sb->sb_hiwat); db_printf("sb_mbcnt: %u ", sb->sb_mbcnt); db_printf("sb_mbmax: %u\n", sb->sb_mbmax); db_print_indent(indent); - db_printf("sb_mcnt: %u ", sb->sb_mcnt); - db_printf("sb_ccnt: %u ", sb->sb_ccnt); db_printf("sb_ctl: %u ", sb->sb_ctl); db_printf("sb_lowat: %d ", sb->sb_lowat); db_printf("sb_timeo: %jd\n", sb->sb_timeo); db_print_indent(indent); db_printf("sb_flags: 0x%x (", sb->sb_flags); db_print_sbflags(sb->sb_flags); db_printf(")\n"); db_print_indent(indent); db_printf("sb_aiojobq first: %p\n", TAILQ_FIRST(&sb->sb_aiojobq)); } static void db_print_socket(struct socket *so, const char *socketname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", socketname, so); indent += 2; db_print_indent(indent); db_printf("so_count: %d ", so->so_count); db_printf("so_type: %d (", so->so_type); db_print_sotype(so->so_type); db_printf(")\n"); db_print_indent(indent); db_printf("so_options: 0x%x (", so->so_options); db_print_sooptions(so->so_options); db_printf(")\n"); db_print_indent(indent); db_printf("so_linger: %d ", so->so_linger); db_printf("so_state: 0x%x (", so->so_state); db_print_sostate(so->so_state); db_printf(")\n"); db_print_indent(indent); db_printf("so_pcb: %p ", so->so_pcb); db_printf("so_proto: %p\n", so->so_proto); if (so->so_proto != NULL) db_print_protosw(so->so_proto, "so_proto", indent); db_print_indent(indent); if (so->so_options & SO_ACCEPTCONN) { db_printf("sol_incomp first: %p ", TAILQ_FIRST(&so->sol_incomp)); db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp)); db_printf("sol_qlen: %d ", so->sol_qlen); db_printf("sol_incqlen: %d ", so->sol_incqlen); db_printf("sol_qlimit: %d ", so->sol_qlimit); } else { db_printf("so_qstate: 0x%x (", so->so_qstate); db_print_soqstate(so->so_qstate); db_printf(") "); db_printf("so_listen: %p ", so->so_listen); /* so_list skipped */ db_printf("so_timeo: %d ", so->so_timeo); db_printf("so_error: %d\n", so->so_error); db_print_indent(indent); db_printf("so_sigio: %p ", so->so_sigio); db_printf("so_oobmark: %lu\n", so->so_oobmark); db_print_sockbuf(&so->so_rcv, "so_rcv", indent); db_print_sockbuf(&so->so_snd, "so_snd", indent); } } DB_SHOW_COMMAND(socket, db_show_socket) { struct socket *so; if (!have_addr) { db_printf("usage: show socket \n"); return; } so = (struct socket *)addr; db_print_socket(so, "socket", 0); } DB_SHOW_COMMAND(sockbuf, db_show_sockbuf) { struct sockbuf *sb; if (!have_addr) { db_printf("usage: show sockbuf \n"); return; } sb = (struct sockbuf *)addr; db_print_sockbuf(sb, "sockbuf", 0); } DB_SHOW_COMMAND(protosw, db_show_protosw) { struct protosw *pr; if (!have_addr) { db_printf("usage: show protosw \n"); return; } pr = (struct protosw *)addr; db_print_protosw(pr, "protosw", 0); } DB_SHOW_COMMAND(domain, db_show_domain) { struct domain *d; if (!have_addr) { db_printf("usage: show protosw \n"); return; } d = (struct domain *)addr; db_print_domain(d, "domain", 0); } #endif diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index bc0a48a37810..07c21d3b7802 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -1,1831 +1,1812 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_kern_tls.h" #include "opt_param.h" #include #include /* for aio_swake proto */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. */ void (*aio_swake)(struct socket *, struct sockbuf *); /* * Primitive routines for operating on socket buffers */ u_long sb_max = SB_MAX; u_long sb_max_adj = (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ #ifdef KERN_TLS static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); #endif static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); static void sbflush_internal(struct sockbuf *sb); /* * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. */ static void sbm_clrprotoflags(struct mbuf *m, int flags) { int mask; mask = ~M_PROTOFLAGS; if (flags & PRUS_NOTREADY) mask |= M_NOTREADY; while (m) { m->m_flags &= mask; m = m->m_next; } } /* * Compress M_NOTREADY mbufs after they have been readied by sbready(). * * sbcompress() skips M_NOTREADY mbufs since the data is not available to * be copied at the time of sbcompress(). This function combines small * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first * mbuf sbready() marked ready, and 'end' is the first mbuf still not * ready. */ static void sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end) { struct mbuf *m, *n; int ext_size; SOCKBUF_LOCK_ASSERT(sb); if ((sb->sb_flags & SB_NOCOALESCE) != 0) return; for (m = m0; m != end; m = m->m_next) { MPASS((m->m_flags & M_NOTREADY) == 0); /* * NB: In sbcompress(), 'n' is the last mbuf in the * socket buffer and 'm' is the new mbuf being copied * into the trailing space of 'n'. Here, the roles * are reversed and 'n' is the next mbuf after 'm' * that is being copied into the trailing space of * 'm'. */ n = m->m_next; #ifdef KERN_TLS /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && (m->m_flags & M_EXTPG) && (n->m_flags & M_EXTPG) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n)) { int hdr_len, trail_len; hdr_len = n->m_epg_hdrlen; trail_len = m->m_epg_trllen; if (trail_len != 0 && hdr_len != 0 && trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { /* copy n's header to m's trailer */ memcpy(&m->m_epg_trail[trail_len], n->m_epg_hdr, hdr_len); m->m_epg_trllen += hdr_len; m->m_len += hdr_len; n->m_epg_hdrlen = 0; n->m_len -= hdr_len; } } #endif /* Compress small unmapped mbufs into plain mbufs. */ if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && !mbuf_has_tls_session(m)) { ext_size = m->m_ext.ext_size; - if (mb_unmapped_compress(m) == 0) { + if (mb_unmapped_compress(m) == 0) sb->sb_mbcnt -= ext_size; - sb->sb_ccnt -= 1; - } } while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && M_WRITABLE(m) && (m->m_flags & M_EXTPG) == 0 && !mbuf_has_tls_session(n) && !mbuf_has_tls_session(m) && n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ n->m_len <= M_TRAILINGSPACE(m) && m->m_type == n->m_type) { KASSERT(sb->sb_lastrecord != n, ("%s: merging start of record (%p) into previous mbuf (%p)", __func__, n, m)); m_copydata(n, 0, n->m_len, mtodo(m, m->m_len)); m->m_len += n->m_len; m->m_next = n->m_next; m->m_flags |= n->m_flags & M_EOR; if (sb->sb_mbtail == n) sb->sb_mbtail = m; sb->sb_mbcnt -= MSIZE; - sb->sb_mcnt -= 1; - if (n->m_flags & M_EXT) { + if (n->m_flags & M_EXT) sb->sb_mbcnt -= n->m_ext.ext_size; - sb->sb_ccnt -= 1; - } m_free(n); n = m->m_next; } } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); } /* * Mark ready "count" units of I/O starting with "m". Most mbufs * count as a single unit of I/O except for M_EXTPG mbufs which * are backed by multiple pages. */ int sbready(struct sockbuf *sb, struct mbuf *m0, int count) { struct mbuf *m; u_int blocker; SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); KASSERT(count > 0, ("%s: invalid count %d", __func__, count)); m = m0; blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; while (count > 0) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) { if (count < m->m_epg_nrdy) { m->m_epg_nrdy -= count; count = 0; break; } count -= m->m_epg_nrdy; m->m_epg_nrdy = 0; } else count--; m->m_flags &= ~(M_NOTREADY | blocker); if (blocker) sb->sb_acc += m->m_len; m = m->m_next; } /* * If the first mbuf is still not fully ready because only * some of its backing pages were readied, no further progress * can be made. */ if (m0 == m) { MPASS(m->m_flags & M_NOTREADY); return (EINPROGRESS); } if (!blocker) { sbready_compress(sb, m0, m); return (EINPROGRESS); } /* This one was blocking all the queue. */ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { KASSERT(m->m_flags & M_BLOCKED, ("%s: m %p !M_BLOCKED", __func__, m)); m->m_flags &= ~M_BLOCKED; sb->sb_acc += m->m_len; } sb->sb_fnrdy = m; sbready_compress(sb, m0, m); return (0); } /* * Adjust sockbuf state reflecting allocation of m. */ void sballoc(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) { if (m->m_flags & M_NOTREADY) sb->sb_fnrdy = m; else sb->sb_acc += m->m_len; } else m->m_flags |= M_BLOCKED; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; sb->sb_mbcnt += MSIZE; - sb->sb_mcnt += 1; - if (m->m_flags & M_EXT) { + if (m->m_flags & M_EXT) sb->sb_mbcnt += m->m_ext.ext_size; - sb->sb_ccnt += 1; - } } /* * Adjust sockbuf state reflecting freeing of m. */ void sbfree(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; if (!(m->m_flags & M_NOTAVAIL)) sb->sb_acc -= m->m_len; if (m == sb->sb_fnrdy) { struct mbuf *n; KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); n = m->m_next; while (n != NULL && !(n->m_flags & M_NOTREADY)) { n->m_flags &= ~M_BLOCKED; sb->sb_acc += n->m_len; n = n->m_next; } sb->sb_fnrdy = n; } if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= m->m_len; sb->sb_mbcnt -= MSIZE; - sb->sb_mcnt -= 1; - if (m->m_flags & M_EXT) { + if (m->m_flags & M_EXT) sb->sb_mbcnt -= m->m_ext.ext_size; - sb->sb_ccnt -= 1; - } if (sb->sb_sndptr == m) { sb->sb_sndptr = NULL; sb->sb_sndptroff = 0; } if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= m->m_len; } #ifdef KERN_TLS /* * Similar to sballoc/sbfree but does not adjust state associated with * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs * are not ready. */ void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; sb->sb_tlscc += m->m_len; sb->sb_mbcnt += MSIZE; - sb->sb_mcnt += 1; - if (m->m_flags & M_EXT) { + if (m->m_flags & M_EXT) sb->sb_mbcnt += m->m_ext.ext_size; - sb->sb_ccnt += 1; - } } void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; sb->sb_tlscc -= m->m_len; sb->sb_mbcnt -= MSIZE; - sb->sb_mcnt -= 1; - if (m->m_flags & M_EXT) { + if (m->m_flags & M_EXT) sb->sb_mbcnt -= m->m_ext.ext_size; - sb->sb_ccnt -= 1; - } } #endif /* * Socantsendmore indicates that no more data will be sent on the socket; it * would normally be applied to a socket when the user informs the system * that no more data is to be sent, by the protocol code (in case * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be * received, and will normally be applied to the socket by a protocol when it * detects that the peer will send no more data. Data queued for reading in * the socket may yet be read. */ void socantsendmore_locked(struct socket *so) { SOCK_SENDBUF_LOCK_ASSERT(so); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantsendmore(struct socket *so) { SOCK_SENDBUF_LOCK(so); socantsendmore_locked(so); SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantrcvmore_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); so->so_rcv.sb_state |= SBS_CANTRCVMORE; #ifdef KERN_TLS if (so->so_rcv.sb_flags & SB_TLS_RX) ktls_check_rx(&so->so_rcv); #endif sorwakeup_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void socantrcvmore(struct socket *so) { SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); if (so->so_options & SO_RERROR) { so->so_rerror = ENOBUFS; sorwakeup_locked(so); } else SOCK_RECVBUF_UNLOCK(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow(struct socket *so) { SOCK_RECVBUF_LOCK(so); soroverflow_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct socket *so, sb_which which) { struct sockbuf *sb; SOCK_BUF_LOCK_ASSERT(so, which); sb = sobuf(so, which); sb->sb_flags |= SB_WAIT; return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which), (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } /* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ static __always_inline void sowakeup(struct socket *so, const sb_which which) { struct sockbuf *sb; int ret; SOCK_BUF_LOCK_ASSERT(so, which); sb = sobuf(so, which); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_acc); } KNOTE_LOCKED(&sb->sb_sel->si_note, 0); if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; if (sb->sb_flags & SB_AIO) sowakeup_aio(so, which); SOCK_BUF_UNLOCK(so, which); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); SOCK_BUF_UNLOCK_ASSERT(so, which); } /* * Do we need to notify the other side when I/O is possible? */ static __always_inline bool sb_notify(const struct sockbuf *sb) { return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | SB_UPCALL | SB_AIO | SB_KNOTE)) != 0); } void sorwakeup_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); if (sb_notify(&so->so_rcv)) sowakeup(so, SO_RCV); else SOCK_RECVBUF_UNLOCK(so); } void sowwakeup_locked(struct socket *so) { SOCK_SENDBUF_LOCK_ASSERT(so); if (sb_notify(&so->so_snd)) sowakeup(so, SO_SND); else SOCK_SENDBUF_UNLOCK(so); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and one for * receiving data. Each buffer contains a queue of mbufs, information about * the number of mbufs and amount of data in the queue, and other fields * allowing select() statements and notification on data availability to be * implemented. * * Data stored in a socket buffer is maintained as a list of records. Each * record is a list of mbufs chained together with the m_next field. Records * are chained together with the m_nextpkt field. The upper level routine * soreceive() expects the following conventions to be observed when placing * information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's name, * then a record containing that name must be present before any * associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really just * additional data associated with the message), and there are ``rights'' * to be received, then a record containing this data should be present * (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by a data * record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space should * be released by calling sbrelease() when the socket is destroyed. */ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; SOCK_SENDBUF_LOCK(so); SOCK_RECVBUF_LOCK(so); if (sbreserve_locked(so, SO_SND, sndcc, td) == 0) goto bad; if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_UNLOCK(so); return (0); bad2: sbrelease_locked(so, SO_SND); bad: SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_UNLOCK(so); return (ENOBUFS); } static int sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) { int error = 0; u_long tmp_sb_max = sb_max; error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); if (error || !req->newptr) return (error); if (tmp_sb_max < MSIZE + MCLBYTES) return (EINVAL); sb_max = tmp_sb_max; sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); return (0); } /* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td) { struct sockbuf *sb = sobuf(so, which); rlim_t sbsize_limit; SOCK_BUF_LOCK_ASSERT(so, which); /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ if (cc > sb_max_adj) return (false); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) return (false); sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (true); } int sbsetopt(struct socket *so, int cmd, u_long cc) { struct sockbuf *sb; sb_which wh; short *flags; u_int *hiwat, *lowat; int error; sb = NULL; SOCK_LOCK(so); if (SOLISTENING(so)) { switch (cmd) { case SO_SNDLOWAT: case SO_SNDBUF: lowat = &so->sol_sbsnd_lowat; hiwat = &so->sol_sbsnd_hiwat; flags = &so->sol_sbsnd_flags; break; case SO_RCVLOWAT: case SO_RCVBUF: lowat = &so->sol_sbrcv_lowat; hiwat = &so->sol_sbrcv_hiwat; flags = &so->sol_sbrcv_flags; break; } } else { switch (cmd) { case SO_SNDLOWAT: case SO_SNDBUF: sb = &so->so_snd; wh = SO_SND; break; case SO_RCVLOWAT: case SO_RCVBUF: sb = &so->so_rcv; wh = SO_RCV; break; } flags = &sb->sb_flags; hiwat = &sb->sb_hiwat; lowat = &sb->sb_lowat; SOCK_BUF_LOCK(so, wh); } error = 0; switch (cmd) { case SO_SNDBUF: case SO_RCVBUF: if (SOLISTENING(so)) { if (cc > sb_max_adj) { error = ENOBUFS; break; } *hiwat = cc; if (*lowat > *hiwat) *lowat = *hiwat; } else { if (!sbreserve_locked(so, wh, cc, curthread)) error = ENOBUFS; } if (error == 0) *flags &= ~SB_AUTOSIZE; break; case SO_SNDLOWAT: case SO_RCVLOWAT: /* * Make sure the low-water is never greater than the * high-water. */ *lowat = (cc > *hiwat) ? *hiwat : cc; break; } if (!SOLISTENING(so)) SOCK_BUF_UNLOCK(so, wh); SOCK_UNLOCK(so); return (error); } /* * Free mbufs held by a socket, and reserved mbuf space. */ static void sbrelease_internal(struct socket *so, sb_which which) { struct sockbuf *sb = sobuf(so, which); sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } void sbrelease_locked(struct socket *so, sb_which which) { SOCK_BUF_LOCK_ASSERT(so, which); sbrelease_internal(so, which); } void sbrelease(struct socket *so, sb_which which) { SOCK_BUF_LOCK(so, which); sbrelease_locked(so, which); SOCK_BUF_UNLOCK(so, which); } void sbdestroy(struct socket *so, sb_which which) { #ifdef KERN_TLS struct sockbuf *sb = sobuf(so, which); if (sb->sb_tls_info != NULL) ktls_free(sb->sb_tls_info); sb->sb_tls_info = NULL; #endif sbrelease_internal(so, which); } /* * Routines to add and remove data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to append * new mbufs to a socket buffer, after checking that adequate space is * available, comparing the function sbspace() with the amount of data to be * added. sbappendrecord() differs from sbappend() in that data supplied is * treated as the beginning of a new record. To place a sender's address, * optional access rights, and data in a socket receive buffer, * sbappendaddr() should be used. To place access rights and data in a * socket receive buffer, sbappendrights() should be used. In either case, * the new data begins a new record. Note that unlike sbappend() and * sbappendrecord(), these routines check for the caller that there will be * enough space to store the data. Each fails if there is not enough space, * or if it cannot find mbufs to store additional information in. * * Reliable protocols may use the socket send buffer to hold data awaiting * acknowledgement. Data is normally copied from a socket send buffer in a * protocol with m_copy for output to a peer, and then removing the data from * the socket buffer with sbdrop() or sbdroprecord() when the data is * acknowledged by the peer. */ #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; if (m != sb->sb_lastrecord) { printf("%s: sb_mb %p sb_lastrecord %p last %p\n", __func__, sb->sb_mb, sb->sb_lastrecord, m); printf("packet chain:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) printf("\t%p\n", m); panic("%s from %s:%u", __func__, file, line); } } void sblastmbufchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mbtail) { printf("%s: sb_mb %p sb_mbtail %p last %p\n", __func__, sb->sb_mb, sb->sb_mbtail, m); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) printf("%p ", n); printf("\n"); } panic("%s from %s:%u", __func__, file, line); } #ifdef KERN_TLS m = sb->sb_mtls; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mtlstail) { printf("%s: sb_mtls %p sb_mtlstail %p last %p\n", __func__, sb->sb_mtls, sb->sb_mtlstail, m); printf("TLS packet tree:\n"); printf("\t"); for (m = sb->sb_mtls; m != NULL; m = m->m_next) { printf("%p ", m); } printf("\n"); panic("%s from %s:%u", __func__, file, line); } #endif } #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ (sb)->sb_mb = (m0); \ (sb)->sb_lastrecord = (m0); \ } while (/*CONSTCOND*/0) /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) { struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); if (m == NULL) return; sbm_clrprotoflags(m, flags); SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * XXX Would like to simply use sb_mbtail here, but * XXX I need to verify that I won't miss an EOR that * XXX way. */ if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * If this is the first record in the socket buffer, * it's also the last record. */ sb->sb_lastrecord = m; } } sbcompress(sb, m, n); SBLASTRECORDCHK(sb); } /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappend_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef KERN_TLS /* * Append an mbuf containing encrypted TLS data. The data * is marked M_NOTREADY until it has been decrypted and * stored as a TLS record. */ static void sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) { struct mbuf *n; SBLASTMBUFCHK(sb); /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, 0); for (n = m; n != NULL; n = n->m_next) n->m_flags |= M_NOTREADY; sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); ktls_check_rx(sb); } #endif /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); #ifdef KERN_TLS /* * Decrypted TLS records are appended as records via * sbappendrecord(). TCP passes encrypted TLS records to this * function which must be scheduled for decryption. */ if (sb->sb_flags & SB_TLS_RX) { sbappend_ktls_rx(sb, m); return; } #endif KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); SBLASTMBUFCHK(sb); #ifdef KERN_TLS if (sb->sb_tls_info != NULL) ktls_seq(sb, m); #endif /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); sbcompress(sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; SBLASTRECORDCHK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef SOCKBUF_DEBUG void sbcheck(struct sockbuf *sb, const char *file, int line) { struct mbuf *m, *n, *fnrdy; u_long acc, ccc, mbcnt; #ifdef KERN_TLS u_long tlscc; #endif SOCKBUF_LOCK_ASSERT(sb); acc = ccc = mbcnt = 0; fnrdy = NULL; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { if (m->m_len == 0) { printf("sb %p empty mbuf %p\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { if (m != sb->sb_fnrdy) { printf("sb %p: fnrdy %p != m %p\n", sb, sb->sb_fnrdy, m); goto fail; } fnrdy = m; } if (fnrdy) { if (!(m->m_flags & M_NOTAVAIL)) { printf("sb %p: fnrdy %p, m %p is avail\n", sb, sb->sb_fnrdy, m); goto fail; } } else acc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } #ifdef KERN_TLS /* * Account for mbufs "detached" by ktls_detach_record() while * they are decrypted by ktls_decrypt(). tlsdcc gives a count * of the detached bytes that are included in ccc. The mbufs * and clusters are not included in the socket buffer * accounting. */ ccc += sb->sb_tlsdcc; tlscc = 0; for (m = sb->sb_mtls; m; m = m->m_next) { if (m->m_nextpkt != NULL) { printf("sb %p TLS mbuf %p with nextpkt\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) == 0) { printf("sb %p TLS mbuf %p ready\n", sb, m); goto fail; } tlscc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } if (sb->sb_tlscc != tlscc) { printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, sb->sb_tlsdcc); goto fail; } #endif if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); #ifdef KERN_TLS printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, sb->sb_tlsdcc); #endif goto fail; } return; fail: panic("%s from %s:%u", __func__, file, line); } #endif /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); if (m0 == NULL) return; m_clrprotoflags(m0); /* * Put the first mbuf on the queue. Note this permits zero length * records. */ sballoc(sb, m0); SBLASTRECORDCHK(sb); SBLINKRECORD(sb, m0); sb->sb_mbtail = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } /* always call sbcompress() so it can do SBLASTMBUFCHK() */ sbcompress(sb, m, m0); } /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { SOCKBUF_LOCK(sb); sbappendrecord_locked(sb, m0); SOCKBUF_UNLOCK(sb); } /* Helper routine that appends data, control, and address to a sockbuf. */ static int sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) { struct mbuf *m, *n, *nlast; #if MSIZE <= 256 if (asa->sa_len > MLEN) return (0); #endif m = m_get(M_NOWAIT, MT_SONAME); if (m == NULL) return (0); m->m_len = asa->sa_len; bcopy(asa, mtod(m, caddr_t), asa->sa_len); if (m0) { M_ASSERT_NO_SND_TAG(m0); m_clrprotoflags(m0); m_tag_delete_chain(m0, NULL); /* * Clear some persistent info from pkthdr. * We don't use m_demote(), because some netgraph consumers * expect M_PKTHDR presence. */ m0->m_pkthdr.rcvif = NULL; m0->m_pkthdr.flowid = 0; m0->m_pkthdr.csum_flags = 0; m0->m_pkthdr.fibnum = 0; m0->m_pkthdr.rsstype = 0; } if (ctrl_last) ctrl_last->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); sballoc(sb, n); nlast = n; SBLINKRECORD(sb, m); sb->sb_mbtail = nlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; int space = asa->sa_len; SOCKBUF_LOCK_ASSERT(sb); if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &ctrl_last); if (space > sbspace(sb)) return (0); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if insufficient mbufs. Does not validate space * on the receiving sockbuf. */ int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; SOCKBUF_LOCK_ASSERT(sb); ctrl_last = (control == NULL) ? NULL : m_last(control); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendaddr_locked(sb, asa, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags) { struct mbuf *m, *mlast; sbm_clrprotoflags(m0, flags); m_last(control)->m_next = m0; SBLASTRECORDCHK(sb); for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); } void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags) { SOCKBUF_LOCK(sb); sbappendcontrol_locked(sb, m0, control, flags); SOCKBUF_UNLOCK(sb); } /* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, both mbufs are not marked as * not ready, and no merging of data types will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { int eor = 0; struct mbuf *o; SOCKBUF_LOCK_ASSERT(sb); while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } if (n && (n->m_flags & M_EOR) == 0 && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && !(n->m_flags & (M_NOTREADY | M_EXTPG)) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); n->m_len += m->m_len; sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) sb->sb_acc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) /* XXX: Probably don't need.*/ sb->sb_ctl += m->m_len; m = m_free(m); continue; } if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) && (m->m_flags & M_NOTREADY) == 0 && !mbuf_has_tls_session(m)) (void)mb_unmapped_compress(m); if (n) n->m_next = m; else sb->sb_mb = m; sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); n->m_flags |= eor; } SBLASTMBUFCHK(sb); } #ifdef KERN_TLS /* * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also * a bit simpler (no EOR markers, always MT_DATA, etc.). */ static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { SOCKBUF_LOCK_ASSERT(sb); while (m) { KASSERT((m->m_flags & M_EOR) == 0, ("TLS RX mbuf %p with EOR", m)); KASSERT(m->m_type == MT_DATA, ("TLS RX mbuf %p is not MT_DATA", m)); KASSERT((m->m_flags & M_NOTREADY) != 0, ("TLS RX mbuf %p ready", m)); KASSERT((m->m_flags & M_EXTPG) == 0, ("TLS RX mbuf %p unmapped", m)); if (m->m_len == 0) { m = m_free(m); continue; } /* * Even though both 'n' and 'm' are NOTREADY, it's ok * to coalesce the data. */ if (n && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(n->m_flags & (M_EXTPG)) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n)) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); n->m_len += m->m_len; sb->sb_ccc += m->m_len; sb->sb_tlscc += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mtls = m; sb->sb_mtlstail = m; sballoc_ktls_rx(sb, m); n = m; m = m->m_next; n->m_next = NULL; } SBLASTMBUFCHK(sb); } #endif /* * Free all mbufs in a sockbuf. Check that all resources are reclaimed. */ static void sbflush_internal(struct sockbuf *sb) { while (sb->sb_mbcnt || sb->sb_tlsdcc) { /* * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: * we would loop forever. Panic instead. */ if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); } KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); } void sbflush_locked(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sbflush_internal(sb); } void sbflush(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbflush_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Cut data from (the front of) a sockbuf. */ static struct mbuf * sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; bool is_tls; KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", __func__, len)); KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", __func__, len, sb->sb_ccc)); next = (m = sb->sb_mb) ? m->m_nextpkt : 0; is_tls = false; mfree = NULL; while (len > 0) { if (m == NULL) { #ifdef KERN_TLS if (next == NULL && !is_tls) { if (sb->sb_tlsdcc != 0) { MPASS(len >= sb->sb_tlsdcc); len -= sb->sb_tlsdcc; sb->sb_ccc -= sb->sb_tlsdcc; sb->sb_tlsdcc = 0; if (len == 0) break; } next = sb->sb_mtls; is_tls = true; } #endif KASSERT(next, ("%s: no next, len %d", __func__, len)); m = next; next = m->m_nextpkt; } if (m->m_len > len) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p M_NOTAVAIL", __func__, m)); m->m_len -= len; m->m_data += len; sb->sb_ccc -= len; sb->sb_acc -= len; if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= len; break; } len -= m->m_len; #ifdef KERN_TLS if (is_tls) sbfree_ktls_rx(sb, m); else #endif sbfree(sb, m); /* * Do not put M_NOTREADY buffers to the free list, they * are referenced from outside. */ if (m->m_flags & M_NOTREADY && !is_tls) m = m->m_next; else { struct mbuf *n; n = m->m_next; m->m_next = mfree; mfree = m; m = n; } } /* * Free any zero-length mbufs from the buffer. * For SOCK_DGRAM sockets such mbufs represent empty records. * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, * when sosend_generic() needs to send only control data. */ while (m && m->m_len == 0) { struct mbuf *n; sbfree(sb, m); n = m->m_next; m->m_next = mfree; mfree = m; m = n; } #ifdef KERN_TLS if (is_tls) { sb->sb_mb = NULL; sb->sb_mtls = m; if (m == NULL) sb->sb_mtlstail = NULL; } else #endif if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; /* * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure * sb_lastrecord is up-to-date if we dropped part of the last record. */ m = sb->sb_mb; if (m == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (m->m_nextpkt == NULL) { sb->sb_lastrecord = m; } return (mfree); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); m_freem(sbcut_internal(sb, len)); } /* * Drop data from (the front of) a sockbuf, * and return it to caller. */ struct mbuf * sbcut_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); return (sbcut_internal(sb, len)); } void sbdrop(struct sockbuf *sb, int len) { struct mbuf *mfree; SOCKBUF_LOCK(sb); mfree = sbcut_internal(sb, len); SOCKBUF_UNLOCK(sb); m_freem(mfree); } struct mbuf * sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { *moff = off; if (sb->sb_sndptr == NULL) { sb->sb_sndptr = sb->sb_mb; sb->sb_sndptroff = 0; } return (sb->sb_mb); } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } *moff = off; return (m); } void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) { /* * A small copy was done, advance forward the sb_sbsndptr to cover * it. */ struct mbuf *m; if (mb != sb->sb_sndptr) { /* Did not copyout at the same mbuf */ return; } m = mb; while (m && (len > 0)) { if (len >= m->m_len) { len -= m->m_len; if (m->m_next) { sb->sb_sndptroff += m->m_len; sb->sb_sndptr = m->m_next; } m = m->m_next; } else { len = 0; } } } /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. */ struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); /* * If the "off" is below the stored offset, which happens on * retransmits, just use "sb_mb": */ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { m = sb->sb_mb; } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } while (off > 0 && m != NULL) { if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } *moff = off; return (m); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord_locked(struct sockbuf *sb) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); m = m_free(m); } while (m); } SB_EMPTY_FIXUP(sb); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbdroprecord_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Create a "control" mbuf containing the specified data with the specified * type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(const void *p, u_int size, int type, int level, int wait) { struct cmsghdr *cp; struct mbuf *m; MBUF_CHECKSLEEP(wait); if (wait == M_NOWAIT) { if (CMSG_SPACE(size) > MCLBYTES) return (NULL); } else KASSERT(CMSG_SPACE(size) <= MCLBYTES, ("%s: passed CMSG_SPACE(%u) > MCLBYTES", __func__, size)); if (CMSG_SPACE(size) > MLEN) m = m_getcl(wait, MT_CONTROL, 0); else m = m_get(wait, MT_CONTROL); if (m == NULL) return (NULL); KASSERT(CMSG_SPACE(size) <= M_TRAILINGSPACE(m), ("sbcreatecontrol: short mbuf")); /* * Don't leave the padding between the msg header and the * cmsg data and the padding after the cmsg data un-initialized. */ cp = mtod(m, struct cmsghdr *); bzero(cp, CMSG_SPACE(size)); if (p != NULL) (void)memcpy(CMSG_DATA(cp), p, size); m->m_len = CMSG_SPACE(size); cp->cmsg_len = CMSG_LEN(size); cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * This does the same for socket buffers that sotoxsocket does for sockets: * generate an user-format data structure describing the socket buffer. Note * that the xsockbuf structure, since it is always embedded in a socket, does * not include a self pointer nor a length. We make this entry point public * in case some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; - xsb->sb_mcnt = sb->sb_mcnt; - xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, ""); SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, "Socket buffer size waste factor"); diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index 753ebd1fbf35..a108e19b3ff3 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -1,262 +1,260 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 * * $FreeBSD$ */ #ifndef _SYS_SOCKBUF_H_ #define _SYS_SOCKBUF_H_ /* * Constants for sb_flags field of struct sockbuf/xsockbuf. */ #define SB_TLS_RX 0x01 /* using KTLS on RX */ #define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_UPCALL 0x20 /* someone wants an upcall */ #define SB_NOINTR 0x40 /* operations not interruptible */ #define SB_AIO 0x80 /* AIO operations queued */ #define SB_KNOTE 0x100 /* kernel note attached */ #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ #define SB_STOP 0x1000 /* backpressure indicator */ #define SB_AIO_RUNNING 0x2000 /* AIO operation running */ #define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ #if defined(_KERNEL) || defined(_WANT_SOCKET) #include #include #include #include #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ struct ktls_session; struct mbuf; struct sockaddr; struct socket; struct thread; struct selinfo; /* * Variables for socket buffering. * * Locking key to struct sockbuf: * (a) locked by SOCKBUF_LOCK(). */ struct sockbuf { struct mtx *sb_mtx; /* sockbuf lock */ struct selinfo *sb_sel; /* process selecting read/write */ short sb_state; /* (a) socket state on sockbuf */ short sb_flags; /* (a) flags, see above */ struct mbuf *sb_mb; /* (a) the mbuf chain */ struct mbuf *sb_mbtail; /* (a) the last mbuf in the chain */ struct mbuf *sb_lastrecord; /* (a) first mbuf of last * record in socket buffer */ struct mbuf *sb_sndptr; /* (a) pointer into mbuf chain */ struct mbuf *sb_fnrdy; /* (a) pointer to first not ready buffer */ u_int sb_sndptroff; /* (a) byte offset of ptr into chain */ u_int sb_acc; /* (a) available chars in buffer */ u_int sb_ccc; /* (a) claimed chars in buffer */ u_int sb_hiwat; /* (a) max actual char count */ u_int sb_mbcnt; /* (a) chars of mbufs used */ - u_int sb_mcnt; /* (a) number of mbufs in buffer */ - u_int sb_ccnt; /* (a) number of clusters in buffer */ u_int sb_mbmax; /* (a) max chars of mbufs to use */ u_int sb_ctl; /* (a) non-data chars in buffer */ u_int sb_tlscc; /* (a) TLS chain characters */ u_int sb_tlsdcc; /* (a) TLS characters being decrypted */ int sb_lowat; /* (a) low water mark */ sbintime_t sb_timeo; /* (a) timeout for read/write */ struct mbuf *sb_mtls; /* (a) TLS mbuf chain */ struct mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */ int (*sb_upcall)(struct socket *, void *, int); /* (a) */ void *sb_upcallarg; /* (a) */ uint64_t sb_tls_seqno; /* (a) TLS seqno */ struct ktls_session *sb_tls_info; /* (a + b) TLS state */ TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */ struct task sb_aiotask; /* AIO task */ }; #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ #ifdef _KERNEL /* 'which' values for KPIs that operate on one buffer of a socket. */ typedef enum { SO_RCV, SO_SND } sb_which; /* * Per-socket buffer mutex used to protect most fields in the socket buffer. * These make use of the mutex pointer embedded in struct sockbuf, which * currently just references mutexes in the containing socket. The * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with * these locking macros. */ #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx) #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) /* * Socket buffer private mbuf(9) flags. */ #define M_NOTREADY M_PROTO1 /* m_data not populated yet */ #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */ #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED) void sbappend(struct sockbuf *sb, struct mbuf *m, int flags); void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags); int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags); void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(const void *p, u_int size, int type, int level, int wait); void sbdestroy(struct socket *, sb_which); void sbdrop(struct sockbuf *sb, int len); void sbdrop_locked(struct sockbuf *sb, int len); struct mbuf * sbcut_locked(struct sockbuf *sb, int len); void sbdroprecord(struct sockbuf *sb); void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbflush_locked(struct sockbuf *sb); void sbrelease(struct socket *, sb_which); void sbrelease_locked(struct socket *, sb_which); int sbsetopt(struct socket *so, int cmd, u_long cc); bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td); void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); int sbwait(struct socket *, sb_which); void sballoc(struct sockbuf *, struct mbuf *); void sbfree(struct sockbuf *, struct mbuf *); void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m); void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m); int sbready(struct sockbuf *, struct mbuf *, int); /* * Return how much data is available to be taken out of socket * buffer right now. */ static inline u_int sbavail(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_acc); } /* * Return how much data sits there in the socket buffer * It might be that some data is not yet ready to be read. */ static inline u_int sbused(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_ccc); } /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (ccc > hiwat or mbcnt > mbmax). */ static inline long sbspace(struct sockbuf *sb) { int bleft, mleft; /* size should match sockbuf fields */ #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif if (sb->sb_flags & SB_STOP) return(0); bleft = sb->sb_hiwat - sb->sb_ccc; mleft = sb->sb_mbmax - sb->sb_mbcnt; return ((bleft < mleft) ? bleft : mleft); } #define SB_EMPTY_FIXUP(sb) do { \ if ((sb)->sb_mb == NULL) { \ (sb)->sb_mbtail = NULL; \ (sb)->sb_lastrecord = NULL; \ } \ } while (/*CONSTCOND*/0) #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *, const char *, int); void sblastmbufchk(struct sockbuf *, const char *, int); void sbcheck(struct sockbuf *, const char *, int); #define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) #define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) #define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__) #else #define SBLASTRECORDCHK(sb) do {} while (0) #define SBLASTMBUFCHK(sb) do {} while (0) #define SBCHECK(sb) do {} while (0) #endif /* SOCKBUF_DEBUG */ #endif /* _KERNEL */ #endif /* _SYS_SOCKBUF_H_ */ diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 05eefd7e4fd4..52b9376f0559 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,601 +1,599 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 * * $FreeBSD$ */ #ifndef _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_ /* * Socket generation count type. Also used in xinpcb, xtcpcb, xunpcb. */ typedef uint64_t so_gen_t; #if defined(_KERNEL) || defined(_WANT_SOCKET) #include /* for TAILQ macros */ #include /* for struct selinfo */ #include #include #include #include #include #ifdef _KERNEL #include #include #endif struct vnet; /* * Kernel structure per socket. * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. */ typedef int so_upcall_t(struct socket *, void *, int); typedef void so_dtor_t(struct socket *); struct socket; enum socket_qstate { SQ_NONE = 0, SQ_INCOMP = 0x0800, /* on sol_incomp */ SQ_COMP = 0x1000, /* on sol_comp */ }; /*- * Locking key to struct socket: * (a) constant after allocation, no locking required. * (b) locked by SOCK_LOCK(so). * (cr) locked by SOCK_RECVBUF_LOCK(so) * (cs) locked by SOCK_SENDBUF_LOCK(so) * (e) locked by SOLISTEN_LOCK() of corresponding listening socket. * (f) not locked since integer reads/writes are atomic. * (g) used only as a sleep/wakeup address, no value. * (h) locked by global mutex so_global_mtx. * (k) locked by KTLS workqueue mutex */ TAILQ_HEAD(accept_queue, socket); struct socket { struct mtx so_lock; volatile u_int so_count; /* (b / refcount) */ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */ struct selinfo so_wrsel; /* (b/cs) for so_snd */ int so_options; /* (b) from socket call, see socket.h */ short so_type; /* (a) generic type, see socket.h */ short so_state; /* (b) internal state flags SS_* */ void *so_pcb; /* protocol control block */ struct vnet *so_vnet; /* (a) network stack instance */ struct protosw *so_proto; /* (a) protocol handle */ short so_linger; /* time to linger close(2) */ short so_timeo; /* (g) connection timeout */ u_short so_error; /* (f) error affecting connection */ u_short so_rerror; /* (f) error affecting connection */ struct sigio *so_sigio; /* [sg] information for async I/O or out of band data (SIGURG) */ struct ucred *so_cred; /* (a) user credentials */ struct label *so_label; /* (b) MAC label for socket */ /* NB: generation count must not be first. */ so_gen_t so_gencnt; /* (h) generation count */ void *so_emuldata; /* (b) private data for emulators */ so_dtor_t *so_dtor; /* (b) optional destructor */ struct osd osd; /* Object Specific extensions */ /* * so_fibnum, so_user_cookie and friends can be used to attach * some user-specified metadata to a socket, which then can be * used by the kernel for various actions. * so_user_cookie is used by ipfw/dummynet. */ int so_fibnum; /* routing domain for this socket */ uint32_t so_user_cookie; int so_ts_clock; /* type of the clock used for timestamps */ uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */ /* * Mutexes to prevent interleaving of socket I/O. These have to be * outside of the socket buffers in order to interlock with listen(2). */ struct sx so_snd_sx __aligned(CACHE_LINE_SIZE); struct mtx so_snd_mtx; struct sx so_rcv_sx __aligned(CACHE_LINE_SIZE); struct mtx so_rcv_mtx; union { /* Regular (data flow) socket. */ struct { /* (cr, cs) Receive and send buffers. */ struct sockbuf so_rcv, so_snd; /* (e) Our place on accept queue. */ TAILQ_ENTRY(socket) so_list; struct socket *so_listen; /* (b) */ enum socket_qstate so_qstate; /* (b) */ /* (b) cached MAC label for peer */ struct label *so_peerlabel; u_long so_oobmark; /* chars to oob mark */ /* (k) Our place on KTLS RX work queue. */ STAILQ_ENTRY(socket) so_ktls_rx_list; }; /* * Listening socket, where accepts occur, is so_listen in all * subsidiary sockets. If so_listen is NULL, socket is not * related to an accept. For a listening socket itself * sol_incomp queues partially completed connections, while * sol_comp is a queue of connections ready to be accepted. * If a connection is aborted and it has so_listen set, then * it has to be pulled out of either sol_incomp or sol_comp. * We allow connections to queue up based on current queue * lengths and limit on number of queued connections for this * socket. */ struct { /* (e) queue of partial unaccepted connections */ struct accept_queue sol_incomp; /* (e) queue of complete unaccepted connections */ struct accept_queue sol_comp; u_int sol_qlen; /* (e) sol_comp length */ u_int sol_incqlen; /* (e) sol_incomp length */ u_int sol_qlimit; /* (e) queue limit */ /* accept_filter(9) optional data */ struct accept_filter *sol_accept_filter; void *sol_accept_filter_arg; /* saved filter args */ char *sol_accept_filter_str; /* saved user args */ /* Optional upcall, for kernel socket. */ so_upcall_t *sol_upcall; /* (e) */ void *sol_upcallarg; /* (e) */ /* Socket buffer parameters, to be copied to * dataflow sockets, accepted from this one. */ int sol_sbrcv_lowat; int sol_sbsnd_lowat; u_int sol_sbrcv_hiwat; u_int sol_sbsnd_hiwat; short sol_sbrcv_flags; short sol_sbsnd_flags; sbintime_t sol_sbrcv_timeo; sbintime_t sol_sbsnd_timeo; /* Information tracking listen queue overflows. */ struct timeval sol_lastover; /* (e) */ int sol_overcount; /* (e) */ }; }; }; #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ /* * Socket state bits. * * Historically, these bits were all kept in the so_state field. * They are now split into separate, lock-specific fields. * so_state maintains basic socket state protected by the socket lock. * so_qstate holds information about the socket accept queues. * Each socket buffer also has a state field holding information * relevant to that socket buffer (can't send, rcv). * Many fields will be read without locks to improve performance and avoid * lock order issues. However, this approach must be used with caution. */ #define SS_NOFDREF 0x0001 /* no file table ref any more */ #define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ #define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ #define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ #define SS_NBIO 0x0100 /* non-blocking ops */ #define SS_ASYNC 0x0200 /* async i/o notify */ #define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ #define SS_ISDISCONNECTED 0x2000 /* socket disconnected from peer */ /* * Protocols can mark a socket as SS_PROTOREF to indicate that, following * pru_detach, they still want the socket to persist, and will free it * themselves when they are done. Protocols should only ever call sofree() * following setting this flag in pru_detach(), and never otherwise, as * sofree() bypasses socket reference counting. */ #define SS_PROTOREF 0x4000 /* strong protocol reference */ #ifdef _KERNEL #define SOCK_MTX(so) (&(so)->so_lock) #define SOCK_LOCK(so) mtx_lock(&(so)->so_lock) #define SOCK_OWNED(so) mtx_owned(&(so)->so_lock) #define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock) #define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED) #define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED) #define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0) #define SOLISTEN_LOCK(sol) do { \ mtx_lock(&(sol)->so_lock); \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ } while (0) #define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock) #define SOLISTEN_UNLOCK(sol) do { \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ mtx_unlock(&(sol)->so_lock); \ } while (0) #define SOLISTEN_LOCK_ASSERT(sol) do { \ mtx_assert(&(sol)->so_lock, MA_OWNED); \ KASSERT(SOLISTENING(sol), \ ("%s: %p not listening", __func__, (sol))); \ } while (0) /* * Socket buffer locks. These are strongly preferred over SOCKBUF_LOCK(sb) * macros, as we are moving towards protocol specific socket buffers. */ #define SOCK_RECVBUF_MTX(so) \ (&(so)->so_rcv_mtx) #define SOCK_RECVBUF_LOCK(so) \ mtx_lock(SOCK_RECVBUF_MTX(so)) #define SOCK_RECVBUF_UNLOCK(so) \ mtx_unlock(SOCK_RECVBUF_MTX(so)) #define SOCK_RECVBUF_LOCK_ASSERT(so) \ mtx_assert(SOCK_RECVBUF_MTX(so), MA_OWNED) #define SOCK_RECVBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_RECVBUF_MTX(so), MA_NOTOWNED) #define SOCK_SENDBUF_MTX(so) \ (&(so)->so_snd_mtx) #define SOCK_SENDBUF_LOCK(so) \ mtx_lock(SOCK_SENDBUF_MTX(so)) #define SOCK_SENDBUF_UNLOCK(so) \ mtx_unlock(SOCK_SENDBUF_MTX(so)) #define SOCK_SENDBUF_LOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_OWNED) #define SOCK_SENDBUF_UNLOCK_ASSERT(so) \ mtx_assert(SOCK_SENDBUF_MTX(so), MA_NOTOWNED) #define SOCK_BUF_LOCK(so, which) \ mtx_lock(soeventmtx(so, which)) #define SOCK_BUF_UNLOCK(so, which) \ mtx_unlock(soeventmtx(so, which)) #define SOCK_BUF_LOCK_ASSERT(so, which) \ mtx_assert(soeventmtx(so, which), MA_OWNED) #define SOCK_BUF_UNLOCK_ASSERT(so, which) \ mtx_assert(soeventmtx(so, which), MA_NOTOWNED) static inline struct sockbuf * sobuf(struct socket *so, const sb_which which) { return (which == SO_RCV ? &so->so_rcv : &so->so_snd); } static inline struct mtx * soeventmtx(struct socket *so, const sb_which which) { return (which == SO_RCV ? SOCK_RECVBUF_MTX(so) : SOCK_SENDBUF_MTX(so)); } /* * Macros for sockets and socket buffering. */ /* * Flags to soiolock(). */ #define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ #define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ #define SBL_VALID (SBL_WAIT | SBL_NOINTR) #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) #define SOCK_IO_SEND_LOCK(so, flags) \ soiolock((so), &(so)->so_snd_sx, (flags)) #define SOCK_IO_SEND_UNLOCK(so) \ soiounlock(&(so)->so_snd_sx) #define SOCK_IO_SEND_OWNED(so) sx_xlocked(&(so)->so_snd_sx) #define SOCK_IO_RECV_LOCK(so, flags) \ soiolock((so), &(so)->so_rcv_sx, (flags)) #define SOCK_IO_RECV_UNLOCK(so) \ soiounlock(&(so)->so_rcv_sx) #define SOCK_IO_RECV_OWNED(so) sx_xlocked(&(so)->so_rcv_sx) /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) /* can we read something from so? */ #define soreadabledata(so) \ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \ (so)->so_error || (so)->so_rerror) #define soreadable(so) \ (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) /* can we write something to so? */ #define sowriteable(so) \ ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state&SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ (so)->so_error) /* * soref()/sorele() ref-count the socket structure. * soref() may be called without owning socket lock, but in that case a * caller must own something that holds socket, and so_count must be not 0. * Note that you must still explicitly close the socket, but the last ref * count will free the structure. */ #define soref(so) refcount_acquire(&(so)->so_count) #define sorele(so) do { \ SOCK_UNLOCK_ASSERT(so); \ if (!refcount_release_if_not_last(&(so)->so_count)) { \ SOCK_LOCK(so); \ sorele_locked(so); \ } \ } while (0) /* * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to * avoid a non-atomic test-and-wakeup. However, sowakeup is * responsible for releasing the lock if it is called. We unlock only * if we don't call into sowakeup. If any code is introduced that * directly invokes the underlying sowakeup() primitives, it must * maintain the same semantics. */ #define sorwakeup(so) do { \ SOCK_RECVBUF_LOCK(so); \ sorwakeup_locked(so); \ } while (0) #define sowwakeup(so) do { \ SOCK_SENDBUF_LOCK(so); \ sowwakeup_locked(so); \ } while (0) struct accept_filter { char accf_name[16]; int (*accf_callback) (struct socket *so, void *arg, int waitflag); void * (*accf_create) (struct socket *so, char *arg); void (*accf_destroy) (struct socket *so); SLIST_ENTRY(accept_filter) accf_next; }; #define ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \ static struct accept_filter modname##_filter = { \ .accf_name = filtname, \ .accf_callback = cb, \ .accf_create = create, \ .accf_destroy = destroy, \ }; \ static moduledata_t modname##_mod = { \ .name = __XSTRING(modname), \ .evhand = accept_filt_generic_mod_event, \ .priv = &modname##_filter, \ }; \ DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS, \ SI_ORDER_MIDDLE); \ MODULE_VERSION(modname, ver) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_ACCF); MALLOC_DECLARE(M_PCB); MALLOC_DECLARE(M_SONAME); #endif /* * Socket specific helper hook point identifiers * Do not leave holes in the sequence, hook registration is a loop. */ #define HHOOK_SOCKET_OPT 0 #define HHOOK_SOCKET_CREATE 1 #define HHOOK_SOCKET_RCV 2 #define HHOOK_SOCKET_SND 3 #define HHOOK_FILT_SOREAD 4 #define HHOOK_FILT_SOWRITE 5 #define HHOOK_SOCKET_CLOSE 6 #define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE struct socket_hhook_data { struct socket *so; struct mbuf *m; void *hctx; /* hook point specific data*/ int status; }; extern int maxsockets; extern u_long sb_max; extern so_gen_t so_gencnt; struct file; struct filecaps; struct filedesc; struct mbuf; struct sockaddr; struct ucred; struct uio; /* Return values for socket upcalls. */ #define SU_OK 0 #define SU_ISCONNECTED 1 /* * From uipc_socket and friends */ int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len); int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, u_int *fflagp, struct filecaps *havecaps); void soabort(struct socket *so); int soaccept(struct socket *so, struct sockaddr **nam); void soaio_enqueue(struct task *task); void soaio_rcv(void *context, int pending); void soaio_snd(void *context, int pending); int socheckuid(struct socket *so, uid_t uid); int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soclose(struct socket *so); int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soconnect2(struct socket *so1, struct socket *so2); int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td); int sodisconnect(struct socket *so); void sodtor_set(struct socket *, so_dtor_t *); struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); void sofree(struct socket *so); void sohasoutofband(struct socket *so); int solisten(struct socket *so, int backlog, struct thread *td); void solisten_proto(struct socket *so, int backlog); void solisten_proto_abort(struct socket *so); int solisten_proto_check(struct socket *so); int solisten_dequeue(struct socket *, struct socket **, int); struct socket * sonewconn(struct socket *head, int connstatus); struct socket * sopeeloff(struct socket *); int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_stream(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_dgram(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_generic(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); void sorele_locked(struct socket *so); int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); void sorflush(struct socket *so); int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int soshutdown(struct socket *so, int how); void soupcall_clear(struct socket *, sb_which); void soupcall_set(struct socket *, sb_which, so_upcall_t, void *); void solisten_upcall_set(struct socket *, so_upcall_t, void *); void sorwakeup_locked(struct socket *); void sowwakeup_locked(struct socket *); void sowakeup_aio(struct socket *, sb_which); void solisten_wakeup(struct socket *); int selsocket(struct socket *so, int events, struct timeval *tv, struct thread *td); void soisconnected(struct socket *so); void soisconnecting(struct socket *so); void soisdisconnected(struct socket *so); void soisdisconnecting(struct socket *so); void socantrcvmore(struct socket *so); void socantrcvmore_locked(struct socket *so); void socantsendmore(struct socket *so); void socantsendmore_locked(struct socket *so); void soroverflow(struct socket *so); void soroverflow_locked(struct socket *so); int soiolock(struct socket *so, struct sx *sx, int flags); void soiounlock(struct sx *sx); /* * Accept filter functions (duh). */ int accept_filt_add(struct accept_filter *filt); int accept_filt_del(char *name); struct accept_filter *accept_filt_get(char *name); #ifdef ACCEPT_FILTER_MOD #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_accf); #endif int accept_filt_generic_mod_event(module_t mod, int event, void *data); #endif #endif /* _KERNEL */ /* * Structure to export socket from kernel to utilities, via sysctl(3). */ struct xsocket { ksize_t xso_len; /* length of this structure */ kvaddr_t xso_so; /* kernel address of struct socket */ kvaddr_t so_pcb; /* kernel address of struct inpcb */ uint64_t so_oobmark; int64_t so_spare64[8]; int32_t xso_protocol; int32_t xso_family; uint32_t so_qlen; uint32_t so_incqlen; uint32_t so_qlimit; pid_t so_pgid; uid_t so_uid; int32_t so_spare32[8]; int16_t so_type; int16_t so_options; int16_t so_linger; int16_t so_state; int16_t so_timeo; uint16_t so_error; struct xsockbuf { uint32_t sb_cc; uint32_t sb_hiwat; uint32_t sb_mbcnt; - uint32_t sb_mcnt; - uint32_t sb_ccnt; uint32_t sb_mbmax; int32_t sb_lowat; int32_t sb_timeo; int16_t sb_flags; } so_rcv, so_snd; }; #ifdef _KERNEL void sotoxsocket(struct socket *so, struct xsocket *xso); void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb); #endif /* * Socket buffer state bits. Exported via libprocstat(3). */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ #endif /* !_SYS_SOCKETVAR_H_ */ diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index 7cfffc9fa06c..b7dbcb3531b0 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -1,1523 +1,1516 @@ /*- * Copyright (c) 1983, 1988, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #define _WANT_SOCKET #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" #include "nl_defs.h" #define max(a, b) (((a) > (b)) ? (a) : (b)) #ifdef INET static void inetprint(const char *, struct in_addr *, int, const char *, int, const int); #endif #ifdef INET6 static int udp_done, tcp_done, sdp_done; #endif /* INET6 */ static int pcblist_sysctl(int proto, const char *name, char **bufp) { const char *mibvar; char *buf; size_t len; switch (proto) { case IPPROTO_TCP: mibvar = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; case IPPROTO_DIVERT: mibvar = "net.inet.divert.pcblist"; break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return (0); } if ((buf = malloc(len)) == NULL) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return (0); } *bufp = buf; return (1); } /* * Copied directly from uipc_socket2.c. We leave out some fields that are in * nested structures that aren't used to avoid extra work. */ static void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; - xsb->sb_mcnt = sb->sb_mcnt; - xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } int sotoxsocket(struct socket *so, struct xsocket *xso) { struct protosw proto; struct domain domain; bzero(xso, sizeof *xso); xso->xso_len = sizeof *xso; xso->xso_so = (uintptr_t)so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = (uintptr_t)so->so_pcb; if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0) return (-1); xso->xso_protocol = proto.pr_protocol; if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0) return (-1); xso->xso_family = domain.dom_family; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; if ((so->so_options & SO_ACCEPTCONN) != 0) { xso->so_qlen = so->sol_qlen; xso->so_incqlen = so->sol_incqlen; xso->so_qlimit = so->sol_qlimit; } else { sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); xso->so_oobmark = so->so_oobmark; } return (0); } /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. * Listening processes (aflag) are suppressed unless the * -a (all) flag is specified. */ void protopr(u_long off, const char *name, int af1, int proto) { static int first = 1; int istcp; char *buf; const char *vchar; struct xtcpcb *tp; struct xinpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; int fnamelen, cnamelen; istcp = 0; switch (proto) { case IPPROTO_TCP: #ifdef INET6 if (strncmp(name, "sdp", 3) != 0) { if (tcp_done != 0) return; else tcp_done = 1; } else { if (sdp_done != 0) return; else sdp_done = 1; } #endif istcp = 1; break; case IPPROTO_UDP: #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif break; } if (!pcblist_sysctl(proto, name, &buf)) return; if (cflag || Cflag) { fnamelen = strlen("Stack"); cnamelen = strlen("CC"); oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen*)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { tp = (struct xtcpcb *)xig; inp = &tp->xt_inp; } else { continue; } if (so->xso_protocol != proto) continue; if (inp->inp_gencnt > oxig->xig_gen) continue; fnamelen = max(fnamelen, (int)strlen(tp->xt_stack)); cnamelen = max(cnamelen, (int)strlen(tp->xt_cc)); } } oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { tp = (struct xtcpcb *)xig; inp = &tp->xt_inp; } else { inp = (struct xinpcb *)xig; } so = &inp->xi_socket; /* Ignore sockets for protocols other than the desired one. */ if (so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ if (inp->inp_gencnt > oxig->xig_gen) continue; if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0) #ifdef INET6 || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0) #endif /* INET6 */ || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0 #ifdef INET6 && (inp->inp_vflag & INP_IPV6) == 0 #endif /* INET6 */ )) ) continue; if (!aflag && ( (istcp && tp->t_state == TCPS_LISTEN) || (af1 == AF_INET && inp->inp_laddr.s_addr == INADDR_ANY) #ifdef INET6 || (af1 == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif /* INET6 */ || (af1 == AF_UNSPEC && (((inp->inp_vflag & INP_IPV4) != 0 && inp->inp_laddr.s_addr == INADDR_ANY) #ifdef INET6 || ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif )) )) continue; if (first) { if (!Lflag) { xo_emit("Active Internet connections"); if (aflag) xo_emit(" (including servers)"); } else xo_emit( "Current listen queue sizes (qlen/incqlen/maxqlen)"); xo_emit("\n"); if (Aflag) xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *), "Tcpcb"); if (Lflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-32.32s} {T:/%-45.45s}"), "Proto", "Listen", "Local Address"); else if (Tflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"), "Proto", "Rexmit", "OOORcv", "0-win", "Local Address", "Foreign Address"); else { xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"), "Proto", "Recv-Q", "Send-Q", "Local Address", "Foreign Address"); if (!xflag && !Rflag) xo_emit(" {T:/%-11.11s}", "(state)"); } if (xflag) { - xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " - "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " + xo_emit("{T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}", - "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS", "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA", "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX"); xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} " "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}", "rexmt", "persist", "keep", "2msl", "delack", "rcvtime"); } else if (Rflag) { xo_emit(" {T:/%8.8s} {T:/%5.5s}", "flowid", "ftype"); } if (cflag) { xo_emit(" {T:/%-*.*s}", fnamelen, fnamelen, "Stack"); } if (Cflag) xo_emit(" {T:/%-*.*s} {T:/%10.10s}" " {T:/%10.10s} {T:/%5.5s}" " {T:/%3.3s}", cnamelen, cnamelen, "CC", "cwin", "ssthresh", "MSS", "ECN"); if (Pflag) xo_emit(" {T:/%s}", "Log ID"); xo_emit("\n"); first = 0; } if (Lflag && so->so_qlimit == 0) continue; xo_open_instance("socket"); if (Aflag) { if (istcp) xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)inp->inp_ppcb); else xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)so->so_pcb); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "46" : "6"; else #endif vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "4" : ""; if (istcp && (tp->t_flags & TF_TOE) != 0) xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar); else xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar); if (Lflag) { char buf1[33]; snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen, so->so_incqlen, so->so_qlimit); xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1); } else if (Tflag) { if (istcp) xo_emit("{:sent-retransmit-packets/%6u} " "{:received-out-of-order-packets/%6u} " "{:sent-zero-window/%6u} ", tp->t_sndrexmitpack, tp->t_rcvoopack, tp->t_sndzerowin); else xo_emit("{P:/%21s}", ""); } else { xo_emit("{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} ", so->so_rcv.sb_cc, so->so_snd.sb_cc); } if (numeric_port) { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 1, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 1); } /* else nothing printed now */ #endif /* INET6 */ } else if (inp->inp_flags & INP_ANONPORT) { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 0, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 0); } /* else nothing printed now */ #endif /* INET6 */ } else { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 0, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } /* else nothing printed now */ #endif /* INET6 */ } if (xflag) { - xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} " - "{:receive-clusters/%6u} {:send-clusters/%6u} " - "{:receive-high-water/%6u} {:send-high-water/%6u} " + xo_emit("{:receive-high-water/%6u} " + "{:send-high-water/%6u} " "{:receive-low-water/%6u} {:send-low-water/%6u} " "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} " "{:receive-mbuf-bytes-max/%6u} " "{:send-mbuf-bytes-max/%6u}", - so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, - so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); if (istcp) xo_emit(" {:retransmit-timer/%4d.%02d} " "{:persist-timer/%4d.%02d} " "{:keepalive-timer/%4d.%02d} " "{:msl2-timer/%4d.%02d} " "{:delay-ack-timer/%4d.%02d} " "{:inactivity-timer/%4d.%02d}", tp->tt_rexmt / 1000, (tp->tt_rexmt % 1000) / 10, tp->tt_persist / 1000, (tp->tt_persist % 1000) / 10, tp->tt_keep / 1000, (tp->tt_keep % 1000) / 10, tp->tt_2msl / 1000, (tp->tt_2msl % 1000) / 10, tp->tt_delack / 1000, (tp->tt_delack % 1000) / 10, tp->t_rcvtime / 1000, (tp->t_rcvtime % 1000) / 10); } if (istcp && !Lflag && !xflag && !Tflag && !Rflag) { if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES) xo_emit("{:tcp-state/%-11d}", tp->t_state); else { xo_emit("{:tcp-state/%-11s}", tcpstates[tp->t_state]); #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN) /* Show T/TCP `hidden state' */ if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) xo_emit("{:need-syn-or-fin/*}"); #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */ } } if (Rflag) { /* XXX: is this right Alfred */ xo_emit(" {:flow-id/%08x} {:flow-type/%5d}", inp->inp_flowid, inp->inp_flowtype); } if (istcp) { if (cflag) xo_emit(" {:stack/%-*.*s}", fnamelen, fnamelen, tp->xt_stack); if (Cflag) xo_emit(" {:cc/%-*.*s}" " {:snd-cwnd/%10lu}" " {:snd-ssthresh/%10lu}" " {:t-maxseg/%5u} {:ecn/%3s}", cnamelen, cnamelen, tp->xt_cc, tp->t_snd_cwnd, tp->t_snd_ssthresh, tp->t_maxseg, (tp->t_state >= TCPS_ESTABLISHED ? (tp->xt_ecn > 0 ? (tp->xt_ecn == 1 ? "ecn" : "ace") : "off") : "n/a")); if (Pflag) xo_emit(" {:log-id/%s}", tp->xt_logid[0] == '\0' ? "-" : tp->xt_logid); } xo_emit("\n"); xo_close_instance("socket"); } if (xig != oxig && xig->xig_gen != oxig->xig_gen) { if (oxig->xig_count > xig->xig_count) { xo_emit("Some {d:lost/%s} sockets may have been " "deleted.\n", name); } else if (oxig->xig_count < xig->xig_count) { xo_emit("Some {d:created/%s} sockets may have been " "created.\n", name); } else { xo_emit("Some {d:changed/%s} sockets may have been " "created or deleted.\n", name); } } free(buf); } /* * Dump TCP statistics structure. */ void tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct tcpstat tcpstat; uint64_t tcps_states[TCP_NSTATES]; #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif if (fetch_stats("net.inet.tcp.stats", off, &tcpstat, sizeof(tcpstat), kread_counters) != 0) return; if (fetch_stats_ro("net.inet.tcp.states", nl[N_TCPS_STATES].n_value, &tcps_states, sizeof(tcps_states), kread_counters) != 0) return; xo_open_container("tcp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f)) #define p1a(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f) #define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2, plural(tcpstat.f2)) #define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2) #define p3(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f)) p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n"); p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} " "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t" "{:sent-retransmitted-packets/%ju} {N:/data packet%s} " "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) " "{N:retransmitted}\n"); p(tcps_sndrexmitbad, "\t\t" "{:sent-unnecessary-retransmitted-packets/%ju} " "{N:/data packet%s unnecessarily retransmitted}\n"); p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} " "{N:/resend%s initiated by MTU discovery}\n"); p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} " "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} " "{N:delayed})\n"); p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} " "{N:/URG only packet%s}\n"); p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} " "{N:/window probe packet%s}\n"); p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} " "{N:/control packet%s}\n"); p(tcps_rcvtotal, "\t{:received-packets/%ju} " "{N:/packet%s received}\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t" "{:received-ack-packets/%ju} {N:/ack%s} " "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n"); p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} " "{N:/duplicate ack%s}\n"); p(tcps_tunneled_pkts, "\t\t{:received-udp-tunneled-pkts/%ju} " "{N:/UDP tunneled pkt%s}\n"); p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} " "{N:/UDP tunneled pkt cnt with error%s}\n"); p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} " "{N:/ack%s for unsent data}\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t" "{:received-in-sequence-packets/%ju} {N:/packet%s} " "({:received-in-sequence-bytes/%ju} {N:/byte%s}) " "{N:received in-sequence}\n"); p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t" "{:received-completely-duplicate-packets/%ju} " "{N:/completely duplicate packet%s} " "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n"); p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} " "{N:/old duplicate packet%s}\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t" "{:received-some-duplicate-packets/%ju} " "{N:/packet%s with some dup. data} " "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} " "{N:/out-of-order packet%s} " "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t" "{:received-after-window-packets/%ju} {N:/packet%s} " "({:received-after-window-bytes/%ju} {N:/byte%s}) " "{N:of data after window}\n"); p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} " "{N:/window probe%s}\n"); p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_dsack_count, "\t\t{:received-with-dsack-packets/%ju} " "{N:/packet%s received with dsack}\n"); p(tcps_dsack_bytes, "\t\t{:received-with-dsack-bytes/%ju} " "{N:/dsack byte%s received (no TLP involved)}\n"); p(tcps_dsack_tlp_bytes, "\t\t{:received-with-dsack-bytes-tlp/%ju} " "{N:/dsack byte%s received (TLP responsible)}\n"); p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} " "{N:/packet%s received after close}\n"); p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} " "{N:/discarded for bad header offset field%s}\n"); p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} " "{N:discarded because packet too short}\n"); p1a(tcps_rcvreassfull, "\t\t{:discard-reassembly-queue-full/%ju} " "{N:discarded due to full reassembly queue}\n"); p(tcps_connattempt, "\t{:connection-requests/%ju} " "{N:/connection request%s}\n"); p(tcps_accepts, "\t{:connections-accepts/%ju} " "{N:/connection accept%s}\n"); p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} " "{N:/bad connection attempt%s}\n"); p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} " "{N:/listen queue overflow%s}\n"); p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} " "{N:/ignored RSTs in the window%s}\n"); p(tcps_connects, "\t{:connections-established/%ju} " "{N:/connection%s established (including accepts)}\n"); p(tcps_usedrtt, "\t\t{:connections-hostcache-rtt/%ju} " "{N:/time%s used RTT from hostcache}\n"); p(tcps_usedrttvar, "\t\t{:connections-hostcache-rttvar/%ju} " "{N:/time%s used RTT variance from hostcache}\n"); p(tcps_usedssthresh, "\t\t{:connections-hostcache-ssthresh/%ju} " "{N:/time%s used slow-start threshold from hostcache}\n"); p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} " "{N:/connection%s closed (including} " "{:connection-drops/%ju} {N:/drop%s})\n"); p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} " "{N:/connection%s updated cached RTT on close}\n"); p(tcps_cachedrttvar, "\t\t" "{:connections-updated-variance-on-close/%ju} " "{N:/connection%s updated cached RTT variance on close}\n"); p(tcps_cachedssthresh, "\t\t" "{:connections-updated-ssthresh-on-close/%ju} " "{N:/connection%s updated cached ssthresh on close}\n"); p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} " "{N:/embryonic connection%s dropped}\n"); p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} " "{N:/segment%s updated rtt (of} " "{:segment-update-attempts/%ju} {N:/attempt%s})\n"); p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} " "{N:/retransmit timeout%s}\n"); p(tcps_timeoutdrop, "\t\t" "{:connections-dropped-by-retransmit-timeout/%ju} " "{N:/connection%s dropped by rexmit timeout}\n"); p(tcps_persisttimeo, "\t{:persist-timeout/%ju} " "{N:/persist timeout%s}\n"); p(tcps_persistdrop, "\t\t" "{:connections-dropped-by-persist-timeout/%ju} " "{N:/connection%s dropped by persist timeout}\n"); p(tcps_finwait2_drops, "\t" "{:connections-dropped-by-finwait2-timeout/%ju} " "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n"); p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} " "{N:/keepalive timeout%s}\n"); p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} " "{N:/keepalive probe%s sent}\n"); p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} " "{N:/connection%s dropped by keepalive}\n"); p(tcps_predack, "\t{:ack-header-predictions/%ju} " "{N:/correct ACK header prediction%s}\n"); p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} " "{N:/correct data packet header prediction%s}\n"); xo_open_container("syncache"); p3(tcps_sc_added, "\t{:entries-added/%ju} " "{N:/syncache entr%s added}\n"); p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} " "{N:/retransmitted}\n"); p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n"); p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n"); p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n"); p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} " "{N:/bucket overflow}\n"); p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} " "{N:/cache overflow}\n"); p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n"); p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n"); p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n"); p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n"); p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n"); p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n"); p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n"); p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} " "{N:/cookie%s received}\n"); xo_close_container("syncache"); xo_open_container("hostcache"); p3(tcps_hc_added, "\t{:entries-added/%ju} " "{N:/hostcache entr%s added}\n"); p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} " "{N:/bucket overflow}\n"); xo_close_container("hostcache"); xo_open_container("sack"); p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} " "{N:/SACK recovery episode%s}\n"); p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} " "{N:/segment rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} " "{N:/byte rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} " "{N:/SACK option%s (SACK blocks) received}\n"); p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} " "{N:/SACK option%s (SACK blocks) sent}\n"); p(tcps_sack_lostrexmt, "\t{:lost-retransmissions/%ju} " "{N:/SACK retransmission%s lost}\n"); p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} " "{N:/SACK scoreboard overflow}\n"); xo_close_container("sack"); xo_open_container("ecn"); p(tcps_ecn_ce, "\t{:ce-packets/%ju} " "{N:/packet%s with ECN CE bit set}\n"); p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} " "{N:/packet%s with ECN ECT(0) bit set}\n"); p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} " "{N:/packet%s with ECN ECT(1) bit set}\n"); p(tcps_ecn_shs, "\t{:handshakes/%ju} " "{N:/successful ECN handshake%s}\n"); p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); p(tcps_ace_nect, "\t{:ace-nonect-syn/%ju} " "{N:/ACE SYN packet%s with Non-ECT}\n"); p(tcps_ace_ect0, "\t{:ace-ect0-syn/%ju} " "{N:/ACE SYN packet%s with ECT0}\n"); p(tcps_ace_ect1, "\t{:ace-ect1-syn/%ju} " "{N:/ACE SYN packet%s with ECT1}\n"); p(tcps_ace_ce, "\t{:ace-ce-syn/%ju} " "{N:/ACE SYN packet%s with CE}\n"); xo_close_container("ecn"); xo_open_container("tcp-signature"); p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} " "{N:/packet%s with matching signature received}\n"); p(tcps_sig_rcvbadsig, "\t{:received-bad-signature/%ju} " "{N:/packet%s with bad signature received}\n"); p(tcps_sig_err_buildsig, "\t{:failed-make-signature/%ju} " "{N:/time%s failed to make signature due to no SA}\n"); p(tcps_sig_err_sigopt, "\t{:no-signature-expected/%ju} " "{N:/time%s unexpected signature received}\n"); p(tcps_sig_err_nosigopt, "\t{:no-signature-provided/%ju} " "{N:/time%s no signature provided by segment}\n"); xo_close_container("tcp-signature"); xo_open_container("pmtud"); p(tcps_pmtud_blackhole_activated, "\t{:pmtud-activated/%ju} " "{N:/Path MTU discovery black hole detection activation%s}\n"); p(tcps_pmtud_blackhole_activated_min_mss, "\t{:pmtud-activated-min-mss/%ju} " "{N:/Path MTU discovery black hole detection min MSS activation%s}\n"); p(tcps_pmtud_blackhole_failed, "\t{:pmtud-failed/%ju} " "{N:/Path MTU discovery black hole detection failure%s}\n"); xo_close_container("pmtud"); xo_open_container("tw"); p(tcps_tw_responds, "\t{:tw_responds/%ju} " "{N:/time%s connection in TIME-WAIT responded with ACK}\n"); p(tcps_tw_recycles, "\t{:tw_recycles/%ju} " "{N:/time%s connection in TIME-WAIT was actively recycled}\n"); p(tcps_tw_resets, "\t{:tw_resets/%ju} " "{N:/time%s connection in TIME-WAIT responded with RST}\n"); xo_close_container("tw"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 xo_open_container("TCP connection count by state"); xo_emit("{T:/TCP connection count by state}:\n"); for (int i = 0; i < TCP_NSTATES; i++) { /* * XXXGL: is there a way in libxo to use %s * in the "content string" of a format * string? I failed to do that, that's why * a temporary buffer is used to construct * format string for xo_emit(). */ char fmtbuf[80]; if (sflag > 1 && tcps_states[i] == 0) continue; snprintf(fmtbuf, sizeof(fmtbuf), "\t{:%s/%%ju} " "{Np:/connection ,connections} in %s state\n", tcpstates[i], tcpstates[i]); xo_emit(fmtbuf, (uintmax_t )tcps_states[i]); } xo_close_container("TCP connection count by state"); xo_close_container("tcp"); } /* * Dump UDP statistics structure. */ void udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct udpstat udpstat; uint64_t delivered; #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif if (fetch_stats("net.inet.udp.stats", off, &udpstat, sizeof(udpstat), kread_counters) != 0) return; xo_open_container("udp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f)) #define p1a(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f) p(udps_ipackets, "{:received-datagrams/%ju} " "{N:/datagram%s received}\n"); p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} " "{N:/with incomplete header}\n"); p1a(udps_badlen, "{:dropped-bad-data-length/%ju} " "{N:/with bad data length field}\n"); p1a(udps_badsum, "{:dropped-bad-checksum/%ju} " "{N:/with bad checksum}\n"); p1a(udps_nosum, "{:dropped-no-checksum/%ju} " "{N:/with no checksum}\n"); p1a(udps_noport, "{:dropped-no-socket/%ju} " "{N:/dropped due to no socket}\n"); p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} " "{N:/broadcast\\/multicast datagram%s undelivered}\n"); p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} " "{N:/dropped due to full socket buffers}\n"); p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} " "{N:/not for hashed pcb}\n"); delivered = udpstat.udps_ipackets - udpstat.udps_hdrops - udpstat.udps_badlen - udpstat.udps_badsum - udpstat.udps_noport - udpstat.udps_noportbcast - udpstat.udps_fullsock; if (delivered || sflag <= 1) xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n", (uint64_t)delivered); p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n"); /* the next statistic is cumulative in udps_noportbcast */ p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} " "{N:/time%s multicast source filter matched}\n"); #undef p #undef p1a xo_close_container("udp"); } /* * Dump CARP statistics structure. */ void carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct carpstats carpstat; if (fetch_stats("net.inet.carp.stats", off, &carpstat, sizeof(carpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f)) #define p2(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f) p(carps_ipackets, "\t{:received-inet-packets/%ju} " "{N:/packet%s received (IPv4)}\n"); p(carps_ipackets6, "\t{:received-inet6-packets/%ju} " "{N:/packet%s received (IPv6)}\n"); p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} " "{N:/packet%s discarded for wrong TTL}\n"); p(carps_hdrops, "\t\t{:dropped-short-header/%ju} " "{N:/packet%s shorter than header}\n"); p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(carps_badver, "\t\t{:dropped-bad-version/%ju} " "{N:/discarded packet%s with a bad version}\n"); p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} " "{N:/discarded because packet too short}\n"); p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} " "{N:/discarded for bad authentication}\n"); p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} " "{N:/discarded for bad vhid}\n"); p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} " "{N:/discarded because of a bad address list}\n"); p(carps_opackets, "\t{:sent-inet-packets/%ju} " "{N:/packet%s sent (IPv4)}\n"); p(carps_opackets6, "\t{:sent-inet6-packets/%ju} " "{N:/packet%s sent (IPv6)}\n"); p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} " "{N:/send failed due to mbuf memory error}\n"); #if notyet p(carps_ostates, "\t\t{:send-state-updates/%s} " "{N:/state update%s sent}\n"); #endif #undef p #undef p2 xo_close_container(name); } /* * Dump IP statistics structure. */ void ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct ipstat ipstat; if (fetch_stats("net.inet.ip.stats", off, &ipstat, sizeof(ipstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f)) #define p1a(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f) p(ips_total, "\t{:received-packets/%ju} " "{N:/total packet%s received}\n"); p(ips_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/bad header checksum%s}\n"); p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} " "{N:/with size smaller than minimum}\n"); p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} " "{N:/with data size < data length}\n"); p1a(ips_toolong, "\t{:dropped-too-long/%ju} " "{N:/with ip length > max ip packet size}\n"); p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} " "{N:/with header length < data size}\n"); p1a(ips_badlen, "\t{:dropped-short-data/%ju} " "{N:/with data length < header length}\n"); p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} " "{N:/with bad options}\n"); p1a(ips_badvers, "\t{:dropped-bad-version/%ju} " "{N:/with incorrect version number}\n"); p(ips_fragments, "\t{:received-fragments/%ju} " "{N:/fragment%s received}\n"); p(ips_fragdropped, "\t{:dropped-fragments/%ju} " "{N:/fragment%s dropped (dup or out of space)}\n"); p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} " "{N:/fragment%s dropped after timeout}\n"); p(ips_reassembled, "\t{:reassembled-packets/%ju} " "{N:/packet%s reassembled ok}\n"); p(ips_delivered, "\t{:received-local-packets/%ju} " "{N:/packet%s for this host}\n"); p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} " "{N:/packet%s for unknown\\/unsupported protocol}\n"); p(ips_forward, "\t{:forwarded-packets/%ju} " "{N:/packet%s forwarded}"); p(ips_fastforward, " ({:fast-forwarded-packets/%ju} " "{N:/packet%s fast forwarded})"); if (ipstat.ips_forward || sflag <= 1) xo_emit("\n"); p(ips_cantforward, "\t{:packets-cannot-forward/%ju} " "{N:/packet%s not forwardable}\n"); p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} " "{N:/packet%s received for unknown multicast group}\n"); p(ips_redirectsent, "\t{:redirects-sent/%ju} " "{N:/redirect%s sent}\n"); p(ips_localout, "\t{:sent-packets/%ju} " "{N:/packet%s sent from this host}\n"); p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} " "{N:/packet%s sent with fabricated ip header}\n"); p(ips_odropped, "\t{:discard-no-mbufs/%ju} " "{N:/output packet%s dropped due to no bufs, etc.}\n"); p(ips_noroute, "\t{:discard-no-route/%ju} " "{N:/output packet%s discarded due to no route}\n"); p(ips_fragmented, "\t{:sent-fragments/%ju} " "{N:/output datagram%s fragmented}\n"); p(ips_ofragments, "\t{:fragments-created/%ju} " "{N:/fragment%s created}\n"); p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} " "{N:/datagram%s that can't be fragmented}\n"); p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} " "{N:/tunneling packet%s that can't find gif}\n"); p(ips_badaddr, "\t{:discard-bad-address/%ju} " "{N:/datagram%s with bad address in header}\n"); #undef p #undef p1a xo_close_container(name); } /* * Dump ARP statistics structure. */ void arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct arpstat arpstat; if (fetch_stats("net.link.ether.arp.stats", off, &arpstat, sizeof(arpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f)) #define p2(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f)) p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n"); p(txerrors, "{:sent-failures/%ju} {N:/ARP request%s failed to sent}\n"); p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n"); p(rxrequests, "{:received-requests/%ju} " "{N:/ARP request%s received}\n"); p2(rxreplies, "{:received-replies/%ju} " "{N:/ARP repl%s received}\n"); p(received, "{:received-packets/%ju} " "{N:/ARP packet%s received}\n"); p(dropped, "{:dropped-no-entry/%ju} " "{N:/total packet%s dropped due to no ARP entry}\n"); p(timeouts, "{:entries-timeout/%ju} " "{N:/ARP entry%s timed out}\n"); p(dupips, "{:dropped-duplicate-address/%ju} " "{N:/Duplicate IP%s seen}\n"); #undef p #undef p2 xo_close_container(name); } static const char *icmpnames[ICMP_MAXTYPE + 1] = { "echo reply", /* RFC 792 */ "#1", "#2", "destination unreachable", /* RFC 792 */ "source quench", /* RFC 792 */ "routing redirect", /* RFC 792 */ "#6", "#7", "echo", /* RFC 792 */ "router advertisement", /* RFC 1256 */ "router solicitation", /* RFC 1256 */ "time exceeded", /* RFC 792 */ "parameter problem", /* RFC 792 */ "time stamp", /* RFC 792 */ "time stamp reply", /* RFC 792 */ "information request", /* RFC 792 */ "information request reply", /* RFC 792 */ "address mask request", /* RFC 950 */ "address mask reply", /* RFC 950 */ "#19", "#20", "#21", "#22", "#23", "#24", "#25", "#26", "#27", "#28", "#29", "icmp traceroute", /* RFC 1393 */ "datagram conversion error", /* RFC 1475 */ "mobile host redirect", "IPv6 where-are-you", "IPv6 i-am-here", "mobile registration req", "mobile registration reply", "domain name request", /* RFC 1788 */ "domain name reply", /* RFC 1788 */ "icmp SKIP", "icmp photuris", /* RFC 2521 */ }; /* * Dump ICMP statistics. */ void icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct icmpstat icmpstat; size_t len; int i, first; if (fetch_stats("net.inet.icmp.stats", off, &icmpstat, sizeof(icmpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plural(icmpstat.f)) #define p1a(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f) #define p2(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plurales(icmpstat.f)) p(icps_error, "\t{:icmp-calls/%lu} " "{N:/call%s to icmp_error}\n"); p(icps_oldicmp, "\t{:errors-not-from-message/%lu} " "{N:/error%s not generated in response to an icmp message}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_outhist[i] != 0) { if (first) { xo_open_list("output-histogram"); xo_emit("\tOutput histogram:\n"); first = 0; } xo_open_instance("output-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_outhist[i]); else xo_emit("\t\tunknown ICMP #{k:name/%d}: " "{:count/%lu}\n", i, icmpstat.icps_outhist[i]); xo_close_instance("output-histogram"); } } if (!first) xo_close_list("output-histogram"); p(icps_badcode, "\t{:dropped-bad-code/%lu} " "{N:/message%s with bad code fields}\n"); p(icps_tooshort, "\t{:dropped-too-short/%lu} " "{N:/message%s less than the minimum length}\n"); p(icps_checksum, "\t{:dropped-bad-checksum/%lu} " "{N:/message%s with bad checksum}\n"); p(icps_badlen, "\t{:dropped-bad-length/%lu} " "{N:/message%s with bad length}\n"); p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} " "{N:/multicast echo requests ignored}\n"); p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} " "{N:/multicast timestamp requests ignored}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_inhist[i] != 0) { if (first) { xo_open_list("input-histogram"); xo_emit("\tInput histogram:\n"); first = 0; } xo_open_instance("input-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_inhist[i]); else xo_emit( "\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n", i, icmpstat.icps_inhist[i]); xo_close_instance("input-histogram"); } } if (!first) xo_close_list("input-histogram"); p(icps_reflect, "\t{:sent-packets/%lu} " "{N:/message response%s generated}\n"); p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} " "{N:/invalid return address%s}\n"); p(icps_noroute, "\t{:discard-no-route/%lu} " "{N:/no return route%s}\n"); #undef p #undef p1a #undef p2 if (live) { len = sizeof i; if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) < 0) return; xo_emit("\tICMP address mask responses are " "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis"); } xo_close_container(name); } /* * Dump IGMP statistics structure. */ void igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct igmpstat igmpstat; int error, zflag0; if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread) != 0) return; /* * Reread net.inet.igmp.stats when zflag == 1. * This is because this MIB contains version number and * length of the structure which are not set when clearing * the counters. */ zflag0 = zflag; if (zflag) { zflag = 0; error = fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread); zflag = zflag0; if (error) return; } if (igmpstat.igps_version != IGPS_VERSION_3) { xo_warnx("%s: version mismatch (%d != %d)", __func__, igmpstat.igps_version, IGPS_VERSION_3); return; } if (igmpstat.igps_len != IGPS_VERSION3_LEN) { xo_warnx("%s: size mismatch (%d != %d)", __func__, igmpstat.igps_len, IGPS_VERSION3_LEN); return; } xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f)) #define py64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f)) p64(igps_rcv_total, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} " "{N:/message%s received with wrong TTL}\n"); p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} " "{N:/V1\\/V2 membership quer%s received}\n"); py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} " "{N:/V3 membership quer%s received}\n"); py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} " "{N:/membership quer%s received with invalid field(s)}\n"); py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} " "{N:/general quer%s received}\n"); py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} " "{N:/group quer%s received}\n"); py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} " "{N:/group-source quer%s received}\n"); py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} " "{N:/group-source quer%s dropped}\n"); p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} " "{N:/membership report%s received}\n"); p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} " "{N:/membership report%s received with invalid field(s)}\n"); p64(igps_rcv_ourreports, "\t" "{:received-membership-reports-matching/%ju} " "{N:/membership report%s received for groups to which we belong}" "\n"); p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} " "{N:/V3 report%s received without Router Alert}\n"); p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} " "{N:/membership report%s sent}\n"); #undef p64 #undef py64 xo_close_container(name); } /* * Dump PIM statistics structure. */ void pim_stats(u_long off __unused, const char *name, int af1 __unused, int proto __unused) { struct pimstat pimstat; if (fetch_stats("net.inet.pim.stats", off, &pimstat, sizeof(pimstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f)) #define py(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y") p(pims_rcv_total_msgs, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} " "{N:/byte%s received}\n"); p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} " "{N:/message%s received with bad version}\n"); p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} " "{N:/data register message%s received}\n"); p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} " "{N:/data register byte%s received}\n"); p(pims_rcv_registers_wrongiif, "\t" "{:received-data-register-wrong-interface/%ju} " "{N:/data register message%s received on wrong iif}\n"); p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} " "{N:/bad register%s received}\n"); p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} " "{N:/data register message%s sent}\n"); p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} " "{N:/data register byte%s sent}\n"); #undef p #undef py xo_close_container(name); } #ifdef INET /* * Pretty print an Internet address (net address + port). */ static void inetprint(const char *container, struct in_addr *in, int port, const char *proto, int num_port, const int af1) { struct servent *sp = 0; char line[80], *cp; int width; size_t alen, plen; if (container) xo_open_container(container); if (Wflag) snprintf(line, sizeof(line), "%s.", inetname(in)); else snprintf(line, sizeof(line), "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in)); alen = strlen(line); cp = line + alen; if (!num_port && port) sp = getservbyport((int)port, proto); if (sp || port == 0) snprintf(cp, sizeof(line) - alen, "%.15s ", sp ? sp->s_name : "*"); else snprintf(cp, sizeof(line) - alen, "%d ", ntohs((u_short)port)); width = (Aflag && !Wflag) ? 18 : ((!Wflag || af1 == AF_INET) ? 22 : 45); if (Wflag) xo_emit("{d:target/%-*s} ", width, line); else xo_emit("{d:target/%-*.*s} ", width, width, line); plen = strlen(cp) - 1; alen--; xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen, plen, cp); if (container) xo_close_container(container); } /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) snprintf(line, sizeof(line), "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); } return (line); } #endif diff --git a/usr.bin/netstat/netstat.1 b/usr.bin/netstat/netstat.1 index e4186b9c9999..0907b5bc7a9f 100644 --- a/usr.bin/netstat/netstat.1 +++ b/usr.bin/netstat/netstat.1 @@ -1,855 +1,850 @@ .\" Copyright (c) 1983, 1990, 1992, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)netstat.1 8.8 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd September 25, 2020 +.Dd May 27, 2022 .Dt NETSTAT 1 .Os .Sh NAME .Nm netstat .Nd show network status and statistics .Sh SYNOPSIS .Bk -words .Bl -tag -width "netstat" .It Nm .Op Fl -libxo .Op Fl 46AaCLnPRSTWx .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl i | I Ar interface .Op Fl -libxo .Op Fl 46abdhnW .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl w Ar wait .Op Fl -libxo .Op Fl I Ar interface .Op Fl 46d .Op Fl M Ar core .Op Fl N Ar system .Op Fl q Ar howmany .It Nm Fl s .Op Fl -libxo .Op Fl 46sz .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl i | I Ar interface Fl s .Op Fl -libxo .Op Fl 46s .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl m .Op Fl -libxo .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl B .Op Fl -libxo .Op Fl z .Op Fl I Ar interface .It Nm Fl r .Op Fl -libxo .Op Fl 46nW .Op Fl F Ar fibnum .Op Fl f Ar address_family .It Nm Fl rs .Op Fl -libxo .Op Fl s .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl g .Op Fl -libxo .Op Fl 46W .Op Fl f Ar address_family .It Nm Fl gs .Op Fl -libxo .Op Fl 46s .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .It Nm Fl Q .Op Fl -libxo .El .Ek .Sh DESCRIPTION The .Nm command symbolically displays the contents of various network-related data structures. There are a number of output formats, depending on the options for the information presented. .Bl -tag -width indent .It Xo .Bk -words .Nm .Op Fl 46AaCLnRSTWx .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display a list of active sockets (protocol control blocks) for each network protocol. .Pp The default display for active sockets shows the local and remote addresses, send and receive queue sizes (in bytes), protocol, and the internal state of the protocol. Address formats are of the form .Dq host.port or .Dq network.port if a socket's address specifies a network but no specific host address. When known, the host and network addresses are displayed symbolically according to the databases .Xr hosts 5 and .Xr networks 5 , respectively. If a symbolic name for an address is unknown, or if the .Fl n option is specified, the address is printed numerically, according to the address family. For more information regarding the Internet IPv4 .Dq dot format , refer to .Xr inet 3 . Unspecified, or .Dq wildcard , addresses and ports appear as .Dq Li * . .Bl -tag -width indent .It Fl -libxo Generate output via .Xr libxo 3 in a selection of different human and machine readable formats. See .Xr xo_parse_args 3 for details on command line arguments. .It Fl 4 Show IPv4 only. See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only. See .Sx GENERAL OPTIONS . .It Fl A Show the address of a protocol control block (PCB) associated with a socket; used for debugging. .It Fl a Show the state of all sockets; normally sockets used by server processes are not shown. .It Fl c Show the used TCP stack for each session. .It Fl C Show the congestion control algorithm and diagnostic information of TCP sockets. .It Fl L Show the size of the various listen queues. The first count shows the number of unaccepted connections, the second count shows the amount of unaccepted incomplete connections, and the third count is the maximum number of queued connections. .It Fl n Do not resolve numeric addresses and port numbers to names. See .Sx GENERAL OPTIONS . .It Fl P Display the log ID for each socket. .It Fl R Display the flowid and flowtype for each socket. flowid is a 32 bit hardware specific identifier for each flow. flowtype defines which protocol fields are hashed to produce the id. A complete listing is available in .Pa sys/mbuf.h under .Dv M_HASHTYPE_* . .It Fl S Show network addresses as numbers (as with .Fl n ) but show ports symbolically. .It Fl T Display diagnostic information from the TCP control block. Fields include the number of packets requiring retransmission, received out-of-order, and those advertising a zero-sized window. .It Fl W Avoid truncating addresses even if this causes some fields to overflow. .It Fl x Display socket buffer and TCP timer statistics for each internet socket. .Pp The .Fl x flag causes .Nm to output all the information recorded about data stored in the socket buffers. The fields are: -.Bl -column ".Li R-MBUF" -.It Li R-MBUF Ta Number of mbufs in the receive queue. -.It Li S-MBUF Ta Number of mbufs in the send queue. -.It Li R-CLUS Ta Number of clusters, of any type, in the receive -queue. -.It Li S-CLUS Ta Number of clusters, of any type, in the send queue. +.Bl -column ".Li R-HIWA" .It Li R-HIWA Ta Receive buffer high water mark, in bytes. .It Li S-HIWA Ta Send buffer high water mark, in bytes. .It Li R-LOWA Ta Receive buffer low water mark, in bytes. .It Li S-LOWA Ta Send buffer low water mark, in bytes. .It Li R-BCNT Ta Receive buffer byte count. .It Li S-BCNT Ta Send buffer byte count. .It Li R-BMAX Ta Maximum bytes that can be used in the receive buffer. .It Li S-BMAX Ta Maximum bytes that can be used in the send buffer. .It Li rexmt Ta Time, in seconds, to fire Retransmit Timer, or 0 if not armed. .It Li persist Ta Time, in seconds, to fire Retransmit Persistence, or 0 if not armed. .It Li keep Ta Time, in seconds, to fire Keep Alive, or 0 if not armed. .It Li 2msl Ta Time, in seconds, to fire 2*msl TIME_WAIT Timer, or 0 if not armed. .It Li delack Ta Time, in seconds, to fire Delayed ACK Timer, or 0 if not armed. .It Li rcvtime Ta Time, in seconds, since last packet received. .El .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .It Fl p Ar protocol Filter by .Ar protocol . See .Sx GENERAL OPTIONS . .It Fl M Use an alternative core. See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image. See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl i | I Ar interface .Op Fl 46abdhnW .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Show the state of all network interfaces or a single .Ar interface which have been auto-configured (interfaces statically configured into a system, but not located at boot time are not shown). An asterisk .Pq Dq Li * after an interface name indicates that the interface is .Dq down . .Pp When .Nm is invoked with .Fl i .Pq all interfaces or .Fl I Ar interface , it provides a table of cumulative statistics regarding packets transferred, errors, and collisions. The network addresses of the interface and the maximum transmission unit .Pq Dq mtu are also displayed. .Bl -tag -width indent .It Fl 4 Show IPv4 only. See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only. See .Sx GENERAL OPTIONS . .It Fl a Multicast addresses currently in use are shown for each Ethernet interface and for each IP interface address. Multicast addresses are shown on separate lines following the interface address with which they are associated. .It Fl b Show the number of bytes in and out. .It Fl d Show the number of dropped packets. .It Fl h Print all counters in human readable form. .It Fl n Do not resolve numeric addresses and port numbers to names. See .Sx GENERAL OPTIONS . .It Fl W Avoid truncating interface names even if this causes some fields to overflow. .Sx GENERAL OPTIONS . .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl w Ar wait .Op Fl I Ar interface .Op Fl 46d .Op Fl M Ar core .Op Fl N Ar system .Op Fl q Ar howmany .Ek .Xc At intervals of .Ar wait seconds, display the information regarding packet traffic on all configured network interfaces or a single .Ar interface . .Pp When .Nm is invoked with the .Fl w option and a .Ar wait interval argument, it displays a running count of statistics related to network interfaces. An obsolescent version of this option used a numeric parameter with no option, and is currently supported for backward compatibility. By default, this display summarizes information for all interfaces. Information for a specific interface may be displayed with the .Fl I Ar interface option. .Bl -tag -width indent .It Fl I Ar interface Only show information regarding .Ar interface .It Fl 4 Show IPv4 only. See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only. See .Sx GENERAL OPTIONS . .It Fl d Show the number of dropped packets. .It Fl M Use an alternative core. See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image. See .Sx GENERAL OPTIONS . .It Fl q Exit after .Ar howmany outputs. .El .It Xo .Bk -words .Nm .Fl s .Op Fl 46sz .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display system-wide statistics for each network protocol. .Bl -tag -width indent .It Fl 4 Show IPv4 only. See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only. See .Sx GENERAL OPTIONS . .It Fl s If .Fl s is repeated, counters with a value of zero are suppressed. .It Fl z Reset statistic counters after displaying them. .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .It Fl p Ar protocol Filter by .Ar protocol . See .Sx GENERAL OPTIONS . .It Fl M Use an alternative core. See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl i | I Ar interface Fl s .Op Fl 46s .Op Fl f Ar protocol_family | Fl p Ar protocol .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display per-interface statistics for each network protocol. .Bl -tag -width indent .It Fl 4 Show IPv4 only See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only See .Sx GENERAL OPTIONS . .It Fl s If .Fl s is repeated, counters with a value of zero are suppressed. .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .It Fl p Ar protocol Filter by .Ar protocol . See .Sx GENERAL OPTIONS . .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl m .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Show statistics recorded by the memory management routines .Pq Xr mbuf 9 . The network manages a private pool of memory buffers. .Bl -tag -width indent .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl B .Op Fl z .Op Fl I Ar interface .Ek .Xc Show statistics about .Xr bpf 4 peers. This includes information like how many packets have been matched, dropped and received by the bpf device, also information about current buffer sizes and device states. .Pp The .Xr bpf 4 flags displayed when .Nm is invoked with the .Fl B option represent the underlying parameters of the bpf peer. Each flag is represented as a single lower case letter. The mapping between the letters and flags in order of appearance are: .Bl -column ".Li i" .It Li p Ta Set if listening promiscuously .It Li i Ta Dv BIOCIMMEDIATE No has been set on the device .It Li f Ta Dv BIOCGHDRCMPLT No status: source link addresses are being filled automatically .It Li s Ta Dv BIOCGSEESENT No status: see packets originating locally and remotely on the interface. .It Li a Ta Packet reception generates a signal .It Li l Ta Dv BIOCLOCK No status: descriptor has been locked .El .Pp For more information about these flags, please refer to .Xr bpf 4 . .Bl -tag -width indent .It Fl z Reset statistic counters after displaying them. .El .It Xo .Bk -words .Nm .Fl r .Op Fl 46AnW .Op Fl F Ar fibnum .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display the contents of routing tables. .Pp When .Nm is invoked with the routing table option .Fl r , it lists the available routes and their status. Each route consists of a destination host or network, and a gateway to use in forwarding packets. The flags field shows a collection of information about the route stored as binary choices. The individual flags are discussed in more detail in the .Xr route 8 and .Xr route 4 manual pages. The mapping between letters and flags is: .Bl -column ".Li W" ".Dv RTF_WASCLONED" .It Li 1 Ta Dv RTF_PROTO1 Ta "Protocol specific routing flag #1" .It Li 2 Ta Dv RTF_PROTO2 Ta "Protocol specific routing flag #2" .It Li 3 Ta Dv RTF_PROTO3 Ta "Protocol specific routing flag #3" .It Li B Ta Dv RTF_BLACKHOLE Ta "Just discard pkts (during updates)" .It Li b Ta Dv RTF_BROADCAST Ta "The route represents a broadcast address" .It Li D Ta Dv RTF_DYNAMIC Ta "Created dynamically (by redirect)" .It Li G Ta Dv RTF_GATEWAY Ta "Destination requires forwarding by intermediary" .It Li H Ta Dv RTF_HOST Ta "Host entry (net otherwise)" .It Li L Ta Dv RTF_LLINFO Ta "Valid protocol to link address translation" .It Li M Ta Dv RTF_MODIFIED Ta "Modified dynamically (by redirect)" .It Li R Ta Dv RTF_REJECT Ta "Host or net unreachable" .It Li S Ta Dv RTF_STATIC Ta "Manually added" .It Li U Ta Dv RTF_UP Ta "Route usable" .It Li X Ta Dv RTF_XRESOLVE Ta "External daemon translates proto to link address" .El .Pp Direct routes are created for each interface attached to the local host; the gateway field for such entries shows the address of the outgoing interface. The refcnt field gives the current number of active uses of the route. Connection oriented protocols normally hold on to a single route for the duration of a connection while connectionless protocols obtain a route while sending to the same destination. The use field provides a count of the number of packets sent using that route. The interface entry indicates the network interface utilized for the route. .Bl -tag -width indent .It Fl 4 Show IPv4 only. See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only. See .Sx GENERAL OPTIONS . .It Fl n Do not resolve numeric addresses and port numbers to names. See .Sx GENERAL OPTIONS . .It Fl W Show the path MTU for each route, and print interface names with a wider field size. .It Fl F Display the routing table with the number .Ar fibnum . If the specified .Ar fibnum is -1 or .Fl F is not specified, the default routing table is displayed. .It Fl f Display the routing table for a particular .Ar address_family . .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl rs .Op Fl s .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display routing statistics. .Bl -tag -width indent .It Fl s If .Fl s is repeated, counters with a value of zero are suppressed. .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl g .Op Fl 46W .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Display the contents of the multicast virtual interface tables, and multicast forwarding caches. Entries in these tables will appear only when the kernel is actively forwarding multicast sessions. This option is applicable only to the .Cm inet and .Cm inet6 address families. .Bl -tag -width indent .It Fl 4 Show IPv4 only See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only See .Sx GENERAL OPTIONS . .It Fl W Avoid truncating addresses even if this causes some fields to overflow. .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl gs .Op Fl 46s .Op Fl f Ar address_family .Op Fl M Ar core .Op Fl N Ar system .Ek .Xc Show multicast routing statistics. .Bl -tag -width indent .It Fl 4 Show IPv4 only See .Sx GENERAL OPTIONS . .It Fl 6 Show IPv6 only See .Sx GENERAL OPTIONS . .It Fl s If .Fl s is repeated, counters with a value of zero are suppressed. .It Fl f Ar protocol_family Filter by .Ar protocol_family . See .Sx GENERAL OPTIONS . .It Fl M Use an alternative core See .Sx GENERAL OPTIONS . .It Fl N Use an alternative kernel image See .Sx GENERAL OPTIONS . .El .It Xo .Bk -words .Nm .Fl Q .Ek .Xc Show .Xr netisr 9 statistics. The flags field shows available ISR handlers: .Bl -column ".Li W" ".Dv NETISR_SNP_FLAGS_DRAINEDCPU" .It Li C Ta Dv NETISR_SNP_FLAGS_M2CPUID Ta "Able to map mbuf to cpu id" .It Li D Ta Dv NETISR_SNP_FLAGS_DRAINEDCPU Ta "Has queue drain handler" .It Li F Ta Dv NETISR_SNP_FLAGS_M2FLOW Ta "Able to map mbuf to flow id" .El .El .Ss GENERAL OPTIONS Some options have the general meaning: .Bl -tag -width flag .It Fl 4 Is shorthand for .Fl f .Ar inet .Pq Show only IPv4 .It Fl 6 Is shorthand for .Fl f .Ar inet6 .Pq Show only IPv6 .It Fl f Ar address_family , Fl p Ar protocol Limit display to those records of the specified .Ar address_family or a single .Ar protocol . The following address families and protocols are recognized: .Pp .Bl -tag -width ".Cm netgraph , ng Pq Dv AF_NETGRAPH" -compact .It Em Family .Em Protocols .It Cm inet Pq Dv AF_INET .Cm divert , icmp , igmp , ip , ipsec , pim, sctp , tcp , udp .It Cm inet6 Pq Dv AF_INET6 .Cm icmp6 , ip6 , ipsec6 , rip6 , sctp , tcp , udp .It Cm pfkey Pq Dv PF_KEY .Cm pfkey .It Cm netgraph , ng Pq Dv AF_NETGRAPH .Cm ctrl , data .It Cm unix Pq Dv AF_UNIX .It Cm link Pq Dv AF_LINK .El .Pp The program will complain if .Ar protocol is unknown or if there is no statistics routine for it. .It Fl M Extract values associated with the name list from the specified core instead of the default .Pa /dev/kmem . .It Fl N Extract the name list from the specified system instead of the default, which is the kernel image the system has booted from. .It Fl n Show network addresses and ports as numbers. Normally .Nm attempts to resolve addresses and ports, and display them symbolically. .El .Sh EXAMPLES Show packet traffic information (packets, bytes, errors, packet drops, etc) for interface re0 updated every 2 seconds and exit after 5 outputs: .Bd -literal -offset indent $ netstat -w 2 -q 5 -I re0 .Ed .Pp Show statistics for ICMP on any interface: .Bd -literal -offset indent $ netstat -s -p icmp .Ed .Pp Show routing tables: .Bd -literal -offset indent $ netstat -r .Ed .Pp Same as above, but without resolving numeric addresses and port numbers to names: .Bd -literal -offset indent $ netstat -rn .Ed .Sh SEE ALSO .Xr fstat 1 , .Xr nfsstat 1 , .Xr procstat 1 , .Xr ps 1 , .Xr sockstat 1 , .Xr libxo 3 , .Xr xo_parse_args 3 , .Xr bpf 4 , .Xr inet 4 , .Xr route 4 , .Xr unix 4 , .Xr hosts 5 , .Xr networks 5 , .Xr protocols 5 , .Xr services 5 , .Xr iostat 8 , .Xr route 8 , .Xr trpt 8 , .Xr vmstat 8 , .Xr mbuf 9 .Sh HISTORY The .Nm command appeared in .Bx 4.2 . .Pp IPv6 support was added by WIDE/KAME project. .Sh BUGS The notion of errors is ill-defined.