diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c index 83cb7f6dc00a..bbd6c078fd06 100644 --- a/sys/kern/uipc_debug.c +++ b/sys/kern/uipc_debug.c @@ -1,503 +1,499 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Debugger routines relating to sockets, protocols, etc, for use in DDB. */ #include #include "opt_ddb.h" #include #include #include #include #include #include #ifdef DDB #include static void db_print_sotype(short so_type) { switch (so_type) { case SOCK_STREAM: db_printf("SOCK_STREAM"); break; case SOCK_DGRAM: db_printf("SOCK_DGRAM"); break; case SOCK_RAW: db_printf("SOCK_RAW"); break; case SOCK_RDM: db_printf("SOCK_RDM"); break; case SOCK_SEQPACKET: db_printf("SOCK_SEQPACKET"); break; default: db_printf("unknown"); break; } } static void db_print_sooptions(int so_options) { int comma; comma = 0; if (so_options & SO_DEBUG) { db_printf("%sSO_DEBUG", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTCONN) { db_printf("%sSO_ACCEPTCONN", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEADDR) { db_printf("%sSO_REUSEADDR", comma ? ", " : ""); comma = 1; } if (so_options & SO_KEEPALIVE) { db_printf("%sSO_KEEPALIVE", comma ? ", " : ""); comma = 1; } if (so_options & SO_DONTROUTE) { db_printf("%sSO_DONTROUTE", comma ? ", " : ""); comma = 1; } if (so_options & SO_BROADCAST) { db_printf("%sSO_BROADCAST", comma ? ", " : ""); comma = 1; } if (so_options & SO_USELOOPBACK) { db_printf("%sSO_USELOOPBACK", comma ? ", " : ""); comma = 1; } if (so_options & SO_LINGER) { db_printf("%sSO_LINGER", comma ? ", " : ""); comma = 1; } if (so_options & SO_OOBINLINE) { db_printf("%sSO_OOBINLINE", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEPORT) { db_printf("%sSO_REUSEPORT", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEPORT_LB) { db_printf("%sSO_REUSEPORT_LB", comma ? ", " : ""); comma = 1; } if (so_options & SO_TIMESTAMP) { db_printf("%sSO_TIMESTAMP", comma ? ", " : ""); comma = 1; } if (so_options & SO_NOSIGPIPE) { db_printf("%sSO_NOSIGPIPE", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTFILTER) { db_printf("%sSO_ACCEPTFILTER", comma ? ", " : ""); comma = 1; } if (so_options & SO_BINTIME) { db_printf("%sSO_BINTIME", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_OFFLOAD) { db_printf("%sSO_NO_OFFLOAD", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_DDP) { db_printf("%sSO_NO_DDP", comma ? ", " : ""); comma = 1; } } static void db_print_sostate(short so_state) { int comma; comma = 0; if (so_state & SS_ISCONNECTED) { db_printf("%sSS_ISCONNECTED", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONNECTING) { db_printf("%sSS_ISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISDISCONNECTING) { db_printf("%sSS_ISDISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_NBIO) { db_printf("%sSS_NBIO", comma ? ", " : ""); comma = 1; } if (so_state & SS_ASYNC) { db_printf("%sSS_ASYNC", comma ? ", " : ""); comma = 1; } } static void db_print_soqstate(int so_qstate) { int comma; comma = 0; if (so_qstate & SQ_INCOMP) { db_printf("%sSQ_INCOMP", comma ? ", " : ""); comma = 1; } if (so_qstate & SQ_COMP) { db_printf("%sSQ_COMP", comma ? ", " : ""); comma = 1; } } static void db_print_sbstate(short sb_state) { int comma; comma = 0; if (sb_state & SBS_CANTSENDMORE) { db_printf("%sSBS_CANTSENDMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_CANTRCVMORE) { db_printf("%sSBS_CANTRCVMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_RCVATMARK) { db_printf("%sSBS_RCVATMARK", comma ? ", " : ""); comma = 1; } } static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_domain(struct domain *d, const char *domain_name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", domain_name, d); indent += 2; db_print_indent(indent); db_printf("dom_family: %d ", d->dom_family); db_printf("dom_name: %s\n", d->dom_name); db_print_indent(indent); db_printf("dom_externalize: %p ", d->dom_externalize); db_print_indent(indent); db_printf("dom_protosw: %p ", d->dom_protosw); db_printf("dom_next: %p\n", d->dom_next.sle_next); db_print_indent(indent); db_printf("dom_rtattach: %p ", d->dom_rtattach); db_print_indent(indent); db_printf("dom_ifattach: %p ", d->dom_ifattach); db_printf("dom_ifdetach: %p\n", d->dom_ifdetach); } static void db_print_prflags(short pr_flags) { int comma; comma = 0; if (pr_flags & PR_ATOMIC) { db_printf("%sPR_ATOMIC", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_ADDR) { db_printf("%sPR_ADDR", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_CONNREQUIRED) { db_printf("%sPR_CONNREQUIRED", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_WANTRCVD) { db_printf("%sPR_WANTRCVD", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_IMPLOPCL) { db_printf("%sPR_IMPLOPCL", comma ? ", " : ""); comma = 1; } } static void db_print_protosw(struct protosw *pr, const char *prname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", prname, pr); indent += 2; db_print_indent(indent); db_printf("pr_type: %d ", pr->pr_type); db_printf("pr_domain: %p\n", pr->pr_domain); if (pr->pr_domain != NULL) db_print_domain(pr->pr_domain, "pr_domain", indent); db_print_indent(indent); db_printf("pr_protocol: %d\n", pr->pr_protocol); db_print_indent(indent); db_printf("pr_flags: %d (", pr->pr_flags); db_print_prflags(pr->pr_flags); db_printf(")\n"); db_print_indent(indent); db_printf("pr_ctloutput: %p ", pr->pr_ctloutput); } static void db_print_sbflags(short sb_flags) { int comma; comma = 0; if (sb_flags & SB_WAIT) { db_printf("%sSB_WAIT", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_SEL) { db_printf("%sSB_SEL", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_ASYNC) { db_printf("%sSB_ASYNC", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_UPCALL) { db_printf("%sSB_UPCALL", comma ? ", " : ""); comma = 1; } - if (sb_flags & SB_NOINTR) { - db_printf("%sSB_NOINTR", comma ? ", " : ""); - comma = 1; - } if (sb_flags & SB_AIO) { db_printf("%sSB_AIO", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_KNOTE) { db_printf("%sSB_KNOTE", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_AUTOSIZE) { db_printf("%sSB_AUTOSIZE", comma ? ", " : ""); comma = 1; } } static void db_print_sockbuf(struct sockbuf *sb, const char *sockbufname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", sockbufname, sb); indent += 2; db_print_indent(indent); db_printf("sb_state: 0x%x (", sb->sb_state); db_print_sbstate(sb->sb_state); db_printf(")\n"); db_print_indent(indent); db_printf("sb_mb: %p ", sb->sb_mb); db_printf("sb_mbtail: %p ", sb->sb_mbtail); db_printf("sb_lastrecord: %p\n", sb->sb_lastrecord); db_print_indent(indent); db_printf("sb_sndptr: %p ", sb->sb_sndptr); db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff); db_print_indent(indent); db_printf("sb_acc: %u ", sb->sb_acc); db_printf("sb_ccc: %u ", sb->sb_ccc); db_printf("sb_hiwat: %u ", sb->sb_hiwat); db_printf("sb_mbcnt: %u ", sb->sb_mbcnt); db_printf("sb_mbmax: %u\n", sb->sb_mbmax); db_print_indent(indent); db_printf("sb_ctl: %u ", sb->sb_ctl); db_printf("sb_lowat: %d ", sb->sb_lowat); db_printf("sb_timeo: %jd\n", sb->sb_timeo); db_print_indent(indent); db_printf("sb_flags: 0x%x (", sb->sb_flags); db_print_sbflags(sb->sb_flags); db_printf(")\n"); db_print_indent(indent); db_printf("sb_aiojobq first: %p\n", TAILQ_FIRST(&sb->sb_aiojobq)); } static void db_print_socket(struct socket *so, const char *socketname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", socketname, so); indent += 2; db_print_indent(indent); db_printf("so_count: %d ", so->so_count); db_printf("so_type: %d (", so->so_type); db_print_sotype(so->so_type); db_printf(")\n"); db_print_indent(indent); db_printf("so_options: 0x%x (", so->so_options); db_print_sooptions(so->so_options); db_printf(")\n"); db_print_indent(indent); db_printf("so_linger: %d ", so->so_linger); db_printf("so_state: 0x%x (", so->so_state); db_print_sostate(so->so_state); db_printf(")\n"); db_print_indent(indent); db_printf("so_pcb: %p ", so->so_pcb); db_printf("so_proto: %p\n", so->so_proto); if (so->so_proto != NULL) db_print_protosw(so->so_proto, "so_proto", indent); db_print_indent(indent); if (so->so_options & SO_ACCEPTCONN) { db_printf("sol_incomp first: %p ", TAILQ_FIRST(&so->sol_incomp)); db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp)); db_printf("sol_qlen: %d ", so->sol_qlen); db_printf("sol_incqlen: %d ", so->sol_incqlen); db_printf("sol_qlimit: %d ", so->sol_qlimit); } else { db_printf("so_qstate: 0x%x (", so->so_qstate); db_print_soqstate(so->so_qstate); db_printf(") "); db_printf("so_listen: %p ", so->so_listen); /* so_list skipped */ db_printf("so_timeo: %d ", so->so_timeo); db_printf("so_error: %d\n", so->so_error); db_print_indent(indent); db_printf("so_sigio: %p ", so->so_sigio); db_printf("so_oobmark: %lu\n", so->so_oobmark); db_print_sockbuf(&so->so_rcv, "so_rcv", indent); db_print_sockbuf(&so->so_snd, "so_snd", indent); } } DB_SHOW_COMMAND(socket, db_show_socket) { struct socket *so; if (!have_addr) { db_printf("usage: show socket \n"); return; } so = (struct socket *)addr; db_print_socket(so, "socket", 0); } DB_SHOW_COMMAND(sockbuf, db_show_sockbuf) { struct sockbuf *sb; if (!have_addr) { db_printf("usage: show sockbuf \n"); return; } sb = (struct sockbuf *)addr; db_print_sockbuf(sb, "sockbuf", 0); } DB_SHOW_COMMAND(protosw, db_show_protosw) { struct protosw *pr; if (!have_addr) { db_printf("usage: show protosw \n"); return; } pr = (struct protosw *)addr; db_print_protosw(pr, "protosw", 0); } DB_SHOW_COMMAND(domain, db_show_domain) { struct domain *d; if (!have_addr) { db_printf("usage: show protosw \n"); return; } d = (struct domain *)addr; db_print_domain(d, "domain", 0); } #endif diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 95c7241d5f13..e8d410b00c15 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -1,1906 +1,1905 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_kern_tls.h" #include "opt_param.h" #include #include /* for aio_swake proto */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. */ void (*aio_swake)(struct socket *, struct sockbuf *); /* * Primitive routines for operating on socket buffers */ #define BUF_MAX_ADJ(_sz) (((u_quad_t)(_sz)) * MCLBYTES / (MSIZE + MCLBYTES)) u_long sb_max = SB_MAX; u_long sb_max_adj = BUF_MAX_ADJ(SB_MAX); static u_long sb_efficiency = 8; /* parameter for sbreserve() */ #ifdef KERN_TLS static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); #endif static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); static void sbflush_internal(struct sockbuf *sb); /* * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. */ static void sbm_clrprotoflags(struct mbuf *m, int flags) { int mask; mask = ~M_PROTOFLAGS; if (flags & PRUS_NOTREADY) mask |= M_NOTREADY; while (m) { m->m_flags &= mask; m = m->m_next; } } /* * Compress M_NOTREADY mbufs after they have been readied by sbready(). * * sbcompress() skips M_NOTREADY mbufs since the data is not available to * be copied at the time of sbcompress(). This function combines small * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first * mbuf sbready() marked ready, and 'end' is the first mbuf still not * ready. */ static void sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end) { struct mbuf *m, *n; int ext_size; SOCKBUF_LOCK_ASSERT(sb); if ((sb->sb_flags & SB_NOCOALESCE) != 0) return; for (m = m0; m != end; m = m->m_next) { MPASS((m->m_flags & M_NOTREADY) == 0); /* * NB: In sbcompress(), 'n' is the last mbuf in the * socket buffer and 'm' is the new mbuf being copied * into the trailing space of 'n'. Here, the roles * are reversed and 'n' is the next mbuf after 'm' * that is being copied into the trailing space of * 'm'. */ n = m->m_next; #ifdef KERN_TLS /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && (m->m_flags & M_EXTPG) && (n->m_flags & M_EXTPG) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n)) { int hdr_len, trail_len; hdr_len = n->m_epg_hdrlen; trail_len = m->m_epg_trllen; if (trail_len != 0 && hdr_len != 0 && trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { /* copy n's header to m's trailer */ memcpy(&m->m_epg_trail[trail_len], n->m_epg_hdr, hdr_len); m->m_epg_trllen += hdr_len; m->m_len += hdr_len; n->m_epg_hdrlen = 0; n->m_len -= hdr_len; } } #endif /* Compress small unmapped mbufs into plain mbufs. */ if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && !mbuf_has_tls_session(m)) { ext_size = m->m_ext.ext_size; if (mb_unmapped_compress(m) == 0) sb->sb_mbcnt -= ext_size; } while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && M_WRITABLE(m) && (m->m_flags & M_EXTPG) == 0 && !mbuf_has_tls_session(n) && !mbuf_has_tls_session(m) && n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ n->m_len <= M_TRAILINGSPACE(m) && m->m_type == n->m_type) { KASSERT(sb->sb_lastrecord != n, ("%s: merging start of record (%p) into previous mbuf (%p)", __func__, n, m)); m_copydata(n, 0, n->m_len, mtodo(m, m->m_len)); m->m_len += n->m_len; m->m_next = n->m_next; m->m_flags |= n->m_flags & M_EOR; if (sb->sb_mbtail == n) sb->sb_mbtail = m; sb->sb_mbcnt -= MSIZE; if (n->m_flags & M_EXT) sb->sb_mbcnt -= n->m_ext.ext_size; m_free(n); n = m->m_next; } } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); } /* * Mark ready "count" units of I/O starting with "m". Most mbufs * count as a single unit of I/O except for M_EXTPG mbufs which * are backed by multiple pages. */ int sbready(struct sockbuf *sb, struct mbuf *m0, int count) { struct mbuf *m; u_int blocker; SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); KASSERT(count > 0, ("%s: invalid count %d", __func__, count)); m = m0; blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; while (count > 0) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) { if (count < m->m_epg_nrdy) { m->m_epg_nrdy -= count; count = 0; break; } count -= m->m_epg_nrdy; m->m_epg_nrdy = 0; } else count--; m->m_flags &= ~(M_NOTREADY | blocker); if (blocker) sb->sb_acc += m->m_len; m = m->m_next; } /* * If the first mbuf is still not fully ready because only * some of its backing pages were readied, no further progress * can be made. */ if (m0 == m) { MPASS(m->m_flags & M_NOTREADY); return (EINPROGRESS); } if (!blocker) { sbready_compress(sb, m0, m); return (EINPROGRESS); } /* This one was blocking all the queue. */ for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { KASSERT(m->m_flags & M_BLOCKED, ("%s: m %p !M_BLOCKED", __func__, m)); m->m_flags &= ~M_BLOCKED; sb->sb_acc += m->m_len; } sb->sb_fnrdy = m; sbready_compress(sb, m0, m); return (0); } /* * Adjust sockbuf state reflecting allocation of m. */ void sballoc(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) { if (m->m_flags & M_NOTREADY) sb->sb_fnrdy = m; else sb->sb_acc += m->m_len; } else m->m_flags |= M_BLOCKED; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl += m->m_len; sb->sb_mbcnt += MSIZE; if (m->m_flags & M_EXT) sb->sb_mbcnt += m->m_ext.ext_size; } /* * Adjust sockbuf state reflecting freeing of m. */ void sbfree(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; if (!(m->m_flags & M_NOTAVAIL)) sb->sb_acc -= m->m_len; if (m == sb->sb_fnrdy) { struct mbuf *n; KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); n = m->m_next; while (n != NULL && !(n->m_flags & M_NOTREADY)) { n->m_flags &= ~M_BLOCKED; sb->sb_acc += n->m_len; n = n->m_next; } sb->sb_fnrdy = n; } if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= m->m_len; sb->sb_mbcnt -= MSIZE; if (m->m_flags & M_EXT) sb->sb_mbcnt -= m->m_ext.ext_size; if (sb->sb_sndptr == m) { sb->sb_sndptr = NULL; sb->sb_sndptroff = 0; } if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= m->m_len; } #ifdef KERN_TLS /* * Similar to sballoc/sbfree but does not adjust state associated with * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs * are not ready. */ void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m) { SOCKBUF_LOCK_ASSERT(sb); sb->sb_ccc += m->m_len; sb->sb_tlscc += m->m_len; sb->sb_mbcnt += MSIZE; if (m->m_flags & M_EXT) sb->sb_mbcnt += m->m_ext.ext_size; } void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m) { #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ SOCKBUF_LOCK_ASSERT(sb); #endif sb->sb_ccc -= m->m_len; sb->sb_tlscc -= m->m_len; sb->sb_mbcnt -= MSIZE; if (m->m_flags & M_EXT) sb->sb_mbcnt -= m->m_ext.ext_size; } #endif /* * Socantsendmore indicates that no more data will be sent on the socket; it * would normally be applied to a socket when the user informs the system * that no more data is to be sent, by the protocol code (in case * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be * received, and will normally be applied to the socket by a protocol when it * detects that the peer will send no more data. Data queued for reading in * the socket may yet be read. */ void socantsendmore_locked(struct socket *so) { SOCK_SENDBUF_LOCK_ASSERT(so); so->so_snd.sb_state |= SBS_CANTSENDMORE; sowwakeup_locked(so); SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantsendmore(struct socket *so) { SOCK_SENDBUF_LOCK(so); socantsendmore_locked(so); SOCK_SENDBUF_UNLOCK_ASSERT(so); } void socantrcvmore_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); so->so_rcv.sb_state |= SBS_CANTRCVMORE; #ifdef KERN_TLS if (so->so_rcv.sb_flags & SB_TLS_RX) ktls_check_rx(&so->so_rcv); #endif sorwakeup_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void socantrcvmore(struct socket *so) { SOCK_RECVBUF_LOCK(so); socantrcvmore_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); if (so->so_options & SO_RERROR) { so->so_rerror = ENOBUFS; sorwakeup_locked(so); } else SOCK_RECVBUF_UNLOCK(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } void soroverflow(struct socket *so) { SOCK_RECVBUF_LOCK(so); soroverflow_locked(so); SOCK_RECVBUF_UNLOCK_ASSERT(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(struct socket *so, sb_which which) { struct sockbuf *sb; SOCK_BUF_LOCK_ASSERT(so, which); sb = sobuf(so, which); sb->sb_flags |= SB_WAIT; return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which), - (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", - sb->sb_timeo, 0, 0)); + PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0)); } /* * Wakeup processes waiting on a socket buffer. Do asynchronous notification * via SIGIO if the socket has the SS_ASYNC flag set. * * Called with the socket buffer lock held; will release the lock by the end * of the function. This allows the caller to acquire the socket buffer lock * while testing for the need for various sorts of wakeup and hold it through * to the point where it's no longer required. We currently hold the lock * through calls out to other subsystems (with the exception of kqueue), and * then release it to avoid lock order issues. It's not clear that's * correct. */ static __always_inline void sowakeup(struct socket *so, const sb_which which) { struct sockbuf *sb; int ret; SOCK_BUF_LOCK_ASSERT(so, which); sb = sobuf(so, which); selwakeuppri(sb->sb_sel, PSOCK); if (!SEL_WAITING(sb->sb_sel)) sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup(&sb->sb_acc); } KNOTE_LOCKED(&sb->sb_sel->si_note, 0); if (sb->sb_upcall != NULL) { ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); if (ret == SU_ISCONNECTED) { KASSERT(sb == &so->so_rcv, ("SO_SND upcall returned SU_ISCONNECTED")); soupcall_clear(so, SO_RCV); } } else ret = SU_OK; if (sb->sb_flags & SB_AIO) sowakeup_aio(so, which); SOCK_BUF_UNLOCK(so, which); if (ret == SU_ISCONNECTED) soisconnected(so); if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGIO, 0); SOCK_BUF_UNLOCK_ASSERT(so, which); } static void splice_push(struct socket *so) { struct so_splice *sp; SOCK_RECVBUF_LOCK_ASSERT(so); sp = so->so_splice; mtx_lock(&sp->mtx); SOCK_RECVBUF_UNLOCK(so); so_splice_dispatch(sp); } static void splice_pull(struct socket *so) { struct so_splice *sp; SOCK_SENDBUF_LOCK_ASSERT(so); sp = so->so_splice_back; mtx_lock(&sp->mtx); SOCK_SENDBUF_UNLOCK(so); so_splice_dispatch(sp); } /* * Do we need to notify the other side when I/O is possible? */ static __always_inline bool sb_notify(const struct sockbuf *sb) { return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | SB_UPCALL | SB_AIO | SB_KNOTE)) != 0); } void sorwakeup_locked(struct socket *so) { SOCK_RECVBUF_LOCK_ASSERT(so); if (so->so_rcv.sb_flags & SB_SPLICED) splice_push(so); else if (sb_notify(&so->so_rcv)) sowakeup(so, SO_RCV); else SOCK_RECVBUF_UNLOCK(so); } void sowwakeup_locked(struct socket *so) { SOCK_SENDBUF_LOCK_ASSERT(so); if (so->so_snd.sb_flags & SB_SPLICED) splice_pull(so); else if (sb_notify(&so->so_snd)) sowakeup(so, SO_SND); else SOCK_SENDBUF_UNLOCK(so); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and one for * receiving data. Each buffer contains a queue of mbufs, information about * the number of mbufs and amount of data in the queue, and other fields * allowing select() statements and notification on data availability to be * implemented. * * Data stored in a socket buffer is maintained as a list of records. Each * record is a list of mbufs chained together with the m_next field. Records * are chained together with the m_nextpkt field. The upper level routine * soreceive() expects the following conventions to be observed when placing * information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's name, * then a record containing that name must be present before any * associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really just * additional data associated with the message), and there are ``rights'' * to be received, then a record containing this data should be present * (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by a data * record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space should * be released by calling sbrelease() when the socket is destroyed. */ int soreserve(struct socket *so, u_long sndcc, u_long rcvcc) { struct thread *td = curthread; SOCK_SENDBUF_LOCK(so); SOCK_RECVBUF_LOCK(so); if (sbreserve_locked(so, SO_SND, sndcc, td) == 0) goto bad; if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_UNLOCK(so); return (0); bad2: sbrelease_locked(so, SO_SND); bad: SOCK_RECVBUF_UNLOCK(so); SOCK_SENDBUF_UNLOCK(so); return (ENOBUFS); } static int sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) { int error = 0; u_long tmp_sb_max = sb_max; error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); if (error || !req->newptr) return (error); if (tmp_sb_max < MSIZE + MCLBYTES) return (EINVAL); sb_max = tmp_sb_max; sb_max_adj = BUF_MAX_ADJ(sb_max); return (0); } /* * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't * become limiting if buffering efficiency is near the normal case. */ bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc, u_long buf_max, struct thread *td) { struct sockbuf *sb = sobuf(so, which); rlim_t sbsize_limit; SOCK_BUF_LOCK_ASSERT(so, which); /* * When a thread is passed, we take into account the thread's socket * buffer size limit. The caller will generally pass curthread, but * in the TCP input path, NULL will be passed to indicate that no * appropriate thread resource limits are available. In that case, * we don't apply a process limit. */ if (cc > BUF_MAX_ADJ(buf_max)) return (false); if (td != NULL) { sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); } else sbsize_limit = RLIM_INFINITY; if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) return (false); sb->sb_mbmax = min(cc * sb_efficiency, buf_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (true); } bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td) { return (sbreserve_locked_limit(so, which, cc, sb_max, td)); } int sbsetopt(struct socket *so, struct sockopt *sopt) { struct sockbuf *sb; sb_which wh; short *flags; u_int cc, *hiwat, *lowat; int error, optval; error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error != 0) return (error); /* * Values < 1 make no sense for any of these options, * so disallow them. */ if (optval < 1) return (EINVAL); cc = optval; sb = NULL; SOCK_LOCK(so); if (SOLISTENING(so)) { switch (sopt->sopt_name) { case SO_SNDLOWAT: case SO_SNDBUF: lowat = &so->sol_sbsnd_lowat; hiwat = &so->sol_sbsnd_hiwat; flags = &so->sol_sbsnd_flags; break; case SO_RCVLOWAT: case SO_RCVBUF: lowat = &so->sol_sbrcv_lowat; hiwat = &so->sol_sbrcv_hiwat; flags = &so->sol_sbrcv_flags; break; } } else { switch (sopt->sopt_name) { case SO_SNDLOWAT: case SO_SNDBUF: sb = &so->so_snd; wh = SO_SND; break; case SO_RCVLOWAT: case SO_RCVBUF: sb = &so->so_rcv; wh = SO_RCV; break; } flags = &sb->sb_flags; hiwat = &sb->sb_hiwat; lowat = &sb->sb_lowat; SOCK_BUF_LOCK(so, wh); } error = 0; switch (sopt->sopt_name) { case SO_SNDBUF: case SO_RCVBUF: if (SOLISTENING(so)) { if (cc > sb_max_adj) { error = ENOBUFS; break; } *hiwat = cc; if (*lowat > *hiwat) *lowat = *hiwat; } else { if (!sbreserve_locked(so, wh, cc, curthread)) error = ENOBUFS; } if (error == 0) *flags &= ~SB_AUTOSIZE; break; case SO_SNDLOWAT: case SO_RCVLOWAT: /* * Make sure the low-water is never greater than the * high-water. */ *lowat = (cc > *hiwat) ? *hiwat : cc; break; } if (!SOLISTENING(so)) SOCK_BUF_UNLOCK(so, wh); SOCK_UNLOCK(so); return (error); } /* * Free mbufs held by a socket, and reserved mbuf space. */ static void sbrelease_internal(struct socket *so, sb_which which) { struct sockbuf *sb = sobuf(so, which); sbflush_internal(sb); (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, RLIM_INFINITY); sb->sb_mbmax = 0; } void sbrelease_locked(struct socket *so, sb_which which) { SOCK_BUF_LOCK_ASSERT(so, which); sbrelease_internal(so, which); } void sbrelease(struct socket *so, sb_which which) { SOCK_BUF_LOCK(so, which); sbrelease_locked(so, which); SOCK_BUF_UNLOCK(so, which); } void sbdestroy(struct socket *so, sb_which which) { #ifdef KERN_TLS struct sockbuf *sb = sobuf(so, which); if (sb->sb_tls_info != NULL) ktls_free(sb->sb_tls_info); sb->sb_tls_info = NULL; #endif sbrelease_internal(so, which); } /* * Routines to add and remove data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to append * new mbufs to a socket buffer, after checking that adequate space is * available, comparing the function sbspace() with the amount of data to be * added. sbappendrecord() differs from sbappend() in that data supplied is * treated as the beginning of a new record. To place a sender's address, * optional access rights, and data in a socket receive buffer, * sbappendaddr() should be used. To place access rights and data in a * socket receive buffer, sbappendrights() should be used. In either case, * the new data begins a new record. Note that unlike sbappend() and * sbappendrecord(), these routines check for the caller that there will be * enough space to store the data. Each fails if there is not enough space, * or if it cannot find mbufs to store additional information in. * * Reliable protocols may use the socket send buffer to hold data awaiting * acknowledgement. Data is normally copied from a socket send buffer in a * protocol with m_copy for output to a peer, and then removing the data from * the socket buffer with sbdrop() or sbdroprecord() when the data is * acknowledged by the peer. */ #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; if (m != sb->sb_lastrecord) { printf("%s: sb_mb %p sb_lastrecord %p last %p\n", __func__, sb->sb_mb, sb->sb_lastrecord, m); printf("packet chain:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) printf("\t%p\n", m); panic("%s from %s:%u", __func__, file, line); } } void sblastmbufchk(struct sockbuf *sb, const char *file, int line) { struct mbuf *m = sb->sb_mb; struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); while (m && m->m_nextpkt) m = m->m_nextpkt; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mbtail) { printf("%s: sb_mb %p sb_mbtail %p last %p\n", __func__, sb->sb_mb, sb->sb_mbtail, m); printf("packet tree:\n"); for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { printf("\t"); for (n = m; n != NULL; n = n->m_next) printf("%p ", n); printf("\n"); } panic("%s from %s:%u", __func__, file, line); } #ifdef KERN_TLS m = sb->sb_mtls; while (m && m->m_next) m = m->m_next; if (m != sb->sb_mtlstail) { printf("%s: sb_mtls %p sb_mtlstail %p last %p\n", __func__, sb->sb_mtls, sb->sb_mtlstail, m); printf("TLS packet tree:\n"); printf("\t"); for (m = sb->sb_mtls; m != NULL; m = m->m_next) { printf("%p ", m); } printf("\n"); panic("%s from %s:%u", __func__, file, line); } #endif } #endif /* SOCKBUF_DEBUG */ #define SBLINKRECORD(sb, m0) do { \ SOCKBUF_LOCK_ASSERT(sb); \ if ((sb)->sb_lastrecord != NULL) \ (sb)->sb_lastrecord->m_nextpkt = (m0); \ else \ (sb)->sb_mb = (m0); \ (sb)->sb_lastrecord = (m0); \ } while (/*CONSTCOND*/0) /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) { struct mbuf *n; SOCKBUF_LOCK_ASSERT(sb); if (m == NULL) return; kmsan_check_mbuf(m, "sbappend"); sbm_clrprotoflags(m, flags); SBLASTRECORDCHK(sb); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * XXX Would like to simply use sb_mbtail here, but * XXX I need to verify that I won't miss an EOR that * XXX way. */ if ((n = sb->sb_lastrecord) != NULL) { do { if (n->m_flags & M_EOR) { sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } else { /* * If this is the first record in the socket buffer, * it's also the last record. */ sb->sb_lastrecord = m; } } sbcompress(sb, m, n); SBLASTRECORDCHK(sb); } /* * Append mbuf chain m to the last record in the socket buffer sb. The * additional space associated the mbuf chain is recorded in sb. Empty mbufs * are discarded and mbufs are compacted where possible. */ void sbappend(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappend_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef KERN_TLS /* * Append an mbuf containing encrypted TLS data. The data * is marked M_NOTREADY until it has been decrypted and * stored as a TLS record. */ static void sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) { struct ifnet *ifp; struct mbuf *n; int flags; ifp = NULL; flags = M_NOTREADY; SBLASTMBUFCHK(sb); /* Mbuf chain must start with a packet header. */ MPASS((m->m_flags & M_PKTHDR) != 0); /* Remove all packet headers and mbuf tags to get a pure data chain. */ for (n = m; n != NULL; n = n->m_next) { if (n->m_flags & M_PKTHDR) { ifp = m->m_pkthdr.leaf_rcvif; if ((n->m_pkthdr.csum_flags & CSUM_TLS_MASK) == CSUM_TLS_DECRYPTED) { /* Mark all mbufs in this packet decrypted. */ flags = M_NOTREADY | M_DECRYPTED; } else { flags = M_NOTREADY; } m_demote_pkthdr(n); } n->m_flags &= M_DEMOTEFLAGS; n->m_flags |= flags; MPASS((n->m_flags & M_NOTREADY) != 0); } sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); ktls_check_rx(sb); /* Check for incoming packet route changes: */ if (ifp != NULL && sb->sb_tls_info->rx_ifp != NULL && sb->sb_tls_info->rx_ifp != ifp) ktls_input_ifp_mismatch(sb, ifp); } #endif /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK_ASSERT(sb); KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); kmsan_check_mbuf(m, "sbappend"); #ifdef KERN_TLS /* * Decrypted TLS records are appended as records via * sbappendrecord(). TCP passes encrypted TLS records to this * function which must be scheduled for decryption. */ if (sb->sb_flags & SB_TLS_RX) { sbappend_ktls_rx(sb, m); return; } #endif KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); SBLASTMBUFCHK(sb); #ifdef KERN_TLS if (sb->sb_tls_info != NULL) ktls_seq(sb, m); #endif /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); sbcompress(sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; SBLASTRECORDCHK(sb); } /* * This version of sbappend() should only be used when the caller absolutely * knows that there will never be more than one record in the socket buffer, * that is, a stream protocol (such as TCP). */ void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) { SOCKBUF_LOCK(sb); sbappendstream_locked(sb, m, flags); SOCKBUF_UNLOCK(sb); } #ifdef SOCKBUF_DEBUG void sbcheck(struct sockbuf *sb, const char *file, int line) { struct mbuf *m, *n, *fnrdy; u_long acc, ccc, mbcnt; #ifdef KERN_TLS u_long tlscc; #endif SOCKBUF_LOCK_ASSERT(sb); acc = ccc = mbcnt = 0; fnrdy = NULL; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { if (m->m_len == 0) { printf("sb %p empty mbuf %p\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { if (m != sb->sb_fnrdy) { printf("sb %p: fnrdy %p != m %p\n", sb, sb->sb_fnrdy, m); goto fail; } fnrdy = m; } if (fnrdy) { if (!(m->m_flags & M_NOTAVAIL)) { printf("sb %p: fnrdy %p, m %p is avail\n", sb, sb->sb_fnrdy, m); goto fail; } } else acc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } #ifdef KERN_TLS /* * Account for mbufs "detached" by ktls_detach_record() while * they are decrypted by ktls_decrypt(). tlsdcc gives a count * of the detached bytes that are included in ccc. The mbufs * and clusters are not included in the socket buffer * accounting. */ ccc += sb->sb_tlsdcc; tlscc = 0; for (m = sb->sb_mtls; m; m = m->m_next) { if (m->m_nextpkt != NULL) { printf("sb %p TLS mbuf %p with nextpkt\n", sb, m); goto fail; } if ((m->m_flags & M_NOTREADY) == 0) { printf("sb %p TLS mbuf %p ready\n", sb, m); goto fail; } tlscc += m->m_len; ccc += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } if (sb->sb_tlscc != tlscc) { printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, sb->sb_tlsdcc); goto fail; } #endif if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); #ifdef KERN_TLS printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, sb->sb_tlsdcc); #endif goto fail; } return; fail: panic("%s from %s:%u", __func__, file, line); } #endif /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); if (m0 == NULL) return; kmsan_check_mbuf(m0, "sbappend"); m_clrprotoflags(m0); /* * Put the first mbuf on the queue. Note this permits zero length * records. */ sballoc(sb, m0); SBLASTRECORDCHK(sb); SBLINKRECORD(sb, m0); sb->sb_mbtail = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } /* always call sbcompress() so it can do SBLASTMBUFCHK() */ sbcompress(sb, m, m0); } /* * As above, except the mbuf chain begins a new record. */ void sbappendrecord(struct sockbuf *sb, struct mbuf *m0) { SOCKBUF_LOCK(sb); sbappendrecord_locked(sb, m0); SOCKBUF_UNLOCK(sb); } /* Helper routine that appends data, control, and address to a sockbuf. */ static int sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) { struct mbuf *m, *n, *nlast; if (m0 != NULL) kmsan_check_mbuf(m0, "sbappend"); if (control != NULL) kmsan_check_mbuf(control, "sbappend"); #if MSIZE <= 256 if (asa->sa_len > MLEN) return (0); #endif m = m_get(M_NOWAIT, MT_SONAME); if (m == NULL) return (0); m->m_len = asa->sa_len; bcopy(asa, mtod(m, caddr_t), asa->sa_len); if (m0) { M_ASSERT_NO_SND_TAG(m0); m_clrprotoflags(m0); m_tag_delete_chain(m0, NULL); /* * Clear some persistent info from pkthdr. * We don't use m_demote(), because some netgraph consumers * expect M_PKTHDR presence. */ m0->m_pkthdr.rcvif = NULL; m0->m_pkthdr.flowid = 0; m0->m_pkthdr.csum_flags = 0; m0->m_pkthdr.fibnum = 0; m0->m_pkthdr.rsstype = 0; } if (ctrl_last) ctrl_last->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n->m_next != NULL; n = n->m_next) sballoc(sb, n); sballoc(sb, n); nlast = n; SBLINKRECORD(sb, m); sb->sb_mbtail = nlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); return (1); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; int space = asa->sa_len; SOCKBUF_LOCK_ASSERT(sb); if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr_locked"); if (m0) space += m0->m_pkthdr.len; space += m_length(control, &ctrl_last); if (space > sbspace(sb)) return (0); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if insufficient mbufs. Does not validate space * on the receiving sockbuf. */ int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { struct mbuf *ctrl_last; SOCKBUF_LOCK_ASSERT(sb); ctrl_last = (control == NULL) ? NULL : m_last(control); return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); } /* * Append address and data, and optionally, control (ancillary) data to the * receive queue of a socket. If present, m0 must include a packet header * with total length. Returns 0 if no space in sockbuf or insufficient * mbufs. */ int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { int retval; SOCKBUF_LOCK(sb); retval = sbappendaddr_locked(sb, asa, m0, control); SOCKBUF_UNLOCK(sb); return (retval); } void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags) { struct mbuf *m, *mlast; if (m0 != NULL) kmsan_check_mbuf(m0, "sbappend"); kmsan_check_mbuf(control, "sbappend"); sbm_clrprotoflags(m0, flags); m_last(control)->m_next = m0; SBLASTRECORDCHK(sb); for (m = control; m->m_next; m = m->m_next) sballoc(sb, m); sballoc(sb, m); mlast = m; SBLINKRECORD(sb, control); sb->sb_mbtail = mlast; SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); } void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags) { SOCKBUF_LOCK(sb); sbappendcontrol_locked(sb, m0, control, flags); SOCKBUF_UNLOCK(sb); } /* * Append the data in mbuf chain (m) into the socket buffer sb following mbuf * (n). If (n) is NULL, the buffer is presumed empty. * * When the data is compressed, mbufs in the chain may be handled in one of * three ways: * * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no * record boundary, and no change in data type). * * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into * an mbuf already in the socket buffer. This can occur if an * appropriate mbuf exists, there is room, both mbufs are not marked as * not ready, and no merging of data types will occur. * * (3) The mbuf may be appended to the end of the existing mbuf chain. * * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as * end-of-record. */ void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { int eor = 0; struct mbuf *o; SOCKBUF_LOCK_ASSERT(sb); while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { if (sb->sb_lastrecord == m) sb->sb_lastrecord = m->m_next; m = m_free(m); continue; } if (n && (n->m_flags & M_EOR) == 0 && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && !(n->m_flags & (M_NOTREADY | M_EXTPG)) && !mbuf_has_tls_session(m) && !mbuf_has_tls_session(n) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); n->m_len += m->m_len; sb->sb_ccc += m->m_len; if (sb->sb_fnrdy == NULL) sb->sb_acc += m->m_len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) /* XXX: Probably don't need.*/ sb->sb_ctl += m->m_len; m = m_free(m); continue; } if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) && (m->m_flags & M_NOTREADY) == 0 && !mbuf_has_tls_session(m)) (void)mb_unmapped_compress(m); if (n) n->m_next = m; else sb->sb_mb = m; sb->sb_mbtail = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); n->m_flags |= eor; } SBLASTMBUFCHK(sb); } #ifdef KERN_TLS /* * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also * a bit simpler (no EOR markers, always MT_DATA, etc.). */ static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) { SOCKBUF_LOCK_ASSERT(sb); while (m) { KASSERT((m->m_flags & M_EOR) == 0, ("TLS RX mbuf %p with EOR", m)); KASSERT(m->m_type == MT_DATA, ("TLS RX mbuf %p is not MT_DATA", m)); KASSERT((m->m_flags & M_NOTREADY) != 0, ("TLS RX mbuf %p ready", m)); KASSERT((m->m_flags & M_EXTPG) == 0, ("TLS RX mbuf %p unmapped", m)); if (m->m_len == 0) { m = m_free(m); continue; } /* * Even though both 'n' and 'm' are NOTREADY, it's ok * to coalesce the data. */ if (n && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && !((m->m_flags ^ n->m_flags) & M_DECRYPTED) && !(n->m_flags & M_EXTPG) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n)) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); n->m_len += m->m_len; sb->sb_ccc += m->m_len; sb->sb_tlscc += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mtls = m; sb->sb_mtlstail = m; sballoc_ktls_rx(sb, m); n = m; m = m->m_next; n->m_next = NULL; } SBLASTMBUFCHK(sb); } #endif /* * Free all mbufs in a sockbuf. Check that all resources are reclaimed. */ static void sbflush_internal(struct sockbuf *sb) { while (sb->sb_mbcnt || sb->sb_tlsdcc) { /* * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: * we would loop forever. Panic instead. */ if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) break; m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); } KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, ("%s: ccc %u mb %p mbcnt %u", __func__, sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); } void sbflush_locked(struct sockbuf *sb) { SOCKBUF_LOCK_ASSERT(sb); sbflush_internal(sb); } void sbflush(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbflush_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Cut data from (the front of) a sockbuf. */ static struct mbuf * sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; bool is_tls; KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", __func__, len)); KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", __func__, len, sb->sb_ccc)); next = (m = sb->sb_mb) ? m->m_nextpkt : 0; is_tls = false; mfree = NULL; while (len > 0) { if (m == NULL) { #ifdef KERN_TLS if (next == NULL && !is_tls) { if (sb->sb_tlsdcc != 0) { MPASS(len >= sb->sb_tlsdcc); len -= sb->sb_tlsdcc; sb->sb_ccc -= sb->sb_tlsdcc; sb->sb_tlsdcc = 0; if (len == 0) break; } next = sb->sb_mtls; is_tls = true; } #endif KASSERT(next, ("%s: no next, len %d", __func__, len)); m = next; next = m->m_nextpkt; } if (m->m_len > len) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p M_NOTAVAIL", __func__, m)); m->m_len -= len; m->m_data += len; sb->sb_ccc -= len; sb->sb_acc -= len; if (sb->sb_sndptroff != 0) sb->sb_sndptroff -= len; if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) sb->sb_ctl -= len; break; } len -= m->m_len; #ifdef KERN_TLS if (is_tls) sbfree_ktls_rx(sb, m); else #endif sbfree(sb, m); /* * Do not put M_NOTREADY buffers to the free list, they * are referenced from outside. */ if (m->m_flags & M_NOTREADY && !is_tls) m = m->m_next; else { struct mbuf *n; n = m->m_next; m->m_next = mfree; mfree = m; m = n; } } /* * Free any zero-length mbufs from the buffer. * For SOCK_DGRAM sockets such mbufs represent empty records. * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, * when sosend_generic() needs to send only control data. */ while (m && m->m_len == 0) { struct mbuf *n; sbfree(sb, m); n = m->m_next; m->m_next = mfree; mfree = m; m = n; } #ifdef KERN_TLS if (is_tls) { sb->sb_mb = NULL; sb->sb_mtls = m; if (m == NULL) sb->sb_mtlstail = NULL; } else #endif if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; /* * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure * sb_lastrecord is up-to-date if we dropped part of the last record. */ m = sb->sb_mb; if (m == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (m->m_nextpkt == NULL) { sb->sb_lastrecord = m; } return (mfree); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); m_freem(sbcut_internal(sb, len)); } /* * Drop data from (the front of) a sockbuf, * and return it to caller. */ struct mbuf * sbcut_locked(struct sockbuf *sb, int len) { SOCKBUF_LOCK_ASSERT(sb); return (sbcut_internal(sb, len)); } void sbdrop(struct sockbuf *sb, int len) { struct mbuf *mfree; SOCKBUF_LOCK(sb); mfree = sbcut_internal(sb, len); SOCKBUF_UNLOCK(sb); m_freem(mfree); } struct mbuf * sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { *moff = off; if (sb->sb_sndptr == NULL) { sb->sb_sndptr = sb->sb_mb; sb->sb_sndptroff = 0; } return (sb->sb_mb); } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } *moff = off; return (m); } void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) { /* * A small copy was done, advance forward the sb_sbsndptr to cover * it. */ struct mbuf *m; if (mb != sb->sb_sndptr) { /* Did not copyout at the same mbuf */ return; } m = mb; while (m && (len > 0)) { if (len >= m->m_len) { len -= m->m_len; if (m->m_next) { sb->sb_sndptroff += m->m_len; sb->sb_sndptr = m->m_next; } m = m->m_next; } else { len = 0; } } } /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. */ struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) { struct mbuf *m; KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); /* * If the "off" is below the stored offset, which happens on * retransmits, just use "sb_mb": */ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { m = sb->sb_mb; } else { m = sb->sb_sndptr; off -= sb->sb_sndptroff; } while (off > 0 && m != NULL) { if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } *moff = off; return (m); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord_locked(struct sockbuf *sb) { struct mbuf *m; SOCKBUF_LOCK_ASSERT(sb); m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); m = m_free(m); } while (m); } SB_EMPTY_FIXUP(sb); } /* * Drop a record off the front of a sockbuf and move the next record to the * front. */ void sbdroprecord(struct sockbuf *sb) { SOCKBUF_LOCK(sb); sbdroprecord_locked(sb); SOCKBUF_UNLOCK(sb); } /* * Create a "control" mbuf containing the specified data with the specified * type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(const void *p, u_int size, int type, int level, int wait) { struct cmsghdr *cp; struct mbuf *m; MBUF_CHECKSLEEP(wait); if (wait == M_NOWAIT) { if (CMSG_SPACE(size) > MCLBYTES) return (NULL); } else KASSERT(CMSG_SPACE(size) <= MCLBYTES, ("%s: passed CMSG_SPACE(%u) > MCLBYTES", __func__, size)); if (CMSG_SPACE(size) > MLEN) m = m_getcl(wait, MT_CONTROL, 0); else m = m_get(wait, MT_CONTROL); if (m == NULL) return (NULL); KASSERT(CMSG_SPACE(size) <= M_TRAILINGSPACE(m), ("sbcreatecontrol: short mbuf")); /* * Don't leave the padding between the msg header and the * cmsg data and the padding after the cmsg data un-initialized. */ cp = mtod(m, struct cmsghdr *); bzero(cp, CMSG_SPACE(size)); if (p != NULL) (void)memcpy(CMSG_DATA(cp), p, size); m->m_len = CMSG_SPACE(size); cp->cmsg_len = CMSG_LEN(size); cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * This does the same for socket buffers that sotoxsocket does for sockets: * generate an user-format data structure describing the socket buffer. Note * that the xsockbuf structure, since it is always embedded in a socket, does * not include a self pointer nor a length. We make this entry point public * in case some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, ""); SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, "Socket buffer size waste factor"); diff --git a/sys/netsmb/smb_trantcp.c b/sys/netsmb/smb_trantcp.c index f19eea45f265..fbbf987367d8 100644 --- a/sys/netsmb/smb_trantcp.c +++ b/sys/netsmb/smb_trantcp.c @@ -1,689 +1,683 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define M_NBDATA M_PCB static int smb_tcpsndbuf = NB_SNDQ - 1; static int smb_tcprcvbuf = NB_RCVQ - 1; SYSCTL_DECL(_net_smb); SYSCTL_INT(_net_smb, OID_AUTO, tcpsndbuf, CTLFLAG_RW, &smb_tcpsndbuf, 0, ""); SYSCTL_INT(_net_smb, OID_AUTO, tcprcvbuf, CTLFLAG_RW, &smb_tcprcvbuf, 0, ""); #define nb_sosend(so,m,flags,td) sosend(so, NULL, 0, m, 0, flags, td) static int nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, int *lenp, u_int8_t *rpcodep, struct thread *td); static int smb_nbst_disconnect(struct smb_vc *vcp, struct thread *td); static int nb_setsockopt_int(struct socket *so, int level, int name, int val) { struct sockopt sopt; int error; bzero(&sopt, sizeof(sopt)); sopt.sopt_level = level; sopt.sopt_name = name; sopt.sopt_val = &val; sopt.sopt_valsize = sizeof(val); CURVNET_SET(so->so_vnet); error = sosetopt(so, &sopt); CURVNET_RESTORE(); return error; } static int nb_intr(struct nbpcb *nbp, struct proc *p) { return 0; } static int nb_upcall(struct socket *so, void *arg, int waitflag) { struct nbpcb *nbp = arg; if (arg == NULL || nbp->nbp_selectid == NULL) return (SU_OK); wakeup(nbp->nbp_selectid); return (SU_OK); } static int nb_sethdr(struct mbuf *m, u_int8_t type, u_int32_t len) { u_int32_t *p = mtod(m, u_int32_t *); *p = htonl((len & 0x1FFFF) | (type << 24)); return 0; } static int nb_put_name(struct mbchain *mbp, struct sockaddr_nb *snb) { int error; u_char seglen, *cp; cp = snb->snb_name; if (*cp == 0) return EINVAL; NBDEBUG("[%s]\n", cp); for (;;) { seglen = (*cp) + 1; error = mb_put_mem(mbp, cp, seglen, MB_MSYSTEM); if (error) return error; if (seglen == 1) break; cp += seglen; } return 0; } static int nb_connect_in(struct nbpcb *nbp, struct sockaddr_in *to, struct thread *td) { struct socket *so; int error, s; error = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP, td->td_ucred, td); if (error) return error; nbp->nbp_tso = so; SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, nb_upcall, nbp); SOCKBUF_UNLOCK(&so->so_rcv); so->so_rcv.sb_timeo = (5 * SBT_1S); so->so_snd.sb_timeo = (5 * SBT_1S); error = soreserve(so, nbp->nbp_sndbuf, nbp->nbp_rcvbuf); if (error) goto bad; nb_setsockopt_int(so, SOL_SOCKET, SO_KEEPALIVE, 1); nb_setsockopt_int(so, IPPROTO_TCP, TCP_NODELAY, 1); - SOCKBUF_LOCK(&so->so_rcv); - so->so_rcv.sb_flags &= ~SB_NOINTR; - SOCKBUF_UNLOCK(&so->so_rcv); - SOCKBUF_LOCK(&so->so_snd); - so->so_snd.sb_flags &= ~SB_NOINTR; - SOCKBUF_UNLOCK(&so->so_snd); error = soconnect(so, (struct sockaddr*)to, td); if (error) goto bad; s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { tsleep(&so->so_timeo, PSOCK, "nbcon", 2 * hz); if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0 && (error = nb_intr(nbp, td->td_proc)) != 0) { so->so_state &= ~SS_ISCONNECTING; splx(s); goto bad; } } if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto bad; } splx(s); return 0; bad: smb_nbst_disconnect(nbp->nbp_vc, td); return error; } static int nbssn_rq_request(struct nbpcb *nbp, struct thread *td) { struct mbchain *mbp; struct mdchain *mdp; struct mbuf *m0; struct timeval tv; struct sockaddr_in sin; u_short port; u_int8_t rpcode; int error, rplen; mbp = malloc(sizeof(struct mbchain), M_NBDATA, M_WAITOK); mdp = malloc(sizeof(struct mbchain), M_NBDATA, M_WAITOK); error = mb_init(mbp); if (error) { free(mbp, M_NBDATA); free(mdp, M_NBDATA); return error; } mb_put_uint32le(mbp, 0); nb_put_name(mbp, nbp->nbp_paddr); nb_put_name(mbp, nbp->nbp_laddr); nb_sethdr(mbp->mb_top, NB_SSN_REQUEST, mb_fixhdr(mbp) - 4); error = nb_sosend(nbp->nbp_tso, mbp->mb_top, 0, td); if (!error) { nbp->nbp_state = NBST_RQSENT; } mb_detach(mbp); mb_done(mbp); free(mbp, M_NBDATA); if (error) { free(mdp, M_NBDATA); return error; } TIMESPEC_TO_TIMEVAL(&tv, &nbp->nbp_timo); error = selsocket(nbp->nbp_tso, POLLIN, &tv, td); if (error == EWOULDBLOCK) { /* Timeout */ NBDEBUG("initial request timeout\n"); free(mdp, M_NBDATA); return ETIMEDOUT; } if (error) { /* restart or interrupt */ free(mdp, M_NBDATA); return error; } error = nbssn_recv(nbp, &m0, &rplen, &rpcode, td); if (error) { NBDEBUG("recv() error %d\n", error); free(mdp, M_NBDATA); return error; } /* * Process NETBIOS reply */ if (m0) md_initm(mdp, m0); error = 0; do { if (rpcode == NB_SSN_POSRESP) { nbp->nbp_state = NBST_SESSION; nbp->nbp_flags |= NBF_CONNECTED; break; } if (rpcode != NB_SSN_RTGRESP) { error = ECONNABORTED; break; } if (rplen != 6) { error = ECONNABORTED; break; } md_get_mem(mdp, (caddr_t)&sin.sin_addr, 4, MB_MSYSTEM); md_get_uint16(mdp, &port); sin.sin_port = port; nbp->nbp_state = NBST_RETARGET; smb_nbst_disconnect(nbp->nbp_vc, td); error = nb_connect_in(nbp, &sin, td); if (!error) error = nbssn_rq_request(nbp, td); if (error) { smb_nbst_disconnect(nbp->nbp_vc, td); break; } } while(0); if (m0) md_done(mdp); free(mdp, M_NBDATA); return error; } static int nbssn_recvhdr(struct nbpcb *nbp, int *lenp, u_int8_t *rpcodep, int flags, struct thread *td) { struct socket *so = nbp->nbp_tso; struct uio auio; struct iovec aio; u_int32_t len; int error; aio.iov_base = (caddr_t)&len; aio.iov_len = sizeof(len); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(len); auio.uio_td = td; CURVNET_SET(so->so_vnet); error = soreceive(so, (struct sockaddr **)NULL, &auio, (struct mbuf **)NULL, (struct mbuf **)NULL, &flags); CURVNET_RESTORE(); if (error) return error; if (auio.uio_resid > 0) { SMBSDEBUG("short reply\n"); return EPIPE; } len = ntohl(len); *rpcodep = (len >> 24) & 0xFF; len &= 0x1ffff; if (len > SMB_MAXPKTLEN) { SMBERROR("packet too long (%d)\n", len); return EFBIG; } *lenp = len; return 0; } static int nbssn_recv(struct nbpcb *nbp, struct mbuf **mpp, int *lenp, u_int8_t *rpcodep, struct thread *td) { struct socket *so = nbp->nbp_tso; struct uio auio; struct mbuf *m, *tm, *im; u_int8_t rpcode; int len, resid; int error, rcvflg; if (so == NULL) return ENOTCONN; if (mpp) *mpp = NULL; m = NULL; for(;;) { /* * Poll for a response header. * If we don't have one waiting, return. */ len = 0; rpcode = 0; error = nbssn_recvhdr(nbp, &len, &rpcode, MSG_DONTWAIT, td); if ((so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) || (so->so_rcv.sb_state & SBS_CANTRCVMORE)) { nbp->nbp_state = NBST_CLOSED; NBDEBUG("session closed by peer\n"); return ECONNRESET; } if (error) return error; if (len == 0 && nbp->nbp_state != NBST_SESSION) break; /* no data, try again */ if (rpcode == NB_SSN_KEEPALIVE) continue; /* * Loop, blocking, for data following the response header. * * Note that we can't simply block here with MSG_WAITALL for the * entire response size, as it may be larger than the TCP * slow-start window that the sender employs. This will result * in the sender stalling until the delayed ACK is sent, then * resuming slow-start, resulting in very poor performance. * * Instead, we never request more than NB_SORECEIVE_CHUNK * bytes at a time, resulting in an ack being pushed by * the TCP code at the completion of each call. */ resid = len; while (resid > 0) { tm = NULL; rcvflg = MSG_WAITALL; bzero(&auio, sizeof(auio)); auio.uio_resid = min(resid, NB_SORECEIVE_CHUNK); auio.uio_td = td; resid -= auio.uio_resid; /* * Spin until we have collected everything in * this chunk. */ do { rcvflg = MSG_WAITALL; CURVNET_SET(so->so_vnet); error = soreceive(so, (struct sockaddr **)NULL, &auio, &tm, (struct mbuf **)NULL, &rcvflg); CURVNET_RESTORE(); } while (error == EWOULDBLOCK || error == EINTR || error == ERESTART); if (error) goto out; /* short return guarantees unhappiness */ if (auio.uio_resid > 0) { SMBERROR("packet is shorter than expected\n"); error = EPIPE; goto out; } /* append received chunk to previous chunk(s) */ if (m == NULL) { m = tm; } else { /* * Just glue the new chain on the end. * Consumer will pullup as required. */ for (im = m; im->m_next != NULL; im = im->m_next) ; im->m_next = tm; } } /* got a session/message packet? */ if (nbp->nbp_state == NBST_SESSION && rpcode == NB_SSN_MESSAGE) break; /* drop packet and try for another */ NBDEBUG("non-session packet %x\n", rpcode); if (m) { m_freem(m); m = NULL; } } out: if (error) { if (m) m_freem(m); return error; } if (mpp) *mpp = m; else m_freem(m); *lenp = len; *rpcodep = rpcode; return 0; } /* * SMB transport interface */ static int smb_nbst_create(struct smb_vc *vcp, struct thread *td) { struct nbpcb *nbp; nbp = malloc(sizeof *nbp, M_NBDATA, M_WAITOK); bzero(nbp, sizeof *nbp); nbp->nbp_timo.tv_sec = 15; /* XXX: sysctl ? */ nbp->nbp_state = NBST_CLOSED; nbp->nbp_vc = vcp; nbp->nbp_sndbuf = smb_tcpsndbuf; nbp->nbp_rcvbuf = smb_tcprcvbuf; vcp->vc_tdata = nbp; return 0; } static int smb_nbst_done(struct smb_vc *vcp, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; if (nbp == NULL) return ENOTCONN; smb_nbst_disconnect(vcp, td); if (nbp->nbp_laddr) free(nbp->nbp_laddr, M_SONAME); if (nbp->nbp_paddr) free(nbp->nbp_paddr, M_SONAME); free(nbp, M_NBDATA); return 0; } static int smb_nbst_bind(struct smb_vc *vcp, struct sockaddr *sap, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; struct sockaddr_nb *snb; int error, slen; NBDEBUG("\n"); error = EINVAL; do { if (nbp->nbp_flags & NBF_LOCADDR) break; /* * It is possible to create NETBIOS name in the kernel, * but nothing prevents us to do it in the user space. */ if (sap == NULL) break; slen = sap->sa_len; if (slen < NB_MINSALEN) break; snb = (struct sockaddr_nb*)sodupsockaddr(sap, M_WAITOK); if (snb == NULL) { error = ENOMEM; break; } nbp->nbp_laddr = snb; nbp->nbp_flags |= NBF_LOCADDR; error = 0; } while(0); return error; } static int smb_nbst_connect(struct smb_vc *vcp, struct sockaddr *sap, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; struct sockaddr_in sin; struct sockaddr_nb *snb; struct timespec ts1, ts2; int error, slen; NBDEBUG("\n"); if (nbp->nbp_tso != NULL) return EISCONN; if (nbp->nbp_laddr == NULL) return EINVAL; slen = sap->sa_len; if (slen < NB_MINSALEN) return EINVAL; if (nbp->nbp_paddr) { free(nbp->nbp_paddr, M_SONAME); nbp->nbp_paddr = NULL; } snb = (struct sockaddr_nb*)sodupsockaddr(sap, M_WAITOK); if (snb == NULL) return ENOMEM; nbp->nbp_paddr = snb; sin = snb->snb_addrin; getnanotime(&ts1); error = nb_connect_in(nbp, &sin, td); if (error) return error; getnanotime(&ts2); timespecsub(&ts2, &ts1, &ts2); if (ts2.tv_sec == 0) { ts2.tv_sec = 1; ts2.tv_nsec = 0; } timespecadd(&ts2, &ts2, &nbp->nbp_timo); timespecadd(&nbp->nbp_timo, &ts2, &nbp->nbp_timo); timespecadd(&nbp->nbp_timo, &ts2, &nbp->nbp_timo); /* * 4 */ error = nbssn_rq_request(nbp, td); if (error) smb_nbst_disconnect(vcp, td); return error; } static int smb_nbst_disconnect(struct smb_vc *vcp, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; struct socket *so; if (nbp == NULL || nbp->nbp_tso == NULL) return ENOTCONN; if ((so = nbp->nbp_tso) != NULL) { nbp->nbp_flags &= ~NBF_CONNECTED; nbp->nbp_tso = (struct socket *)NULL; soshutdown(so, 2); soclose(so); } if (nbp->nbp_state != NBST_RETARGET) { nbp->nbp_state = NBST_CLOSED; } return 0; } static int smb_nbst_send(struct smb_vc *vcp, struct mbuf *m0, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; int error; if (nbp->nbp_state != NBST_SESSION) { error = ENOTCONN; goto abort; } M_PREPEND(m0, 4, M_WAITOK); nb_sethdr(m0, NB_SSN_MESSAGE, m_fixhdr(m0) - 4); error = nb_sosend(nbp->nbp_tso, m0, 0, td); return error; abort: if (m0) m_freem(m0); return error; } static int smb_nbst_recv(struct smb_vc *vcp, struct mbuf **mpp, struct thread *td) { struct nbpcb *nbp = vcp->vc_tdata; u_int8_t rpcode; int error, rplen; nbp->nbp_flags |= NBF_RECVLOCK; error = nbssn_recv(nbp, mpp, &rplen, &rpcode, td); nbp->nbp_flags &= ~NBF_RECVLOCK; return error; } static void smb_nbst_timo(struct smb_vc *vcp) { return; } static void smb_nbst_intr(struct smb_vc *vcp) { struct nbpcb *nbp = vcp->vc_tdata; if (nbp == NULL || nbp->nbp_tso == NULL) return; sorwakeup(nbp->nbp_tso); sowwakeup(nbp->nbp_tso); } static int smb_nbst_getparam(struct smb_vc *vcp, int param, void *data) { struct nbpcb *nbp = vcp->vc_tdata; switch (param) { case SMBTP_SNDSZ: *(int*)data = nbp->nbp_sndbuf; break; case SMBTP_RCVSZ: *(int*)data = nbp->nbp_rcvbuf; break; case SMBTP_TIMEOUT: *(struct timespec*)data = nbp->nbp_timo; break; default: return EINVAL; } return 0; } static int smb_nbst_setparam(struct smb_vc *vcp, int param, void *data) { struct nbpcb *nbp = vcp->vc_tdata; switch (param) { case SMBTP_SELECTID: nbp->nbp_selectid = data; break; default: return EINVAL; } return 0; } /* * Check for fatal errors */ static int smb_nbst_fatal(struct smb_vc *vcp, int error) { switch (error) { case ENOTCONN: case ENETRESET: case ECONNABORTED: return 1; } return 0; } struct smb_tran_desc smb_tran_nbtcp_desc = { SMBT_NBTCP, smb_nbst_create, smb_nbst_done, smb_nbst_bind, smb_nbst_connect, smb_nbst_disconnect, smb_nbst_send, smb_nbst_recv, smb_nbst_timo, smb_nbst_intr, smb_nbst_getparam, smb_nbst_setparam, smb_nbst_fatal }; diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index fd503d021282..73dd7afa371f 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -1,325 +1,325 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_SOCKBUF_H_ #define _SYS_SOCKBUF_H_ /* * Constants for sb_flags field of struct sockbuf/xsockbuf. */ #define SB_TLS_RX 0x01 /* using KTLS on RX */ #define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_UPCALL 0x20 /* someone wants an upcall */ -#define SB_NOINTR 0x40 /* operations not interruptible */ +/* was SB_NOINTR 0x40 */ #define SB_AIO 0x80 /* AIO operations queued */ #define SB_KNOTE 0x100 /* kernel note attached */ #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ #define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ #define SB_STOP 0x1000 /* backpressure indicator */ #define SB_AIO_RUNNING 0x2000 /* AIO operation running */ #define SB_SPLICED 0x4000 /* socket buffer is spliced; previously used for SB_TLS_IFNET */ #define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SBS_RCVATMARK 0x0040 /* at mark on input */ #if defined(_KERNEL) || defined(_WANT_SOCKET) #include #include #include #include #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ struct ktls_session; struct mbuf; struct sockaddr; struct socket; struct sockopt; struct thread; struct selinfo; /* * Socket buffer * * A buffer starts with the fields that are accessed by I/O multiplexing * APIs like select(2), kevent(2) or AIO and thus are shared between different * buffer implementations. They are protected by the SOCK_RECVBUF_LOCK() * or SOCK_SENDBUF_LOCK() of the owning socket. * * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific * methods. * * Protocol specific implementations follow in a union. */ struct sockbuf { struct selinfo *sb_sel; /* process selecting read/write */ short sb_state; /* socket state on sockbuf */ short sb_flags; /* flags, see above */ u_int sb_acc; /* available chars in buffer */ u_int sb_ccc; /* claimed chars in buffer */ u_int sb_mbcnt; /* chars of mbufs used */ u_int sb_ctl; /* non-data chars in buffer */ u_int sb_hiwat; /* max actual char count */ u_int sb_lowat; /* low water mark */ u_int sb_mbmax; /* max chars of mbufs to use */ sbintime_t sb_timeo; /* timeout for read/write */ int (*sb_upcall)(struct socket *, void *, int); void *sb_upcallarg; TAILQ_HEAD(, kaiocb) sb_aiojobq; /* pending AIO ops */ struct task sb_aiotask; /* AIO task */ union { /* * Classic BSD one-size-fits-all socket buffer, capable of * doing streams and datagrams. The stream part is able * to perform special features: * - not ready data (sendfile) * - TLS */ struct { /* compat: sockbuf lock pointer */ struct mtx *sb_mtx; /* first and last mbufs in the chain */ struct mbuf *sb_mb; struct mbuf *sb_mbtail; /* first mbuf of last record in socket buffer */ struct mbuf *sb_lastrecord; /* pointer to data to send next (TCP */ struct mbuf *sb_sndptr; /* pointer to first not ready buffer */ struct mbuf *sb_fnrdy; /* byte offset of ptr into chain, used with sb_sndptr */ u_int sb_sndptroff; /* TLS */ u_int sb_tlscc; /* TLS chain characters */ u_int sb_tlsdcc; /* characters being decrypted */ struct mbuf *sb_mtls; /* TLS mbuf chain */ struct mbuf *sb_mtlstail; /* last mbuf in TLS chain */ uint64_t sb_tls_seqno; /* TLS seqno */ /* TLS state, locked by sockbuf and sock I/O mutexes. */ struct ktls_session *sb_tls_info; }; /* * PF_UNIX/SOCK_DGRAM * * Local protocol, thus we should buffer on the receive side * only. However, in one to many configuration we don't want * a single receive buffer to be shared. So we would link * send buffers onto receive buffer. All the fields are locked * by the receive buffer lock. */ struct { /* * For receive buffer: own queue of this buffer for * unconnected sends. For send buffer: queue lended * to the peer receive buffer, to isolate ourselves * from other senders. */ STAILQ_HEAD(, mbuf) uxdg_mb; /* For receive buffer: datagram seen via MSG_PEEK. */ struct mbuf *uxdg_peeked; /* * For receive buffer: queue of send buffers of * connected peers. For send buffer: linkage on * connected peer receive buffer queue. */ union { TAILQ_HEAD(, sockbuf) uxdg_conns; TAILQ_ENTRY(sockbuf) uxdg_clist; }; /* Counters for this buffer uxdg_mb chain + peeked. */ u_int uxdg_cc; u_int uxdg_ctl; u_int uxdg_mbcnt; }; /* * Netlink socket. */ struct { TAILQ_HEAD(, nl_buf) nl_queue; }; }; }; #endif /* defined(_KERNEL) || defined(_WANT_SOCKET) */ #ifdef _KERNEL /* 'which' values for KPIs that operate on one buffer of a socket. */ typedef enum { SO_RCV, SO_SND } sb_which; /* * Per-socket buffer mutex used to protect most fields in the socket buffer. * These make use of the mutex pointer embedded in struct sockbuf, which * currently just references mutexes in the containing socket. The * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with * these locking macros. */ #define SOCKBUF_MTX(_sb) ((_sb)->sb_mtx) #define SOCKBUF_LOCK(_sb) mtx_lock(SOCKBUF_MTX(_sb)) #define SOCKBUF_OWNED(_sb) mtx_owned(SOCKBUF_MTX(_sb)) #define SOCKBUF_UNLOCK(_sb) mtx_unlock(SOCKBUF_MTX(_sb)) #define SOCKBUF_LOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED) #define SOCKBUF_UNLOCK_ASSERT(_sb) mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED) /* * Socket buffer private mbuf(9) flags. */ #define M_NOTREADY M_PROTO1 /* m_data not populated yet */ #define M_BLOCKED M_PROTO2 /* M_NOTREADY in front of m */ #define M_NOTAVAIL (M_NOTREADY | M_BLOCKED) void sbappend(struct sockbuf *sb, struct mbuf *m, int flags); void sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags); void sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags); int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags); void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, int flags); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n); struct mbuf * sbcreatecontrol(const void *p, u_int size, int type, int level, int wait); void sbdestroy(struct socket *, sb_which); void sbdrop(struct sockbuf *sb, int len); void sbdrop_locked(struct sockbuf *sb, int len); struct mbuf * sbcut_locked(struct sockbuf *sb, int len); void sbdroprecord(struct sockbuf *sb); void sbdroprecord_locked(struct sockbuf *sb); void sbflush(struct sockbuf *sb); void sbflush_locked(struct sockbuf *sb); void sbrelease(struct socket *, sb_which); void sbrelease_locked(struct socket *, sb_which); int sbsetopt(struct socket *so, struct sockopt *); bool sbreserve_locked(struct socket *so, sb_which which, u_long cc, struct thread *td); bool sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc, u_long buf_max, struct thread *td); void sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len); struct mbuf * sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff); struct mbuf * sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff); int sbwait(struct socket *, sb_which); void sballoc(struct sockbuf *, struct mbuf *); void sbfree(struct sockbuf *, struct mbuf *); void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m); void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m); int sbready(struct sockbuf *, struct mbuf *, int); /* * Return how much data is available to be taken out of socket * buffer right now. */ static inline u_int sbavail(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_acc); } /* * Return how much data sits there in the socket buffer * It might be that some data is not yet ready to be read. */ static inline u_int sbused(struct sockbuf *sb) { #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif return (sb->sb_ccc); } /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (ccc > hiwat or mbcnt > mbmax). */ static inline long sbspace(struct sockbuf *sb) { int bleft, mleft; /* size should match sockbuf fields */ #if 0 SOCKBUF_LOCK_ASSERT(sb); #endif if (sb->sb_flags & SB_STOP) return(0); bleft = sb->sb_hiwat - sb->sb_ccc; mleft = sb->sb_mbmax - sb->sb_mbcnt; return ((bleft < mleft) ? bleft : mleft); } #define SB_EMPTY_FIXUP(sb) do { \ if ((sb)->sb_mb == NULL) { \ (sb)->sb_mbtail = NULL; \ (sb)->sb_lastrecord = NULL; \ } \ } while (/*CONSTCOND*/0) #ifdef SOCKBUF_DEBUG void sblastrecordchk(struct sockbuf *, const char *, int); void sblastmbufchk(struct sockbuf *, const char *, int); void sbcheck(struct sockbuf *, const char *, int); #define SBLASTRECORDCHK(sb) sblastrecordchk((sb), __FILE__, __LINE__) #define SBLASTMBUFCHK(sb) sblastmbufchk((sb), __FILE__, __LINE__) #define SBCHECK(sb) sbcheck((sb), __FILE__, __LINE__) #else #define SBLASTRECORDCHK(sb) do {} while (0) #define SBLASTMBUFCHK(sb) do {} while (0) #define SBCHECK(sb) do {} while (0) #endif /* SOCKBUF_DEBUG */ #endif /* _KERNEL */ #endif /* _SYS_SOCKBUF_H_ */