Index: head/sys/kern/uipc_debug.c =================================================================== --- head/sys/kern/uipc_debug.c (revision 295135) +++ head/sys/kern/uipc_debug.c (revision 295136) @@ -1,530 +1,530 @@ /*- * Copyright (c) 2007 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Debugger routines relating to sockets, protocols, etc, for use in DDB. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #ifdef DDB #include static void db_print_sotype(short so_type) { switch (so_type) { case SOCK_STREAM: db_printf("SOCK_STREAM"); break; case SOCK_DGRAM: db_printf("SOCK_DGRAM"); break; case SOCK_RAW: db_printf("SOCK_RAW"); break; case SOCK_RDM: db_printf("SOCK_RDM"); break; case SOCK_SEQPACKET: db_printf("SOCK_SEQPACKET"); break; default: db_printf("unknown"); break; } } static void db_print_sooptions(short so_options) { int comma; comma = 0; if (so_options & SO_DEBUG) { db_printf("%sSO_DEBUG", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTCONN) { db_printf("%sSO_ACCEPTCONN", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEADDR) { db_printf("%sSO_REUSEADDR", comma ? ", " : ""); comma = 1; } if (so_options & SO_KEEPALIVE) { db_printf("%sSO_KEEPALIVE", comma ? ", " : ""); comma = 1; } if (so_options & SO_DONTROUTE) { db_printf("%sSO_DONTROUTE", comma ? ", " : ""); comma = 1; } if (so_options & SO_BROADCAST) { db_printf("%sSO_BROADCAST", comma ? ", " : ""); comma = 1; } if (so_options & SO_USELOOPBACK) { db_printf("%sSO_USELOOPBACK", comma ? ", " : ""); comma = 1; } if (so_options & SO_LINGER) { db_printf("%sSO_LINGER", comma ? ", " : ""); comma = 1; } if (so_options & SO_OOBINLINE) { db_printf("%sSO_OOBINLINE", comma ? ", " : ""); comma = 1; } if (so_options & SO_REUSEPORT) { db_printf("%sSO_REUSEPORT", comma ? ", " : ""); comma = 1; } if (so_options & SO_TIMESTAMP) { db_printf("%sSO_TIMESTAMP", comma ? ", " : ""); comma = 1; } if (so_options & SO_NOSIGPIPE) { db_printf("%sSO_NOSIGPIPE", comma ? ", " : ""); comma = 1; } if (so_options & SO_ACCEPTFILTER) { db_printf("%sSO_ACCEPTFILTER", comma ? ", " : ""); comma = 1; } if (so_options & SO_BINTIME) { db_printf("%sSO_BINTIME", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_OFFLOAD) { db_printf("%sSO_NO_OFFLOAD", comma ? ", " : ""); comma = 1; } if (so_options & SO_NO_DDP) { db_printf("%sSO_NO_DDP", comma ? ", " : ""); comma = 1; } } static void db_print_sostate(short so_state) { int comma; comma = 0; if (so_state & SS_NOFDREF) { db_printf("%sSS_NOFDREF", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONNECTED) { db_printf("%sSS_ISCONNECTED", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONNECTING) { db_printf("%sSS_ISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISDISCONNECTING) { db_printf("%sSS_ISDISCONNECTING", comma ? ", " : ""); comma = 1; } if (so_state & SS_NBIO) { db_printf("%sSS_NBIO", comma ? ", " : ""); comma = 1; } if (so_state & SS_ASYNC) { db_printf("%sSS_ASYNC", comma ? ", " : ""); comma = 1; } if (so_state & SS_ISCONFIRMING) { db_printf("%sSS_ISCONFIRMING", comma ? ", " : ""); comma = 1; } if (so_state & SS_PROTOREF) { db_printf("%sSS_PROTOREF", comma ? ", " : ""); comma = 1; } } static void db_print_soqstate(int so_qstate) { int comma; comma = 0; if (so_qstate & SQ_INCOMP) { db_printf("%sSQ_INCOMP", comma ? ", " : ""); comma = 1; } if (so_qstate & SQ_COMP) { db_printf("%sSQ_COMP", comma ? ", " : ""); comma = 1; } } static void db_print_sbstate(short sb_state) { int comma; comma = 0; if (sb_state & SBS_CANTSENDMORE) { db_printf("%sSBS_CANTSENDMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_CANTRCVMORE) { db_printf("%sSBS_CANTRCVMORE", comma ? ", " : ""); comma = 1; } if (sb_state & SBS_RCVATMARK) { db_printf("%sSBS_RCVATMARK", comma ? ", " : ""); comma = 1; } } static void db_print_indent(int indent) { int i; for (i = 0; i < indent; i++) db_printf(" "); } static void db_print_domain(struct domain *d, const char *domain_name, int indent) { db_print_indent(indent); db_printf("%s at %p\n", domain_name, d); indent += 2; db_print_indent(indent); db_printf("dom_family: %d ", d->dom_family); db_printf("dom_name: %s\n", d->dom_name); db_print_indent(indent); db_printf("dom_init: %p ", d->dom_init); db_printf("dom_externalize: %p ", d->dom_externalize); db_printf("dom_dispose: %p\n", d->dom_dispose); db_print_indent(indent); db_printf("dom_protosw: %p ", d->dom_protosw); db_printf("dom_next: %p\n", d->dom_next); db_print_indent(indent); db_printf("dom_rtattach: %p ", d->dom_rtattach); db_print_indent(indent); db_printf("dom_ifattach: %p ", d->dom_ifattach); db_printf("dom_ifdetach: %p\n", d->dom_ifdetach); } static void db_print_prflags(short pr_flags) { int comma; comma = 0; if (pr_flags & PR_ATOMIC) { db_printf("%sPR_ATOMIC", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_ADDR) { db_printf("%sPR_ADDR", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_CONNREQUIRED) { db_printf("%sPR_CONNREQUIRED", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_WANTRCVD) { db_printf("%sPR_WANTRCVD", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_RIGHTS) { db_printf("%sPR_RIGHTS", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_IMPLOPCL) { db_printf("%sPR_IMPLOPCL", comma ? ", " : ""); comma = 1; } if (pr_flags & PR_LASTHDR) { db_printf("%sPR_LASTHDR", comma ? ", " : ""); comma = 1; } } static void db_print_protosw(struct protosw *pr, const char *prname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", prname, pr); indent += 2; db_print_indent(indent); db_printf("pr_type: %d ", pr->pr_type); db_printf("pr_domain: %p\n", pr->pr_domain); if (pr->pr_domain != NULL) db_print_domain(pr->pr_domain, "pr_domain", indent); db_print_indent(indent); db_printf("pr_protocol: %d\n", pr->pr_protocol); db_print_indent(indent); db_printf("pr_flags: %d (", pr->pr_flags); db_print_prflags(pr->pr_flags); db_printf(")\n"); db_print_indent(indent); db_printf("pr_input: %p ", pr->pr_input); db_printf("pr_output: %p ", pr->pr_output); db_printf("pr_ctlinput: %p\n", pr->pr_ctlinput); db_print_indent(indent); db_printf("pr_ctloutput: %p ", pr->pr_ctloutput); db_printf("pr_init: %p\n", pr->pr_init); db_print_indent(indent); db_printf("pr_fasttimo: %p ", pr->pr_fasttimo); db_printf("pr_slowtimo: %p ", pr->pr_slowtimo); db_printf("pr_drain: %p\n", pr->pr_drain); db_print_indent(indent); } static void db_print_sbflags(short sb_flags) { int comma; comma = 0; if (sb_flags & SB_WAIT) { db_printf("%sSB_WAIT", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_SEL) { db_printf("%sSB_SEL", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_ASYNC) { db_printf("%sSB_ASYNC", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_UPCALL) { db_printf("%sSB_UPCALL", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_NOINTR) { db_printf("%sSB_NOINTR", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_AIO) { db_printf("%sSB_AIO", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_KNOTE) { db_printf("%sSB_KNOTE", comma ? ", " : ""); comma = 1; } if (sb_flags & SB_AUTOSIZE) { db_printf("%sSB_AUTOSIZE", comma ? ", " : ""); comma = 1; } } static void db_print_sockbuf(struct sockbuf *sb, const char *sockbufname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", sockbufname, sb); indent += 2; db_print_indent(indent); db_printf("sb_state: 0x%x (", sb->sb_state); db_print_sbstate(sb->sb_state); db_printf(")\n"); db_print_indent(indent); db_printf("sb_mb: %p ", sb->sb_mb); db_printf("sb_mbtail: %p ", sb->sb_mbtail); db_printf("sb_lastrecord: %p\n", sb->sb_lastrecord); db_print_indent(indent); db_printf("sb_sndptr: %p ", sb->sb_sndptr); db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff); db_print_indent(indent); db_printf("sb_acc: %u ", sb->sb_acc); db_printf("sb_ccc: %u ", sb->sb_ccc); db_printf("sb_hiwat: %u ", sb->sb_hiwat); db_printf("sb_mbcnt: %u ", sb->sb_mbcnt); db_printf("sb_mbmax: %u\n", sb->sb_mbmax); db_print_indent(indent); db_printf("sb_ctl: %u ", sb->sb_ctl); db_printf("sb_lowat: %d ", sb->sb_lowat); db_printf("sb_timeo: %jd\n", sb->sb_timeo); db_print_indent(indent); db_printf("sb_flags: 0x%x (", sb->sb_flags); db_print_sbflags(sb->sb_flags); db_printf(")\n"); } static void db_print_socket(struct socket *so, const char *socketname, int indent) { db_print_indent(indent); db_printf("%s at %p\n", socketname, so); indent += 2; db_print_indent(indent); db_printf("so_count: %d ", so->so_count); db_printf("so_type: %d (", so->so_type); db_print_sotype(so->so_type); db_printf(")\n"); db_print_indent(indent); db_printf("so_options: 0x%x (", so->so_options); db_print_sooptions(so->so_options); db_printf(")\n"); db_print_indent(indent); db_printf("so_linger: %d ", so->so_linger); db_printf("so_state: 0x%x (", so->so_state); db_print_sostate(so->so_state); db_printf(")\n"); db_print_indent(indent); db_printf("so_qstate: 0x%x (", so->so_qstate); db_print_soqstate(so->so_qstate); db_printf(") "); db_printf("so_pcb: %p ", so->so_pcb); db_printf("so_proto: %p\n", so->so_proto); if (so->so_proto != NULL) db_print_protosw(so->so_proto, "so_proto", indent); db_print_indent(indent); db_printf("so_head: %p ", so->so_head); db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp)); db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp)); db_print_indent(indent); /* so_list skipped */ - db_printf("so_qlen: %d ", so->so_qlen); - db_printf("so_incqlen: %d ", so->so_incqlen); - db_printf("so_qlimit: %d ", so->so_qlimit); + db_printf("so_qlen: %u ", so->so_qlen); + db_printf("so_incqlen: %u ", so->so_incqlen); + db_printf("so_qlimit: %u ", so->so_qlimit); db_printf("so_timeo: %d ", so->so_timeo); db_printf("so_error: %d\n", so->so_error); db_print_indent(indent); db_printf("so_sigio: %p ", so->so_sigio); db_printf("so_oobmark: %lu ", so->so_oobmark); db_printf("so_aiojobq first: %p\n", TAILQ_FIRST(&so->so_aiojobq)); db_print_sockbuf(&so->so_rcv, "so_rcv", indent); db_print_sockbuf(&so->so_snd, "so_snd", indent); } DB_SHOW_COMMAND(socket, db_show_socket) { struct socket *so; if (!have_addr) { db_printf("usage: show socket \n"); return; } so = (struct socket *)addr; db_print_socket(so, "socket", 0); } DB_SHOW_COMMAND(sockbuf, db_show_sockbuf) { struct sockbuf *sb; if (!have_addr) { db_printf("usage: show sockbuf \n"); return; } sb = (struct sockbuf *)addr; db_print_sockbuf(sb, "sockbuf", 0); } DB_SHOW_COMMAND(protosw, db_show_protosw) { struct protosw *pr; if (!have_addr) { db_printf("usage: show protosw \n"); return; } pr = (struct protosw *)addr; db_print_protosw(pr, "protosw", 0); } DB_SHOW_COMMAND(domain, db_show_domain) { struct domain *d; if (!have_addr) { db_printf("usage: show protosw \n"); return; } d = (struct domain *)addr; db_print_domain(d, "domain", 0); } #endif Index: head/sys/kern/uipc_socket.c =================================================================== --- head/sys/kern/uipc_socket.c (revision 295135) +++ head/sys/kern/uipc_socket.c (revision 295136) @@ -1,3710 +1,3716 @@ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. * Copyright (c) 2004 The FreeBSD Foundation * Copyright (c) 2004-2008 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 */ /* * Comments on the socket life cycle: * * soalloc() sets of socket layer state for a socket, called only by * socreate() and sonewconn(). Socket layer private. * * sodealloc() tears down socket layer state for a socket, called only by * sofree() and sonewconn(). Socket layer private. * * pru_attach() associates protocol layer state with an allocated socket; * called only once, may fail, aborting socket allocation. This is called * from socreate() and sonewconn(). Socket layer private. * * pru_detach() disassociates protocol layer state from an attached socket, * and will be called exactly once for sockets in which pru_attach() has * been successfully called. If pru_attach() returned an error, * pru_detach() will not be called. Socket layer private. * * pru_abort() and pru_close() notify the protocol layer that the last * consumer of a socket is starting to tear down the socket, and that the * protocol should terminate the connection. Historically, pru_abort() also * detached protocol state from the socket state, but this is no longer the * case. * * socreate() creates a socket and attaches protocol state. This is a public * interface that may be used by socket layer consumers to create new * sockets. * * sonewconn() creates a socket and attaches protocol state. This is a * public interface that may be used by protocols to create new sockets when * a new connection is received and will be available for accept() on a * listen socket. * * soclose() destroys a socket after possibly waiting for it to disconnect. * This is a public interface that socket consumers should use to close and * release a socket when done with it. * * soabort() destroys a socket without waiting for it to disconnect (used * only for incoming connections that are already partially or fully * connected). This is used internally by the socket layer when clearing * listen socket queues (due to overflow or close on the listen socket), but * is also a public interface protocols may use to abort connections in * their incomplete listen queues should they no longer be required. Sockets * placed in completed connection listen queues should not be aborted for * reasons described in the comment above the soclose() implementation. This * is not a general purpose close routine, and except in the specific * circumstances described here, should not be used. * * sofree() will free a socket and its protocol state if all references on * the socket have been released, and is the public interface to attempt to * free a socket when a reference is removed. This is a socket layer private * interface. * * NOTE: In addition to socreate() and soclose(), which provide a single * socket reference to the consumer to be managed as required, there are two * calls to explicitly manage socket references, soref(), and sorele(). * Currently, these are generally required only when transitioning a socket * from a listen queue to a file descriptor, in order to prevent garbage * collection of the socket at an untimely moment. For a number of reasons, * these interfaces are not preferred, and should be avoided. * * NOTE: With regard to VNETs the general rule is that callers do not set * curvnet. Exceptions to this rule include soabort(), sodisconnect(), * sofree() (and with that sorele(), sotryfree()), as well as sonewconn() * and sorflush(), which are usually called from a pre-set VNET context. * sopoll() currently does not need a VNET context to be set. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include /* for struct knote */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #include #endif static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags); static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static int filt_solisten(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); fo_kqfilter_t soo_kqfilter; static struct filterops solisten_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_solisten, }; static struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, .f_event = filt_soread, }; static struct filterops sowrite_filtops = { .f_isfd = 1, .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); #define VNET_SO_ASSERT(so) \ VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]); #define V_socket_hhh VNET(socket_hhh) /* * Limit on the number of connections in the listen queue waiting * for accept(2). * NB: The orginal sysctl somaxconn is still available but hidden * to prevent confusion about the actual purpose of this number. */ -static int somaxconn = SOMAXCONN; +static u_int somaxconn = SOMAXCONN; static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS) { int error; int val; val = somaxconn; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr ) return (error); - if (val < 1 || val > USHRT_MAX) + /* + * The purpose of the UINT_MAX / 3 limit, is so that the formula + * 3 * so_qlimit / 2 + * below, will not overflow. + */ + + if (val < 1 || val > UINT_MAX / 3) return (EINVAL); somaxconn = val; return (0); } SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size"); SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP, 0, sizeof(int), sysctl_somaxconn, "I", "Maximum listen socket pending connection accept queue size (compat)"); static int numopensockets; SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, &numopensockets, 0, "Number of open sockets"); /* * accept_mtx locks down per-socket fields relating to accept queues. See * socketvar.h for an annotation of the protected fields of struct socket. */ struct mtx accept_mtx; MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); /* * so_global_mtx protects so_gencnt, numopensockets, and the per-socket * so_gencnt field. */ static struct mtx so_global_mtx; MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); /* * General IPC sysctl name space, used by sockets and a variety of other IPC * types. */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* * Initialize the socket subsystem and set up the socket * memory allocator. */ static uma_zone_t socket_zone; int maxsockets; static void socket_zone_change(void *tag) { maxsockets = uma_zone_set_max(socket_zone, maxsockets); } static void socket_hhook_register(int subtype) { if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype, &V_socket_hhh[subtype], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0) printf("%s: WARNING: unable to register hook\n", __func__); } static void socket_hhook_deregister(int subtype) { if (hhook_head_deregister(V_socket_hhh[subtype]) != 0) printf("%s: WARNING: unable to deregister hook\n", __func__); } static void socket_init(void *tag) { socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); maxsockets = uma_zone_set_max(socket_zone, maxsockets); uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached"); EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, EVENTHANDLER_PRI_FIRST); } SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); static void socket_vnet_init(const void *unused __unused) { int i; /* We expect a contiguous range */ for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_register(i); } VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_init, NULL); static void socket_vnet_uninit(const void *unused __unused) { int i; for (i = 0; i <= HHOOK_SOCKET_LAST; i++) socket_hhook_deregister(i); } VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_vnet_uninit, NULL); /* * Initialise maxsockets. This SYSINIT must be run after * tunable_mbinit(). */ static void init_maxsockets(void *ignored) { TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); maxsockets = imax(maxsockets, maxfiles); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); /* * Sysctl to get and set the maximum global sockets limit. Notify protocols * of the change so that they can update their dependent limits as required. */ static int sysctl_maxsockets(SYSCTL_HANDLER_ARGS) { int error, newmaxsockets; newmaxsockets = maxsockets; error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); if (error == 0 && req->newptr) { if (newmaxsockets > maxsockets && newmaxsockets <= maxfiles) { maxsockets = newmaxsockets; EVENTHANDLER_INVOKE(maxsockets_change); } else error = EINVAL; } return (error); } SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, &maxsockets, 0, sysctl_maxsockets, "IU", "Maximum number of sockets avaliable"); /* * Socket operation routines. These routines are called by the routines in * sys_socket.c or from a system process, and implement the semantics of * socket operations by switching out to the protocol specific routines. */ /* * Get a socket structure from our zone, and initialize it. Note that it * would probably be better to allocate socket and PCB at the same time, but * I'm not convinced that all the protocols can be easily modified to do * this. * * soalloc() returns a socket with a ref count of 0. */ static struct socket * soalloc(struct vnet *vnet) { struct socket *so; so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); if (so == NULL) return (NULL); #ifdef MAC if (mac_socket_init(so, M_NOWAIT) != 0) { uma_zfree(socket_zone, so); return (NULL); } #endif if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) { uma_zfree(socket_zone, so); return (NULL); } SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); sx_init(&so->so_snd.sb_sx, "so_snd_sx"); sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); TAILQ_INIT(&so->so_aiojobq); #ifdef VIMAGE VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet = vnet; #endif /* We shouldn't need the so_global_mtx */ if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) { /* Do we need more comprehensive error returns? */ uma_zfree(socket_zone, so); return (NULL); } mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; ++numopensockets; #ifdef VIMAGE vnet->vnet_sockcnt++; #endif mtx_unlock(&so_global_mtx); return (so); } /* * Free the storage associated with a socket at the socket layer, tear down * locks, labels, etc. All protocol state is assumed already to have been * torn down (and possibly never set up) by the caller. */ static void sodealloc(struct socket *so) { KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); mtx_lock(&so_global_mtx); so->so_gencnt = ++so_gencnt; --numopensockets; /* Could be below, but faster here. */ #ifdef VIMAGE VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p", __func__, __LINE__, so)); so->so_vnet->vnet_sockcnt--; #endif mtx_unlock(&so_global_mtx); if (so->so_rcv.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); if (so->so_snd.sb_hiwat) (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); /* remove accept filter if one is present. */ if (so->so_accf != NULL) do_setopt_accept_filter(so, NULL); #ifdef MAC mac_socket_destroy(so); #endif hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE); crfree(so->so_cred); khelp_destroy_osd(&so->osd); sx_destroy(&so->so_snd.sb_sx); sx_destroy(&so->so_rcv.sb_sx); SOCKBUF_LOCK_DESTROY(&so->so_snd); SOCKBUF_LOCK_DESTROY(&so->so_rcv); uma_zfree(socket_zone, so); } /* * socreate returns a socket with a ref count of 1. The socket should be * closed with soclose(). */ int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td) { struct protosw *prp; struct socket *so; int error; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == NULL) { /* No support for domain. */ if (pffinddomain(dom) == NULL) return (EAFNOSUPPORT); /* No support for socket type. */ if (proto == 0 && type != 0) return (EPROTOTYPE); return (EPROTONOSUPPORT); } if (prp->pr_usrreqs->pru_attach == NULL || prp->pr_usrreqs->pru_attach == pru_attach_notsupp) return (EPROTONOSUPPORT); if (prison_check_af(cred, prp->pr_domain->dom_family) != 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); so = soalloc(CRED_TO_VNET(cred)); if (so == NULL) return (ENOBUFS); TAILQ_INIT(&so->so_incomp); TAILQ_INIT(&so->so_comp); so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) so->so_fibnum = td->td_proc->p_fibnum; else so->so_fibnum = 0; so->so_proto = prp; #ifdef MAC mac_socket_create(cred, so); #endif knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); so->so_count = 1; /* * Auto-sizing of socket buffers is managed by the protocols and * the appropriate flags must be set in the pru_attach function. */ CURVNET_SET(so->so_vnet); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); CURVNET_RESTORE(); if (error) { KASSERT(so->so_count == 1, ("socreate: so_count %d", so->so_count)); so->so_count = 0; sodealloc(so); return (error); } *aso = so; return (0); } #ifdef REGRESSION static int regression_sonewconn_earlytest = 1; SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); #endif /* * When an attempt at a new connection is noted on a socket which accepts * connections, sonewconn is called. If the connection is possible (subject * to space constraints, etc.) then we allocate a new structure, propoerly * linked into the data structure of the original socket, and return this. * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED. * * Note: the ref count on the socket is 0 on return. */ struct socket * sonewconn(struct socket *head, int connstatus) { static struct timeval lastover; static struct timeval overinterval = { 60, 0 }; static int overcount; struct socket *so; int over; ACCEPT_LOCK(); over = (head->so_qlen > 3 * head->so_qlimit / 2); ACCEPT_UNLOCK(); #ifdef REGRESSION if (regression_sonewconn_earlytest && over) { #else if (over) { #endif overcount++; if (ratecheck(&lastover, &overinterval)) { log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " "%i already in queue awaiting acceptance " "(%d occurrences)\n", __func__, head->so_pcb, head->so_qlen, overcount); overcount = 0; } return (NULL); } VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", __func__, __LINE__, head)); so = soalloc(head->so_vnet); if (so == NULL) { log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " "limit reached or out of memory\n", __func__, head->so_pcb); return (NULL); } if ((head->so_options & SO_ACCEPTFILTER) != 0) connstatus = 0; so->so_head = head; so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; so->so_state = head->so_state | SS_NOFDREF; so->so_fibnum = head->so_fibnum; so->so_proto = head->so_proto; so->so_cred = crhold(head->so_cred); #ifdef MAC mac_socket_newconn(head, so); #endif knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); VNET_SO_ASSERT(head); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", __func__, head->so_pcb); return (NULL); } if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; so->so_snd.sb_lowat = head->so_snd.sb_lowat; so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; so->so_snd.sb_timeo = head->so_snd.sb_timeo; so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; so->so_state |= connstatus; ACCEPT_LOCK(); /* * The accept socket may be tearing down but we just * won a race on the ACCEPT_LOCK. * However, if sctp_peeloff() is called on a 1-to-many * style socket, the SO_ACCEPTCONN doesn't need to be set. */ if (!(head->so_options & SO_ACCEPTCONN) && ((head->so_proto->pr_protocol != IPPROTO_SCTP) || (head->so_type != SOCK_SEQPACKET))) { SOCK_LOCK(so); so->so_head = NULL; sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */ return (NULL); } if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_qstate |= SQ_COMP; head->so_qlen++; } else { /* * Keep removing sockets from the head until there's room for * us to insert on the tail. In pre-locking revisions, this * was a simple if(), but as we could be racing with other * threads and soabort() requires dropping locks, we must * loop waiting for the condition to be true. */ while (head->so_incqlen > head->so_qlimit) { struct socket *sp; sp = TAILQ_FIRST(&head->so_incomp); TAILQ_REMOVE(&head->so_incomp, sp, so_list); head->so_incqlen--; sp->so_qstate &= ~SQ_INCOMP; sp->so_head = NULL; ACCEPT_UNLOCK(); soabort(sp); ACCEPT_LOCK(); } TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); so->so_qstate |= SQ_INCOMP; head->so_incqlen++; } ACCEPT_UNLOCK(); if (connstatus) { sorwakeup(head); wakeup_one(&head->so_timeo); } return (so); } int sobind(struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td); CURVNET_RESTORE(); return (error); } int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td); CURVNET_RESTORE(); return (error); } /* * solisten() transitions a socket from a non-listening state to a listening * state, but can also be used to update the listen queue depth on an * existing listen socket. The protocol will call back into the sockets * layer using solisten_proto_check() and solisten_proto() to check and set * socket-layer listen state. Call backs are used so that the protocol can * acquire both protocol and socket layer locks in whatever order is required * by the protocol. * * Protocol implementors are advised to hold the socket lock across the * socket-layer test and set to avoid races at the socket layer. */ int solisten(struct socket *so, int backlog, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td); CURVNET_RESTORE(); return (error); } int solisten_proto_check(struct socket *so) { SOCK_LOCK_ASSERT(so); if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) return (EINVAL); return (0); } void solisten_proto(struct socket *so, int backlog) { SOCK_LOCK_ASSERT(so); if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; so->so_options |= SO_ACCEPTCONN; } /* * Evaluate the reference count and named references on a socket; if no * references remain, free it. This should be called whenever a reference is * released, such as in sorele(), but also when named reference flags are * cleared in socket or protocol code. * * sofree() will free the socket if: * * - There are no outstanding file descriptor references or related consumers * (so_count == 0). * * - The socket has been closed by user space, if ever open (SS_NOFDREF). * * - The protocol does not have an outstanding strong reference on the socket * (SS_PROTOREF). * * - The socket is not in a completed connection queue, so a process has been * notified that it is present. If it is removed, the user process may * block in accept() despite select() saying the socket was ready. */ void sofree(struct socket *so) { struct protosw *pr = so->so_proto; struct socket *head; ACCEPT_LOCK_ASSERT(); SOCK_LOCK_ASSERT(so); if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) { SOCK_UNLOCK(so); ACCEPT_UNLOCK(); return; } head = so->so_head; if (head != NULL) { KASSERT((so->so_qstate & SQ_COMP) != 0 || (so->so_qstate & SQ_INCOMP) != 0, ("sofree: so_head != NULL, but neither SQ_COMP nor " "SQ_INCOMP")); KASSERT((so->so_qstate & SQ_COMP) == 0 || (so->so_qstate & SQ_INCOMP) == 0, ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; so->so_qstate &= ~SQ_INCOMP; so->so_head = NULL; } KASSERT((so->so_qstate & SQ_COMP) == 0 && (so->so_qstate & SQ_INCOMP) == 0, ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); if (so->so_options & SO_ACCEPTCONN) { KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated")); } SOCK_UNLOCK(so); ACCEPT_UNLOCK(); VNET_SO_ASSERT(so); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(so); if (pr->pr_usrreqs->pru_detach != NULL) (*pr->pr_usrreqs->pru_detach)(so); /* * From this point on, we assume that no other references to this * socket exist anywhere else in the stack. Therefore, no locks need * to be acquired or held. * * We used to do a lot of socket buffer and socket locking here, as * well as invoke sorflush() and perform wakeups. The direct call to * dom_dispose() and sbrelease_internal() are an inlining of what was * necessary from sorflush(). * * Notice that the socket buffer and kqueue state are torn down * before calling pru_detach. This means that protocols shold not * assume they can perform socket wakeups, etc, in their detach code. */ sbdestroy(&so->so_snd, so); sbdestroy(&so->so_rcv, so); seldrain(&so->so_snd.sb_sel); seldrain(&so->so_rcv.sb_sel); knlist_destroy(&so->so_rcv.sb_sel.si_note); knlist_destroy(&so->so_snd.sb_sel.si_note); sodealloc(so); } /* * Close a socket on last file table reference removal. Initiate disconnect * if connected. Free socket when disconnect complete. * * This function will sorele() the socket. Note that soclose() may be called * prior to the ref count reaching zero. The actual socket structure will * not be freed until the ref count reaches zero. */ int soclose(struct socket *so) { int error = 0; KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); CURVNET_SET(so->so_vnet); funsetown(&so->so_sigio); if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) { if (error == ENOTCONN) error = 0; goto drop; } } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = tsleep(&so->so_timeo, PSOCK | PCATCH, "soclos", so->so_linger * hz); if (error) break; } } } drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); ACCEPT_LOCK(); if (so->so_options & SO_ACCEPTCONN) { struct socket *sp; /* * Prevent new additions to the accept queues due * to ACCEPT_LOCK races while we are draining them. */ so->so_options &= ~SO_ACCEPTCONN; while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { TAILQ_REMOVE(&so->so_incomp, sp, so_list); so->so_incqlen--; sp->so_qstate &= ~SQ_INCOMP; sp->so_head = NULL; ACCEPT_UNLOCK(); soabort(sp); ACCEPT_LOCK(); } while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { TAILQ_REMOVE(&so->so_comp, sp, so_list); so->so_qlen--; sp->so_qstate &= ~SQ_COMP; sp->so_head = NULL; ACCEPT_UNLOCK(); soabort(sp); ACCEPT_LOCK(); } KASSERT((TAILQ_EMPTY(&so->so_comp)), ("%s: so_comp populated", __func__)); KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("%s: so_incomp populated", __func__)); } SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */ CURVNET_RESTORE(); return (error); } /* * soabort() is used to abruptly tear down a connection, such as when a * resource limit is reached (listen queue depth exceeded), or if a listen * socket is closed while there are sockets waiting to be accepted. * * This interface is tricky, because it is called on an unreferenced socket, * and must be called only by a thread that has actually removed the socket * from the listen queue it was on, or races with other threads are risked. * * This interface will call into the protocol code, so must not be called * with any socket locks held. Protocols do call it while holding their own * recursible protocol mutexes, but this is something that should be subject * to review in the future. */ void soabort(struct socket *so) { /* * In as much as is possible, assert that no references to this * socket are held. This is not quite the same as asserting that the * current thread is responsible for arranging for no references, but * is as close as we can get for now. */ KASSERT(so->so_count == 0, ("soabort: so_count")); KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF")); KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); VNET_SO_ASSERT(so); if (so->so_proto->pr_usrreqs->pru_abort != NULL) (*so->so_proto->pr_usrreqs->pru_abort)(so); ACCEPT_LOCK(); SOCK_LOCK(so); sofree(so); } int soaccept(struct socket *so, struct sockaddr **nam) { int error; SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); so->so_state &= ~SS_NOFDREF; SOCK_UNLOCK(so); CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); CURVNET_RESTORE(); return (error); } int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) { return (soconnectat(AT_FDCWD, so, nam, td)); } int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); CURVNET_SET(so->so_vnet); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. This allows * user to disconnect by connecting to, e.g., a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) { error = EISCONN; } else { /* * Prevent accumulated error from previous connection from * biting us. */ so->so_error = 0; if (fd == AT_FDCWD) { error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); } else { error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd, so, nam, td); } } CURVNET_RESTORE(); return (error); } int soconnect2(struct socket *so1, struct socket *so2) { int error; CURVNET_SET(so1->so_vnet); error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); CURVNET_RESTORE(); return (error); } int sodisconnect(struct socket *so) { int error; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); return (error); } #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM")); KASSERT(so->so_proto->pr_flags & PR_ATOMIC, ("sosend_dgram: !PR_ATOMIC")); if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. */ if (resid < 0) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto out; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto out; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection-based * socket if it supports implied connect. Return ENOTCONN if * not connected and no address is supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto out; } } else if (addr == NULL) { if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; SOCKBUF_UNLOCK(&so->so_snd); goto out; } } /* * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a * problem and need fixing. */ space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; space -= clen; SOCKBUF_UNLOCK(&so->so_snd); if (resid > space) { error = EMSGSIZE; goto out; } if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf chain. * If no data is to be copied in, a single empty mbuf * is returned. */ top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); if (top == NULL) { error = EFAULT; /* only possible error */ goto out; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } KASSERT(resid == 0, ("sosend_dgram: resid != 0")); /* * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock * than with. */ if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously done could be out * of date. We could have recieved a reset packet in an interrupt or * maybe we slept while doing page faults in uiomove() etc. We could * probably recheck again inside the locking protection here, but * there are probably other places that this also happens. We must * rethink this. */ VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands this flag and * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } clen = 0; control = NULL; top = NULL; out: if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } /* * Send on a socket. If send must go all at once and message is larger than * send buffering, then hard error. Lock against other senders. If must go * all at once and not enough room now, then inform user that this would * block and do nothing. Otherwise, if nonblocking, send as much as * possible. The data to be sent is described by "uio" if nonzero, otherwise * by the mbuf chain "top" (which must be null if uio is not). Data provided * in mbuf chain must be small enough to send all at once. * * Returns nonzero on error, timeout or signal; callers must check for short * counts if EINTR/ERESTART are returned. Data and control buffers are freed * on return. */ int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { long space; ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; if (uio != NULL) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. However, space must be * signed, as it might be less than 0 if we over-committed, and we * must use a signed comparison of space and resid. On the other * hand, a negative resid causes us to loop sending 0-length * segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; restart: do { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { SOCKBUF_UNLOCK(&so->so_snd); error = EPIPE; goto release; } if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto release; } } else if (addr == NULL) { SOCKBUF_UNLOCK(&so->so_snd); if (so->so_proto->pr_flags & PR_CONNREQUIRED) error = ENOTCONN; else error = EDESTADDRREQ; goto release; } } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) { SOCKBUF_UNLOCK(&so->so_snd); error = EMSGSIZE; goto release; } if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { SOCKBUF_UNLOCK(&so->so_snd); error = EWOULDBLOCK; goto release; } error = sbwait(&so->so_snd); SOCKBUF_UNLOCK(&so->so_snd); if (error) goto release; goto restart; } SOCKBUF_UNLOCK(&so->so_snd); space -= clen; do { if (uio == NULL) { resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { /* * Copy the data from userland into a mbuf * chain. If resid is 0, which can happen * only if we have control to send, then * a single empty mbuf is returned. This * is a workaround to prevent protocol send * methods to panic. */ top = m_uiotombuf(uio, M_WAITOK, space, (atomic ? max_hdr : 0), (atomic ? M_PKTHDR : 0) | ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { error = EFAULT; /* only possible error */ goto release; } space -= resid - uio->uio_resid; resid = uio->uio_resid; } if (dontroute) { SOCK_LOCK(so); so->so_options |= SO_DONTROUTE; SOCK_UNLOCK(so); } /* * XXX all the SBS_CANTSENDMORE checks previously * done could be out of date. We could have recieved * a reset packet in an interrupt or maybe we slept * while doing page faults in uiomove() etc. We * could probably recheck again inside the locking * protection here, but there are probably other * places that this also happens. We must rethink * this. */ VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands * this flag and nothing left to send then use * PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } clen = 0; control = NULL; top = NULL; if (error) goto release; } while (resid && space > 0); } while (resid); release: sbunlock(&so->so_snd); out: if (top != NULL) m_freem(top); if (control != NULL) m_freem(control); return (error); } int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { int error; CURVNET_SET(so->so_vnet); error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, control, flags, td); CURVNET_RESTORE(); return (error); } /* * The part of soreceive() that implements reading non-inline out-of-band * data from a socket. For more complete comments, see soreceive(), from * which this code originated. * * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is * unable to return an mbuf chain to the caller. */ static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) { struct protosw *pr = so->so_proto; struct mbuf *m; int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); VNET_SO_ASSERT(so); m = m_get(M_WAITOK, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, void *), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m != NULL) m_freem(m); return (error); } /* * Following replacement or removal of the first mbuf on the first mbuf chain * of a socket buffer, push necessary state changes back into the socket * buffer so that other consumers see the values consistently. 'nextrecord' * is the callers locally stored value of the original value of * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. * NOTE: 'nextrecord' may be NULL. */ static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) { SOCKBUF_LOCK_ASSERT(sb); /* * First, update for the new value of nextrecord. If necessary, make * it the first record. */ if (sb->sb_mb != NULL) sb->sb_mb->m_nextpkt = nextrecord; else sb->sb_mb = nextrecord; /* * Now update any dependent socket buffer fields to reflect the new * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the * addition of a second clause that takes care of the case where * sb_mb has been updated, but remains the last record. */ if (sb->sb_mb == NULL) { sb->sb_mbtail = NULL; sb->sb_lastrecord = NULL; } else if (sb->sb_mb->m_nextpkt == NULL) sb->sb_lastrecord = sb->sb_mb; } /* * Implement receive operations on a socket. We depend on the way that * records are added to the sockbuf by sbappend. In particular, each record * (mbufs linked through m_next) must begin with an address if the protocol * so specifies, followed by an optional mbuf or mbufs containing ancillary * data, and then zero or more mbufs of data. In order to allow parallelism * between network receive and copying to user space, as well as avoid * sleeping with a mutex held, we release the socket buffer mutex during the * user space copy. Although the sockbuf is locked, new data may still be * appended, and thus we must maintain consistency of the sockbuf during that * time. * * The caller may receive the data as a single mbuf chain by supplying an * mbuf **mp0 for use in returning the chain. The uio is then used only for * the count in uio_resid. */ int soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, **mp; int flags, error, offset; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; ssize_t orig_resid = uio->uio_resid; mp = mp0; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp != NULL) *mp = NULL; if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) && uio->uio_resid) { VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, 0); } error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) return (error); restart: SOCKBUF_LOCK(&so->so_rcv); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more (subject * to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && sbavail(&so->so_rcv) < uio->uio_resid) && sbavail(&so->so_rcv) < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != NULL || !sbavail(&so->so_rcv), ("receive: m == %p sbavail == %u", m, sbavail(&so->so_rcv))); if (so->so_error) { if (m != NULL) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; SOCKBUF_UNLOCK(&so->so_rcv); goto release; } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { if (m == NULL) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } else goto dontblock; } for (; m != NULL; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { SOCKBUF_UNLOCK(&so->so_rcv); error = ENOTCONN; goto release; } if (uio->uio_resid == 0) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); error = EWOULDBLOCK; goto release; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if (error) goto release; goto restart; } dontblock: /* * From this point onward, we maintain 'nextrecord' as a cache of the * pointer to the next record in the socket buffer. We must keep the * various socket buffer pointers and local stack versions of the * pointers in sync, pushing out modifications before dropping the * socket buffer mutex, and re-reading them when picking it up. * * Otherwise, we will race with the network stack appending new data * or records onto the socket buffer by using inconsistent/stale * versions of the field, possibly resulting in socket buffer * corruption. * * By holding the high-level sblock(), we prevent simultaneous * readers from pulling off the front of the socket buffer. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); orig_resid = 0; if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; sockbuf_pushsync(&so->so_rcv, nextrecord); } } /* * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. If MSG_PEEK, we * just copy the data; if !MSG_PEEK, we call into the protocol to * perform externalization (or freeing if controlp == NULL). */ if (m != NULL && m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; do { if (flags & MSG_PEEK) { if (controlp != NULL) { *controlp = m_copy(m, 0, m->m_len); controlp = &(*controlp)->m_next; } m = m->m_next; } else { sbfree(&so->so_rcv, m); so->so_rcv.sb_mb = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = so->so_rcv.sb_mb; } } while (m != NULL && m->m_type == MT_CONTROL); if ((flags & MSG_PEEK) == 0) sockbuf_pushsync(&so->so_rcv, nextrecord); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); SOCKBUF_LOCK(&so->so_rcv); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { orig_resid = 0; while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } if (m != NULL) nextrecord = so->so_rcv.sb_mb->m_nextpkt; else nextrecord = so->so_rcv.sb_mb; orig_resid = 0; } if (m != NULL) { if ((flags & MSG_PEEK) == 0) { KASSERT(m->m_nextpkt == nextrecord, ("soreceive: post-control, nextrecord !sync")); if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_mb == m, ("soreceive: post-control, sb_mb!=m")); KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive: post-control, lastrecord!=m")); } } type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } else { if ((flags & MSG_PEEK) == 0) { KASSERT(so->so_rcv.sb_mb == nextrecord, ("soreceive: sb_mb != nextrecord")); if (so->so_rcv.sb_mb == NULL) { KASSERT(so->so_rcv.sb_lastrecord == NULL, ("soreceive: sb_lastercord != NULL")); } } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * Now continue to read any data mbufs off of the head of the socket * buffer until the read request is satisfied. Note that 'type' is * used to store the type of any mbuf reads that have happened so far * such that soreceive() can stop reading if the type changes, which * causes soreceive() to return only one of regular data and inline * out-of-band data in a single socket receive operation. */ moff = 0; offset = 0; while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0 && error == 0) { /* * If the type of mbuf has changed since the last mbuf * examined ('type'), end the receive operation. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) { if (type != m->m_type) break; } else if (type == MT_OOBDATA) break; else KASSERT(m->m_type == MT_DATA, ("m->m_type == %d", m->m_type)); so->so_rcv.sb_state &= ~SBS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. Otherwise copy * them out via the uio, then free. Sockbuf must be * consistent here (points to current mbuf, it points to next * record) when we drop priority; we must note any additions * to the sockbuf when we block interrupts again. */ if (mp == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); error = uiomove(mtod(m, char *) + moff, (int)len, uio); SOCKBUF_LOCK(&so->so_rcv); if (error) { /* * The MT_SONAME mbuf has already been removed * from the record, so it is necessary to * remove the data mbufs, if any, to preserve * the invariant in the case of PR_ADDR that * requires MT_SONAME mbufs at the head of * each record. */ if (m && pr->pr_flags & PR_ATOMIC && ((flags & MSG_PEEK) == 0)) (void)sbdroprecord_locked(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } else uio->uio_resid -= len; SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp != NULL) { m->m_nextpkt = NULL; *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = NULL; } else { so->so_rcv.sb_mb = m_free(m); m = so->so_rcv.sb_mb; } sockbuf_pushsync(&so->so_rcv, nextrecord); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); } } else { if (flags & MSG_PEEK) moff += len; else { if (mp != NULL) { if (flags & MSG_DONTWAIT) { *mp = m_copym(m, 0, len, M_NOWAIT); if (*mp == NULL) { /* * m_copym() couldn't * allocate an mbuf. * Adjust uio_resid back * (it was adjusted * down by len bytes, * which we didn't end * up "copying" over). */ uio->uio_resid += len; break; } } else { SOCKBUF_UNLOCK(&so->so_rcv); *mp = m_copym(m, 0, len, M_WAITOK); SOCKBUF_LOCK(&so->so_rcv); } } sbcut_locked(&so->so_rcv, len); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_rcv.sb_state |= SBS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), we * must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return with a * short count but without error. Keep sockbuf locked * against other readers. */ while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && !sosendallatonce(so) && nextrecord == NULL) { SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) break; /* * Notify the protocol that some data has been * drained before blocking. */ if (pr->pr_flags & PR_WANTRCVD) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * We could receive some data while was notifying * the protocol. Skip blocking in this case. */ if (so->so_rcv.sb_mb == NULL) { error = sbwait(&so->so_rcv); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } } m = so->so_rcv.sb_mb; if (m != NULL) nextrecord = m->m_nextpkt; } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m != NULL && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord_locked(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == NULL) { /* * First part is an inline SB_EMPTY_FIXUP(). Second * part makes sure sb_lastrecord is up-to-date if * there is still data in the socket buffer. */ so->so_rcv.sb_mb = nextrecord; if (so->so_rcv.sb_mb == NULL) { so->so_rcv.sb_mbtail = NULL; so->so_rcv.sb_lastrecord = NULL; } else if (nextrecord->m_nextpkt == NULL) so->so_rcv.sb_lastrecord = nextrecord; } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); /* * If soreceive() is being done from the socket callback, * then don't need to generate ACK to peer to update window, * since ACK will be generated on return to TCP. */ if (!(flags & MSG_SOCALLBCK) && (pr->pr_flags & PR_WANTRCVD)) { SOCKBUF_UNLOCK(&so->so_rcv); VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); goto restart; } SOCKBUF_UNLOCK(&so->so_rcv); if (flagsp != NULL) *flagsp |= flags; release: sbunlock(&so->so_rcv); return (error); } /* * Optimized version of soreceive() for stream (TCP) sockets. * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int len = 0, error = 0, flags, oresid; struct sockbuf *sb; struct mbuf *m, *n = NULL; /* We only do stream sockets. */ if (so->so_type != SOCK_STREAM) return (EINVAL); if (psa != NULL) *psa = NULL; if (controlp != NULL) return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) *mp0 = NULL; sb = &so->so_rcv; /* Prevent other readers from entering the socket. */ error = sblock(sb, SBLOCKWAIT(flags)); if (error) goto out; SOCKBUF_LOCK(sb); /* Easy one, no space to copyout anything. */ if (uio->uio_resid == 0) { error = EINVAL; goto out; } oresid = uio->uio_resid; /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { error = ENOTCONN; goto out; } restart: SOCKBUF_LOCK_ASSERT(&so->so_rcv); /* Abort if socket has reported problems. */ if (so->so_error) { if (sbavail(sb) > 0) goto deliver; if (oresid > uio->uio_resid) goto out; error = so->so_error; if (!(flags & MSG_PEEK)) so->so_error = 0; goto out; } /* Door is closed. Deliver what is left, if any. */ if (sb->sb_state & SBS_CANTRCVMORE) { if (sbavail(sb) > 0) goto deliver; else goto out; } /* Socket buffer is empty and we shall not block. */ if (sbavail(sb) == 0 && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { error = EAGAIN; goto out; } /* Socket buffer got some data that we shall deliver now. */ if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) && ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)) || sbavail(sb) >= sb->sb_lowat || sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat) ) { goto deliver; } /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat)) goto deliver; /* * Wait and block until (more) data comes in. * NB: Drops the sockbuf lock during wait. */ error = sbwait(sb); if (error) goto out; goto restart; deliver: SOCKBUF_LOCK_ASSERT(&so->so_rcv); KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__)); KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); /* Statistics. */ if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; /* Fill uio until full or current end of socket buffer is reached. */ len = min(uio->uio_resid, sbavail(sb)); if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { if (*mp0 == NULL) *mp0 = sb->sb_mb; else m_cat(*mp0, sb->sb_mb); for (m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { KASSERT(!(m->m_flags & M_NOTAVAIL), ("%s: m %p not available", __func__, m)); len -= m->m_len; uio->uio_resid -= m->m_len; sbfree(sb, m); n = m; } n->m_next = NULL; sb->sb_mb = m; sb->sb_lastrecord = sb->sb_mb; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); } /* Copy the remainder. */ if (len > 0) { KASSERT(sb->sb_mb != NULL, ("%s: len > 0 && sb->sb_mb empty", __func__)); m = m_copym(sb->sb_mb, 0, len, M_NOWAIT); if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else uio->uio_resid -= len; if (*mp0 != NULL) m_cat(*mp0, m); else *mp0 = m; if (*mp0 == NULL) { error = ENOBUFS; goto out; } } } else { /* NB: Must unlock socket buffer as uiomove may sleep. */ SOCKBUF_UNLOCK(sb); error = m_mbuftouio(uio, sb->sb_mb, len); SOCKBUF_LOCK(sb); if (error) goto out; } SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); /* * Remove the delivered data from the socket buffer unless we * were only peeking. */ if (!(flags & MSG_PEEK)) { if (len > 0) sbdrop_locked(sb, len); /* Notify protocol that we drained some data. */ if ((so->so_proto->pr_flags & PR_WANTRCVD) && (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); VNET_SO_ASSERT(so); (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(sb); } } /* * For MSG_WAITALL we may have to loop again and wait for * more data to come in. */ if ((flags & MSG_WAITALL) && uio->uio_resid > 0) goto restart; out: SOCKBUF_LOCK_ASSERT(sb); SBLASTRECORDCHK(sb); SBLASTMBUFCHK(sb); SOCKBUF_UNLOCK(sb); sbunlock(sb); return (error); } /* * Optimized version of soreceive() for simple datagram cases from userspace. * Unlike in the stream case, we're able to drop a datagram if copyout() * fails, and because we handle datagrams atomically, we don't need to use a * sleep lock to prevent I/O interlacing. */ int soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, *m2; int flags, error; ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; /* * For any complicated cases, fall back to the full * soreceive_generic(). */ if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); /* * Enforce restrictions on use. */ KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, ("soreceive_dgram: wantrcvd")); KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, ("soreceive_dgram: SBS_RCVATMARK")); KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, ("soreceive_dgram: P_CONNREQUIRED")); /* * Loop blocking while waiting for a datagram. */ SOCKBUF_LOCK(&so->so_rcv); while ((m = so->so_rcv.sb_mb) == NULL) { KASSERT(sbavail(&so->so_rcv) == 0, ("soreceive_dgram: sb_mb NULL but sbavail %u", sbavail(&so->so_rcv))); if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_rcv); return (error); } if (so->so_rcv.sb_state & SBS_CANTRCVMORE || uio->uio_resid == 0) { SOCKBUF_UNLOCK(&so->so_rcv); return (0); } if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { SOCKBUF_UNLOCK(&so->so_rcv); return (EWOULDBLOCK); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); error = sbwait(&so->so_rcv); if (error) { SOCKBUF_UNLOCK(&so->so_rcv); return (error); } } SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) uio->uio_td->td_ru.ru_msgrcv++; SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); nextrecord = m->m_nextpkt; if (nextrecord == NULL) { KASSERT(so->so_rcv.sb_lastrecord == m, ("soreceive_dgram: lastrecord != m")); } KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, ("soreceive_dgram: m_nextpkt != nextrecord")); /* * Pull 'm' and its chain off the front of the packet queue. */ so->so_rcv.sb_mb = NULL; sockbuf_pushsync(&so->so_rcv, nextrecord); /* * Walk 'm's chain and free that many bytes from the socket buffer. */ for (m2 = m; m2 != NULL; m2 = m2->m_next) sbfree(&so->so_rcv, m2); /* * Do a few last checks before we let go of the lock. */ SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); if (pr->pr_flags & PR_ADDR) { KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type)); if (psa != NULL) *psa = sodupsockaddr(mtod(m, struct sockaddr *), M_NOWAIT); m = m_free(m); } if (m == NULL) { /* XXXRW: Can this happen? */ return (0); } /* * Packet to copyout() is now in 'm' and it is disconnected from the * queue. * * Process one or more MT_CONTROL mbufs present before any data mbufs * in the first mbuf chain on the socket buffer. We call into the * protocol to perform externalization (or freeing if controlp == * NULL). In some cases there can be only MT_CONTROL mbufs without * MT_DATA mbufs. */ if (m->m_type == MT_CONTROL) { struct mbuf *cm = NULL, *cmn; struct mbuf **cme = &cm; do { m2 = m->m_next; m->m_next = NULL; *cme = m; cme = &(*cme)->m_next; m = m2; } while (m != NULL && m->m_type == MT_CONTROL); while (cm != NULL) { cmn = cm->m_next; cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { error = (*pr->pr_domain->dom_externalize) (cm, controlp, flags); } else if (controlp != NULL) *controlp = cm; else m_freem(cm); if (controlp != NULL) { while (*controlp != NULL) controlp = &(*controlp)->m_next; } cm = cmn; } } KASSERT(m == NULL || m->m_type == MT_DATA, ("soreceive_dgram: !data")); while (m != NULL && uio->uio_resid > 0) { len = uio->uio_resid; if (len > m->m_len) len = m->m_len; error = uiomove(mtod(m, char *), (int)len, uio); if (error) { m_freem(m); return (error); } if (len == m->m_len) m = m_free(m); else { m->m_data += len; m->m_len -= len; } } if (m != NULL) { flags |= MSG_TRUNC; m_freem(m); } if (flagsp != NULL) *flagsp |= flags; return (0); } int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { int error; CURVNET_SET(so->so_vnet); error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, controlp, flagsp)); CURVNET_RESTORE(); return (error); } int soshutdown(struct socket *so, int how) { struct protosw *pr = so->so_proto; int error; if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) return (ENOTCONN); CURVNET_SET(so->so_vnet); if (pr->pr_usrreqs->pru_flush != NULL) (*pr->pr_usrreqs->pru_flush)(so, how); if (how != SHUT_WR) sorflush(so); if (how != SHUT_RD) { error = (*pr->pr_usrreqs->pru_shutdown)(so); wakeup(&so->so_timeo); CURVNET_RESTORE(); return (error); } wakeup(&so->so_timeo); CURVNET_RESTORE(); return (0); } void sorflush(struct socket *so) { struct sockbuf *sb = &so->so_rcv; struct protosw *pr = so->so_proto; struct socket aso; VNET_SO_ASSERT(so); /* * In order to avoid calling dom_dispose with the socket buffer mutex * held, and in order to generally avoid holding the lock for a long * time, we make a copy of the socket buffer and clear the original * (except locks, state). The new socket buffer copy won't have * initialized locks so we can only call routines that won't use or * assert those locks. * * Dislodge threads currently blocked in receive and wait to acquire * a lock against other simultaneous readers before clearing the * socket buffer. Don't let our acquire be interrupted by a signal * despite any existing socket disposition on interruptable waiting. */ socantrcvmore(so); (void) sblock(sb, SBL_WAIT | SBL_NOINTR); /* * Invalidate/clear most of the sockbuf structure, but leave selinfo * and mutex data unchanged. */ SOCKBUF_LOCK(sb); bzero(&aso, sizeof(aso)); aso.so_pcb = so->so_pcb; bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); bzero(&sb->sb_startzero, sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); SOCKBUF_UNLOCK(sb); sbunlock(sb); /* * Dispose of special rights and flush the copied socket. Don't call * any unsafe routines (that rely on locks being initialized) on aso. */ if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(&aso); sbrelease_internal(&aso.so_rcv, so); } /* * Wrapper for Socket established helper hook. * Parameters: socket, context of the hook point, hook id. */ static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id) { struct socket_hhook_data hhook_data = { .so = so, .hctx = hctx, .m = NULL, .status = 0 }; CURVNET_SET(so->so_vnet); HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd); CURVNET_RESTORE(); /* Ugly but needed, since hhooks return void for now */ return (hhook_data.status); } /* * Perhaps this routine, and sooptcopyout(), below, ought to come in an * additional variant to handle the case where the option value needs to be * some kind of integer, but not a specific size. In addition to their use * here, these functions are also called by the protocol-level pr_ctloutput() * routines. */ int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) { size_t valsize; /* * If the user gives us more than we wanted, we ignore it, but if we * don't get the minimum length the caller wants, we return EINVAL. * On success, sopt->sopt_valsize is set to however much we actually * retrieved. */ if ((valsize = sopt->sopt_valsize) < minlen) return EINVAL; if (valsize > len) sopt->sopt_valsize = valsize = len; if (sopt->sopt_td != NULL) return (copyin(sopt->sopt_val, buf, valsize)); bcopy(sopt->sopt_val, buf, valsize); return (0); } /* * Kernel version of setsockopt(2). * * XXX: optlen is size_t, not socklen_t */ int so_setsockopt(struct socket *so, int level, int optname, void *optval, size_t optlen) { struct sockopt sopt; sopt.sopt_level = level; sopt.sopt_name = optname; sopt.sopt_dir = SOPT_SET; sopt.sopt_val = optval; sopt.sopt_valsize = optlen; sopt.sopt_td = NULL; return (sosetopt(so, &sopt)); } int sosetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; sbintime_t val; uint32_t val32; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) { error = (*so->so_proto->pr_ctloutput)(so, sopt); CURVNET_RESTORE(); return (error); } error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = do_setopt_accept_filter(so, sopt); if (error) goto bad; break; case SO_LINGER: error = sooptcopyin(sopt, &l, sizeof l, sizeof l); if (error) goto bad; SOCK_LOCK(so); so->so_linger = l.l_linger; if (l.l_onoff) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; SOCK_UNLOCK(so); break; case SO_DEBUG: case SO_KEEPALIVE: case SO_DONTROUTE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: case SO_NO_DDP: case SO_NO_OFFLOAD: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; SOCK_LOCK(so); if (optval) so->so_options |= sopt->sopt_name; else so->so_options &= ~sopt->sopt_name; SOCK_UNLOCK(so); break; case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (((so->so_proto->pr_domain->dom_family == PF_INET) || (so->so_proto->pr_domain->dom_family == PF_INET6) || (so->so_proto->pr_domain->dom_family == PF_ROUTE))) so->so_fibnum = optval; else so->so_fibnum = 0; break; case SO_USER_COOKIE: error = sooptcopyin(sopt, &val32, sizeof val32, sizeof val32); if (error) goto bad; so->so_user_cookie = val32; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; /* * Values < 1 make no sense for any of these options, * so disallow them. */ if (optval < 1) { error = EINVAL; goto bad; } switch (sopt->sopt_name) { case SO_SNDBUF: case SO_RCVBUF: if (sbreserve(sopt->sopt_name == SO_SNDBUF ? &so->so_snd : &so->so_rcv, (u_long)optval, so, curthread) == 0) { error = ENOBUFS; goto bad; } (sopt->sopt_name == SO_SNDBUF ? &so->so_snd : &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE; break; /* * Make sure the low-water is never greater than the * high-water. */ case SO_SNDLOWAT: SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_lowat = (optval > so->so_snd.sb_hiwat) ? so->so_snd.sb_hiwat : optval; SOCKBUF_UNLOCK(&so->so_snd); break; case SO_RCVLOWAT: SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_lowat = (optval > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : optval; SOCKBUF_UNLOCK(&so->so_rcv); break; } break; case SO_SNDTIMEO: case SO_RCVTIMEO: #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; error = sooptcopyin(sopt, &tv32, sizeof tv32, sizeof tv32); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); } else #endif error = sooptcopyin(sopt, &tv, sizeof tv, sizeof tv); if (error) goto bad; if (tv.tv_sec < 0 || tv.tv_usec < 0 || tv.tv_usec >= 1000000) { error = EDOM; goto bad; } if (tv.tv_sec > INT32_MAX) val = SBT_MAX; else val = tvtosbt(tv); switch (sopt->sopt_name) { case SO_SNDTIMEO: so->so_snd.sb_timeo = val; break; case SO_RCVTIMEO: so->so_rcv.sb_timeo = val; break; } break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof extmac, sizeof extmac); if (error) goto bad; error = mac_setsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); #else error = EOPNOTSUPP; #endif break; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto->pr_ctloutput != NULL) (void)(*so->so_proto->pr_ctloutput)(so, sopt); } bad: CURVNET_RESTORE(); return (error); } /* * Helper routine for getsockopt. */ int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) { int error; size_t valsize; error = 0; /* * Documented get behavior is that we always return a value, possibly * truncated to fit in the user's buffer. Traditional behavior is * that we always tell the user precisely how much we copied, rather * than something useful like the total amount we had available for * her. Note that this interface is not idempotent; the entire * answer must generated ahead of time. */ valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != NULL) { if (sopt->sopt_td != NULL) error = copyout(buf, sopt->sopt_val, valsize); else bcopy(buf, sopt->sopt_val, valsize); } return (error); } int sogetopt(struct socket *so, struct sockopt *sopt) { int error, optval; struct linger l; struct timeval tv; #ifdef MAC struct mac extmac; #endif CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto->pr_ctloutput != NULL) error = (*so->so_proto->pr_ctloutput)(so, sopt); else error = ENOPROTOOPT; CURVNET_RESTORE(); return (error); } else { switch (sopt->sopt_name) { case SO_ACCEPTFILTER: error = do_getopt_accept_filter(so, sopt); break; case SO_LINGER: SOCK_LOCK(so); l.l_onoff = so->so_options & SO_LINGER; l.l_linger = so->so_linger; SOCK_UNLOCK(so); error = sooptcopyout(sopt, &l, sizeof l); break; case SO_USELOOPBACK: case SO_DONTROUTE: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_BROADCAST: case SO_OOBINLINE: case SO_ACCEPTCONN: case SO_TIMESTAMP: case SO_BINTIME: case SO_NOSIGPIPE: optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); break; case SO_TYPE: optval = so->so_type; goto integer; case SO_PROTOCOL: optval = so->so_proto->pr_protocol; goto integer; case SO_ERROR: SOCK_LOCK(so); optval = so->so_error; so->so_error = 0; SOCK_UNLOCK(so); goto integer; case SO_SNDBUF: optval = so->so_snd.sb_hiwat; goto integer; case SO_RCVBUF: optval = so->so_rcv.sb_hiwat; goto integer; case SO_SNDLOWAT: optval = so->so_snd.sb_lowat; goto integer; case SO_RCVLOWAT: optval = so->so_rcv.sb_lowat; goto integer; case SO_SNDTIMEO: case SO_RCVTIMEO: tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) { struct timeval32 tv32; CP(tv, tv32, tv_sec); CP(tv, tv32, tv_usec); error = sooptcopyout(sopt, &tv32, sizeof tv32); } else #endif error = sooptcopyout(sopt, &tv, sizeof tv); break; case SO_LABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; #endif break; case SO_PEERLABEL: #ifdef MAC error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) goto bad; error = mac_getsockopt_peerlabel( sopt->sopt_td->td_ucred, so, &extmac); if (error) goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; #endif break; case SO_LISTENQLIMIT: optval = so->so_qlimit; goto integer; case SO_LISTENQLEN: optval = so->so_qlen; goto integer; case SO_LISTENINCQLEN: optval = so->so_incqlen; goto integer; default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, HHOOK_SOCKET_OPT); else error = ENOPROTOOPT; break; } } #ifdef MAC bad: #endif CURVNET_RESTORE(); return (error); } int soopt_getm(struct sockopt *sopt, struct mbuf **mp) { struct mbuf *m, *m_prev; int sopt_size = sopt->sopt_valsize; MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) return ENOBUFS; if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; *mp = m; m_prev = m; while (sopt_size) { MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA); if (m == NULL) { m_freem(*mp); return ENOBUFS; } if (sopt_size > MLEN) { MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK : M_NOWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); m_freem(*mp); return ENOBUFS; } m->m_len = min(MCLBYTES, sopt_size); } else { m->m_len = min(MLEN, sopt_size); } sopt_size -= m->m_len; m_prev->m_next = m; m_prev = m; } return (0); } int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; m = m->m_next; } if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ panic("ip6_sooptmcopyin"); return (0); } int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) { struct mbuf *m0 = m; size_t valsize = 0; if (sopt->sopt_val == NULL) return (0); while (m != NULL && sopt->sopt_valsize >= m->m_len) { if (sopt->sopt_td != NULL) { int error; error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); if (error != 0) { m_freem(m0); return(error); } } else bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); sopt->sopt_valsize -= m->m_len; sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; valsize += m->m_len; m = m->m_next; } if (m != NULL) { /* enough soopt buffer should be given from user-land */ m_freem(m0); return(EINVAL); } sopt->sopt_valsize = valsize; return (0); } /* * sohasoutofband(): protocol notifies socket layer of the arrival of new * out-of-band data, which will then notify socket consumers. */ void sohasoutofband(struct socket *so) { if (so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGURG, 0); selwakeuppri(&so->so_rcv.sb_sel, PSOCK); } int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { /* * We do not need to set or assert curvnet as long as everyone uses * sopoll_generic(). */ return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, td)); } int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { int revents = 0; SOCKBUF_LOCK(&so->so_snd); SOCKBUF_LOCK(&so->so_rcv); if (events & (POLLIN | POLLRDNORM)) if (soreadabledata(so)) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) if (sowriteable(so)) revents |= events & (POLLOUT | POLLWRNORM); if (events & (POLLPRI | POLLRDBAND)) if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) revents |= events & (POLLPRI | POLLRDBAND); if ((events & POLLINIGNEOF) == 0) { if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { revents |= events & (POLLIN | POLLRDNORM); if (so->so_snd.sb_state & SBS_CANTSENDMORE) revents |= POLLHUP; } } if (revents == 0) { if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { selrecord(td, &so->so_rcv.sb_sel); so->so_rcv.sb_flags |= SB_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(td, &so->so_snd.sb_sel); so->so_snd.sb_flags |= SB_SEL; } } SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_snd); return (revents); } int soo_kqfilter(struct file *fp, struct knote *kn) { struct socket *so = kn->kn_fp->f_data; struct sockbuf *sb; switch (kn->kn_filter) { case EVFILT_READ: if (so->so_options & SO_ACCEPTCONN) kn->kn_fop = &solisten_filtops; else kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; sb = &so->so_snd; break; default: return (EINVAL); } SOCKBUF_LOCK(sb); knlist_add(&sb->sb_sel.si_note, kn, 1); sb->sb_flags |= SB_KNOTE; SOCKBUF_UNLOCK(sb); return (0); } /* * Some routines that return EOPNOTSUPP for entry points that are not * supported by a protocol. Fill in as needed. */ int pru_accept_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_attach_notsupp(struct socket *so, int proto, struct thread *td) { return EOPNOTSUPP; } int pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam, struct thread *td) { return EOPNOTSUPP; } int pru_connect2_notsupp(struct socket *so1, struct socket *so2) { return EOPNOTSUPP; } int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { return EOPNOTSUPP; } int pru_disconnect_notsupp(struct socket *so) { return EOPNOTSUPP; } int pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) { return EOPNOTSUPP; } int pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_rcvd_notsupp(struct socket *so, int flags) { return EOPNOTSUPP; } int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { return EOPNOTSUPP; } int pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { return EOPNOTSUPP; } int pru_ready_notsupp(struct socket *so, struct mbuf *m, int count) { return (EOPNOTSUPP); } /* * This isn't really a ``null'' operation, but it's the default one and * doesn't do anything destructive. */ int pru_sense_null(struct socket *so, struct stat *sb) { sb->st_blksize = so->so_snd.sb_hiwat; return 0; } int pru_shutdown_notsupp(struct socket *so) { return EOPNOTSUPP; } int pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { return EOPNOTSUPP; } int pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { return EOPNOTSUPP; } int pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, struct thread *td) { return EOPNOTSUPP; } static void filt_sordetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; SOCKBUF_LOCK(&so->so_rcv); knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1); if (knlist_empty(&so->so_rcv.sb_sel.si_note)) so->so_rcv.sb_flags &= ~SB_KNOTE; SOCKBUF_UNLOCK(&so->so_rcv); } /*ARGSUSED*/ static int filt_soread(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; SOCKBUF_LOCK_ASSERT(&so->so_rcv); kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error) /* temporary udp error */ return (1); if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_data >= kn->kn_sdata) return 1; } else { if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) return 1; } /* This hook returning non-zero indicates an event, not error */ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD)); } static void filt_sowdetach(struct knote *kn) { struct socket *so = kn->kn_fp->f_data; SOCKBUF_LOCK(&so->so_snd); knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1); if (knlist_empty(&so->so_snd.sb_sel.si_note)) so->so_snd.sb_flags &= ~SB_KNOTE; SOCKBUF_UNLOCK(&so->so_snd); } /*ARGSUSED*/ static int filt_sowrite(struct knote *kn, long hint) { struct socket *so; so = kn->kn_fp->f_data; SOCKBUF_LOCK_ASSERT(&so->so_snd); kn->kn_data = sbspace(&so->so_snd); hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { kn->kn_flags |= EV_EOF; kn->kn_fflags = so->so_error; return (1); } else if (so->so_error) /* temporary udp error */ return (1); else if (((so->so_state & SS_ISCONNECTED) == 0) && (so->so_proto->pr_flags & PR_CONNREQUIRED)) return (0); else if (kn->kn_sfflags & NOTE_LOWAT) return (kn->kn_data >= kn->kn_sdata); else return (kn->kn_data >= so->so_snd.sb_lowat); } /*ARGSUSED*/ static int filt_solisten(struct knote *kn, long hint) { struct socket *so = kn->kn_fp->f_data; kn->kn_data = so->so_qlen; return (!TAILQ_EMPTY(&so->so_comp)); } int socheckuid(struct socket *so, uid_t uid) { if (so == NULL) return (EPERM); if (so->so_cred->cr_uid != uid) return (EPERM); return (0); } /* * These functions are used by protocols to notify the socket layer (and its * consumers) of state changes in the sockets driven by protocol-side events. */ /* * Procedures to manipulate state flags of socket and do appropriate wakeups. * * Normal sequence from the active (originating) side is that * soisconnecting() is called during processing of connect() call, resulting * in an eventual call to soisconnected() if/when the connection is * established. When the connection is torn down soisdisconnecting() is * called during processing of disconnect() call, and soisdisconnected() is * called when the connection to the peer is totally severed. The semantics * of these routines are such that connectionless protocols can call * soisconnected() and soisdisconnected() only, bypassing the in-progress * calls when setting up a ``connection'' takes no time. * * From the passive side, a socket is created with two queues of sockets: * so_incomp for connections in progress and so_comp for connections already * made and awaiting user acceptance. As a protocol is preparing incoming * connections, it creates a socket structure queued on so_incomp by calling * sonewconn(). When the connection is established, soisconnected() is * called, and transfers the socket structure to so_comp, making it available * to accept(). * * If a socket is closed with sockets on either so_incomp or so_comp, these * sockets are dropped. * * If higher-level protocols are implemented in the kernel, the wakeups done * here will sometimes cause software-interrupt process scheduling. */ void soisconnecting(struct socket *so) { SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; SOCK_UNLOCK(so); } void soisconnected(struct socket *so) { struct socket *head; int ret; restart: ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; head = so->so_head; if (head != NULL && (so->so_qstate & SQ_INCOMP)) { if ((so->so_options & SO_ACCEPTFILTER) == 0) { SOCK_UNLOCK(so); TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; so->so_qstate &= ~SQ_INCOMP; TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); head->so_qlen++; so->so_qstate |= SQ_COMP; ACCEPT_UNLOCK(); sorwakeup(head); wakeup_one(&head->so_timeo); } else { ACCEPT_UNLOCK(); soupcall_set(so, SO_RCV, head->so_accf->so_accept_filter->accf_callback, head->so_accf->so_accept_filter_arg); so->so_options &= ~SO_ACCEPTFILTER; ret = head->so_accf->so_accept_filter->accf_callback(so, head->so_accf->so_accept_filter_arg, M_NOWAIT); if (ret == SU_ISCONNECTED) soupcall_clear(so, SO_RCV); SOCK_UNLOCK(so); if (ret == SU_ISCONNECTED) goto restart; } return; } SOCK_UNLOCK(so); ACCEPT_UNLOCK(); wakeup(&so->so_timeo); sorwakeup(so); sowwakeup(so); } void soisdisconnecting(struct socket *so) { /* * Note: This code assumes that SOCK_LOCK(so) and * SOCKBUF_LOCK(&so->so_rcv) are the same. */ SOCKBUF_LOCK(&so->so_rcv); so->so_state &= ~SS_ISCONNECTING; so->so_state |= SS_ISDISCONNECTING; socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); socantsendmore_locked(so); wakeup(&so->so_timeo); } void soisdisconnected(struct socket *so) { /* * Note: This code assumes that SOCK_LOCK(so) and * SOCKBUF_LOCK(&so->so_rcv) are the same. */ SOCKBUF_LOCK(&so->so_rcv); so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISDISCONNECTED; socantrcvmore_locked(so); SOCKBUF_LOCK(&so->so_snd); sbdrop_locked(&so->so_snd, sbused(&so->so_snd)); socantsendmore_locked(so); wakeup(&so->so_timeo); } /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. */ struct sockaddr * sodupsockaddr(const struct sockaddr *sa, int mflags) { struct sockaddr *sa2; sa2 = malloc(sa->sa_len, M_SONAME, mflags); if (sa2) bcopy(sa, sa2, sa->sa_len); return sa2; } /* * Register per-socket buffer upcalls. */ void soupcall_set(struct socket *so, int which, int (*func)(struct socket *, void *, int), void *arg) { struct sockbuf *sb; switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; default: panic("soupcall_set: bad which"); } SOCKBUF_LOCK_ASSERT(sb); #if 0 /* XXX: accf_http actually wants to do this on purpose. */ KASSERT(sb->sb_upcall == NULL, ("soupcall_set: overwriting upcall")); #endif sb->sb_upcall = func; sb->sb_upcallarg = arg; sb->sb_flags |= SB_UPCALL; } void soupcall_clear(struct socket *so, int which) { struct sockbuf *sb; switch (which) { case SO_RCV: sb = &so->so_rcv; break; case SO_SND: sb = &so->so_snd; break; default: panic("soupcall_clear: bad which"); } SOCKBUF_LOCK_ASSERT(sb); KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear")); sb->sb_upcall = NULL; sb->sb_upcallarg = NULL; sb->sb_flags &= ~SB_UPCALL; } /* * Create an external-format (``xsocket'') structure using the information in * the kernel-format socket structure pointed to by so. This is done to * reduce the spew of irrelevant information over this interface, to isolate * user code from changes in the kernel structure, and potentially to provide * information-hiding if we decide that some of this information should be * hidden from users. */ void sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); xso->so_uid = so->so_cred->cr_uid; } /* * Socket accessor functions to provide external consumers with * a safe interface to socket state * */ void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg) { TAILQ_FOREACH(so, &so->so_comp, so_list) func(so, arg); } struct sockbuf * so_sockbuf_rcv(struct socket *so) { return (&so->so_rcv); } struct sockbuf * so_sockbuf_snd(struct socket *so) { return (&so->so_snd); } int so_state_get(const struct socket *so) { return (so->so_state); } void so_state_set(struct socket *so, int val) { so->so_state = val; } int so_options_get(const struct socket *so) { return (so->so_options); } void so_options_set(struct socket *so, int val) { so->so_options = val; } int so_error_get(const struct socket *so) { return (so->so_error); } void so_error_set(struct socket *so, int val) { so->so_error = val; } int so_linger_get(const struct socket *so) { return (so->so_linger); } void so_linger_set(struct socket *so, int val) { so->so_linger = val; } struct protosw * so_protosw_get(const struct socket *so) { return (so->so_proto); } void so_protosw_set(struct socket *so, struct protosw *val) { so->so_proto = val; } void so_sorwakeup(struct socket *so) { sorwakeup(so); } void so_sowwakeup(struct socket *so) { sowwakeup(so); } void so_sorwakeup_locked(struct socket *so) { sorwakeup_locked(so); } void so_sowwakeup_locked(struct socket *so) { sowwakeup_locked(so); } void so_lock(struct socket *so) { SOCK_LOCK(so); } void so_unlock(struct socket *so) { SOCK_UNLOCK(so); } Index: head/sys/netinet/sctp_sysctl.c =================================================================== --- head/sys/netinet/sctp_sysctl.c (revision 295135) +++ head/sys/netinet/sctp_sysctl.c (revision 295136) @@ -1,956 +1,960 @@ /*- * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include FEATURE(sctp, "Stream Control Transmission Protocol"); /* * sysctl tunable variables */ void sctp_init_sysctls() { SCTP_BASE_SYSCTL(sctp_sendspace) = SCTPCTL_MAXDGRAM_DEFAULT; SCTP_BASE_SYSCTL(sctp_recvspace) = SCTPCTL_RECVSPACE_DEFAULT; SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT; SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT; SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_pr_enable) = SCTPCTL_PR_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_auth_enable) = SCTPCTL_AUTH_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_asconf_enable) = SCTPCTL_ASCONF_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_reconfig_enable) = SCTPCTL_RECONFIG_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_nrsack_enable) = SCTPCTL_NRSACK_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_pktdrop_enable) = SCTPCTL_PKTDROP_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_strict_sacks) = SCTPCTL_STRICT_SACKS_DEFAULT; SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT; SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT; SCTP_BASE_SYSCTL(sctp_fr_max_burst_default) = SCTPCTL_FRMAXBURST_DEFAULT; SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = SCTPCTL_MAXCHUNKS_DEFAULT; SCTP_BASE_SYSCTL(sctp_hashtblsize) = SCTPCTL_TCBHASHSIZE_DEFAULT; SCTP_BASE_SYSCTL(sctp_pcbtblsize) = SCTPCTL_PCBHASHSIZE_DEFAULT; SCTP_BASE_SYSCTL(sctp_min_split_point) = SCTPCTL_MIN_SPLIT_POINT_DEFAULT; SCTP_BASE_SYSCTL(sctp_chunkscale) = SCTPCTL_CHUNKSCALE_DEFAULT; SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default) = SCTPCTL_DELAYED_SACK_TIME_DEFAULT; SCTP_BASE_SYSCTL(sctp_sack_freq_default) = SCTPCTL_SACK_FREQ_DEFAULT; SCTP_BASE_SYSCTL(sctp_system_free_resc_limit) = SCTPCTL_SYS_RESOURCE_DEFAULT; SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit) = SCTPCTL_ASOC_RESOURCE_DEFAULT; SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default) = SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT; SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default) = SCTPCTL_PMTU_RAISE_TIME_DEFAULT; SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default) = SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT; SCTP_BASE_SYSCTL(sctp_secret_lifetime_default) = SCTPCTL_SECRET_LIFETIME_DEFAULT; SCTP_BASE_SYSCTL(sctp_rto_max_default) = SCTPCTL_RTO_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_rto_min_default) = SCTPCTL_RTO_MIN_DEFAULT; SCTP_BASE_SYSCTL(sctp_rto_initial_default) = SCTPCTL_RTO_INITIAL_DEFAULT; SCTP_BASE_SYSCTL(sctp_init_rto_max_default) = SCTPCTL_INIT_RTO_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default) = SCTPCTL_VALID_COOKIE_LIFE_DEFAULT; SCTP_BASE_SYSCTL(sctp_init_rtx_max_default) = SCTPCTL_INIT_RTX_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default) = SCTPCTL_ASSOC_RTX_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT; SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT; SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT; SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT; SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT; SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT; SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT; SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT; SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT; SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT; SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT; SCTP_BASE_SYSCTL(sctp_do_drain) = SCTPCTL_DO_SCTP_DRAIN_DEFAULT; SCTP_BASE_SYSCTL(sctp_hb_maxburst) = SCTPCTL_HB_MAX_BURST_DEFAULT; SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit) = SCTPCTL_ABORT_AT_LIMIT_DEFAULT; SCTP_BASE_SYSCTL(sctp_strict_data_order) = SCTPCTL_STRICT_DATA_ORDER_DEFAULT; SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT; SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT; SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT; SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT; SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT; SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT; SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT; SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT; SCTP_BASE_SYSCTL(sctp_vtag_time_wait) = SCTPCTL_TIME_WAIT_DEFAULT; SCTP_BASE_SYSCTL(sctp_buffer_splitting) = SCTPCTL_BUFFER_SPLITTING_DEFAULT; SCTP_BASE_SYSCTL(sctp_initial_cwnd) = SCTPCTL_INITIAL_CWND_DEFAULT; SCTP_BASE_SYSCTL(sctp_rttvar_bw) = SCTPCTL_RTTVAR_BW_DEFAULT; SCTP_BASE_SYSCTL(sctp_rttvar_rtt) = SCTPCTL_RTTVAR_RTT_DEFAULT; SCTP_BASE_SYSCTL(sctp_rttvar_eqret) = SCTPCTL_RTTVAR_EQRET_DEFAULT; SCTP_BASE_SYSCTL(sctp_steady_step) = SCTPCTL_RTTVAR_STEADYS_DEFAULT; SCTP_BASE_SYSCTL(sctp_use_dccc_ecn) = SCTPCTL_RTTVAR_DCCCECN_DEFAULT; SCTP_BASE_SYSCTL(sctp_blackhole) = SCTPCTL_BLACKHOLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_diag_info_code) = SCTPCTL_DIAG_INFO_CODE_DEFAULT; #if defined(SCTP_LOCAL_TRACE_BUF) memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log)); #endif SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = SCTPCTL_UDP_TUNNELING_PORT_DEFAULT; SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) = SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT; SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly) = SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT; #if defined(SCTP_DEBUG) SCTP_BASE_SYSCTL(sctp_debug_on) = SCTPCTL_DEBUG_DEFAULT; #endif #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_BASE_SYSCTL(sctp_output_unlocked) = SCTPCTL_OUTPUT_UNLOCKED_DEFAULT; #endif } /* It returns an upper limit. No filtering is done here */ static unsigned int sctp_sysctl_number_of_addresses(struct sctp_inpcb *inp) { unsigned int cnt; struct sctp_vrf *vrf; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; struct sctp_laddr *laddr; cnt = 0; /* neither Mac OS X nor FreeBSD support mulitple routing functions */ if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) { return (0); } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif cnt++; break; default: break; } } } } else { LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { switch (laddr->ifa->address.sa.sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif cnt++; break; default: break; } } } return (cnt); } static int sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req) { struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa; int loopback_scope, ipv4_local_scope, local_scope, site_scope; int ipv4_addr_legal, ipv6_addr_legal; struct sctp_vrf *vrf; struct xsctp_laddr xladdr; struct sctp_laddr *laddr; int error; /* Turn on all the appropriate scope */ if (stcb) { /* use association specific values */ loopback_scope = stcb->asoc.scope.loopback_scope; ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope; local_scope = stcb->asoc.scope.local_scope; site_scope = stcb->asoc.scope.site_scope; ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal; ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal; } else { /* Use generic values for endpoints. */ loopback_scope = 1; ipv4_local_scope = 1; local_scope = 1; site_scope = 1; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { ipv6_addr_legal = 1; if (SCTP_IPV6_V6ONLY(inp)) { ipv4_addr_legal = 0; } else { ipv4_addr_legal = 1; } } else { ipv6_addr_legal = 0; ipv4_addr_legal = 1; } } /* neither Mac OS X nor FreeBSD support mulitple routing functions */ if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) { SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); return (-1); } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) /* Skip loopback if loopback_scope not set */ continue; LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { if (stcb) { /* * ignore if blacklisted at * association level */ if (sctp_is_addr_restricted(stcb, sctp_ifa)) continue; } switch (sctp_ifa->address.sa.sa_family) { #ifdef INET case AF_INET: if (ipv4_addr_legal) { struct sockaddr_in *sin; sin = &sctp_ifa->address.sin; if (sin->sin_addr.s_addr == 0) continue; if (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sin->sin_addr) != 0) { continue; } if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) continue; } else { continue; } break; #endif #ifdef INET6 case AF_INET6: if (ipv6_addr_legal) { struct sockaddr_in6 *sin6; sin6 = &sctp_ifa->address.sin6; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) continue; if (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sin6->sin6_addr) != 0) { continue; } if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { if (local_scope == 0) continue; if (sin6->sin6_scope_id == 0) { /* * bad link * local * address */ if (sa6_recoverscope(sin6) != 0) continue; } } if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) continue; } else { continue; } break; #endif default: continue; } memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr)); memcpy((void *)&xladdr.address, (const void *)&sctp_ifa->address, sizeof(union sctp_sockstore)); SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr)); if (error) { return (error); } else { SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); } } } } else { LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { /* ignore if blacklisted at association level */ if (stcb && sctp_is_addr_restricted(stcb, laddr->ifa)) continue; memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr)); memcpy((void *)&xladdr.address, (const void *)&laddr->ifa->address, sizeof(union sctp_sockstore)); xladdr.start_time.tv_sec = (uint32_t) laddr->start_time.tv_sec; xladdr.start_time.tv_usec = (uint32_t) laddr->start_time.tv_usec; SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr)); if (error) { return (error); } else { SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); } } } memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr)); xladdr.last = 1; SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr)); if (error) { return (error); } else { SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); return (0); } } /* * sysctl functions */ static int sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS) { unsigned int number_of_endpoints; unsigned int number_of_local_addresses; unsigned int number_of_associations; unsigned int number_of_remote_addresses; unsigned int n; int error; struct sctp_inpcb *inp; struct sctp_tcb *stcb; struct sctp_nets *net; struct xsctp_inpcb xinpcb; struct xsctp_tcb xstcb; struct xsctp_raddr xraddr; struct socket *so; number_of_endpoints = 0; number_of_local_addresses = 0; number_of_associations = 0; number_of_remote_addresses = 0; SCTP_INP_INFO_RLOCK(); if (req->oldptr == NULL) { LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) { SCTP_INP_RLOCK(inp); number_of_endpoints++; number_of_local_addresses += sctp_sysctl_number_of_addresses(inp); LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { number_of_associations++; number_of_local_addresses += sctp_sysctl_number_of_addresses(inp); TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { number_of_remote_addresses++; } } SCTP_INP_RUNLOCK(inp); } SCTP_INP_INFO_RUNLOCK(); n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) + (number_of_local_addresses + number_of_endpoints + number_of_associations) * sizeof(struct xsctp_laddr) + (number_of_associations + number_of_endpoints) * sizeof(struct xsctp_tcb) + (number_of_remote_addresses + number_of_associations) * sizeof(struct xsctp_raddr); /* request some more memory than needed */ req->oldidx = (n + n / 8); return (0); } if (req->newptr != NULL) { SCTP_INP_INFO_RUNLOCK(); SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM); return (EPERM); } LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) { SCTP_INP_RLOCK(inp); if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) { /* if its allgone it is being freed - skip it */ goto skip; } xinpcb.last = 0; xinpcb.local_port = ntohs(inp->sctp_lport); xinpcb.flags = inp->sctp_flags; xinpcb.features = inp->sctp_features; xinpcb.total_sends = inp->total_sends; xinpcb.total_recvs = inp->total_recvs; xinpcb.total_nospaces = inp->total_nospaces; xinpcb.fragmentation_point = inp->sctp_frag_point; xinpcb.socket = inp->sctp_socket; so = inp->sctp_socket; if ((so == NULL) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) { xinpcb.qlen = 0; xinpcb.maxqlen = 0; } else { xinpcb.qlen = so->so_qlen; + xinpcb.qlen_old = so->so_qlen > USHRT_MAX ? + USHRT_MAX : (uint16_t) so->so_qlen; xinpcb.maxqlen = so->so_qlimit; + xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ? + USHRT_MAX : (uint16_t) so->so_qlimit; } SCTP_INP_INCR_REF(inp); SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); if (error) { SCTP_INP_DECR_REF(inp); return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); error = sctp_sysctl_copy_out_local_addresses(inp, NULL, req); if (error) { SCTP_INP_DECR_REF(inp); return (error); } LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) { SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); xstcb.last = 0; xstcb.local_port = ntohs(inp->sctp_lport); xstcb.remote_port = ntohs(stcb->rport); if (stcb->asoc.primary_destination != NULL) xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr; xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay; xstcb.state = (uint32_t) sctp_map_assoc_state(stcb->asoc.state); /* 7.0 does not support these */ xstcb.assoc_id = sctp_get_associd(stcb); xstcb.peers_rwnd = stcb->asoc.peers_rwnd; xstcb.in_streams = stcb->asoc.streamincnt; xstcb.out_streams = stcb->asoc.streamoutcnt; xstcb.max_nr_retrans = stcb->asoc.overall_error_count; xstcb.primary_process = 0; /* not really supported * yet */ xstcb.T1_expireries = stcb->asoc.timoinit + stcb->asoc.timocookie; xstcb.T2_expireries = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack; xstcb.retransmitted_tsns = stcb->asoc.marked_retrans; xstcb.start_time.tv_sec = (uint32_t) stcb->asoc.start_time.tv_sec; xstcb.start_time.tv_usec = (uint32_t) stcb->asoc.start_time.tv_usec; xstcb.discontinuity_time.tv_sec = (uint32_t) stcb->asoc.discontinuity_time.tv_sec; xstcb.discontinuity_time.tv_usec = (uint32_t) stcb->asoc.discontinuity_time.tv_usec; xstcb.total_sends = stcb->total_sends; xstcb.total_recvs = stcb->total_recvs; xstcb.local_tag = stcb->asoc.my_vtag; xstcb.remote_tag = stcb->asoc.peer_vtag; xstcb.initial_tsn = stcb->asoc.init_seq_number; xstcb.highest_tsn = stcb->asoc.sending_seq - 1; xstcb.cumulative_tsn = stcb->asoc.last_acked_seq; xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn; xstcb.mtu = stcb->asoc.smallest_mtu; xstcb.refcnt = stcb->asoc.refcnt; SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb)); if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); error = sctp_sysctl_copy_out_local_addresses(inp, stcb, req); if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (error); } TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { xraddr.last = 0; xraddr.address = net->ro._l_addr; xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE); xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0); xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0); xraddr.potentially_failed = ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF); xraddr.rto = net->RTO; xraddr.max_path_rtx = net->failure_threshold; xraddr.rtx = net->marked_retrans; xraddr.error_counter = net->error_count; xraddr.cwnd = net->cwnd; xraddr.flight_size = net->flight_size; xraddr.mtu = net->mtu; xraddr.rtt = net->rtt / 1000; xraddr.heartbeat_interval = net->heart_beat_delay; xraddr.ssthresh = net->ssthresh; xraddr.start_time.tv_sec = (uint32_t) net->start_time.tv_sec; xraddr.start_time.tv_usec = (uint32_t) net->start_time.tv_usec; SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr)); if (error) { SCTP_INP_DECR_REF(inp); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); } atomic_subtract_int(&stcb->asoc.refcnt, 1); memset((void *)&xraddr, 0, sizeof(struct xsctp_raddr)); xraddr.last = 1; SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr)); if (error) { SCTP_INP_DECR_REF(inp); return (error); } SCTP_INP_INFO_RLOCK(); SCTP_INP_RLOCK(inp); } SCTP_INP_DECR_REF(inp); SCTP_INP_RUNLOCK(inp); SCTP_INP_INFO_RUNLOCK(); memset((void *)&xstcb, 0, sizeof(struct xsctp_tcb)); xstcb.last = 1; error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb)); if (error) { return (error); } skip: SCTP_INP_INFO_RLOCK(); } SCTP_INP_INFO_RUNLOCK(); memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb)); xinpcb.last = 1; error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb)); return (error); } static int sctp_sysctl_handle_udp_tunneling(SYSCTL_HANDLER_ARGS) { int error; uint32_t old, new; SCTP_INP_INFO_RLOCK(); old = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port); SCTP_INP_INFO_RUNLOCK(); new = old; error = sysctl_handle_int(oidp, &new, 0, req); if ((error == 0) && (req->newptr != NULL)) { #if (SCTPCTL_UDP_TUNNELING_PORT_MIN == 0) if (new > SCTPCTL_UDP_TUNNELING_PORT_MAX) { #else if ((new < SCTPCTL_UDP_TUNNELING_PORT_MIN) || (new > SCTPCTL_UDP_TUNNELING_PORT_MAX)) { #endif error = EINVAL; } else { SCTP_INP_INFO_WLOCK(); SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = new; if (old != 0) { sctp_over_udp_stop(); } if (new != 0) { error = sctp_over_udp_start(); } SCTP_INP_INFO_WUNLOCK(); } } return (error); } static int sctp_sysctl_handle_auth(SYSCTL_HANDLER_ARGS) { int error; uint32_t new; new = SCTP_BASE_SYSCTL(sctp_auth_enable); error = sysctl_handle_int(oidp, &new, 0, req); if ((error == 0) && (req->newptr != NULL)) { #if (SCTPCTL_AUTH_ENABLE_MIN == 0) if ((new > SCTPCTL_AUTH_ENABLE_MAX) || ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) { #else if ((new < SCTPCTL_AUTH_ENABLE_MIN) || (new > SCTPCTL_AUTH_ENABLE_MAX) || ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) { #endif error = EINVAL; } else { SCTP_BASE_SYSCTL(sctp_auth_enable) = new; } } return (error); } static int sctp_sysctl_handle_asconf(SYSCTL_HANDLER_ARGS) { int error; uint32_t new; new = SCTP_BASE_SYSCTL(sctp_asconf_enable); error = sysctl_handle_int(oidp, &new, 0, req); if ((error == 0) && (req->newptr != NULL)) { #if (SCTPCTL_ASCONF_ENABLE_MIN == 0) if ((new > SCTPCTL_ASCONF_ENABLE_MAX) || ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) { #else if ((new < SCTPCTL_ASCONF_ENABLE_MIN) || (new > SCTPCTL_ASCONF_ENABLE_MAX) || ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) { #endif error = EINVAL; } else { SCTP_BASE_SYSCTL(sctp_asconf_enable) = new; } } return (error); } static int sctp_sysctl_handle_stats(SYSCTL_HANDLER_ARGS) { int error; #if defined(SMP) && defined(SCTP_USE_PERCPU_STAT) struct sctpstat *sarry; struct sctpstat sb; int cpu; #endif struct sctpstat sb_temp; if ((req->newptr != NULL) && (req->newlen != sizeof(struct sctpstat))) { return (EINVAL); } memset(&sb_temp, 0, sizeof(struct sctpstat)); if (req->newptr != NULL) { error = SYSCTL_IN(req, &sb_temp, sizeof(struct sctpstat)); if (error != 0) { return (error); } } #if defined(SMP) && defined(SCTP_USE_PERCPU_STAT) memset(&sb, 0, sizeof(sb)); for (cpu = 0; cpu < mp_maxid; cpu++) { sarry = &SCTP_BASE_STATS[cpu]; if (sarry->sctps_discontinuitytime.tv_sec > sb.sctps_discontinuitytime.tv_sec) { sb.sctps_discontinuitytime.tv_sec = sarry->sctps_discontinuitytime.tv_sec; sb.sctps_discontinuitytime.tv_usec = sarry->sctps_discontinuitytime.tv_usec; } sb.sctps_currestab += sarry->sctps_currestab; sb.sctps_activeestab += sarry->sctps_activeestab; sb.sctps_restartestab += sarry->sctps_restartestab; sb.sctps_collisionestab += sarry->sctps_collisionestab; sb.sctps_passiveestab += sarry->sctps_passiveestab; sb.sctps_aborted += sarry->sctps_aborted; sb.sctps_shutdown += sarry->sctps_shutdown; sb.sctps_outoftheblue += sarry->sctps_outoftheblue; sb.sctps_checksumerrors += sarry->sctps_checksumerrors; sb.sctps_outcontrolchunks += sarry->sctps_outcontrolchunks; sb.sctps_outorderchunks += sarry->sctps_outorderchunks; sb.sctps_outunorderchunks += sarry->sctps_outunorderchunks; sb.sctps_incontrolchunks += sarry->sctps_incontrolchunks; sb.sctps_inorderchunks += sarry->sctps_inorderchunks; sb.sctps_inunorderchunks += sarry->sctps_inunorderchunks; sb.sctps_fragusrmsgs += sarry->sctps_fragusrmsgs; sb.sctps_reasmusrmsgs += sarry->sctps_reasmusrmsgs; sb.sctps_outpackets += sarry->sctps_outpackets; sb.sctps_inpackets += sarry->sctps_inpackets; sb.sctps_recvpackets += sarry->sctps_recvpackets; sb.sctps_recvdatagrams += sarry->sctps_recvdatagrams; sb.sctps_recvpktwithdata += sarry->sctps_recvpktwithdata; sb.sctps_recvsacks += sarry->sctps_recvsacks; sb.sctps_recvdata += sarry->sctps_recvdata; sb.sctps_recvdupdata += sarry->sctps_recvdupdata; sb.sctps_recvheartbeat += sarry->sctps_recvheartbeat; sb.sctps_recvheartbeatack += sarry->sctps_recvheartbeatack; sb.sctps_recvecne += sarry->sctps_recvecne; sb.sctps_recvauth += sarry->sctps_recvauth; sb.sctps_recvauthmissing += sarry->sctps_recvauthmissing; sb.sctps_recvivalhmacid += sarry->sctps_recvivalhmacid; sb.sctps_recvivalkeyid += sarry->sctps_recvivalkeyid; sb.sctps_recvauthfailed += sarry->sctps_recvauthfailed; sb.sctps_recvexpress += sarry->sctps_recvexpress; sb.sctps_recvexpressm += sarry->sctps_recvexpressm; sb.sctps_recvnocrc += sarry->sctps_recvnocrc; sb.sctps_recvswcrc += sarry->sctps_recvswcrc; sb.sctps_recvhwcrc += sarry->sctps_recvhwcrc; sb.sctps_sendpackets += sarry->sctps_sendpackets; sb.sctps_sendsacks += sarry->sctps_sendsacks; sb.sctps_senddata += sarry->sctps_senddata; sb.sctps_sendretransdata += sarry->sctps_sendretransdata; sb.sctps_sendfastretrans += sarry->sctps_sendfastretrans; sb.sctps_sendmultfastretrans += sarry->sctps_sendmultfastretrans; sb.sctps_sendheartbeat += sarry->sctps_sendheartbeat; sb.sctps_sendecne += sarry->sctps_sendecne; sb.sctps_sendauth += sarry->sctps_sendauth; sb.sctps_senderrors += sarry->sctps_senderrors; sb.sctps_sendnocrc += sarry->sctps_sendnocrc; sb.sctps_sendswcrc += sarry->sctps_sendswcrc; sb.sctps_sendhwcrc += sarry->sctps_sendhwcrc; sb.sctps_pdrpfmbox += sarry->sctps_pdrpfmbox; sb.sctps_pdrpfehos += sarry->sctps_pdrpfehos; sb.sctps_pdrpmbda += sarry->sctps_pdrpmbda; sb.sctps_pdrpmbct += sarry->sctps_pdrpmbct; sb.sctps_pdrpbwrpt += sarry->sctps_pdrpbwrpt; sb.sctps_pdrpcrupt += sarry->sctps_pdrpcrupt; sb.sctps_pdrpnedat += sarry->sctps_pdrpnedat; sb.sctps_pdrppdbrk += sarry->sctps_pdrppdbrk; sb.sctps_pdrptsnnf += sarry->sctps_pdrptsnnf; sb.sctps_pdrpdnfnd += sarry->sctps_pdrpdnfnd; sb.sctps_pdrpdiwnp += sarry->sctps_pdrpdiwnp; sb.sctps_pdrpdizrw += sarry->sctps_pdrpdizrw; sb.sctps_pdrpbadd += sarry->sctps_pdrpbadd; sb.sctps_pdrpmark += sarry->sctps_pdrpmark; sb.sctps_timoiterator += sarry->sctps_timoiterator; sb.sctps_timodata += sarry->sctps_timodata; sb.sctps_timowindowprobe += sarry->sctps_timowindowprobe; sb.sctps_timoinit += sarry->sctps_timoinit; sb.sctps_timosack += sarry->sctps_timosack; sb.sctps_timoshutdown += sarry->sctps_timoshutdown; sb.sctps_timoheartbeat += sarry->sctps_timoheartbeat; sb.sctps_timocookie += sarry->sctps_timocookie; sb.sctps_timosecret += sarry->sctps_timosecret; sb.sctps_timopathmtu += sarry->sctps_timopathmtu; sb.sctps_timoshutdownack += sarry->sctps_timoshutdownack; sb.sctps_timoshutdownguard += sarry->sctps_timoshutdownguard; sb.sctps_timostrmrst += sarry->sctps_timostrmrst; sb.sctps_timoearlyfr += sarry->sctps_timoearlyfr; sb.sctps_timoasconf += sarry->sctps_timoasconf; sb.sctps_timodelprim += sarry->sctps_timodelprim; sb.sctps_timoautoclose += sarry->sctps_timoautoclose; sb.sctps_timoassockill += sarry->sctps_timoassockill; sb.sctps_timoinpkill += sarry->sctps_timoinpkill; sb.sctps_hdrops += sarry->sctps_hdrops; sb.sctps_badsum += sarry->sctps_badsum; sb.sctps_noport += sarry->sctps_noport; sb.sctps_badvtag += sarry->sctps_badvtag; sb.sctps_badsid += sarry->sctps_badsid; sb.sctps_nomem += sarry->sctps_nomem; sb.sctps_fastretransinrtt += sarry->sctps_fastretransinrtt; sb.sctps_markedretrans += sarry->sctps_markedretrans; sb.sctps_naglesent += sarry->sctps_naglesent; sb.sctps_naglequeued += sarry->sctps_naglequeued; sb.sctps_maxburstqueued += sarry->sctps_maxburstqueued; sb.sctps_ifnomemqueued += sarry->sctps_ifnomemqueued; sb.sctps_windowprobed += sarry->sctps_windowprobed; sb.sctps_lowlevelerr += sarry->sctps_lowlevelerr; sb.sctps_lowlevelerrusr += sarry->sctps_lowlevelerrusr; sb.sctps_datadropchklmt += sarry->sctps_datadropchklmt; sb.sctps_datadroprwnd += sarry->sctps_datadroprwnd; sb.sctps_ecnereducedcwnd += sarry->sctps_ecnereducedcwnd; sb.sctps_vtagexpress += sarry->sctps_vtagexpress; sb.sctps_vtagbogus += sarry->sctps_vtagbogus; sb.sctps_primary_randry += sarry->sctps_primary_randry; sb.sctps_cmt_randry += sarry->sctps_cmt_randry; sb.sctps_slowpath_sack += sarry->sctps_slowpath_sack; sb.sctps_wu_sacks_sent += sarry->sctps_wu_sacks_sent; sb.sctps_sends_with_flags += sarry->sctps_sends_with_flags; sb.sctps_sends_with_unord += sarry->sctps_sends_with_unord; sb.sctps_sends_with_eof += sarry->sctps_sends_with_eof; sb.sctps_sends_with_abort += sarry->sctps_sends_with_abort; sb.sctps_protocol_drain_calls += sarry->sctps_protocol_drain_calls; sb.sctps_protocol_drains_done += sarry->sctps_protocol_drains_done; sb.sctps_read_peeks += sarry->sctps_read_peeks; sb.sctps_cached_chk += sarry->sctps_cached_chk; sb.sctps_cached_strmoq += sarry->sctps_cached_strmoq; sb.sctps_left_abandon += sarry->sctps_left_abandon; sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid; sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid; sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over; if (req->newptr != NULL) { memcpy(sarry, &sb_temp, sizeof(struct sctpstat)); } } error = SYSCTL_OUT(req, &sb, sizeof(struct sctpstat)); #else error = SYSCTL_OUT(req, &SCTP_BASE_STATS, sizeof(struct sctpstat)); if (error != 0) { return (error); } if (req->newptr != NULL) { memcpy(&SCTP_BASE_STATS, &sb_temp, sizeof(struct sctpstat)); } #endif return (error); } #if defined(SCTP_LOCAL_TRACE_BUF) static int sctp_sysctl_handle_trace_log(SYSCTL_HANDLER_ARGS) { int error; error = SYSCTL_OUT(req, &SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log)); return (error); } static int sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS) { int error = 0; memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log)); return (error); } #endif #define SCTP_UINT_SYSCTL(mib_name, var_name, prefix) \ static int \ sctp_sysctl_handle_##mib_name(SYSCTL_HANDLER_ARGS) \ { \ int error; \ uint32_t new; \ \ new = SCTP_BASE_SYSCTL(var_name); \ error = sysctl_handle_int(oidp, &new, 0, req); \ if ((error == 0) && (req->newptr != NULL)) { \ if ((new < prefix##_MIN) || \ (new > prefix##_MAX)) { \ error = EINVAL; \ } else { \ SCTP_BASE_SYSCTL(var_name) = new; \ } \ } \ return (error); \ } \ SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name, \ CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, NULL, 0, \ sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC); /* * sysctl definitions */ SCTP_UINT_SYSCTL(sendspace, sctp_sendspace, SCTPCTL_MAXDGRAM) SCTP_UINT_SYSCTL(recvspace, sctp_recvspace, SCTPCTL_RECVSPACE) SCTP_UINT_SYSCTL(auto_asconf, sctp_auto_asconf, SCTPCTL_AUTOASCONF) SCTP_UINT_SYSCTL(ecn_enable, sctp_ecn_enable, SCTPCTL_ECN_ENABLE) SCTP_UINT_SYSCTL(pr_enable, sctp_pr_enable, SCTPCTL_PR_ENABLE) SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, sctp_sysctl_handle_auth, "IU", SCTPCTL_AUTH_ENABLE_DESC); SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, sctp_sysctl_handle_asconf, "IU", SCTPCTL_ASCONF_ENABLE_DESC); SCTP_UINT_SYSCTL(reconfig_enable, sctp_reconfig_enable, SCTPCTL_RECONFIG_ENABLE) SCTP_UINT_SYSCTL(nrsack_enable, sctp_nrsack_enable, SCTPCTL_NRSACK_ENABLE) SCTP_UINT_SYSCTL(pktdrop_enable, sctp_pktdrop_enable, SCTPCTL_PKTDROP_ENABLE) SCTP_UINT_SYSCTL(strict_sacks, sctp_strict_sacks, SCTPCTL_STRICT_SACKS) SCTP_UINT_SYSCTL(peer_chkoh, sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH) SCTP_UINT_SYSCTL(maxburst, sctp_max_burst_default, SCTPCTL_MAXBURST) SCTP_UINT_SYSCTL(fr_maxburst, sctp_fr_max_burst_default, SCTPCTL_FRMAXBURST) SCTP_UINT_SYSCTL(maxchunks, sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS) SCTP_UINT_SYSCTL(tcbhashsize, sctp_hashtblsize, SCTPCTL_TCBHASHSIZE) SCTP_UINT_SYSCTL(pcbhashsize, sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE) SCTP_UINT_SYSCTL(min_split_point, sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT) SCTP_UINT_SYSCTL(chunkscale, sctp_chunkscale, SCTPCTL_CHUNKSCALE) SCTP_UINT_SYSCTL(delayed_sack_time, sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME) SCTP_UINT_SYSCTL(sack_freq, sctp_sack_freq_default, SCTPCTL_SACK_FREQ) SCTP_UINT_SYSCTL(sys_resource, sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE) SCTP_UINT_SYSCTL(asoc_resource, sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE) SCTP_UINT_SYSCTL(heartbeat_interval, sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL) SCTP_UINT_SYSCTL(pmtu_raise_time, sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME) SCTP_UINT_SYSCTL(shutdown_guard_time, sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME) SCTP_UINT_SYSCTL(secret_lifetime, sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME) SCTP_UINT_SYSCTL(rto_max, sctp_rto_max_default, SCTPCTL_RTO_MAX) SCTP_UINT_SYSCTL(rto_min, sctp_rto_min_default, SCTPCTL_RTO_MIN) SCTP_UINT_SYSCTL(rto_initial, sctp_rto_initial_default, SCTPCTL_RTO_INITIAL) SCTP_UINT_SYSCTL(init_rto_max, sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX) SCTP_UINT_SYSCTL(valid_cookie_life, sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE) SCTP_UINT_SYSCTL(init_rtx_max, sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX) SCTP_UINT_SYSCTL(assoc_rtx_max, sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX) SCTP_UINT_SYSCTL(path_rtx_max, sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX) SCTP_UINT_SYSCTL(path_pf_threshold, sctp_path_pf_threshold, SCTPCTL_PATH_PF_THRESHOLD) SCTP_UINT_SYSCTL(add_more_on_output, sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT) SCTP_UINT_SYSCTL(incoming_streams, sctp_nr_incoming_streams_default, SCTPCTL_INCOMING_STREAMS) SCTP_UINT_SYSCTL(outgoing_streams, sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS) SCTP_UINT_SYSCTL(cmt_on_off, sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF) SCTP_UINT_SYSCTL(cmt_use_dac, sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC) SCTP_UINT_SYSCTL(cwnd_maxburst, sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST) SCTP_UINT_SYSCTL(nat_friendly, sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY) SCTP_UINT_SYSCTL(abc_l_var, sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR) SCTP_UINT_SYSCTL(max_chained_mbufs, sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS) SCTP_UINT_SYSCTL(do_sctp_drain, sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN) SCTP_UINT_SYSCTL(hb_max_burst, sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST) SCTP_UINT_SYSCTL(abort_at_limit, sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT) SCTP_UINT_SYSCTL(strict_data_order, sctp_strict_data_order, SCTPCTL_STRICT_DATA_ORDER) SCTP_UINT_SYSCTL(min_residual, sctp_min_residual, SCTPCTL_MIN_RESIDUAL) SCTP_UINT_SYSCTL(max_retran_chunk, sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK) SCTP_UINT_SYSCTL(log_level, sctp_logging_level, SCTPCTL_LOGGING_LEVEL) SCTP_UINT_SYSCTL(default_cc_module, sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE) SCTP_UINT_SYSCTL(default_ss_module, sctp_default_ss_module, SCTPCTL_DEFAULT_SS_MODULE) SCTP_UINT_SYSCTL(default_frag_interleave, sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE) SCTP_UINT_SYSCTL(mobility_base, sctp_mobility_base, SCTPCTL_MOBILITY_BASE) SCTP_UINT_SYSCTL(mobility_fasthandoff, sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF) #if defined(SCTP_LOCAL_TRACE_BUF) SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD, NULL, 0, sctp_sysctl_handle_trace_log, "S,sctplog", "SCTP logging (struct sctp_log)"); SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, sctp_sysctl_handle_trace_log_clear, "IU", "Clear SCTP Logging buffer"); #endif SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, sctp_sysctl_handle_udp_tunneling, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC); SCTP_UINT_SYSCTL(enable_sack_immediately, sctp_enable_sack_immediately, SCTPCTL_SACK_IMMEDIATELY_ENABLE) SCTP_UINT_SYSCTL(nat_friendly_init, sctp_inits_include_nat_friendly, SCTPCTL_NAT_FRIENDLY_INITS) SCTP_UINT_SYSCTL(vtag_time_wait, sctp_vtag_time_wait, SCTPCTL_TIME_WAIT) SCTP_UINT_SYSCTL(buffer_splitting, sctp_buffer_splitting, SCTPCTL_BUFFER_SPLITTING) SCTP_UINT_SYSCTL(initial_cwnd, sctp_initial_cwnd, SCTPCTL_INITIAL_CWND) SCTP_UINT_SYSCTL(rttvar_bw, sctp_rttvar_bw, SCTPCTL_RTTVAR_BW) SCTP_UINT_SYSCTL(rttvar_rtt, sctp_rttvar_rtt, SCTPCTL_RTTVAR_RTT) SCTP_UINT_SYSCTL(rttvar_eqret, sctp_rttvar_eqret, SCTPCTL_RTTVAR_EQRET) SCTP_UINT_SYSCTL(rttvar_steady_step, sctp_steady_step, SCTPCTL_RTTVAR_STEADYS) SCTP_UINT_SYSCTL(use_dcccecn, sctp_use_dccc_ecn, SCTPCTL_RTTVAR_DCCCECN) SCTP_UINT_SYSCTL(blackhole, sctp_blackhole, SCTPCTL_BLACKHOLE) SCTP_UINT_SYSCTL(diag_info_code, sctp_diag_info_code, SCTPCTL_DIAG_INFO_CODE) #ifdef SCTP_DEBUG SCTP_UINT_SYSCTL(debug, sctp_debug_on, SCTPCTL_DEBUG) #endif #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) SCTP_UINT_SYSCTL(output_unlocked, sctp_output_unlocked, SCTPCTL_OUTPUT_UNLOCKED) #endif SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW, NULL, 0, sctp_sysctl_handle_stats, "S,sctpstat", "SCTP statistics (struct sctp_stat)"); SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, sctp_sysctl_handle_assoclist, "S,xassoc", "List of active SCTP associations"); Index: head/sys/netinet/sctp_uio.h =================================================================== --- head/sys/netinet/sctp_uio.h (revision 295135) +++ head/sys/netinet/sctp_uio.h (revision 295136) @@ -1,1339 +1,1341 @@ /*- * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef _NETINET_SCTP_UIO_H_ #define _NETINET_SCTP_UIO_H_ #if ! defined(_KERNEL) #include #endif #include #include #include typedef uint32_t sctp_assoc_t; #define SCTP_FUTURE_ASSOC 0 #define SCTP_CURRENT_ASSOC 1 #define SCTP_ALL_ASSOC 2 struct sctp_event { sctp_assoc_t se_assoc_id; uint16_t se_type; uint8_t se_on; }; /* Compatibility to previous define's */ #define sctp_stream_reset_events sctp_stream_reset_event /* On/Off setup for subscription to events */ struct sctp_event_subscribe { uint8_t sctp_data_io_event; uint8_t sctp_association_event; uint8_t sctp_address_event; uint8_t sctp_send_failure_event; uint8_t sctp_peer_error_event; uint8_t sctp_shutdown_event; uint8_t sctp_partial_delivery_event; uint8_t sctp_adaptation_layer_event; uint8_t sctp_authentication_event; uint8_t sctp_sender_dry_event; uint8_t sctp_stream_reset_event; }; /* ancillary data types */ #define SCTP_INIT 0x0001 #define SCTP_SNDRCV 0x0002 #define SCTP_EXTRCV 0x0003 #define SCTP_SNDINFO 0x0004 #define SCTP_RCVINFO 0x0005 #define SCTP_NXTINFO 0x0006 #define SCTP_PRINFO 0x0007 #define SCTP_AUTHINFO 0x0008 #define SCTP_DSTADDRV4 0x0009 #define SCTP_DSTADDRV6 0x000a /* * ancillary data structures */ struct sctp_initmsg { uint16_t sinit_num_ostreams; uint16_t sinit_max_instreams; uint16_t sinit_max_attempts; uint16_t sinit_max_init_timeo; }; /* We add 96 bytes to the size of sctp_sndrcvinfo. * This makes the current structure 128 bytes long * which is nicely 64 bit aligned but also has room * for us to add more and keep ABI compatibility. * For example, already we have the sctp_extrcvinfo * when enabled which is 48 bytes. */ /* * The assoc up needs a verfid * all sendrcvinfo's need a verfid for SENDING only. */ #define SCTP_ALIGN_RESV_PAD 92 #define SCTP_ALIGN_RESV_PAD_SHORT 76 struct sctp_sndrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; uint16_t sinfo_keynumber; uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD]; }; struct sctp_extrcvinfo { uint16_t sinfo_stream; uint16_t sinfo_ssn; uint16_t sinfo_flags; uint32_t sinfo_ppid; uint32_t sinfo_context; uint32_t sinfo_timetolive; /* should have been sinfo_pr_value */ uint32_t sinfo_tsn; uint32_t sinfo_cumtsn; sctp_assoc_t sinfo_assoc_id; uint16_t serinfo_next_flags; uint16_t serinfo_next_stream; uint32_t serinfo_next_aid; uint32_t serinfo_next_length; uint32_t serinfo_next_ppid; uint16_t sinfo_keynumber; uint16_t sinfo_keynumber_valid; uint8_t __reserve_pad[SCTP_ALIGN_RESV_PAD_SHORT]; }; #define sinfo_pr_value sinfo_timetolive #define sreinfo_next_flags serinfo_next_flags #define sreinfo_next_stream serinfo_next_stream #define sreinfo_next_aid serinfo_next_aid #define sreinfo_next_length serinfo_next_length #define sreinfo_next_ppid serinfo_next_ppid struct sctp_sndinfo { uint16_t snd_sid; uint16_t snd_flags; uint32_t snd_ppid; uint32_t snd_context; sctp_assoc_t snd_assoc_id; }; struct sctp_prinfo { uint16_t pr_policy; uint32_t pr_value; }; struct sctp_default_prinfo { uint16_t pr_policy; uint32_t pr_value; sctp_assoc_t pr_assoc_id; }; struct sctp_authinfo { uint16_t auth_keynumber; }; struct sctp_rcvinfo { uint16_t rcv_sid; uint16_t rcv_ssn; uint16_t rcv_flags; uint32_t rcv_ppid; uint32_t rcv_tsn; uint32_t rcv_cumtsn; uint32_t rcv_context; sctp_assoc_t rcv_assoc_id; }; struct sctp_nxtinfo { uint16_t nxt_sid; uint16_t nxt_flags; uint32_t nxt_ppid; uint32_t nxt_length; sctp_assoc_t nxt_assoc_id; }; #define SCTP_NO_NEXT_MSG 0x0000 #define SCTP_NEXT_MSG_AVAIL 0x0001 #define SCTP_NEXT_MSG_ISCOMPLETE 0x0002 #define SCTP_NEXT_MSG_IS_UNORDERED 0x0004 #define SCTP_NEXT_MSG_IS_NOTIFICATION 0x0008 struct sctp_recvv_rn { struct sctp_rcvinfo recvv_rcvinfo; struct sctp_nxtinfo recvv_nxtinfo; }; #define SCTP_RECVV_NOINFO 0 #define SCTP_RECVV_RCVINFO 1 #define SCTP_RECVV_NXTINFO 2 #define SCTP_RECVV_RN 3 #define SCTP_SENDV_NOINFO 0 #define SCTP_SENDV_SNDINFO 1 #define SCTP_SENDV_PRINFO 2 #define SCTP_SENDV_AUTHINFO 3 #define SCTP_SENDV_SPA 4 struct sctp_sendv_spa { uint32_t sendv_flags; struct sctp_sndinfo sendv_sndinfo; struct sctp_prinfo sendv_prinfo; struct sctp_authinfo sendv_authinfo; }; #define SCTP_SEND_SNDINFO_VALID 0x00000001 #define SCTP_SEND_PRINFO_VALID 0x00000002 #define SCTP_SEND_AUTHINFO_VALID 0x00000004 struct sctp_snd_all_completes { uint16_t sall_stream; uint16_t sall_flags; uint32_t sall_ppid; uint32_t sall_context; uint32_t sall_num_sent; uint32_t sall_num_failed; }; /* Flags that go into the sinfo->sinfo_flags field */ #define SCTP_NOTIFICATION 0x0010 /* next message is a notification */ #define SCTP_COMPLETE 0x0020 /* next message is complete */ #define SCTP_EOF 0x0100 /* Start shutdown procedures */ #define SCTP_ABORT 0x0200 /* Send an ABORT to peer */ #define SCTP_UNORDERED 0x0400 /* Message is un-ordered */ #define SCTP_ADDR_OVER 0x0800 /* Override the primary-address */ #define SCTP_SENDALL 0x1000 /* Send this on all associations */ #define SCTP_EOR 0x2000 /* end of message signal */ #define SCTP_SACK_IMMEDIATELY 0x4000 /* Set I-Bit */ #define INVALID_SINFO_FLAG(x) (((x) & 0xfffffff0 \ & ~(SCTP_EOF | SCTP_ABORT | SCTP_UNORDERED |\ SCTP_ADDR_OVER | SCTP_SENDALL | SCTP_EOR |\ SCTP_SACK_IMMEDIATELY)) != 0) /* for the endpoint */ /* The lower four bits is an enumeration of PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000/* Reliable transfer */ #define SCTP_PR_SCTP_TTL 0x0001/* Time based PR-SCTP */ #define SCTP_PR_SCTP_BUF 0x0002/* Buffer based PR-SCTP */ #define SCTP_PR_SCTP_RTX 0x0003/* Number of retransmissions based PR-SCTP */ #define SCTP_PR_SCTP_MAX SCTP_PR_SCTP_RTX #define SCTP_PR_SCTP_ALL 0x000f/* Used for aggregated stats */ #define PR_SCTP_POLICY(x) ((x) & 0x0f) #define PR_SCTP_ENABLED(x) ((PR_SCTP_POLICY(x) != SCTP_PR_SCTP_NONE) && \ (PR_SCTP_POLICY(x) != SCTP_PR_SCTP_ALL)) #define PR_SCTP_TTL_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_TTL) #define PR_SCTP_BUF_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_BUF) #define PR_SCTP_RTX_ENABLED(x) (PR_SCTP_POLICY(x) == SCTP_PR_SCTP_RTX) #define PR_SCTP_INVALID_POLICY(x) (PR_SCTP_POLICY(x) > SCTP_PR_SCTP_MAX) #define PR_SCTP_VALID_POLICY(x) (PR_SCTP_POLICY(x) <= SCTP_PR_SCTP_MAX) /* Stat's */ struct sctp_pcbinfo { uint32_t ep_count; uint32_t asoc_count; uint32_t laddr_count; uint32_t raddr_count; uint32_t chk_count; uint32_t readq_count; uint32_t free_chunks; uint32_t stream_oque; }; struct sctp_sockstat { sctp_assoc_t ss_assoc_id; uint32_t ss_total_sndbuf; uint32_t ss_total_recv_buf; }; /* * notification event structures */ /* * association change event */ struct sctp_assoc_change { uint16_t sac_type; uint16_t sac_flags; uint32_t sac_length; uint16_t sac_state; uint16_t sac_error; uint16_t sac_outbound_streams; uint16_t sac_inbound_streams; sctp_assoc_t sac_assoc_id; uint8_t sac_info[]; }; /* sac_state values */ #define SCTP_COMM_UP 0x0001 #define SCTP_COMM_LOST 0x0002 #define SCTP_RESTART 0x0003 #define SCTP_SHUTDOWN_COMP 0x0004 #define SCTP_CANT_STR_ASSOC 0x0005 /* sac_info values */ #define SCTP_ASSOC_SUPPORTS_PR 0x01 #define SCTP_ASSOC_SUPPORTS_AUTH 0x02 #define SCTP_ASSOC_SUPPORTS_ASCONF 0x03 #define SCTP_ASSOC_SUPPORTS_MULTIBUF 0x04 #define SCTP_ASSOC_SUPPORTS_RE_CONFIG 0x05 #define SCTP_ASSOC_SUPPORTS_MAX 0x05 /* * Address event */ struct sctp_paddr_change { uint16_t spc_type; uint16_t spc_flags; uint32_t spc_length; struct sockaddr_storage spc_aaddr; uint32_t spc_state; uint32_t spc_error; sctp_assoc_t spc_assoc_id; }; /* paddr state values */ #define SCTP_ADDR_AVAILABLE 0x0001 #define SCTP_ADDR_UNREACHABLE 0x0002 #define SCTP_ADDR_REMOVED 0x0003 #define SCTP_ADDR_ADDED 0x0004 #define SCTP_ADDR_MADE_PRIM 0x0005 #define SCTP_ADDR_CONFIRMED 0x0006 #define SCTP_ACTIVE 0x0001 /* SCTP_ADDR_REACHABLE */ #define SCTP_INACTIVE 0x0002 /* neither SCTP_ADDR_REACHABLE nor * SCTP_ADDR_UNCONFIRMED */ #define SCTP_UNCONFIRMED 0x0200 /* SCTP_ADDR_UNCONFIRMED */ /* remote error events */ struct sctp_remote_error { uint16_t sre_type; uint16_t sre_flags; uint32_t sre_length; uint16_t sre_error; sctp_assoc_t sre_assoc_id; uint8_t sre_data[]; }; /* data send failure event (deprecated) */ struct sctp_send_failed { uint16_t ssf_type; uint16_t ssf_flags; uint32_t ssf_length; uint32_t ssf_error; struct sctp_sndrcvinfo ssf_info; sctp_assoc_t ssf_assoc_id; uint8_t ssf_data[]; }; /* data send failure event (not deprecated) */ struct sctp_send_failed_event { uint16_t ssfe_type; uint16_t ssfe_flags; uint32_t ssfe_length; uint32_t ssfe_error; struct sctp_sndinfo ssfe_info; sctp_assoc_t ssfe_assoc_id; uint8_t ssfe_data[]; }; /* flag that indicates state of data */ #define SCTP_DATA_UNSENT 0x0001 /* inqueue never on wire */ #define SCTP_DATA_SENT 0x0002 /* on wire at failure */ /* shutdown event */ struct sctp_shutdown_event { uint16_t sse_type; uint16_t sse_flags; uint32_t sse_length; sctp_assoc_t sse_assoc_id; }; /* Adaptation layer indication stuff */ struct sctp_adaptation_event { uint16_t sai_type; uint16_t sai_flags; uint32_t sai_length; uint32_t sai_adaptation_ind; sctp_assoc_t sai_assoc_id; }; struct sctp_setadaptation { uint32_t ssb_adaptation_ind; }; /* compatible old spelling */ struct sctp_adaption_event { uint16_t sai_type; uint16_t sai_flags; uint32_t sai_length; uint32_t sai_adaption_ind; sctp_assoc_t sai_assoc_id; }; struct sctp_setadaption { uint32_t ssb_adaption_ind; }; /* * Partial Delivery API event */ struct sctp_pdapi_event { uint16_t pdapi_type; uint16_t pdapi_flags; uint32_t pdapi_length; uint32_t pdapi_indication; uint16_t pdapi_stream; uint16_t pdapi_seq; sctp_assoc_t pdapi_assoc_id; }; /* indication values */ #define SCTP_PARTIAL_DELIVERY_ABORTED 0x0001 /* * authentication key event */ struct sctp_authkey_event { uint16_t auth_type; uint16_t auth_flags; uint32_t auth_length; uint16_t auth_keynumber; uint16_t auth_altkeynumber; uint32_t auth_indication; sctp_assoc_t auth_assoc_id; }; /* indication values */ #define SCTP_AUTH_NEW_KEY 0x0001 #define SCTP_AUTH_NEWKEY SCTP_AUTH_NEW_KEY #define SCTP_AUTH_NO_AUTH 0x0002 #define SCTP_AUTH_FREE_KEY 0x0003 struct sctp_sender_dry_event { uint16_t sender_dry_type; uint16_t sender_dry_flags; uint32_t sender_dry_length; sctp_assoc_t sender_dry_assoc_id; }; /* * Stream reset event - subscribe to SCTP_STREAM_RESET_EVENT */ struct sctp_stream_reset_event { uint16_t strreset_type; uint16_t strreset_flags; uint32_t strreset_length; sctp_assoc_t strreset_assoc_id; uint16_t strreset_stream_list[]; }; /* flags in stream_reset_event (strreset_flags) */ #define SCTP_STREAM_RESET_INCOMING_SSN 0x0001 #define SCTP_STREAM_RESET_OUTGOING_SSN 0x0002 #define SCTP_STREAM_RESET_DENIED 0x0004 #define SCTP_STREAM_RESET_FAILED 0x0008 /* * Assoc reset event - subscribe to SCTP_ASSOC_RESET_EVENT */ struct sctp_assoc_reset_event { uint16_t assocreset_type; uint16_t assocreset_flags; uint32_t assocreset_length; sctp_assoc_t assocreset_assoc_id; uint32_t assocreset_local_tsn; uint32_t assocreset_remote_tsn; }; #define SCTP_ASSOC_RESET_DENIED 0x0004 #define SCTP_ASSOC_RESET_FAILED 0x0008 /* * Stream change event - subscribe to SCTP_STREAM_CHANGE_EVENT */ struct sctp_stream_change_event { uint16_t strchange_type; uint16_t strchange_flags; uint32_t strchange_length; sctp_assoc_t strchange_assoc_id; uint16_t strchange_instrms; uint16_t strchange_outstrms; }; #define SCTP_STREAM_CHANGE_DENIED 0x0004 #define SCTP_STREAM_CHANGE_FAILED 0x0008 /* SCTP notification event */ struct sctp_tlv { uint16_t sn_type; uint16_t sn_flags; uint32_t sn_length; }; union sctp_notification { struct sctp_tlv sn_header; struct sctp_assoc_change sn_assoc_change; struct sctp_paddr_change sn_paddr_change; struct sctp_remote_error sn_remote_error; struct sctp_send_failed sn_send_failed; struct sctp_shutdown_event sn_shutdown_event; struct sctp_adaptation_event sn_adaptation_event; /* compatibility same as above */ struct sctp_adaption_event sn_adaption_event; struct sctp_pdapi_event sn_pdapi_event; struct sctp_authkey_event sn_auth_event; struct sctp_sender_dry_event sn_sender_dry_event; struct sctp_send_failed_event sn_send_failed_event; struct sctp_stream_reset_event sn_strreset_event; struct sctp_assoc_reset_event sn_assocreset_event; struct sctp_stream_change_event sn_strchange_event; }; /* notification types */ #define SCTP_ASSOC_CHANGE 0x0001 #define SCTP_PEER_ADDR_CHANGE 0x0002 #define SCTP_REMOTE_ERROR 0x0003 #define SCTP_SEND_FAILED 0x0004 #define SCTP_SHUTDOWN_EVENT 0x0005 #define SCTP_ADAPTATION_INDICATION 0x0006 /* same as above */ #define SCTP_ADAPTION_INDICATION 0x0006 #define SCTP_PARTIAL_DELIVERY_EVENT 0x0007 #define SCTP_AUTHENTICATION_EVENT 0x0008 #define SCTP_STREAM_RESET_EVENT 0x0009 #define SCTP_SENDER_DRY_EVENT 0x000a #define SCTP_NOTIFICATIONS_STOPPED_EVENT 0x000b /* we don't send this */ #define SCTP_ASSOC_RESET_EVENT 0x000c #define SCTP_STREAM_CHANGE_EVENT 0x000d #define SCTP_SEND_FAILED_EVENT 0x000e /* * socket option structs */ struct sctp_paddrparams { struct sockaddr_storage spp_address; sctp_assoc_t spp_assoc_id; uint32_t spp_hbinterval; uint32_t spp_pathmtu; uint32_t spp_flags; uint32_t spp_ipv6_flowlabel; uint16_t spp_pathmaxrxt; uint8_t spp_dscp; }; #define spp_ipv4_tos spp_dscp #define SPP_HB_ENABLE 0x00000001 #define SPP_HB_DISABLE 0x00000002 #define SPP_HB_DEMAND 0x00000004 #define SPP_PMTUD_ENABLE 0x00000008 #define SPP_PMTUD_DISABLE 0x00000010 #define SPP_HB_TIME_IS_ZERO 0x00000080 #define SPP_IPV6_FLOWLABEL 0x00000100 #define SPP_DSCP 0x00000200 #define SPP_IPV4_TOS SPP_DSCP struct sctp_paddrthlds { struct sockaddr_storage spt_address; sctp_assoc_t spt_assoc_id; uint16_t spt_pathmaxrxt; uint16_t spt_pathpfthld; }; struct sctp_paddrinfo { struct sockaddr_storage spinfo_address; sctp_assoc_t spinfo_assoc_id; int32_t spinfo_state; uint32_t spinfo_cwnd; uint32_t spinfo_srtt; uint32_t spinfo_rto; uint32_t spinfo_mtu; }; struct sctp_rtoinfo { sctp_assoc_t srto_assoc_id; uint32_t srto_initial; uint32_t srto_max; uint32_t srto_min; }; struct sctp_assocparams { sctp_assoc_t sasoc_assoc_id; uint32_t sasoc_peer_rwnd; uint32_t sasoc_local_rwnd; uint32_t sasoc_cookie_life; uint16_t sasoc_asocmaxrxt; uint16_t sasoc_number_peer_destinations; }; struct sctp_setprim { struct sockaddr_storage ssp_addr; sctp_assoc_t ssp_assoc_id; uint8_t ssp_padding[4]; }; struct sctp_setpeerprim { struct sockaddr_storage sspp_addr; sctp_assoc_t sspp_assoc_id; uint8_t sspp_padding[4]; }; struct sctp_getaddresses { sctp_assoc_t sget_assoc_id; /* addr is filled in for N * sockaddr_storage */ struct sockaddr addr[1]; }; struct sctp_status { sctp_assoc_t sstat_assoc_id; int32_t sstat_state; uint32_t sstat_rwnd; uint16_t sstat_unackdata; uint16_t sstat_penddata; uint16_t sstat_instrms; uint16_t sstat_outstrms; uint32_t sstat_fragmentation_point; struct sctp_paddrinfo sstat_primary; }; /* * AUTHENTICATION support */ /* SCTP_AUTH_CHUNK */ struct sctp_authchunk { uint8_t sauth_chunk; }; /* SCTP_AUTH_KEY */ struct sctp_authkey { sctp_assoc_t sca_assoc_id; uint16_t sca_keynumber; uint16_t sca_keylength; uint8_t sca_key[]; }; /* SCTP_HMAC_IDENT */ struct sctp_hmacalgo { uint32_t shmac_number_of_idents; uint16_t shmac_idents[]; }; /* AUTH hmac_id */ #define SCTP_AUTH_HMAC_ID_RSVD 0x0000 #define SCTP_AUTH_HMAC_ID_SHA1 0x0001 /* default, mandatory */ #define SCTP_AUTH_HMAC_ID_SHA256 0x0003 /* SCTP_AUTH_ACTIVE_KEY / SCTP_AUTH_DELETE_KEY */ struct sctp_authkeyid { sctp_assoc_t scact_assoc_id; uint16_t scact_keynumber; }; /* SCTP_PEER_AUTH_CHUNKS / SCTP_LOCAL_AUTH_CHUNKS */ struct sctp_authchunks { sctp_assoc_t gauth_assoc_id; uint32_t gauth_number_of_chunks; uint8_t gauth_chunks[]; }; struct sctp_assoc_value { sctp_assoc_t assoc_id; uint32_t assoc_value; }; struct sctp_cc_option { int option; struct sctp_assoc_value aid_value; }; struct sctp_stream_value { sctp_assoc_t assoc_id; uint16_t stream_id; uint16_t stream_value; }; struct sctp_assoc_ids { uint32_t gaids_number_of_ids; sctp_assoc_t gaids_assoc_id[]; }; struct sctp_sack_info { sctp_assoc_t sack_assoc_id; uint32_t sack_delay; uint32_t sack_freq; }; struct sctp_timeouts { sctp_assoc_t stimo_assoc_id; uint32_t stimo_init; uint32_t stimo_data; uint32_t stimo_sack; uint32_t stimo_shutdown; uint32_t stimo_heartbeat; uint32_t stimo_cookie; uint32_t stimo_shutdownack; }; struct sctp_udpencaps { struct sockaddr_storage sue_address; sctp_assoc_t sue_assoc_id; uint16_t sue_port; }; struct sctp_prstatus { sctp_assoc_t sprstat_assoc_id; uint16_t sprstat_sid; uint16_t sprstat_policy; uint64_t sprstat_abandoned_unsent; uint64_t sprstat_abandoned_sent; }; struct sctp_cwnd_args { struct sctp_nets *net; /* network to *//* FIXME: LP64 issue */ uint32_t cwnd_new_value;/* cwnd in k */ uint32_t pseudo_cumack; uint16_t inflight; /* flightsize in k */ uint16_t cwnd_augment; /* increment to it */ uint8_t meets_pseudo_cumack; uint8_t need_new_pseudo_cumack; uint8_t cnt_in_send; uint8_t cnt_in_str; }; struct sctp_blk_args { uint32_t onsb; /* in 1k bytes */ uint32_t sndlen; /* len of send being attempted */ uint32_t peer_rwnd; /* rwnd of peer */ uint16_t send_sent_qcnt;/* chnk cnt */ uint16_t stream_qcnt; /* chnk cnt */ uint16_t chunks_on_oque;/* chunks out */ uint16_t flight_size; /* flight size in k */ }; /* * Max we can reset in one setting, note this is dictated not by the define * but the size of a mbuf cluster so don't change this define and think you * can specify more. You must do multiple resets if you want to reset more * than SCTP_MAX_EXPLICIT_STR_RESET. */ #define SCTP_MAX_EXPLICT_STR_RESET 1000 struct sctp_reset_streams { sctp_assoc_t srs_assoc_id; uint16_t srs_flags; uint16_t srs_number_streams; /* 0 == ALL */ uint16_t srs_stream_list[]; /* list if strrst_num_streams is not 0 */ }; struct sctp_add_streams { sctp_assoc_t sas_assoc_id; uint16_t sas_instrms; uint16_t sas_outstrms; }; struct sctp_get_nonce_values { sctp_assoc_t gn_assoc_id; uint32_t gn_peers_tag; uint32_t gn_local_tag; }; /* Debugging logs */ struct sctp_str_log { void *stcb; /* FIXME: LP64 issue */ uint32_t n_tsn; uint32_t e_tsn; uint16_t n_sseq; uint16_t e_sseq; uint16_t strm; }; struct sctp_sb_log { void *stcb; /* FIXME: LP64 issue */ uint32_t so_sbcc; uint32_t stcb_sbcc; uint32_t incr; }; struct sctp_fr_log { uint32_t largest_tsn; uint32_t largest_new_tsn; uint32_t tsn; }; struct sctp_fr_map { uint32_t base; uint32_t cum; uint32_t high; }; struct sctp_rwnd_log { uint32_t rwnd; uint32_t send_size; uint32_t overhead; uint32_t new_rwnd; }; struct sctp_mbcnt_log { uint32_t total_queue_size; uint32_t size_change; uint32_t total_queue_mb_size; uint32_t mbcnt_change; }; struct sctp_sack_log { uint32_t cumack; uint32_t oldcumack; uint32_t tsn; uint16_t numGaps; uint16_t numDups; }; struct sctp_lock_log { void *sock; /* FIXME: LP64 issue */ void *inp; /* FIXME: LP64 issue */ uint8_t tcb_lock; uint8_t inp_lock; uint8_t info_lock; uint8_t sock_lock; uint8_t sockrcvbuf_lock; uint8_t socksndbuf_lock; uint8_t create_lock; uint8_t resv; }; struct sctp_rto_log { void *net; /* FIXME: LP64 issue */ uint32_t rtt; }; struct sctp_nagle_log { void *stcb; /* FIXME: LP64 issue */ uint32_t total_flight; uint32_t total_in_queue; uint16_t count_in_queue; uint16_t count_in_flight; }; struct sctp_sbwake_log { void *stcb; /* FIXME: LP64 issue */ uint16_t send_q; uint16_t sent_q; uint16_t flight; uint16_t wake_cnt; uint8_t stream_qcnt; /* chnk cnt */ uint8_t chunks_on_oque; /* chunks out */ uint8_t sbflags; uint8_t sctpflags; }; struct sctp_misc_info { uint32_t log1; uint32_t log2; uint32_t log3; uint32_t log4; }; struct sctp_log_closing { void *inp; /* FIXME: LP64 issue */ void *stcb; /* FIXME: LP64 issue */ uint32_t sctp_flags; uint16_t state; int16_t loc; }; struct sctp_mbuf_log { struct mbuf *mp; /* FIXME: LP64 issue */ caddr_t ext; caddr_t data; uint16_t size; uint8_t refcnt; uint8_t mbuf_flags; }; struct sctp_cwnd_log { uint64_t time_event; uint8_t from; uint8_t event_type; uint8_t resv[2]; union { struct sctp_log_closing close; struct sctp_blk_args blk; struct sctp_cwnd_args cwnd; struct sctp_str_log strlog; struct sctp_fr_log fr; struct sctp_fr_map map; struct sctp_rwnd_log rwnd; struct sctp_mbcnt_log mbcnt; struct sctp_sack_log sack; struct sctp_lock_log lock; struct sctp_rto_log rto; struct sctp_sb_log sb; struct sctp_nagle_log nagle; struct sctp_sbwake_log wake; struct sctp_mbuf_log mb; struct sctp_misc_info misc; } x; }; struct sctp_cwnd_log_req { int32_t num_in_log; /* Number in log */ int32_t num_ret; /* Number returned */ int32_t start_at; /* start at this one */ int32_t end_at; /* end at this one */ struct sctp_cwnd_log log[]; }; struct sctp_timeval { uint32_t tv_sec; uint32_t tv_usec; }; struct sctpstat { struct sctp_timeval sctps_discontinuitytime; /* sctpStats 18 * (TimeStamp) */ /* MIB according to RFC 3873 */ uint32_t sctps_currestab; /* sctpStats 1 (Gauge32) */ uint32_t sctps_activeestab; /* sctpStats 2 (Counter32) */ uint32_t sctps_restartestab; uint32_t sctps_collisionestab; uint32_t sctps_passiveestab; /* sctpStats 3 (Counter32) */ uint32_t sctps_aborted; /* sctpStats 4 (Counter32) */ uint32_t sctps_shutdown;/* sctpStats 5 (Counter32) */ uint32_t sctps_outoftheblue; /* sctpStats 6 (Counter32) */ uint32_t sctps_checksumerrors; /* sctpStats 7 (Counter32) */ uint32_t sctps_outcontrolchunks; /* sctpStats 8 (Counter64) */ uint32_t sctps_outorderchunks; /* sctpStats 9 (Counter64) */ uint32_t sctps_outunorderchunks; /* sctpStats 10 (Counter64) */ uint32_t sctps_incontrolchunks; /* sctpStats 11 (Counter64) */ uint32_t sctps_inorderchunks; /* sctpStats 12 (Counter64) */ uint32_t sctps_inunorderchunks; /* sctpStats 13 (Counter64) */ uint32_t sctps_fragusrmsgs; /* sctpStats 14 (Counter64) */ uint32_t sctps_reasmusrmsgs; /* sctpStats 15 (Counter64) */ uint32_t sctps_outpackets; /* sctpStats 16 (Counter64) */ uint32_t sctps_inpackets; /* sctpStats 17 (Counter64) */ /* input statistics: */ uint32_t sctps_recvpackets; /* total input packets */ uint32_t sctps_recvdatagrams; /* total input datagrams */ uint32_t sctps_recvpktwithdata; /* total packets that had data */ uint32_t sctps_recvsacks; /* total input SACK chunks */ uint32_t sctps_recvdata;/* total input DATA chunks */ uint32_t sctps_recvdupdata; /* total input duplicate DATA chunks */ uint32_t sctps_recvheartbeat; /* total input HB chunks */ uint32_t sctps_recvheartbeatack; /* total input HB-ACK chunks */ uint32_t sctps_recvecne;/* total input ECNE chunks */ uint32_t sctps_recvauth;/* total input AUTH chunks */ uint32_t sctps_recvauthmissing; /* total input chunks missing AUTH */ uint32_t sctps_recvivalhmacid; /* total number of invalid HMAC ids * received */ uint32_t sctps_recvivalkeyid; /* total number of invalid secret ids * received */ uint32_t sctps_recvauthfailed; /* total number of auth failed */ uint32_t sctps_recvexpress; /* total fast path receives all one * chunk */ uint32_t sctps_recvexpressm; /* total fast path multi-part data */ uint32_t sctps_recvnocrc; uint32_t sctps_recvswcrc; uint32_t sctps_recvhwcrc; /* output statistics: */ uint32_t sctps_sendpackets; /* total output packets */ uint32_t sctps_sendsacks; /* total output SACKs */ uint32_t sctps_senddata;/* total output DATA chunks */ uint32_t sctps_sendretransdata; /* total output retransmitted DATA * chunks */ uint32_t sctps_sendfastretrans; /* total output fast retransmitted * DATA chunks */ uint32_t sctps_sendmultfastretrans; /* total FR's that happened * more than once to same * chunk (u-del multi-fr * algo). */ uint32_t sctps_sendheartbeat; /* total output HB chunks */ uint32_t sctps_sendecne;/* total output ECNE chunks */ uint32_t sctps_sendauth;/* total output AUTH chunks FIXME */ uint32_t sctps_senderrors; /* ip_output error counter */ uint32_t sctps_sendnocrc; uint32_t sctps_sendswcrc; uint32_t sctps_sendhwcrc; /* PCKDROPREP statistics: */ uint32_t sctps_pdrpfmbox; /* Packet drop from middle box */ uint32_t sctps_pdrpfehos; /* P-drop from end host */ uint32_t sctps_pdrpmbda;/* P-drops with data */ uint32_t sctps_pdrpmbct;/* P-drops, non-data, non-endhost */ uint32_t sctps_pdrpbwrpt; /* P-drop, non-endhost, bandwidth rep * only */ uint32_t sctps_pdrpcrupt; /* P-drop, not enough for chunk header */ uint32_t sctps_pdrpnedat; /* P-drop, not enough data to confirm */ uint32_t sctps_pdrppdbrk; /* P-drop, where process_chunk_drop * said break */ uint32_t sctps_pdrptsnnf; /* P-drop, could not find TSN */ uint32_t sctps_pdrpdnfnd; /* P-drop, attempt reverse TSN lookup */ uint32_t sctps_pdrpdiwnp; /* P-drop, e-host confirms zero-rwnd */ uint32_t sctps_pdrpdizrw; /* P-drop, midbox confirms no space */ uint32_t sctps_pdrpbadd;/* P-drop, data did not match TSN */ uint32_t sctps_pdrpmark;/* P-drop, TSN's marked for Fast Retran */ /* timeouts */ uint32_t sctps_timoiterator; /* Number of iterator timers that * fired */ uint32_t sctps_timodata;/* Number of T3 data time outs */ uint32_t sctps_timowindowprobe; /* Number of window probe (T3) timers * that fired */ uint32_t sctps_timoinit;/* Number of INIT timers that fired */ uint32_t sctps_timosack;/* Number of sack timers that fired */ uint32_t sctps_timoshutdown; /* Number of shutdown timers that * fired */ uint32_t sctps_timoheartbeat; /* Number of heartbeat timers that * fired */ uint32_t sctps_timocookie; /* Number of times a cookie timeout * fired */ uint32_t sctps_timosecret; /* Number of times an endpoint changed * its cookie secret */ uint32_t sctps_timopathmtu; /* Number of PMTU timers that fired */ uint32_t sctps_timoshutdownack; /* Number of shutdown ack timers that * fired */ uint32_t sctps_timoshutdownguard; /* Number of shutdown guard * timers that fired */ uint32_t sctps_timostrmrst; /* Number of stream reset timers that * fired */ uint32_t sctps_timoearlyfr; /* Number of early FR timers that * fired */ uint32_t sctps_timoasconf; /* Number of times an asconf timer * fired */ uint32_t sctps_timodelprim; /* Number of times a prim_deleted * timer fired */ uint32_t sctps_timoautoclose; /* Number of times auto close timer * fired */ uint32_t sctps_timoassockill; /* Number of asoc free timers expired */ uint32_t sctps_timoinpkill; /* Number of inp free timers expired */ /* former early FR counters */ uint32_t sctps_spare[11]; /* others */ uint32_t sctps_hdrops; /* packet shorter than header */ uint32_t sctps_badsum; /* checksum error */ uint32_t sctps_noport; /* no endpoint for port */ uint32_t sctps_badvtag; /* bad v-tag */ uint32_t sctps_badsid; /* bad SID */ uint32_t sctps_nomem; /* no memory */ uint32_t sctps_fastretransinrtt; /* number of multiple FR in a * RTT window */ uint32_t sctps_markedretrans; uint32_t sctps_naglesent; /* nagle allowed sending */ uint32_t sctps_naglequeued; /* nagle doesn't allow sending */ uint32_t sctps_maxburstqueued; /* max burst doesn't allow sending */ uint32_t sctps_ifnomemqueued; /* look ahead tells us no memory in * interface ring buffer OR we had a * send error and are queuing one * send. */ uint32_t sctps_windowprobed; /* total number of window probes sent */ uint32_t sctps_lowlevelerr; /* total times an output error causes * us to clamp down on next user send. */ uint32_t sctps_lowlevelerrusr; /* total times sctp_senderrors were * caused from a user send from a user * invoked send not a sack response */ uint32_t sctps_datadropchklmt; /* Number of in data drops due to * chunk limit reached */ uint32_t sctps_datadroprwnd; /* Number of in data drops due to rwnd * limit reached */ uint32_t sctps_ecnereducedcwnd; /* Number of times a ECN reduced the * cwnd */ uint32_t sctps_vtagexpress; /* Used express lookup via vtag */ uint32_t sctps_vtagbogus; /* Collision in express lookup. */ uint32_t sctps_primary_randry; /* Number of times the sender ran dry * of user data on primary */ uint32_t sctps_cmt_randry; /* Same for above */ uint32_t sctps_slowpath_sack; /* Sacks the slow way */ uint32_t sctps_wu_sacks_sent; /* Window Update only sacks sent */ uint32_t sctps_sends_with_flags; /* number of sends with * sinfo_flags !=0 */ uint32_t sctps_sends_with_unord; /* number of unordered sends */ uint32_t sctps_sends_with_eof; /* number of sends with EOF flag set */ uint32_t sctps_sends_with_abort; /* number of sends with ABORT * flag set */ uint32_t sctps_protocol_drain_calls; /* number of times protocol * drain called */ uint32_t sctps_protocol_drains_done; /* number of times we did a * protocol drain */ uint32_t sctps_read_peeks; /* Number of times recv was called * with peek */ uint32_t sctps_cached_chk; /* Number of cached chunks used */ uint32_t sctps_cached_strmoq; /* Number of cached stream oq's used */ uint32_t sctps_left_abandon; /* Number of unread messages abandoned * by close */ uint32_t sctps_send_burst_avoid; /* Unused */ uint32_t sctps_send_cwnd_avoid; /* Send cwnd full avoidance, already * max burst inflight to net */ uint32_t sctps_fwdtsn_map_over; /* number of map array over-runs via * fwd-tsn's */ uint32_t sctps_queue_upd_ecne; /* Number of times we queued or * updated an ECN chunk on send queue */ uint32_t sctps_reserved[31]; /* Future ABI compat - remove int's * from here when adding new */ }; #define SCTP_STAT_INCR(_x) SCTP_STAT_INCR_BY(_x,1) #define SCTP_STAT_DECR(_x) SCTP_STAT_DECR_BY(_x,1) #if defined(__FreeBSD__) && defined(SMP) && defined(SCTP_USE_PERCPU_STAT) #define SCTP_STAT_INCR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x += _d) #define SCTP_STAT_DECR_BY(_x,_d) (SCTP_BASE_STATS[PCPU_GET(cpuid)]._x -= _d) #else #define SCTP_STAT_INCR_BY(_x,_d) atomic_add_int(&SCTP_BASE_STAT(_x), _d) #define SCTP_STAT_DECR_BY(_x,_d) atomic_subtract_int(&SCTP_BASE_STAT(_x), _d) #endif /* The following macros are for handling MIB values, */ #define SCTP_STAT_INCR_COUNTER32(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_INCR_COUNTER64(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_INCR_GAUGE32(_x) SCTP_STAT_INCR(_x) #define SCTP_STAT_DECR_COUNTER32(_x) SCTP_STAT_DECR(_x) #define SCTP_STAT_DECR_COUNTER64(_x) SCTP_STAT_DECR(_x) #define SCTP_STAT_DECR_GAUGE32(_x) SCTP_STAT_DECR(_x) union sctp_sockstore { struct sockaddr_in sin; struct sockaddr_in6 sin6; struct sockaddr sa; }; /***********************************/ /* And something for us old timers */ /***********************************/ #ifndef ntohll #include #define ntohll(x) be64toh(x) #endif #ifndef htonll #include #define htonll(x) htobe64(x) #endif /***********************************/ struct xsctp_inpcb { uint32_t last; uint32_t flags; uint64_t features; uint32_t total_sends; uint32_t total_recvs; uint32_t total_nospaces; uint32_t fragmentation_point; uint16_t local_port; - uint16_t qlen; - uint16_t maxqlen; + uint16_t qlen_old; + uint16_t maxqlen_old; void *socket; + uint32_t qlen; + uint32_t maxqlen; #if defined(__LP64__) - uint32_t extra_padding[29]; /* future */ + uint32_t extra_padding[27]; /* future */ #else - uint32_t extra_padding[30]; /* future */ + uint32_t extra_padding[28]; /* future */ #endif }; struct xsctp_tcb { union sctp_sockstore primary_addr; /* sctpAssocEntry 5/6 */ uint32_t last; uint32_t heartbeat_interval; /* sctpAssocEntry 7 */ uint32_t state; /* sctpAssocEntry 8 */ uint32_t in_streams; /* sctpAssocEntry 9 */ uint32_t out_streams; /* sctpAssocEntry 10 */ uint32_t max_nr_retrans;/* sctpAssocEntry 11 */ uint32_t primary_process; /* sctpAssocEntry 12 */ uint32_t T1_expireries; /* sctpAssocEntry 13 */ uint32_t T2_expireries; /* sctpAssocEntry 14 */ uint32_t retransmitted_tsns; /* sctpAssocEntry 15 */ uint32_t total_sends; uint32_t total_recvs; uint32_t local_tag; uint32_t remote_tag; uint32_t initial_tsn; uint32_t highest_tsn; uint32_t cumulative_tsn; uint32_t cumulative_tsn_ack; uint32_t mtu; uint32_t refcnt; uint16_t local_port; /* sctpAssocEntry 3 */ uint16_t remote_port; /* sctpAssocEntry 4 */ struct sctp_timeval start_time; /* sctpAssocEntry 16 */ struct sctp_timeval discontinuity_time; /* sctpAssocEntry 17 */ uint32_t peers_rwnd; sctp_assoc_t assoc_id; /* sctpAssocEntry 1 */ uint32_t extra_padding[32]; /* future */ }; struct xsctp_laddr { union sctp_sockstore address; /* sctpAssocLocalAddrEntry 1/2 */ uint32_t last; struct sctp_timeval start_time; /* sctpAssocLocalAddrEntry 3 */ uint32_t extra_padding[32]; /* future */ }; struct xsctp_raddr { union sctp_sockstore address; /* sctpAssocLocalRemEntry 1/2 */ uint32_t last; uint32_t rto; /* sctpAssocLocalRemEntry 5 */ uint32_t max_path_rtx; /* sctpAssocLocalRemEntry 6 */ uint32_t rtx; /* sctpAssocLocalRemEntry 7 */ uint32_t error_counter; /* */ uint32_t cwnd; /* */ uint32_t flight_size; /* */ uint32_t mtu; /* */ uint8_t active; /* sctpAssocLocalRemEntry 3 */ uint8_t confirmed; /* */ uint8_t heartbeat_enabled; /* sctpAssocLocalRemEntry 4 */ uint8_t potentially_failed; struct sctp_timeval start_time; /* sctpAssocLocalRemEntry 8 */ uint32_t rtt; uint32_t heartbeat_interval; uint32_t ssthresh; uint32_t extra_padding[30]; /* future */ }; #define SCTP_MAX_LOGGING_SIZE 30000 #define SCTP_TRACE_PARAMS 6 /* This number MUST be even */ struct sctp_log_entry { uint64_t timestamp; uint32_t subsys; uint32_t padding; uint32_t params[SCTP_TRACE_PARAMS]; }; struct sctp_log { struct sctp_log_entry entry[SCTP_MAX_LOGGING_SIZE]; uint32_t index; uint32_t padding; }; /* * Kernel defined for sctp_send */ #if defined(_KERNEL) || defined(__Userspace__) int sctp_lower_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *i_pak, struct mbuf *control, int flags, struct sctp_sndrcvinfo *srcv ,struct thread *p ); int sctp_sorecvmsg(struct socket *so, struct uio *uio, struct mbuf **mp, struct sockaddr *from, int fromlen, int *msg_flags, struct sctp_sndrcvinfo *sinfo, int filling_sinfo); #endif /* * API system calls */ #if !(defined(_KERNEL)) && !(defined(__Userspace__)) __BEGIN_DECLS int sctp_peeloff(int, sctp_assoc_t); int sctp_bindx(int, struct sockaddr *, int, int); int sctp_connectx(int, const struct sockaddr *, int, sctp_assoc_t *); int sctp_getaddrlen(sa_family_t); int sctp_getpaddrs(int, sctp_assoc_t, struct sockaddr **); void sctp_freepaddrs(struct sockaddr *); int sctp_getladdrs(int, sctp_assoc_t, struct sockaddr **); void sctp_freeladdrs(struct sockaddr *); int sctp_opt_info(int, sctp_assoc_t, int, void *, socklen_t *); /* deprecated */ ssize_t sctp_sendmsg(int, const void *, size_t, const struct sockaddr *, socklen_t, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t); /* deprecated */ ssize_t sctp_send(int, const void *, size_t, const struct sctp_sndrcvinfo *, int); /* deprecated */ ssize_t sctp_sendx(int, const void *, size_t, struct sockaddr *, int, struct sctp_sndrcvinfo *, int); /* deprecated */ ssize_t sctp_sendmsgx(int sd, const void *, size_t, struct sockaddr *, int, uint32_t, uint32_t, uint16_t, uint32_t, uint32_t); sctp_assoc_t sctp_getassocid(int, struct sockaddr *); /* deprecated */ ssize_t sctp_recvmsg(int, void *, size_t, struct sockaddr *, socklen_t *, struct sctp_sndrcvinfo *, int *); ssize_t sctp_sendv(int, const struct iovec *, int, struct sockaddr *, int, void *, socklen_t, unsigned int, int); ssize_t sctp_recvv(int, const struct iovec *, int, struct sockaddr *, socklen_t *, void *, socklen_t *, unsigned int *, int *); __END_DECLS #endif /* !_KERNEL */ #endif /* !__sctp_uio_h__ */ Index: head/sys/sys/socketvar.h =================================================================== --- head/sys/sys/socketvar.h (revision 295135) +++ head/sys/sys/socketvar.h (revision 295136) @@ -1,418 +1,418 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 * * $FreeBSD$ */ #ifndef _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_ #include /* for TAILQ macros */ #include /* for struct selinfo */ #include #include #include #include #include #include #ifdef _KERNEL #include #include #endif struct vnet; /* * Kernel structure per socket. * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. */ typedef u_quad_t so_gen_t; struct socket; /*- * Locking key to struct socket: * (a) constant after allocation, no locking required. * (b) locked by SOCK_LOCK(so). * (c) locked by SOCKBUF_LOCK(&so->so_rcv). * (d) locked by SOCKBUF_LOCK(&so->so_snd). * (e) locked by ACCEPT_LOCK(). * (f) not locked since integer reads/writes are atomic. * (g) used only as a sleep/wakeup address, no value. * (h) locked by global mutex so_global_mtx. */ struct socket { int so_count; /* (b) reference count */ short so_type; /* (a) generic type, see socket.h */ short so_options; /* from socket call, see socket.h */ short so_linger; /* time to linger while closing */ short so_state; /* (b) internal state flags SS_* */ int so_qstate; /* (e) internal state flags SQ_* */ void *so_pcb; /* protocol control block */ struct vnet *so_vnet; /* (a) network stack instance */ struct protosw *so_proto; /* (a) protocol handle */ /* * Variables for connection queuing. * Socket where accepts occur is so_head in all subsidiary sockets. * If so_head is 0, socket is not related to an accept. * For head socket so_incomp queues partially completed connections, * while so_comp is a queue of connections ready to be accepted. * If a connection is aborted and it has so_head set, then * it has to be pulled out of either so_incomp or so_comp. * We allow connections to queue up based on current queue lengths * and limit on number of queued connections for this socket. */ struct socket *so_head; /* (e) back pointer to listen socket */ TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */ TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */ TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */ - u_short so_qlen; /* (e) number of unaccepted connections */ - u_short so_incqlen; /* (e) number of unaccepted incomplete + u_int so_qlen; /* (e) number of unaccepted connections */ + u_int so_incqlen; /* (e) number of unaccepted incomplete connections */ - u_short so_qlimit; /* (e) max number queued connections */ + u_int so_qlimit; /* (e) max number queued connections */ short so_timeo; /* (g) connection timeout */ u_short so_error; /* (f) error affecting connection */ struct sigio *so_sigio; /* [sg] information for async I/O or out of band data (SIGURG) */ u_long so_oobmark; /* (c) chars to oob mark */ TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */ struct sockbuf so_rcv, so_snd; struct ucred *so_cred; /* (a) user credentials */ struct label *so_label; /* (b) MAC label for socket */ struct label *so_peerlabel; /* (b) cached MAC label for peer */ /* NB: generation count must not be first. */ so_gen_t so_gencnt; /* (h) generation count */ void *so_emuldata; /* (b) private data for emulators */ struct so_accf { struct accept_filter *so_accept_filter; void *so_accept_filter_arg; /* saved filter args */ char *so_accept_filter_str; /* saved user args */ } *so_accf; struct osd osd; /* Object Specific extensions */ /* * so_fibnum, so_user_cookie and friends can be used to attach * some user-specified metadata to a socket, which then can be * used by the kernel for various actions. * so_user_cookie is used by ipfw/dummynet. */ int so_fibnum; /* routing domain for this socket */ uint32_t so_user_cookie; }; /* * Global accept mutex to serialize access to accept queues and * fields associated with multiple sockets. This allows us to * avoid defining a lock order between listen and accept sockets * until such time as it proves to be a good idea. */ extern struct mtx accept_mtx; #define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED) #define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED) #define ACCEPT_LOCK() mtx_lock(&accept_mtx) #define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx) /* * Per-socket mutex: we reuse the receive socket buffer mutex for space * efficiency. This decision should probably be revisited as we optimize * locking for the socket code. */ #define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv) #define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv) #define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv) #define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv) #define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv) /* * Socket state bits stored in so_qstate. */ #define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */ #define SQ_COMP 0x1000 /* unaccepted, complete connection */ /* * Externalized form of struct socket used by the sysctl(3) interface. */ struct xsocket { size_t xso_len; /* length of this structure */ struct socket *xso_so; /* makes a convenient handle sometimes */ short so_type; short so_options; short so_linger; short so_state; caddr_t so_pcb; /* another convenient handle */ int xso_protocol; int xso_family; - u_short so_qlen; - u_short so_incqlen; - u_short so_qlimit; + u_int so_qlen; + u_int so_incqlen; + u_int so_qlimit; short so_timeo; u_short so_error; pid_t so_pgid; u_long so_oobmark; struct xsockbuf so_rcv, so_snd; uid_t so_uid; /* XXX */ }; #ifdef _KERNEL /* * Macros for sockets and socket buffering. */ /* * Flags to sblock(). */ #define SBL_WAIT 0x00000001 /* Wait if not immediately available. */ #define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */ #define SBL_VALID (SBL_WAIT | SBL_NOINTR) /* * Do we need to notify the other side when I/O is possible? */ #define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \ SB_UPCALL | SB_AIO | SB_KNOTE)) != 0) /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) /* can we read something from so? */ #define soreadabledata(so) \ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \ !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error) #define soreadable(so) \ (soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE)) /* can we write something to so? */ #define sowriteable(so) \ ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state&SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \ (so)->so_error) /* * soref()/sorele() ref-count the socket structure. Note that you must * still explicitly close the socket, but the last ref count will free * the structure. */ #define soref(so) do { \ SOCK_LOCK_ASSERT(so); \ ++(so)->so_count; \ } while (0) #define sorele(so) do { \ ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count <= 0) \ panic("sorele"); \ if (--(so)->so_count == 0) \ sofree(so); \ else { \ SOCK_UNLOCK(so); \ ACCEPT_UNLOCK(); \ } \ } while (0) /* * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to * avoid a non-atomic test-and-wakeup. However, sowakeup is * responsible for releasing the lock if it is called. We unlock only * if we don't call into sowakeup. If any code is introduced that * directly invokes the underlying sowakeup() primitives, it must * maintain the same semantics. */ #define sorwakeup_locked(so) do { \ SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \ if (sb_notify(&(so)->so_rcv)) \ sowakeup((so), &(so)->so_rcv); \ else \ SOCKBUF_UNLOCK(&(so)->so_rcv); \ } while (0) #define sorwakeup(so) do { \ SOCKBUF_LOCK(&(so)->so_rcv); \ sorwakeup_locked(so); \ } while (0) #define sowwakeup_locked(so) do { \ SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \ if (sb_notify(&(so)->so_snd)) \ sowakeup((so), &(so)->so_snd); \ else \ SOCKBUF_UNLOCK(&(so)->so_snd); \ } while (0) #define sowwakeup(so) do { \ SOCKBUF_LOCK(&(so)->so_snd); \ sowwakeup_locked(so); \ } while (0) struct accept_filter { char accf_name[16]; int (*accf_callback) (struct socket *so, void *arg, int waitflag); void * (*accf_create) (struct socket *so, char *arg); void (*accf_destroy) (struct socket *so); SLIST_ENTRY(accept_filter) accf_next; }; #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_ACCF); MALLOC_DECLARE(M_PCB); MALLOC_DECLARE(M_SONAME); #endif /* * Socket specific helper hook point identifiers * Do not leave holes in the sequence, hook registration is a loop. */ #define HHOOK_SOCKET_OPT 0 #define HHOOK_SOCKET_CREATE 1 #define HHOOK_SOCKET_RCV 2 #define HHOOK_SOCKET_SND 3 #define HHOOK_FILT_SOREAD 4 #define HHOOK_FILT_SOWRITE 5 #define HHOOK_SOCKET_CLOSE 6 #define HHOOK_SOCKET_LAST HHOOK_SOCKET_CLOSE struct socket_hhook_data { struct socket *so; struct mbuf *m; void *hctx; /* hook point specific data*/ int status; }; extern int maxsockets; extern u_long sb_max; extern so_gen_t so_gencnt; struct file; struct filedesc; struct mbuf; struct sockaddr; struct ucred; struct uio; /* 'which' values for socket upcalls. */ #define SO_RCV 1 #define SO_SND 2 /* Return values for socket upcalls. */ #define SU_OK 0 #define SU_ISCONNECTED 1 /* * From uipc_socket and friends */ int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type); int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, u_int *fflagp); void soabort(struct socket *so); int soaccept(struct socket *so, struct sockaddr **nam); int socheckuid(struct socket *so, uid_t uid); int sobind(struct socket *so, struct sockaddr *nam, struct thread *td); int sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soclose(struct socket *so); int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td); int soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td); int soconnect2(struct socket *so1, struct socket *so2); int socreate(int dom, struct socket **aso, int type, int proto, struct ucred *cred, struct thread *td); int sodisconnect(struct socket *so); struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); void sofree(struct socket *so); void sohasoutofband(struct socket *so); int solisten(struct socket *so, int backlog, struct thread *td); void solisten_proto(struct socket *so, int backlog); int solisten_proto_check(struct socket *so); struct socket * sonewconn(struct socket *head, int connstatus); int sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int sopoll_generic(struct socket *so, int events, struct ucred *active_cred, struct thread *td); int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_stream(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_dgram(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreceive_generic(struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp); int soreserve(struct socket *so, u_long sndcc, u_long rcvcc); void sorflush(struct socket *so); int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td); int soshutdown(struct socket *so, int how); void sotoxsocket(struct socket *so, struct xsocket *xso); void soupcall_clear(struct socket *so, int which); void soupcall_set(struct socket *so, int which, int (*func)(struct socket *, void *, int), void *arg); void sowakeup(struct socket *so, struct sockbuf *sb); int selsocket(struct socket *so, int events, struct timeval *tv, struct thread *td); /* * Accept filter functions (duh). */ int accept_filt_add(struct accept_filter *filt); int accept_filt_del(char *name); struct accept_filter *accept_filt_get(char *name); #ifdef ACCEPT_FILTER_MOD #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_accf); #endif int accept_filt_generic_mod_event(module_t mod, int event, void *data); #endif #endif /* _KERNEL */ #endif /* !_SYS_SOCKETVAR_H_ */ Index: head/usr.bin/netstat/inet.c =================================================================== --- head/usr.bin/netstat/inet.c (revision 295135) +++ head/usr.bin/netstat/inet.c (revision 295136) @@ -1,1454 +1,1454 @@ /*- * Copyright (c) 1983, 1988, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" char *inetname(struct in_addr *); void inetprint(const char *, struct in_addr *, int, const char *, int, const int); #ifdef INET6 static int udp_done, tcp_done, sdp_done; #endif /* INET6 */ static int pcblist_sysctl(int proto, const char *name, char **bufp, int istcp __unused) { const char *mibvar; char *buf; size_t len; switch (proto) { case IPPROTO_TCP: mibvar = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; case IPPROTO_DIVERT: mibvar = "net.inet.divert.pcblist"; break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return (0); } if ((buf = malloc(len)) == 0) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return (0); } *bufp = buf; return (1); } /* * Copied directly from uipc_socket2.c. We leave out some fields that are in * nested structures that aren't used to avoid extra work. */ static void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } int sotoxsocket(struct socket *so, struct xsocket *xso) { struct protosw proto; struct domain domain; bzero(xso, sizeof *xso); xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0) return (-1); xso->xso_protocol = proto.pr_protocol; if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0) return (-1); xso->xso_family = domain.dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); return (0); } static int pcblist_kvm(u_long off, char **bufp, int istcp) { struct inpcbinfo pcbinfo; struct inpcbhead listhead; struct inpcb *inp; struct xinpcb xi; struct xinpgen xig; struct xtcpcb xt; struct socket so; struct xsocket *xso; char *buf, *p; size_t len; if (off == 0) return (0); kread(off, &pcbinfo, sizeof(pcbinfo)); if (istcp) len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xtcpcb); else len = 2 * sizeof(xig) + (pcbinfo.ipi_count + pcbinfo.ipi_count / 8) * sizeof(struct xinpcb); if ((buf = malloc(len)) == 0) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } p = buf; #define COPYOUT(obj, size) do { \ if (len < (size)) { \ xo_warnx("buffer size exceeded"); \ goto fail; \ } \ bcopy((obj), p, (size)); \ len -= (size); \ p += (size); \ } while (0) #define KREAD(off, buf, len) do { \ if (kread((uintptr_t)(off), (buf), (len)) != 0) \ goto fail; \ } while (0) /* Write out header. */ xig.xig_len = sizeof xig; xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; xig.xig_sogen = 0; COPYOUT(&xig, sizeof xig); /* Walk the PCB list. */ xt.xt_len = sizeof xt; xi.xi_len = sizeof xi; if (istcp) xso = &xt.xt_socket; else xso = &xi.xi_socket; KREAD(pcbinfo.ipi_listhead, &listhead, sizeof(listhead)); LIST_FOREACH(inp, &listhead, inp_list) { if (istcp) { KREAD(inp, &xt.xt_inp, sizeof(*inp)); inp = &xt.xt_inp; } else { KREAD(inp, &xi.xi_inp, sizeof(*inp)); inp = &xi.xi_inp; } if (inp->inp_gencnt > pcbinfo.ipi_gencnt) continue; if (istcp) { if (inp->inp_ppcb == NULL) bzero(&xt.xt_tp, sizeof xt.xt_tp); else if (inp->inp_flags & INP_TIMEWAIT) { bzero(&xt.xt_tp, sizeof xt.xt_tp); xt.xt_tp.t_state = TCPS_TIME_WAIT; } else KREAD(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp); } if (inp->inp_socket) { KREAD(inp->inp_socket, &so, sizeof(so)); if (sotoxsocket(&so, xso) != 0) goto fail; } else { bzero(xso, sizeof(*xso)); if (istcp) xso->xso_protocol = IPPROTO_TCP; } if (istcp) COPYOUT(&xt, sizeof xt); else COPYOUT(&xi, sizeof xi); } /* Reread the pcbinfo and write out the footer. */ kread(off, &pcbinfo, sizeof(pcbinfo)); xig.xig_count = pcbinfo.ipi_count; xig.xig_gen = pcbinfo.ipi_gencnt; COPYOUT(&xig, sizeof xig); *bufp = buf; return (1); fail: free(buf); return (0); #undef COPYOUT #undef KREAD } /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. * Listening processes (aflag) are suppressed unless the * -a (all) flag is specified. */ void protopr(u_long off, const char *name, int af1, int proto) { int istcp; static int first = 1; char *buf; const char *vchar; struct tcpcb *tp = NULL; struct inpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; struct xtcp_timer *timer; istcp = 0; switch (proto) { case IPPROTO_TCP: #ifdef INET6 if (strncmp(name, "sdp", 3) != 0) { if (tcp_done != 0) return; else tcp_done = 1; } else { if (sdp_done != 0) return; else sdp_done = 1; } #endif istcp = 1; break; case IPPROTO_UDP: #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif break; } if (live) { if (!pcblist_sysctl(proto, name, &buf, istcp)) return; } else { if (!pcblist_kvm(off, &buf, istcp)) return; } oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { timer = &((struct xtcpcb *)xig)->xt_timer; tp = &((struct xtcpcb *)xig)->xt_tp; inp = &((struct xtcpcb *)xig)->xt_inp; so = &((struct xtcpcb *)xig)->xt_socket; } else { inp = &((struct xinpcb *)xig)->xi_inp; so = &((struct xinpcb *)xig)->xi_socket; timer = NULL; } /* Ignore sockets for protocols other than the desired one. */ if (so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ if (inp->inp_gencnt > oxig->xig_gen) continue; if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0) #ifdef INET6 || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0) #endif /* INET6 */ || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0 #ifdef INET6 && (inp->inp_vflag & INP_IPV6) == 0 #endif /* INET6 */ )) ) continue; if (!aflag && ( (istcp && tp->t_state == TCPS_LISTEN) || (af1 == AF_INET && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || (af1 == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif /* INET6 */ || (af1 == AF_UNSPEC && (((inp->inp_vflag & INP_IPV4) != 0 && inet_lnaof(inp->inp_laddr) == INADDR_ANY) #ifdef INET6 || ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif )) )) continue; if (first) { if (!Lflag) { xo_emit("Active Internet connections"); if (aflag) xo_emit(" (including servers)"); } else xo_emit( "Current listen queue sizes (qlen/incqlen/maxqlen)"); xo_emit("\n"); if (Aflag) xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *), "Tcpcb"); if (Lflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-14.14s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-14.14s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-14.14s} {T:/%-45.45s}"), "Proto", "Listen", "Local Address"); else if (Tflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"), "Proto", "Rexmit", "OOORcv", "0-win", "Local Address", "Foreign Address"); else { xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"), "Proto", "Recv-Q", "Send-Q", "Local Address", "Foreign Address"); if (!xflag && !Rflag) xo_emit(" (state)"); } if (xflag) { xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}", "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS", "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA", "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX"); xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} " "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}", "rexmt", "persist", "keep", "2msl", "delack", "rcvtime"); } else if (Rflag) { xo_emit(" {T:/%8.8s} {T:/%5.5s}", "flowid", "ftype"); } xo_emit("\n"); first = 0; } if (Lflag && so->so_qlimit == 0) continue; xo_open_instance("socket"); if (Aflag) { if (istcp) xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)inp->inp_ppcb); else xo_emit("{q:adddress/%*lx} ", 2 * (int)sizeof(void *), (u_long)so->so_pcb); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "46" : "6"; else #endif vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "4" : ""; if (istcp && (tp->t_flags & TF_TOE) != 0) xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar); else xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar); if (Lflag) { - char buf1[15]; + char buf1[33]; - snprintf(buf1, 15, "%d/%d/%d", so->so_qlen, + snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen, so->so_incqlen, so->so_qlimit); - xo_emit("{:listen-queue-sizes/%-14.14s} ", buf1); + xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1); } else if (Tflag) { if (istcp) xo_emit("{:sent-retransmit-packets/%6u} " "{:received-out-of-order-packets/%6u} " "{:sent-zero-window/%6u} ", tp->t_sndrexmitpack, tp->t_rcvoopack, tp->t_sndzerowin); else xo_emit("{P:/%21s}", ""); } else { xo_emit("{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} ", so->so_rcv.sb_cc, so->so_snd.sb_cc); } if (numeric_port) { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 1, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 1); } /* else nothing printed now */ #endif /* INET6 */ } else if (inp->inp_flags & INP_ANONPORT) { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 0, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 0); } /* else nothing printed now */ #endif /* INET6 */ } else { if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 0, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport, af1); } #ifdef INET6 else if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } /* else nothing printed now */ #endif /* INET6 */ } if (xflag) { xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} " "{:receive-clusters/%6u} {:send-clusters/%6u} " "{:receive-high-water/%6u} {:send-high-water/%6u} " "{:receive-low-water/%6u} {:send-low-water/%6u} " "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} " "{:receive-mbuf-bytes-max/%6u} " "{:send-mbuf-bytes-max/%6u}", so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); if (timer != NULL) xo_emit(" {:retransmit-timer/%4d.%02d} " "{:persist-timer/%4d.%02d} " "{:keepalive-timer/%4d.%02d} " "{:msl2-timer/%4d.%02d} " "{:delay-ack-timer/%4d.%02d} " "{:inactivity-timer/%4d.%02d}", timer->tt_rexmt / 1000, (timer->tt_rexmt % 1000) / 10, timer->tt_persist / 1000, (timer->tt_persist % 1000) / 10, timer->tt_keep / 1000, (timer->tt_keep % 1000) / 10, timer->tt_2msl / 1000, (timer->tt_2msl % 1000) / 10, timer->tt_delack / 1000, (timer->tt_delack % 1000) / 10, timer->t_rcvtime / 1000, (timer->t_rcvtime % 1000) / 10); } if (istcp && !Lflag && !xflag && !Tflag && !Rflag) { if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES) xo_emit("{:tcp-state/%d}", tp->t_state); else { xo_emit("{:tcp-state/%s}", tcpstates[tp->t_state]); #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN) /* Show T/TCP `hidden state' */ if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) xo_emit("{:need-syn-or-fin/*}"); #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */ } } if (Rflag) { /* XXX: is this right Alfred */ xo_emit(" {:flow-id/%08x} {:flow-type/%5d}", inp->inp_flowid, inp->inp_flowtype); } xo_emit("\n"); xo_close_instance("socket"); } if (xig != oxig && xig->xig_gen != oxig->xig_gen) { if (oxig->xig_count > xig->xig_count) { xo_emit("Some {d:lost/%s} sockets may have been " "deleted.\n", name); } else if (oxig->xig_count < xig->xig_count) { xo_emit("Some {d:created/%s} sockets may have been " "created.\n", name); } else { xo_emit("Some {d:changed/%s} sockets may have been " "created or deleted.\n", name); } } free(buf); } /* * Dump TCP statistics structure. */ void tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct tcpstat tcpstat; #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif if (fetch_stats("net.inet.tcp.stats", off, &tcpstat, sizeof(tcpstat), kread_counters) != 0) return; xo_open_container("tcp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f)) #define p1a(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f) #define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2, plural(tcpstat.f2)) #define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2) #define p3(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f)) p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n"); p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} " "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t" "{:sent-retransmitted-packets/%ju} {N:/data packet%s} " "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) " "{N:retransmitted}\n"); p(tcps_sndrexmitbad, "\t\t" "{:sent-unnecessary-retransmitted-packets/%ju} " "{N:/data packet%s unnecessarily retransmitted}\n"); p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} " "{N:/resend%s initiated by MTU discovery}\n"); p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} " "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} " "{N:delayed})\n"); p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} " "{N:/URG only packet%s}\n"); p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} " "{N:/window probe packet%s}\n"); p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} " "{N:/control packet%s}\n"); p(tcps_rcvtotal, "\t{:received-packets/%ju} " "{N:/packet%s received}\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t" "{:received-ack-packets/%ju} {N:/ack%s} " "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n"); p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} " "{N:/duplicate ack%s}\n"); p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} " "{N:/ack%s for unsent data}\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t" "{:received-in-sequence-packets/%ju} {N:/packet%s} " "({:received-in-sequence-bytes/%ju} {N:/byte%s}) " "{N:received in-sequence}\n"); p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t" "{:received-completely-duplicate-packets/%ju} " "{N:/completely duplicate packet%s} " "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n"); p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} " "{N:/old duplicate packet%s}\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t" "{:received-some-duplicate-packets/%ju} " "{N:/packet%s with some dup. data} " "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} " "{N:/out-of-order packet%s} " "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t" "{:received-after-window-packets/%ju} {N:/packet%s} " "({:received-after-window-bytes/%ju} {N:/byte%s}) " "{N:of data after window}\n"); p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} " "{N:/window probe%s}\n"); p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} " "{N:/packet%s received after close}\n"); p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} " "{N:/discarded for bad header offset field%s}\n"); p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} " "{N:discarded because packet too short}\n"); p1a(tcps_rcvmemdrop, "\t\t{:discard-memory-problems/%ju} " "{N:discarded due to memory problems}\n"); p(tcps_connattempt, "\t{:connection-requests/%ju} " "{N:/connection request%s}\n"); p(tcps_accepts, "\t{:connections-accepts/%ju} " "{N:/connection accept%s}\n"); p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} " "{N:/bad connection attempt%s}\n"); p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} " "{N:/listen queue overflow%s}\n"); p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} " "{N:/ignored RSTs in the window%s}\n"); p(tcps_connects, "\t{:connections-established/%ju} " "{N:/connection%s established (including accepts)}\n"); p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} " "{N:/connection%s closed (including} " "{:connection-drops/%ju} {N:/drop%s})\n"); p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} " "{N:/connection%s updated cached RTT on close}\n"); p(tcps_cachedrttvar, "\t\t" "{:connections-updated-variance-on-close/%ju} " "{N:/connection%s updated cached RTT variance on close}\n"); p(tcps_cachedssthresh, "\t\t" "{:connections-updated-ssthresh-on-close/%ju} " "{N:/connection%s updated cached ssthresh on close}\n"); p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} " "{N:/embryonic connection%s dropped}\n"); p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} " "{N:/segment%s updated rtt (of} " "{:segment-update-attempts/%ju} {N:/attempt%s})\n"); p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} " "{N:/retransmit timeout%s}\n"); p(tcps_timeoutdrop, "\t\t" "{:connections-dropped-by-retransmit-timeout/%ju} " "{N:/connection%s dropped by rexmit timeout}\n"); p(tcps_persisttimeo, "\t{:persist-timeout/%ju} " "{N:/persist timeout%s}\n"); p(tcps_persistdrop, "\t\t" "{:connections-dropped-by-persist-timeout/%ju} " "{N:/connection%s dropped by persist timeout}\n"); p(tcps_finwait2_drops, "\t" "{:connections-dropped-by-finwait2-timeout/%ju} " "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n"); p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} " "{N:/keepalive timeout%s}\n"); p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} " "{N:/keepalive probe%s sent}\n"); p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} " "{N:/connection%s dropped by keepalive}\n"); p(tcps_predack, "\t{:ack-header-predictions/%ju} " "{N:/correct ACK header prediction%s}\n"); p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} " "{N:/correct data packet header prediction%s}\n"); xo_open_container("syncache"); p3(tcps_sc_added, "\t{:entries-added/%ju} " "{N:/syncache entr%s added}\n"); p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} " "{N:/retransmitted}\n"); p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n"); p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n"); p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n"); p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} " "{N:/bucket overflow}\n"); p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} " "{N:/cache overflow}\n"); p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n"); p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n"); p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n"); p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n"); p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n"); p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n"); p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n"); p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} " "{N:/cookie%s received}\n"); xo_close_container("syncache"); xo_open_container("hostcache"); p3(tcps_hc_added, "\t{:entries-added/%ju} " "{N:/hostcache entr%s added}\n"); p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} " "{N:/bucket overflow}\n"); xo_close_container("hostcache"); xo_open_container("sack"); p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} " "{N:/SACK recovery episode%s}\n"); p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} " "{N:/segment rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} " "{N:/byte rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} " "{N:/SACK option%s (SACK blocks) received}\n"); p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} " "{N:/SACK option%s (SACK blocks) sent}\n"); p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} " "{N:/SACK scoreboard overflow}\n"); xo_close_container("sack"); xo_open_container("ecn"); p(tcps_ecn_ce, "\t{:ce-packets/%ju} " "{N:/packet%s with ECN CE bit set}\n"); p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} " "{N:/packet%s with ECN ECT(0) bit set}\n"); p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} " "{N:/packet%s with ECN ECT(1) bit set}\n"); p(tcps_ecn_shs, "\t{:handshakes/%ju} " "{N:/successful ECN handshake%s}\n"); p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 xo_close_container("ecn"); xo_close_container("tcp"); } /* * Dump UDP statistics structure. */ void udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct udpstat udpstat; uint64_t delivered; #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif if (fetch_stats("net.inet.udp.stats", off, &udpstat, sizeof(udpstat), kread_counters) != 0) return; xo_open_container("udp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f)) #define p1a(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f) p(udps_ipackets, "{:received-datagrams/%ju} " "{N:/datagram%s received}\n"); p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} " "{N:/with incomplete header}\n"); p1a(udps_badlen, "{:dropped-bad-data-length/%ju} " "{N:/with bad data length field}\n"); p1a(udps_badsum, "{:dropped-bad-checksum/%ju} " "{N:/with bad checksum}\n"); p1a(udps_nosum, "{:dropped-no-checksum/%ju} " "{N:/with no checksum}\n"); p1a(udps_noport, "{:dropped-no-socket/%ju} " "{N:/dropped due to no socket}\n"); p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} " "{N:/broadcast\\/multicast datagram%s undelivered}\n"); p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} " "{N:/dropped due to full socket buffers}\n"); p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} " "{N:/not for hashed pcb}\n"); delivered = udpstat.udps_ipackets - udpstat.udps_hdrops - udpstat.udps_badlen - udpstat.udps_badsum - udpstat.udps_noport - udpstat.udps_noportbcast - udpstat.udps_fullsock; if (delivered || sflag <= 1) xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n", (uint64_t)delivered); p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n"); /* the next statistic is cumulative in udps_noportbcast */ p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} " "{N:/time%s multicast source filter matched}\n"); #undef p #undef p1a xo_close_container("udp"); } /* * Dump CARP statistics structure. */ void carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct carpstats carpstat; if (fetch_stats("net.inet.carp.stats", off, &carpstat, sizeof(carpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f)) #define p2(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f) p(carps_ipackets, "\t{:received-inet-packets/%ju} " "{N:/packet%s received (IPv4)}\n"); p(carps_ipackets6, "\t{:received-inet6-packets/%ju} " "{N:/packet%s received (IPv6)}\n"); p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} " "{N:/packet%s discarded for wrong TTL}\n"); p(carps_hdrops, "\t\t{:dropped-short-header/%ju} " "{N:/packet%s shorter than header}\n"); p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(carps_badver, "\t\t{:dropped-bad-version/%ju} " "{N:/discarded packet%s with a bad version}\n"); p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} " "{N:/discarded because packet too short}\n"); p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} " "{N:/discarded for bad authentication}\n"); p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} " "{N:/discarded for bad vhid}\n"); p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} " "{N:/discarded because of a bad address list}\n"); p(carps_opackets, "\t{:sent-inet-packets/%ju} " "{N:/packet%s sent (IPv4)}\n"); p(carps_opackets6, "\t{:sent-inet6-packets/%ju} " "{N:/packet%s sent (IPv6)}\n"); p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} " "{N:/send failed due to mbuf memory error}\n"); #if notyet p(carps_ostates, "\t\t{:send-state-updates/%s} " "{N:/state update%s sent}\n"); #endif #undef p #undef p2 xo_close_container(name); } /* * Dump IP statistics structure. */ void ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct ipstat ipstat; if (fetch_stats("net.inet.ip.stats", off, &ipstat, sizeof(ipstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f)) #define p1a(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f) p(ips_total, "\t{:received-packets/%ju} " "{N:/total packet%s received}\n"); p(ips_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/bad header checksum%s}\n"); p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} " "{N:/with size smaller than minimum}\n"); p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} " "{N:/with data size < data length}\n"); p1a(ips_toolong, "\t{:dropped-too-long/%ju} " "{N:/with ip length > max ip packet size}\n"); p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} " "{N:/with header length < data size}\n"); p1a(ips_badlen, "\t{:dropped-short-data/%ju} " "{N:/with data length < header length}\n"); p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} " "{N:/with bad options}\n"); p1a(ips_badvers, "\t{:dropped-bad-version/%ju} " "{N:/with incorrect version number}\n"); p(ips_fragments, "\t{:received-fragments/%ju} " "{N:/fragment%s received}\n"); p(ips_fragdropped, "\t{:dropped-fragments/%ju} " "{N:/fragment%s dropped (dup or out of space)}\n"); p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} " "{N:/fragment%s dropped after timeout}\n"); p(ips_reassembled, "\t{:reassembled-packets/%ju} " "{N:/packet%s reassembled ok}\n"); p(ips_delivered, "\t{:received-local-packets/%ju} " "{N:/packet%s for this host}\n"); p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} " "{N:/packet%s for unknown\\/unsupported protocol}\n"); p(ips_forward, "\t{:forwarded-packets/%ju} " "{N:/packet%s forwarded}"); p(ips_fastforward, " ({:fast-forwarded-packets/%ju} " "{N:/packet%s fast forwarded})"); if (ipstat.ips_forward || sflag <= 1) xo_emit("\n"); p(ips_cantforward, "\t{:packets-cannot-forward/%ju} " "{N:/packet%s not forwardable}\n"); p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} " "{N:/packet%s received for unknown multicast group}\n"); p(ips_redirectsent, "\t{:redirects-sent/%ju} " "{N:/redirect%s sent}\n"); p(ips_localout, "\t{:sent-packets/%ju} " "{N:/packet%s sent from this host}\n"); p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} " "{N:/packet%s sent with fabricated ip header}\n"); p(ips_odropped, "\t{:discard-no-mbufs/%ju} " "{N:/output packet%s dropped due to no bufs, etc.}\n"); p(ips_noroute, "\t{:discard-no-route/%ju} " "{N:/output packet%s discarded due to no route}\n"); p(ips_fragmented, "\t{:sent-fragments/%ju} " "{N:/output datagram%s fragmented}\n"); p(ips_ofragments, "\t{:fragments-created/%ju} " "{N:/fragment%s created}\n"); p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} " "{N:/datagram%s that can't be fragmented}\n"); p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} " "{N:/tunneling packet%s that can't find gif}\n"); p(ips_badaddr, "\t{:discard-bad-address/%ju} " "{N:/datagram%s with bad address in header}\n"); #undef p #undef p1a xo_close_container(name); } /* * Dump ARP statistics structure. */ void arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct arpstat arpstat; if (fetch_stats("net.link.ether.arp.stats", off, &arpstat, sizeof(arpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f)) #define p2(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f)) p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n"); p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n"); p(rxrequests, "{:received-requests/%ju} " "{N:/ARP request%s received}\n"); p2(rxreplies, "{:received-replies/%ju} " "{N:/ARP repl%s received}\n"); p(received, "{:received-packers/%ju} " "{N:/ARP packet%s received}\n"); p(dropped, "{:dropped-no-entry/%ju} " "{N:/total packet%s dropped due to no ARP entry}\n"); p(timeouts, "{:entries-timeout/%ju} " "{N:/ARP entry%s timed out}\n"); p(dupips, "{:dropped-duplicate-address/%ju} " "{N:/Duplicate IP%s seen}\n"); #undef p #undef p2 xo_close_container(name); } static const char *icmpnames[ICMP_MAXTYPE + 1] = { "echo reply", /* RFC 792 */ "#1", "#2", "destination unreachable", /* RFC 792 */ "source quench", /* RFC 792 */ "routing redirect", /* RFC 792 */ "#6", "#7", "echo", /* RFC 792 */ "router advertisement", /* RFC 1256 */ "router solicitation", /* RFC 1256 */ "time exceeded", /* RFC 792 */ "parameter problem", /* RFC 792 */ "time stamp", /* RFC 792 */ "time stamp reply", /* RFC 792 */ "information request", /* RFC 792 */ "information request reply", /* RFC 792 */ "address mask request", /* RFC 950 */ "address mask reply", /* RFC 950 */ "#19", "#20", "#21", "#22", "#23", "#24", "#25", "#26", "#27", "#28", "#29", "icmp traceroute", /* RFC 1393 */ "datagram conversion error", /* RFC 1475 */ "mobile host redirect", "IPv6 where-are-you", "IPv6 i-am-here", "mobile registration req", "mobile registration reply", "domain name request", /* RFC 1788 */ "domain name reply", /* RFC 1788 */ "icmp SKIP", "icmp photuris", /* RFC 2521 */ }; /* * Dump ICMP statistics. */ void icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct icmpstat icmpstat; size_t len; int i, first; if (fetch_stats("net.inet.icmp.stats", off, &icmpstat, sizeof(icmpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plural(icmpstat.f)) #define p1a(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f) #define p2(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plurales(icmpstat.f)) p(icps_error, "\t{:icmp-calls/%lu} " "{N:/call%s to icmp_error}\n"); p(icps_oldicmp, "\t{:errors-not-from-message/%lu} " "{N:/error%s not generated in response to an icmp message}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_outhist[i] != 0) { if (first) { xo_open_list("output-histogram"); xo_emit("\tOutput histogram:\n"); first = 0; } xo_open_instance("output-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_outhist[i]); else xo_emit("\t\tunknown ICMP #{k:name/%d}: " "{:count/%lu}\n", i, icmpstat.icps_outhist[i]); xo_close_instance("output-histogram"); } } if (!first) xo_close_list("output-histogram"); p(icps_badcode, "\t{:dropped-bad-code/%lu} " "{N:/message%s with bad code fields}\n"); p(icps_tooshort, "\t{:dropped-too-short/%lu} " "{N:/message%s less than the minimum length}\n"); p(icps_checksum, "\t{:dropped-bad-checksum/%lu} " "{N:/message%s with bad checksum}\n"); p(icps_badlen, "\t{:dropped-bad-length/%lu} " "{N:/message%s with bad length}\n"); p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} " "{N:/multicast echo requests ignored}\n"); p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} " "{N:/multicast timestamp requests ignored}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_inhist[i] != 0) { if (first) { xo_open_list("input-histogram"); xo_emit("\tInput histogram:\n"); first = 0; } xo_open_instance("input-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_inhist[i]); else xo_emit( "\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n", i, icmpstat.icps_inhist[i]); xo_close_instance("input-histogram"); } } if (!first) xo_close_list("input-histogram"); p(icps_reflect, "\t{:sent-packets/%lu} " "{N:/message response%s generated}\n"); p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} " "{N:/invalid return address%s}\n"); p(icps_noroute, "\t{:discard-no-route/%lu} " "{N:/no return route%s}\n"); #undef p #undef p1a #undef p2 if (live) { len = sizeof i; if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) < 0) return; xo_emit("\tICMP address mask responses are " "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis"); } xo_close_container(name); } /* * Dump IGMP statistics structure. */ void igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct igmpstat igmpstat; if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread) != 0) return; if (igmpstat.igps_version != IGPS_VERSION_3) { xo_warnx("%s: version mismatch (%d != %d)", __func__, igmpstat.igps_version, IGPS_VERSION_3); } if (igmpstat.igps_len != IGPS_VERSION3_LEN) { xo_warnx("%s: size mismatch (%d != %d)", __func__, igmpstat.igps_len, IGPS_VERSION3_LEN); } xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f)) #define py64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f)) p64(igps_rcv_total, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} " "{N:/message%s received with wrong TTL}\n"); p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} " "{N:/V1\\/V2 membership quer%s received}\n"); py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} " "{N:/V3 membership quer%s received}\n"); py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} " "{N:/membership quer%s received with invalid field(s)}\n"); py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} " "{N:/general quer%s received}\n"); py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} " "{N:/group quer%s received}\n"); py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} " "{N:/group-source quer%s received}\n"); py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} " "{N:/group-source quer%s dropped}\n"); p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} " "{N:/membership report%s received}\n"); p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} " "{N:/membership report%s received with invalid field(s)}\n"); p64(igps_rcv_ourreports, "\t" "{:received-membership-reports-matching/%ju} " "{N:/membership report%s received for groups to which we belong}" "\n"); p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} " "{N:/V3 report%s received without Router Alert}\n"); p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} " "{N:/membership report%s sent}\n"); #undef p64 #undef py64 xo_close_container(name); } /* * Dump PIM statistics structure. */ void pim_stats(u_long off __unused, const char *name, int af1 __unused, int proto __unused) { struct pimstat pimstat; if (fetch_stats("net.inet.pim.stats", off, &pimstat, sizeof(pimstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f)) #define py(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y") p(pims_rcv_total_msgs, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} " "{N:/byte%s received}\n"); p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} " "{N:/message%s received with bad version}\n"); p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} " "{N:/data register message%s received}\n"); p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} " "{N:/data register byte%s received}\n"); p(pims_rcv_registers_wrongiif, "\t" "{:received-data-register-wrong-interface/%ju} " "{N:/data register message%s received on wrong iif}\n"); p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} " "{N:/bad register%s received}\n"); p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} " "{N:/data register message%s sent}\n"); p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} " "{N:/data register byte%s sent}\n"); #undef p #undef py xo_close_container(name); } /* * Pretty print an Internet address (net address + port). */ void inetprint(const char *container, struct in_addr *in, int port, const char *proto, int num_port, const int af1) { struct servent *sp = 0; char line[80], *cp; int width; if (container) xo_open_container(container); if (Wflag) sprintf(line, "%s.", inetname(in)); else sprintf(line, "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in)); cp = strchr(line, '\0'); if (!num_port && port) sp = getservbyport((int)port, proto); if (sp || port == 0) sprintf(cp, "%.15s ", sp ? sp->s_name : "*"); else sprintf(cp, "%d ", ntohs((u_short)port)); width = (Aflag && !Wflag) ? 18 : ((!Wflag || af1 == AF_INET) ? 22 : 45); if (Wflag) xo_emit("{d:target/%-*s} ", width, line); else xo_emit("{d:target/%-*.*s} ", width, width, line); int alen = cp - line - 1, plen = strlen(cp) - 1; xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen, plen, cp); if (container) xo_close_container(container); } /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; struct netent *np; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { int net = inet_netof(*inp); int lna = inet_lnaof(*inp); if (lna == INADDR_ANY) { np = getnetbyaddr(net, AF_INET); if (np) cp = np->n_name; } if (cp == 0) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) sprintf(line, "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); } return (line); } Index: head/usr.bin/netstat/sctp.c =================================================================== --- head/usr.bin/netstat/sctp.c (revision 295135) +++ head/usr.bin/netstat/sctp.c (revision 295136) @@ -1,918 +1,919 @@ /*- * Copyright (c) 2001-2007, by Weongyo Jeong. All rights reserved. * Copyright (c) 2011, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)sctp.c 0.1 (Berkeley) 4/18/2007"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" #include #ifdef SCTP static void sctp_statesprint(uint32_t state); #define NETSTAT_SCTP_STATES_CLOSED 0x0 #define NETSTAT_SCTP_STATES_BOUND 0x1 #define NETSTAT_SCTP_STATES_LISTEN 0x2 #define NETSTAT_SCTP_STATES_COOKIE_WAIT 0x3 #define NETSTAT_SCTP_STATES_COOKIE_ECHOED 0x4 #define NETSTAT_SCTP_STATES_ESTABLISHED 0x5 #define NETSTAT_SCTP_STATES_SHUTDOWN_SENT 0x6 #define NETSTAT_SCTP_STATES_SHUTDOWN_RECEIVED 0x7 #define NETSTAT_SCTP_STATES_SHUTDOWN_ACK_SENT 0x8 #define NETSTAT_SCTP_STATES_SHUTDOWN_PENDING 0x9 static const char *sctpstates[] = { "CLOSED", "BOUND", "LISTEN", "COOKIE_WAIT", "COOKIE_ECHOED", "ESTABLISHED", "SHUTDOWN_SENT", "SHUTDOWN_RECEIVED", "SHUTDOWN_ACK_SENT", "SHUTDOWN_PENDING" }; static LIST_HEAD(xladdr_list, xladdr_entry) xladdr_head; struct xladdr_entry { struct xsctp_laddr *xladdr; LIST_ENTRY(xladdr_entry) xladdr_entries; }; static LIST_HEAD(xraddr_list, xraddr_entry) xraddr_head; struct xraddr_entry { struct xsctp_raddr *xraddr; LIST_ENTRY(xraddr_entry) xraddr_entries; }; /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ #ifdef INET static char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; struct netent *np; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { int net = inet_netof(*inp); int lna = inet_lnaof(*inp); if (lna == INADDR_ANY) { np = getnetbyaddr(net, AF_INET); if (np) cp = np->n_name; } if (cp == 0) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) sprintf(line, "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); inp->s_addr = htonl(inp->s_addr); } return (line); } #endif #ifdef INET6 static char ntop_buf[INET6_ADDRSTRLEN]; static char * inet6name(struct in6_addr *in6p) { char *cp; static char line[50]; struct hostent *hp; static char domain[MAXHOSTNAMELEN]; static int first = 1; if (first && !numeric_addr) { first = 0; if (gethostname(domain, MAXHOSTNAMELEN) == 0 && (cp = strchr(domain, '.'))) (void) strcpy(domain, cp + 1); else domain[0] = 0; } cp = 0; if (!numeric_addr && !IN6_IS_ADDR_UNSPECIFIED(in6p)) { hp = gethostbyaddr((char *)in6p, sizeof(*in6p), AF_INET6); if (hp) { if ((cp = strchr(hp->h_name, '.')) && !strcmp(cp + 1, domain)) *cp = 0; cp = hp->h_name; } } if (IN6_IS_ADDR_UNSPECIFIED(in6p)) strcpy(line, "*"); else if (cp) strcpy(line, cp); else sprintf(line, "%s", inet_ntop(AF_INET6, (void *)in6p, ntop_buf, sizeof(ntop_buf))); return (line); } #endif static void sctp_print_address(const char *container, union sctp_sockstore *address, int port, int num_port) { struct servent *sp = 0; char line[80], *cp; int width; if (container) xo_open_container(container); switch (address->sa.sa_family) { #ifdef INET case AF_INET: sprintf(line, "%.*s.", Wflag ? 39 : 16, inetname(&address->sin.sin_addr)); break; #endif #ifdef INET6 case AF_INET6: sprintf(line, "%.*s.", Wflag ? 39 : 16, inet6name(&address->sin6.sin6_addr)); break; #endif default: sprintf(line, "%.*s.", Wflag ? 39 : 16, ""); break; } cp = strchr(line, '\0'); if (!num_port && port) sp = getservbyport((int)port, "sctp"); if (sp || port == 0) sprintf(cp, "%.15s ", sp ? sp->s_name : "*"); else sprintf(cp, "%d ", ntohs((u_short)port)); width = Wflag ? 45 : 22; xo_emit("{d:target/%-*.*s} ", width, width, line); int alen = cp - line - 1, plen = strlen(cp) - 1; xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen, plen, cp); if (container) xo_close_container(container); } static int sctp_skip_xinpcb_ifneed(char *buf, const size_t buflen, size_t *offset) { int exist_tcb = 0; struct xsctp_tcb *xstcb; struct xsctp_raddr *xraddr; struct xsctp_laddr *xladdr; while (*offset < buflen) { xladdr = (struct xsctp_laddr *)(buf + *offset); *offset += sizeof(struct xsctp_laddr); if (xladdr->last == 1) break; } while (*offset < buflen) { xstcb = (struct xsctp_tcb *)(buf + *offset); *offset += sizeof(struct xsctp_tcb); if (xstcb->last == 1) break; exist_tcb = 1; while (*offset < buflen) { xladdr = (struct xsctp_laddr *)(buf + *offset); *offset += sizeof(struct xsctp_laddr); if (xladdr->last == 1) break; } while (*offset < buflen) { xraddr = (struct xsctp_raddr *)(buf + *offset); *offset += sizeof(struct xsctp_raddr); if (xraddr->last == 1) break; } } /* * If Lflag is set, we don't care about the return value. */ if (Lflag) return 0; return exist_tcb; } static void sctp_process_tcb(struct xsctp_tcb *xstcb, char *buf, const size_t buflen, size_t *offset, int *indent) { int i, xl_total = 0, xr_total = 0, x_max; struct xsctp_raddr *xraddr; struct xsctp_laddr *xladdr; struct xladdr_entry *prev_xl = NULL, *xl = NULL, *xl_tmp; struct xraddr_entry *prev_xr = NULL, *xr = NULL, *xr_tmp; LIST_INIT(&xladdr_head); LIST_INIT(&xraddr_head); /* * Make `struct xladdr_list' list and `struct xraddr_list' list * to handle the address flexibly. */ while (*offset < buflen) { xladdr = (struct xsctp_laddr *)(buf + *offset); *offset += sizeof(struct xsctp_laddr); if (xladdr->last == 1) break; prev_xl = xl; xl = malloc(sizeof(struct xladdr_entry)); if (xl == NULL) { xo_warnx("malloc %lu bytes", (u_long)sizeof(struct xladdr_entry)); goto out; } xl->xladdr = xladdr; if (prev_xl == NULL) LIST_INSERT_HEAD(&xladdr_head, xl, xladdr_entries); else LIST_INSERT_AFTER(prev_xl, xl, xladdr_entries); xl_total++; } while (*offset < buflen) { xraddr = (struct xsctp_raddr *)(buf + *offset); *offset += sizeof(struct xsctp_raddr); if (xraddr->last == 1) break; prev_xr = xr; xr = malloc(sizeof(struct xraddr_entry)); if (xr == NULL) { xo_warnx("malloc %lu bytes", (u_long)sizeof(struct xraddr_entry)); goto out; } xr->xraddr = xraddr; if (prev_xr == NULL) LIST_INSERT_HEAD(&xraddr_head, xr, xraddr_entries); else LIST_INSERT_AFTER(prev_xr, xr, xraddr_entries); xr_total++; } /* * Let's print the address infos. */ xo_open_list("address"); xl = LIST_FIRST(&xladdr_head); xr = LIST_FIRST(&xraddr_head); x_max = (xl_total > xr_total) ? xl_total : xr_total; for (i = 0; i < x_max; i++) { xo_open_instance("address"); if (((*indent == 0) && i > 0) || *indent > 0) xo_emit("{P:/%-12s} ", " "); if (xl != NULL) { sctp_print_address("local", &(xl->xladdr->address), htons(xstcb->local_port), numeric_port); } else { if (Wflag) { xo_emit("{P:/%-45s} ", " "); } else { xo_emit("{P:/%-22s} ", " "); } } if (xr != NULL && !Lflag) { sctp_print_address("remote", &(xr->xraddr->address), htons(xstcb->remote_port), numeric_port); } if (xl != NULL) xl = LIST_NEXT(xl, xladdr_entries); if (xr != NULL) xr = LIST_NEXT(xr, xraddr_entries); if (i == 0 && !Lflag) sctp_statesprint(xstcb->state); if (i < x_max) xo_emit("\n"); xo_close_instance("address"); } out: /* * Free the list which be used to handle the address. */ xl = LIST_FIRST(&xladdr_head); while (xl != NULL) { xl_tmp = LIST_NEXT(xl, xladdr_entries); free(xl); xl = xl_tmp; } xr = LIST_FIRST(&xraddr_head); while (xr != NULL) { xr_tmp = LIST_NEXT(xr, xraddr_entries); free(xr); xr = xr_tmp; } } static void sctp_process_inpcb(struct xsctp_inpcb *xinpcb, char *buf, const size_t buflen, size_t *offset) { int indent = 0, xladdr_total = 0, is_listening = 0; static int first = 1; const char *tname, *pname; struct xsctp_tcb *xstcb; struct xsctp_laddr *xladdr; size_t offset_laddr; int process_closed; if (xinpcb->maxqlen > 0) is_listening = 1; if (first) { if (!Lflag) { xo_emit("Active SCTP associations"); if (aflag) xo_emit(" (including servers)"); } else xo_emit("Current listen queue sizes (qlen/maxqlen)"); xo_emit("\n"); if (Lflag) xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-8.8s} " "{T:/%-22.22s}\n", "Proto", "Type", "Listen", "Local Address"); else if (Wflag) xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-45.45s} " "{T:/%-45.45s} {T:/%s}\n", "Proto", "Type", "Local Address", "Foreign Address", "(state)"); else xo_emit("{T:/%-6.6s} {T:/%-5.5s} {T:/%-22.22s} " "{T:/%-22.22s} {T:/%s}\n", "Proto", "Type", "Local Address", "Foreign Address", "(state)"); first = 0; } xladdr = (struct xsctp_laddr *)(buf + *offset); if (Lflag && !is_listening) { sctp_skip_xinpcb_ifneed(buf, buflen, offset); return; } if (xinpcb->flags & SCTP_PCB_FLAGS_BOUND_V6) { /* Can't distinguish between sctp46 and sctp6 */ pname = "sctp46"; } else { pname = "sctp4"; } if (xinpcb->flags & SCTP_PCB_FLAGS_TCPTYPE) tname = "1to1"; else if (xinpcb->flags & SCTP_PCB_FLAGS_UDPTYPE) tname = "1toN"; else tname = "????"; if (Lflag) { - char buf1[9]; + char buf1[22]; - snprintf(buf1, 9, "%hu/%hu", xinpcb->qlen, xinpcb->maxqlen); + snprintf(buf1, sizeof buf1, "%u/%u", + xinpcb->qlen, xinpcb->maxqlen); xo_emit("{:protocol/%-6.6s/%s} {:type/%-5.5s/%s} ", pname, tname); xo_emit("{d:queues/%-8.8s}{e:queue-len/%hu}" "{e:max-queue-len/%hu} ", buf1, xinpcb->qlen, xinpcb->maxqlen); } offset_laddr = *offset; process_closed = 0; xo_open_list("local-address"); retry: while (*offset < buflen) { xladdr = (struct xsctp_laddr *)(buf + *offset); *offset += sizeof(struct xsctp_laddr); if (xladdr->last) { if (aflag && !Lflag && (xladdr_total == 0) && process_closed) { xo_open_instance("local-address"); xo_emit("{:protocol/%-6.6s/%s} " "{:type/%-5.5s/%s} ", pname, tname); if (Wflag) { xo_emit("{P:/%-91.91s/%s} " "{:state/CLOSED}", " "); } else { xo_emit("{P:/%-45.45s/%s} " "{:state/CLOSED}", " "); } xo_close_instance("local-address"); } if (process_closed || is_listening) { xo_emit("\n"); } break; } if (!Lflag && !is_listening && !process_closed) continue; xo_open_instance("local-address"); if (xladdr_total == 0) { xo_emit("{:protocol/%-6.6s/%s} {:type/%-5.5s/%s} ", pname, tname); } else { xo_emit("\n"); xo_emit(Lflag ? "{P:/%-21.21s} " : "{P:/%-12.12s} ", " "); } sctp_print_address("local", &(xladdr->address), htons(xinpcb->local_port), numeric_port); if (aflag && !Lflag && xladdr_total == 0) { if (Wflag) { if (process_closed) { xo_emit("{P:/%-45.45s} " "{:state/CLOSED}", " "); } else { xo_emit("{P:/%-45.45s} " "{:state:LISTEN}", " "); } } else { if (process_closed) { xo_emit("{P:/%-22.22s} " "{:state/CLOSED}", " "); } else { xo_emit("{P:/%-22.22s} " "{:state/LISTEN}", " "); } } } xladdr_total++; xo_close_instance("local-address"); } xstcb = (struct xsctp_tcb *)(buf + *offset); *offset += sizeof(struct xsctp_tcb); if (aflag && (xladdr_total == 0) && xstcb->last && !process_closed) { process_closed = 1; *offset = offset_laddr; goto retry; } while (xstcb->last == 0 && *offset < buflen) { xo_emit("{:protocol/%-6.6s/%s} {:type/%-5.5s/%s} ", pname, tname); sctp_process_tcb(xstcb, buf, buflen, offset, &indent); indent++; xstcb = (struct xsctp_tcb *)(buf + *offset); *offset += sizeof(struct xsctp_tcb); } xo_close_list("local-address"); } /* * Print a summary of SCTP connections related to an Internet * protocol. */ void sctp_protopr(u_long off __unused, const char *name __unused, int af1 __unused, int proto) { char *buf; const char *mibvar = "net.inet.sctp.assoclist"; size_t offset = 0; size_t len = 0; struct xsctp_inpcb *xinpcb; if (proto != IPPROTO_SCTP) return; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return; } if ((buf = malloc(len)) == 0) { xo_warnx("malloc %lu bytes", (u_long)len); return; } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return; } xinpcb = (struct xsctp_inpcb *)(buf + offset); offset += sizeof(struct xsctp_inpcb); while (xinpcb->last == 0 && offset < len) { sctp_process_inpcb(xinpcb, buf, (const size_t)len, &offset); xinpcb = (struct xsctp_inpcb *)(buf + offset); offset += sizeof(struct xsctp_inpcb); } free(buf); } static void sctp_statesprint(uint32_t state) { int idx; switch (state) { case SCTP_CLOSED: idx = NETSTAT_SCTP_STATES_CLOSED; break; case SCTP_BOUND: idx = NETSTAT_SCTP_STATES_BOUND; break; case SCTP_LISTEN: idx = NETSTAT_SCTP_STATES_LISTEN; break; case SCTP_COOKIE_WAIT: idx = NETSTAT_SCTP_STATES_COOKIE_WAIT; break; case SCTP_COOKIE_ECHOED: idx = NETSTAT_SCTP_STATES_COOKIE_ECHOED; break; case SCTP_ESTABLISHED: idx = NETSTAT_SCTP_STATES_ESTABLISHED; break; case SCTP_SHUTDOWN_SENT: idx = NETSTAT_SCTP_STATES_SHUTDOWN_SENT; break; case SCTP_SHUTDOWN_RECEIVED: idx = NETSTAT_SCTP_STATES_SHUTDOWN_RECEIVED; break; case SCTP_SHUTDOWN_ACK_SENT: idx = NETSTAT_SCTP_STATES_SHUTDOWN_ACK_SENT; break; case SCTP_SHUTDOWN_PENDING: idx = NETSTAT_SCTP_STATES_SHUTDOWN_PENDING; break; default: xo_emit("UNKNOWN {:state/0x%08x}", state); return; } xo_emit("{:state/%s}", sctpstates[idx]); } /* * Dump SCTP statistics structure. */ void sctp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct sctpstat sctpstat; if (fetch_stats("net.inet.sctp.stats", off, &sctpstat, sizeof(sctpstat), kread) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (sctpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)sctpstat.f, plural(sctpstat.f)) #define p1a(f, m) if (sctpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)sctpstat.f) /* * input statistics */ p(sctps_recvpackets, "\t{:received-packets/%ju} " "{N:/input packet%s}\n"); p(sctps_recvdatagrams, "\t\t{:received-datagrams/%ju} " "{N:/datagram%s}\n"); p(sctps_recvpktwithdata, "\t\t{:received-with-data/%ju} " "{N:/packet%s that had data}\n"); p(sctps_recvsacks, "\t\t{:received-sack-chunks/%ju} " "{N:/input SACK chunk%s}\n"); p(sctps_recvdata, "\t\t{:received-data-chunks/%ju} " "{N:/input DATA chunk%s}\n"); p(sctps_recvdupdata, "\t\t{:received-duplicate-data-chunks/%ju} " "{N:/duplicate DATA chunk%s}\n"); p(sctps_recvheartbeat, "\t\t{:received-hb-chunks/%ju} " "{N:/input HB chunk%s}\n"); p(sctps_recvheartbeatack, "\t\t{:received-hb-ack-chunks/%ju} " "{N:/HB-ACK chunk%s}\n"); p(sctps_recvecne, "\t\t{:received-ecne-chunks/%ju} " "{N:/input ECNE chunk%s}\n"); p(sctps_recvauth, "\t\t{:received-auth-chunks/%ju} " "{N:/input AUTH chunk%s}\n"); p(sctps_recvauthmissing, "\t\t{:dropped-missing-auth/%ju} " "{N:/chunk%s missing AUTH}\n"); p(sctps_recvivalhmacid, "\t\t{:dropped-invalid-hmac/%ju} " "{N:/invalid HMAC id%s received}\n"); p(sctps_recvivalkeyid, "\t\t{:dropped-invalid-secret/%ju} " "{N:/invalid secret id%s received}\n"); p1a(sctps_recvauthfailed, "\t\t{:dropped-auth-failed/%ju} " "{N:/auth failed}\n"); p1a(sctps_recvexpress, "\t\t{:received-fast-path/%ju} " "{N:/fast path receives all one chunk}\n"); p1a(sctps_recvexpressm, "\t\t{:receives-fast-path-multipart/%ju} " "{N:/fast path multi-part data}\n"); /* * output statistics */ p(sctps_sendpackets, "\t{:sent-packets/%ju} " "{N:/output packet%s}\n"); p(sctps_sendsacks, "\t\t{:sent-sacks/%ju} " "{N:/output SACK%s}\n"); p(sctps_senddata, "\t\t{:sent-data-chunks/%ju} " "{N:/output DATA chunk%s}\n"); p(sctps_sendretransdata, "\t\t{:sent-retransmitted-data-chunks/%ju} " "{N:/retransmitted DATA chunk%s}\n"); p(sctps_sendfastretrans, "\t\t" "{:sent-fast-retransmitted-data-chunks/%ju} " "{N:/fast retransmitted DATA chunk%s}\n"); p(sctps_sendmultfastretrans, "\t\t" "{:sent-fast-retransmitted-data-chunk-multiple-times/%ju} " "{N:/FR'%s that happened more than once to same chunk}\n"); p(sctps_sendheartbeat, "\t\t{:sent-hb-chunks/%ju} " "{N:/output HB chunk%s}\n"); p(sctps_sendecne, "\t\t{:sent-ecne-chunks/%ju} " "{N:/output ECNE chunk%s}\n"); p(sctps_sendauth, "\t\t{:sent-auth-chunks/%ju} " "{N:/output AUTH chunk%s}\n"); p1a(sctps_senderrors, "\t\t{:send-errors/%ju} " "{N:/ip_output error counter}\n"); /* * PCKDROPREP statistics */ xo_emit("\t{T:Packet drop statistics}:\n"); xo_open_container("drop-statistics"); p1a(sctps_pdrpfmbox, "\t\t{:middle-box/%ju} " "{N:/from middle box}\n"); p1a(sctps_pdrpfehos, "\t\t{:end-host/%ju} " "{N:/from end host}\n"); p1a(sctps_pdrpmbda, "\t\t{:with-data/%ju} " "{N:/with data}\n"); p1a(sctps_pdrpmbct, "\t\t{:non-data/%ju} " "{N:/non-data, non-endhost}\n"); p1a(sctps_pdrpbwrpt, "\t\t{:non-endhost/%ju} " "{N:/non-endhost, bandwidth rep only}\n"); p1a(sctps_pdrpcrupt, "\t\t{:short-header/%ju} " "{N:/not enough for chunk header}\n"); p1a(sctps_pdrpnedat, "\t\t{:short-data/%ju} " "{N:/not enough data to confirm}\n"); p1a(sctps_pdrppdbrk, "\t\t{:chunk-break/%ju} " "{N:/where process_chunk_drop said break}\n"); p1a(sctps_pdrptsnnf, "\t\t{:tsn-not-found/%ju} " "{N:/failed to find TSN}\n"); p1a(sctps_pdrpdnfnd, "\t\t{:reverse-tsn/%ju} " "{N:/attempt reverse TSN lookup}\n"); p1a(sctps_pdrpdiwnp, "\t\t{:confirmed-zero-window/%ju} " "{N:/e-host confirms zero-rwnd}\n"); p1a(sctps_pdrpdizrw, "\t\t{:middle-box-no-space/%ju} " "{N:/midbox confirms no space}\n"); p1a(sctps_pdrpbadd, "\t\t{:bad-data/%ju} " "{N:/data did not match TSN}\n"); p(sctps_pdrpmark, "\t\t{:tsn-marked-fast-retransmission/%ju} " "{N:/TSN'%s marked for Fast Retran}\n"); xo_close_container("drop-statistics"); /* * Timeouts */ xo_emit("\t{T:Timeouts}:\n"); xo_open_container("timeouts"); p(sctps_timoiterator, "\t\t{:iterator/%ju} " "{N:/iterator timer%s fired}\n"); p(sctps_timodata, "\t\t{:t3-data/%ju} " "{N:/T3 data time out%s}\n"); p(sctps_timowindowprobe, "\t\t{:window-probe/%ju} " "{N:/window probe (T3) timer%s fired}\n"); p(sctps_timoinit, "\t\t{:init-timer/%ju} " "{N:/INIT timer%s fired}\n"); p(sctps_timosack, "\t\t{:sack-timer/%ju} " "{N:/sack timer%s fired}\n"); p(sctps_timoshutdown, "\t\t{:shutdown-timer/%ju} " "{N:/shutdown timer%s fired}\n"); p(sctps_timoheartbeat, "\t\t{:heartbeat-timer/%ju} " "{N:/heartbeat timer%s fired}\n"); p1a(sctps_timocookie, "\t\t{:cookie-timer/%ju} " "{N:/a cookie timeout fired}\n"); p1a(sctps_timosecret, "\t\t{:endpoint-changed-cookie/%ju} " "{N:/an endpoint changed its cook}ie" "secret\n"); p(sctps_timopathmtu, "\t\t{:pmtu-timer/%ju} " "{N:/PMTU timer%s fired}\n"); p(sctps_timoshutdownack, "\t\t{:shutdown-timer/%ju} " "{N:/shutdown ack timer%s fired}\n"); p(sctps_timoshutdownguard, "\t\t{:shutdown-guard-timer/%ju} " "{N:/shutdown guard timer%s fired}\n"); p(sctps_timostrmrst, "\t\t{:stream-reset-timer/%ju} " "{N:/stream reset timer%s fired}\n"); p(sctps_timoearlyfr, "\t\t{:early-fast-retransmission-timer/%ju} " "{N:/early FR timer%s fired}\n"); p1a(sctps_timoasconf, "\t\t{:asconf-timer/%ju} " "{N:/an asconf timer fired}\n"); p1a(sctps_timoautoclose, "\t\t{:auto-close-timer/%ju} " "{N:/auto close timer fired}\n"); p(sctps_timoassockill, "\t\t{:asoc-free-timer/%ju} " "{N:/asoc free timer%s expired}\n"); p(sctps_timoinpkill, "\t\t{:input-free-timer/%ju} " "{N:/inp free timer%s expired}\n"); xo_close_container("timeouts"); #if 0 /* * Early fast retransmission counters */ p(sctps_earlyfrstart, "\t%ju TODO:sctps_earlyfrstart\n"); p(sctps_earlyfrstop, "\t%ju TODO:sctps_earlyfrstop\n"); p(sctps_earlyfrmrkretrans, "\t%ju TODO:sctps_earlyfrmrkretrans\n"); p(sctps_earlyfrstpout, "\t%ju TODO:sctps_earlyfrstpout\n"); p(sctps_earlyfrstpidsck1, "\t%ju TODO:sctps_earlyfrstpidsck1\n"); p(sctps_earlyfrstpidsck2, "\t%ju TODO:sctps_earlyfrstpidsck2\n"); p(sctps_earlyfrstpidsck3, "\t%ju TODO:sctps_earlyfrstpidsck3\n"); p(sctps_earlyfrstpidsck4, "\t%ju TODO:sctps_earlyfrstpidsck4\n"); p(sctps_earlyfrstrid, "\t%ju TODO:sctps_earlyfrstrid\n"); p(sctps_earlyfrstrout, "\t%ju TODO:sctps_earlyfrstrout\n"); p(sctps_earlyfrstrtmr, "\t%ju TODO:sctps_earlyfrstrtmr\n"); #endif /* * Others */ p1a(sctps_hdrops, "\t{:dropped-too-short/%ju} " "{N:/packet shorter than header}\n"); p1a(sctps_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/checksum error}\n"); p1a(sctps_noport, "\t{:dropped-no-endpoint/%ju} " "{N:/no endpoint for port}\n"); p1a(sctps_badvtag, "\t{:dropped-bad-v-tag/%ju} " "{N:/bad v-tag}\n"); p1a(sctps_badsid, "\t{:dropped-bad-sid/%ju} " "{N:/bad SID}\n"); p1a(sctps_nomem, "\t{:dropped-no-memory/%ju} " "{N:/no memory}\n"); p1a(sctps_fastretransinrtt, "\t{:multiple-fast-retransmits-in-rtt/%ju} " "{N:/number of multiple FR in a RT}T window\n"); #if 0 p(sctps_markedretrans, "\t%ju TODO:sctps_markedretrans\n"); #endif p1a(sctps_naglesent, "\t{:rfc813-sent/%ju} " "{N:/RFC813 allowed sending}\n"); p1a(sctps_naglequeued, "\t{:rfc813-queued/%ju} " "{N:/RFC813 does not allow sending}\n"); p1a(sctps_maxburstqueued, "\t{:max-burst-queued/%ju} " "{N:/times max burst prohibited sending}\n"); p1a(sctps_ifnomemqueued, "\t{:no-memory-in-interface/%ju} " "{N:/look ahead tells us no memory in interface}\n"); p(sctps_windowprobed, "\t{:sent-window-probes/%ju} " "{N:/number%s of window probes sent}\n"); p(sctps_lowlevelerr, "\t{:low-level-err/%ju} " "{N:/time%s an output error to clamp down on next user send}\n"); p(sctps_lowlevelerrusr, "\t{:low-level-user-error/%ju} " "{N:/time%s sctp_senderrors were caused from a user}\n"); p(sctps_datadropchklmt, "\t{:dropped-chunk-limit/%ju} " "{N:/number of in data drop%s due to chunk limit reached}\n"); p(sctps_datadroprwnd, "\t{:dropped-rwnd-limit/%ju} " "{N:/number of in data drop%s due to rwnd limit reached}\n"); p(sctps_ecnereducedcwnd, "\t{:ecn-reduced-cwnd/%ju} " "{N:/time%s a ECN reduced the cwnd}\n"); p1a(sctps_vtagexpress, "\t{:v-tag-express-lookup/%ju} " "{N:/used express lookup via vtag}\n"); p1a(sctps_vtagbogus, "\t{:v-tag-collision/%ju} " "{N:/collision in express lookup}\n"); p(sctps_primary_randry, "\t{:sender-ran-dry/%ju} " "{N:/time%s the sender ran dry of user data on primary}\n"); p1a(sctps_cmt_randry, "\t{:cmt-ran-dry/%ju} " "{N:/same for above}\n"); p(sctps_slowpath_sack, "\t{:slow-path-sack/%ju} " "{N:/sack%s the slow way}\n"); p(sctps_wu_sacks_sent, "\t{:sent-window-update-only-sack/%ju} " "{N:/window update only sack%s sent}\n"); p(sctps_sends_with_flags, "\t{:sent-with-sinfo/%ju} " "{N:/send%s with sinfo_flags !=0}\n"); p(sctps_sends_with_unord, "\t{:sent-with-unordered/%ju} " "{N:/unordered send%s}\n"); p(sctps_sends_with_eof, "\t{:sent-with-eof/%ju} " "{N:/send%s with EOF flag set}\n"); p(sctps_sends_with_abort, "\t{:sent-with-abort/%ju} " "{N:/send%s with ABORT flag set}\n"); p(sctps_protocol_drain_calls, "\t{:protocol-drain-called/%ju} " "{N:/time%s protocol drain called}\n"); p(sctps_protocol_drains_done, "\t{:protocol-drain/%ju} " "{N:/time%s we did a protocol drain}\n"); p(sctps_read_peeks, "\t{:read-with-peek/%ju} " "{N:/time%s recv was called with peek}\n"); p(sctps_cached_chk, "\t{:cached-chunks/%ju} " "{N:/cached chunk%s used}\n"); p1a(sctps_cached_strmoq, "\t{:cached-output-queue-used/%ju} " "{N:/cached stream oq's used}\n"); p(sctps_left_abandon, "\t{:messages-abandoned/%ju} " "{N:/unread message%s abandonded by close}\n"); p1a(sctps_send_burst_avoid, "\t{:send-burst-avoidance/%ju} " "{N:/send burst avoidance, already max burst inflight to net}\n"); p1a(sctps_send_cwnd_avoid, "\t{:send-cwnd-avoidance/%ju} " "{N:/send cwnd full avoidance, already max burst inflight " "to net}\n"); p(sctps_fwdtsn_map_over, "\t{:tsn-map-overruns/%ju} " "{N:/number of map array over-run%s via fwd-tsn's}\n"); #undef p #undef p1a xo_close_container(name); } #endif /* SCTP */ Index: head/usr.bin/netstat/unix.c =================================================================== --- head/usr.bin/netstat/unix.c (revision 295135) +++ head/usr.bin/netstat/unix.c (revision 295136) @@ -1,331 +1,331 @@ /*- * Copyright (c) 1983, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)unix.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); /* * Display protocol blocks in the unix domain. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" static void unixdomainpr(struct xunpcb *, struct xsocket *); static const char *const socktype[] = { "#0", "stream", "dgram", "raw", "rdm", "seqpacket" }; static int pcblist_sysctl(int type, char **bufp) { char *buf; size_t len; char mibvar[sizeof "net.local.seqpacket.pcblist"]; sprintf(mibvar, "net.local.%s.pcblist", socktype[type]); len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return (-1); } if ((buf = malloc(len)) == 0) { xo_warnx("malloc %lu bytes", (u_long)len); return (-2); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return (-2); } *bufp = buf; return (0); } static int pcblist_kvm(u_long count_off, u_long gencnt_off, u_long head_off, char **bufp) { struct unp_head head; struct unpcb *unp, unp_conn; u_char sun_len; struct socket so; struct xunpgen xug; struct xunpcb xu; unp_gen_t unp_gencnt; u_int unp_count; char *buf, *p; size_t len; if (count_off == 0 || gencnt_off == 0) return (-2); if (head_off == 0) return (-1); kread(count_off, &unp_count, sizeof(unp_count)); len = 2 * sizeof(xug) + (unp_count + unp_count / 8) * sizeof(xu); if ((buf = malloc(len)) == 0) { xo_warnx("malloc %lu bytes", (u_long)len); return (-2); } p = buf; #define COPYOUT(obj, size) do { \ if (len < (size)) { \ xo_warnx("buffer size exceeded"); \ goto fail; \ } \ bcopy((obj), p, (size)); \ len -= (size); \ p += (size); \ } while (0) #define KREAD(off, buf, len) do { \ if (kread((uintptr_t)(off), (buf), (len)) != 0) \ goto fail; \ } while (0) /* Write out header. */ kread(gencnt_off, &unp_gencnt, sizeof(unp_gencnt)); xug.xug_len = sizeof xug; xug.xug_count = unp_count; xug.xug_gen = unp_gencnt; xug.xug_sogen = 0; COPYOUT(&xug, sizeof xug); /* Walk the PCB list. */ xu.xu_len = sizeof xu; KREAD(head_off, &head, sizeof(head)); LIST_FOREACH(unp, &head, unp_link) { xu.xu_unpp = unp; KREAD(unp, &xu.xu_unp, sizeof (*unp)); unp = &xu.xu_unp; if (unp->unp_gencnt > unp_gencnt) continue; if (unp->unp_addr != NULL) { KREAD(unp->unp_addr, &sun_len, sizeof(sun_len)); KREAD(unp->unp_addr, &xu.xu_addr, sun_len); } if (unp->unp_conn != NULL) { KREAD(unp->unp_conn, &unp_conn, sizeof(unp_conn)); if (unp_conn.unp_addr != NULL) { KREAD(unp_conn.unp_addr, &sun_len, sizeof(sun_len)); KREAD(unp_conn.unp_addr, &xu.xu_caddr, sun_len); } } KREAD(unp->unp_socket, &so, sizeof(so)); if (sotoxsocket(&so, &xu.xu_socket) != 0) goto fail; COPYOUT(&xu, sizeof(xu)); } /* Reread the counts and write the footer. */ kread(count_off, &unp_count, sizeof(unp_count)); kread(gencnt_off, &unp_gencnt, sizeof(unp_gencnt)); xug.xug_count = unp_count; xug.xug_gen = unp_gencnt; COPYOUT(&xug, sizeof xug); *bufp = buf; return (0); fail: free(buf); return (-1); #undef COPYOUT #undef KREAD } void unixpr(u_long count_off, u_long gencnt_off, u_long dhead_off, u_long shead_off, u_long sphead_off, bool *first) { char *buf; int ret, type; struct xsocket *so; struct xunpgen *xug, *oxug; struct xunpcb *xunp; u_long head_off; buf = NULL; for (type = SOCK_STREAM; type <= SOCK_SEQPACKET; type++) { if (live) ret = pcblist_sysctl(type, &buf); else { head_off = 0; switch (type) { case SOCK_STREAM: head_off = shead_off; break; case SOCK_DGRAM: head_off = dhead_off; break; case SOCK_SEQPACKET: head_off = sphead_off; break; } ret = pcblist_kvm(count_off, gencnt_off, head_off, &buf); } if (ret == -1) continue; if (ret < 0) return; oxug = xug = (struct xunpgen *)buf; for (xug = (struct xunpgen *)((char *)xug + xug->xug_len); xug->xug_len > sizeof(struct xunpgen); xug = (struct xunpgen *)((char *)xug + xug->xug_len)) { xunp = (struct xunpcb *)xug; so = &xunp->xu_socket; /* Ignore PCBs which were freed during copyout. */ if (xunp->xu_unp.unp_gencnt > oxug->xug_gen) continue; if (*first) { xo_open_list("socket"); *first = false; } xo_open_instance("socket"); unixdomainpr(xunp, so); xo_close_instance("socket"); } if (xug != oxug && xug->xug_gen != oxug->xug_gen) { if (oxug->xug_count > xug->xug_count) { xo_emit("Some {:type/%s} sockets may have " "been {:action/deleted}.\n", socktype[type]); } else if (oxug->xug_count < xug->xug_count) { xo_emit("Some {:type/%s} sockets may have " "been {:action/created}.\n", socktype[type]); } else { xo_emit("Some {:type/%s} sockets may have " "been {:action/created or deleted}", socktype[type]); } } free(buf); } } static void unixdomainpr(struct xunpcb *xunp, struct xsocket *so) { struct unpcb *unp; struct sockaddr_un *sa; static int first = 1; - char buf1[15]; + char buf1[33]; static const char *titles[2] = { "{T:/%-8.8s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%8.8s} " "{T:/%8.8s} {T:/%8.8s} {T:/%8.8s} {T:Addr}\n", "{T:/%-16.16s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%16.16s} " "{T:/%16.16s} {T:/%16.16s} {T:/%16.16s} {T:Addr}\n" }; static const char *format[2] = { "{q:address/%8lx} {t:type/%-6.6s} " "{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} " "{q:vnode/%8lx} {q:connection/%8lx} " "{q:first-reference/%8lx} {q:next-reference/%8lx}", "{q:address/%16lx} {t:type/%-6.6s} " "{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} " "{q:vnode/%16lx} {q:connection/%16lx} " "{q:first-reference/%16lx} {q:next-reference/%16lx}" }; int fmt = (sizeof(void *) == 8) ? 1 : 0; unp = &xunp->xu_unp; if (unp->unp_addr) sa = &xunp->xu_addr; else sa = (struct sockaddr_un *)0; if (first && !Lflag) { xo_emit("{T:Active UNIX domain sockets}\n"); xo_emit(titles[fmt], "Address", "Type", "Recv-Q", "Send-Q", "Inode", "Conn", "Refs", "Nextref"); first = 0; } if (Lflag && so->so_qlimit == 0) return; if (Lflag) { - snprintf(buf1, 15, "%d/%d/%d", so->so_qlen, + snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen, so->so_incqlen, so->so_qlimit); - xo_emit("unix {d:socket/%-14.14s}{e:queue-length/%d}" - "{e:incomplete-queue-length/%d}{e:queue-limit/%d}", + xo_emit("unix {d:socket/%-32.32s}{e:queue-length/%u}" + "{e:incomplete-queue-length/%u}{e:queue-limit/%u}", buf1, so->so_qlen, so->so_incqlen, so->so_qlimit); } else { xo_emit(format[fmt], (long)so->so_pcb, socktype[so->so_type], so->so_rcv.sb_cc, so->so_snd.sb_cc, (long)unp->unp_vnode, (long)unp->unp_conn, (long)LIST_FIRST(&unp->unp_refs), (long)LIST_NEXT(unp, unp_reflink)); } if (sa) xo_emit(" {:path/%.*s}", (int)(sa->sun_len - offsetof(struct sockaddr_un, sun_path)), sa->sun_path); xo_emit("\n"); }