diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index f0ab77100336..999328df8392 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -1,843 +1,846 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_tcpdebug.h" #include #include #include #include #include #include #include #include #include #include #include #ifndef INVARIANTS #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #include #include #include #endif #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef TCPDEBUG #include #endif #ifdef INET6 #include #endif #include #include #include #include VNET_DEFINE_STATIC(uma_zone_t, tcptw_zone); #define V_tcptw_zone VNET(tcptw_zone) static int maxtcptw; /* * The timed wait queue contains references to each of the TCP sessions * currently in the TIME_WAIT state. The queue pointers, including the * queue pointers in each tcptw structure, are protected using the global * timewait lock, which must be held over queue iteration and modification. * * Rules on tcptw usage: * - a inpcb is always freed _after_ its tcptw * - a tcptw relies on its inpcb reference counting for memory stability * - a tcptw is dereferenceable only while its inpcb is locked */ VNET_DEFINE_STATIC(TAILQ_HEAD(, tcptw), twq_2msl); #define V_twq_2msl VNET(twq_2msl) /* Global timewait lock */ VNET_DEFINE_STATIC(struct rwlock, tw_lock); #define V_tw_lock VNET(tw_lock) #define TW_LOCK_INIT(tw, d) rw_init_flags(&(tw), (d), 0) #define TW_LOCK_DESTROY(tw) rw_destroy(&(tw)) #define TW_RLOCK(tw) rw_rlock(&(tw)) #define TW_WLOCK(tw) rw_wlock(&(tw)) #define TW_RUNLOCK(tw) rw_runlock(&(tw)) #define TW_WUNLOCK(tw) rw_wunlock(&(tw)) #define TW_LOCK_ASSERT(tw) rw_assert(&(tw), RA_LOCKED) #define TW_RLOCK_ASSERT(tw) rw_assert(&(tw), RA_RLOCKED) #define TW_WLOCK_ASSERT(tw) rw_assert(&(tw), RA_WLOCKED) #define TW_UNLOCK_ASSERT(tw) rw_assert(&(tw), RA_UNLOCKED) static void tcp_tw_2msl_reset(struct tcptw *, int); static void tcp_tw_2msl_stop(struct tcptw *, int); static int tcp_twrespond(struct tcptw *, int); static int tcptw_auto_size(void) { int halfrange; /* * Max out at half the ephemeral port range so that TIME_WAIT * sockets don't tie up too many ephemeral ports. */ if (V_ipport_lastauto > V_ipport_firstauto) halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2; else halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2; /* Protect against goofy port ranges smaller than 32. */ return (imin(imax(halfrange, 32), maxsockets / 5)); } static int sysctl_maxtcptw(SYSCTL_HANDLER_ARGS) { int error, new; if (maxtcptw == 0) new = tcptw_auto_size(); else new = maxtcptw; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) if (new >= 32) { maxtcptw = new; uma_zone_set_max(V_tcptw_zone, maxtcptw); } return (error); } SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &maxtcptw, 0, sysctl_maxtcptw, "IU", "Maximum number of compressed TCP TIME_WAIT entries"); VNET_DEFINE_STATIC(bool, nolocaltimewait) = true; #define V_nolocaltimewait VNET(nolocaltimewait) SYSCTL_BOOL(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nolocaltimewait), true, "Do not create compressed TCP TIME_WAIT entries for local connections"); void tcp_tw_zone_change(void) { if (maxtcptw == 0) uma_zone_set_max(V_tcptw_zone, tcptw_auto_size()); } void tcp_tw_init(void) { V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw); if (maxtcptw == 0) uma_zone_set_max(V_tcptw_zone, tcptw_auto_size()); else uma_zone_set_max(V_tcptw_zone, maxtcptw); TAILQ_INIT(&V_twq_2msl); TW_LOCK_INIT(V_tw_lock, "tcptw"); } #ifdef VIMAGE void tcp_tw_destroy(void) { struct tcptw *tw; struct epoch_tracker et; NET_EPOCH_ENTER(et); while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL) tcp_twclose(tw, 0); NET_EPOCH_EXIT(et); TW_LOCK_DESTROY(V_tw_lock); uma_zdestroy(V_tcptw_zone); } #endif /* * Move a TCP connection into TIME_WAIT state. * tcbinfo is locked. * inp is locked, and is unlocked before returning. */ void tcp_twstart(struct tcpcb *tp) { struct tcptw twlocal, *tw; struct inpcb *inp = tp->t_inpcb; struct socket *so; uint32_t recwin; bool acknow, local; #ifdef INET6 bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; #endif NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); /* A dropped inp should never transition to TIME_WAIT state. */ KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: " "(inp->inp_flags & INP_DROPPED) != 0")); if (V_nolocaltimewait) { #ifdef INET6 if (isipv6) local = in6_localaddr(&inp->in6p_faddr); else #endif #ifdef INET local = in_localip(inp->inp_faddr); #else local = false; #endif } else local = false; /* * For use only by DTrace. We do not reference the state * after this point so modifying it in place is not a problem. */ tcp_state_change(tp, TCPS_TIME_WAIT); if (local) tw = &twlocal; else tw = uma_zalloc(V_tcptw_zone, M_NOWAIT); if (tw == NULL) { /* * Reached limit on total number of TIMEWAIT connections * allowed. Remove a connection from TIMEWAIT queue in LRU * fashion to make room for this connection. * If that fails, use on stack tw at least to be able to * run through tcp_twrespond() and standard tcpcb discard * routine. * * XXX: Check if it possible to always have enough room * in advance based on guarantees provided by uma_zalloc(). */ tw = tcp_tw_2msl_scan(1); if (tw == NULL) { tw = &twlocal; local = true; } } /* * For !local case the tcptw will hold a reference on its inpcb * until tcp_twclose is called. */ tw->tw_inpcb = inp; /* * Recover last window size sent. */ so = inp->inp_socket; recwin = lmin(lmax(sbspace(&so->so_rcv), 0), (long)TCP_MAXWIN << tp->rcv_scale); if (recwin < (so->so_rcv.sb_hiwat / 4) && recwin < tp->t_maxseg) recwin = 0; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (tp->rcv_adv - tp->rcv_nxt)) recwin = (tp->rcv_adv - tp->rcv_nxt); tw->last_win = (u_short)(recwin >> tp->rcv_scale); /* * Set t_recent if timestamps are used on the connection. */ if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) == (TF_REQ_TSTMP|TF_RCVD_TSTMP)) { tw->t_recent = tp->ts_recent; tw->ts_offset = tp->ts_offset; } else { tw->t_recent = 0; tw->ts_offset = 0; } tw->snd_nxt = tp->snd_nxt; tw->t_port = tp->t_port; tw->rcv_nxt = tp->rcv_nxt; tw->tw_time = 0; /* XXX * If this code will * be used for fin-wait-2 state also, then we may need * a ts_recent from the last segment. */ acknow = tp->t_flags & TF_ACKNOW; /* * First, discard tcpcb state, which includes stopping its timers and * freeing it. tcp_discardcb() used to also release the inpcb, but * that work is now done in the caller. * * Note: soisdisconnected() call used to be made in tcp_discardcb(), * and might not be needed here any longer. */ #ifdef TCPHPTS tcp_hpts_remove(inp, HPTS_REMOVE_ALL); #endif tcp_discardcb(tp); soisdisconnected(so); tw->tw_so_options = so->so_options; inp->inp_flags |= INP_TIMEWAIT; if (acknow) tcp_twrespond(tw, TH_ACK); if (local) in_pcbdrop(inp); else { in_pcbref(inp); /* Reference from tw */ tw->tw_cred = crhold(so->so_cred); inp->inp_ppcb = tw; TCPSTATES_INC(TCPS_TIME_WAIT); tcp_tw_2msl_reset(tw, 0); } /* * If the inpcb owns the sole reference to the socket, then we can * detach and free the socket as it is not needed in time wait. */ if (inp->inp_flags & INP_SOCKREF) { KASSERT(so->so_state & SS_PROTOREF, ("tcp_twstart: !SS_PROTOREF")); inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); SOCK_LOCK(so); so->so_state &= ~SS_PROTOREF; sofree(so); } else INP_WUNLOCK(inp); } /* * Returns 1 if the TIME_WAIT state was killed and we should start over, * looking for a pcb in the listen state. Returns 0 otherwise. * * For pure SYN-segments the PCB shall be read-locked and the tcpopt pointer * may be NULL. For the rest write-lock and valid tcpopt. */ int tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, struct mbuf *m, int tlen) { struct tcptw *tw; int thflags; tcp_seq seq; NET_EPOCH_ASSERT(); INP_LOCK_ASSERT(inp); /* * XXXRW: Time wait state for inpcb has been recycled, but inpcb is * still present. This is undesirable, but temporarily necessary * until we work out how to handle inpcb's who's timewait state has * been removed. */ tw = intotw(inp); if (tw == NULL) goto drop; thflags = th->th_flags; #ifdef INVARIANTS if ((thflags & (TH_SYN | TH_ACK)) == TH_SYN) INP_RLOCK_ASSERT(inp); else { INP_WLOCK_ASSERT(inp); KASSERT(to != NULL, ("%s: called without options on a non-SYN segment", __func__)); } #endif /* * NOTE: for FIN_WAIT_2 (to be added later), * must validate sequence number before accepting RST */ /* * If the segment contains RST: * Drop the segment - see Stevens, vol. 2, p. 964 and * RFC 1337. */ if (thflags & TH_RST) goto drop; #if 0 /* PAWS not needed at the moment */ /* * RFC 1323 PAWS: If we have a timestamp reply on this segment * and it's less than ts_recent, drop it. */ if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent && TSTMP_LT(to.to_tsval, tp->ts_recent)) { if ((thflags & TH_ACK) == 0) goto drop; goto ack; } /* * ts_recent is never updated because we never accept new segments. */ #endif /* * If a new connection request is received * while in TIME_WAIT, drop the old connection * and start over if the sequence numbers * are above the previous ones. * Allow UDP port number changes in this case. */ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) { /* * In case we can't upgrade our lock just pretend we have * lost this packet. */ if (((thflags & (TH_SYN | TH_ACK)) == TH_SYN) && INP_TRY_UPGRADE(inp) == 0) goto drop; tcp_twclose(tw, 0); + TCPSTAT_INC(tcps_tw_recycles); return (1); } /* * Send RST if UDP port numbers don't match */ if (tw->t_port != m->m_pkthdr.tcp_tun_port) { if (th->th_flags & TH_ACK) { tcp_respond(NULL, mtod(m, void *), th, m, (tcp_seq)0, th->th_ack, TH_RST); } else { if (th->th_flags & TH_SYN) tlen++; if (th->th_flags & TH_FIN) tlen++; tcp_respond(NULL, mtod(m, void *), th, m, th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK); } INP_UNLOCK(inp); + TCPSTAT_INC(tcps_tw_resets); return (0); } /* * Drop the segment if it does not contain an ACK. */ if ((thflags & TH_ACK) == 0) goto drop; INP_WLOCK_ASSERT(inp); /* * If timestamps were negotiated during SYN/ACK and a * segment without a timestamp is received, silently drop * the segment, unless the missing timestamps are tolerated. * See section 3.2 of RFC 7323. */ if (((to->to_flags & TOF_TS) == 0) && (tw->t_recent != 0) && (V_tcp_tolerate_missing_ts == 0)) { goto drop; } /* * Reset the 2MSL timer if this is a duplicate FIN. */ if (thflags & TH_FIN) { seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0); if (seq + 1 == tw->rcv_nxt) tcp_tw_2msl_reset(tw, 1); } /* * Acknowledge the segment if it has data or is not a duplicate ACK. */ if (thflags != TH_ACK || tlen != 0 || th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt) { TCP_PROBE5(receive, NULL, NULL, m, NULL, th); tcp_twrespond(tw, TH_ACK); + TCPSTAT_INC(tcps_tw_responds); goto dropnoprobe; } drop: TCP_PROBE5(receive, NULL, NULL, m, NULL, th); dropnoprobe: INP_UNLOCK(inp); m_freem(m); return (0); } void tcp_twclose(struct tcptw *tw, int reuse) { struct socket *so; struct inpcb *inp; /* * At this point, we are in one of two situations: * * (1) We have no socket, just an inpcb<->twtcp pair. We can free * all state. * * (2) We have a socket -- if we own a reference, release it and * notify the socket layer. */ inp = tw->tw_inpcb; KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait")); KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw")); NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); tcp_tw_2msl_stop(tw, reuse); inp->inp_ppcb = NULL; in_pcbdrop(inp); so = inp->inp_socket; if (so != NULL) { /* * If there's a socket, handle two cases: first, we own a * strong reference, which we will now release, or we don't * in which case another reference exists (XXXRW: think * about this more), and we don't need to take action. */ if (inp->inp_flags & INP_SOCKREF) { inp->inp_flags &= ~INP_SOCKREF; INP_WUNLOCK(inp); SOCK_LOCK(so); KASSERT(so->so_state & SS_PROTOREF, ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF")); so->so_state &= ~SS_PROTOREF; sofree(so); } else { /* * If we don't own the only reference, the socket and * inpcb need to be left around to be handled by * tcp_usr_detach() later. */ INP_WUNLOCK(inp); } } else { /* * The socket has been already cleaned-up for us, only free the * inpcb. */ in_pcbfree(inp); } TCPSTAT_INC(tcps_closed); } static int tcp_twrespond(struct tcptw *tw, int flags) { struct inpcb *inp = tw->tw_inpcb; #if defined(INET6) || defined(INET) struct tcphdr *th = NULL; #endif struct mbuf *m; #ifdef INET struct ip *ip = NULL; #endif u_int hdrlen, optlen, ulen; int error = 0; /* Keep compiler happy */ struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; #endif struct udphdr *udp = NULL; hdrlen = 0; /* Keep compiler happy */ INP_WLOCK_ASSERT(inp); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_data += max_linkhdr; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif #ifdef INET6 if (isipv6) { hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); ip6 = mtod(m, struct ip6_hdr *); if (tw->t_port) { udp = (struct udphdr *)(ip6 + 1); hdrlen += sizeof(struct udphdr); udp->uh_sport = htons(V_tcp_udp_tunneling_port); udp->uh_dport = tw->t_port; ulen = (hdrlen - sizeof(struct ip6_hdr)); th = (struct tcphdr *)(udp + 1); } else th = (struct tcphdr *)(ip6 + 1); tcpip_fillheaders(inp, tw->t_port, ip6, th); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { hdrlen = sizeof(struct tcpiphdr); ip = mtod(m, struct ip *); if (tw->t_port) { udp = (struct udphdr *)(ip + 1); hdrlen += sizeof(struct udphdr); udp->uh_sport = htons(V_tcp_udp_tunneling_port); udp->uh_dport = tw->t_port; ulen = (hdrlen - sizeof(struct ip)); th = (struct tcphdr *)(udp + 1); } else th = (struct tcphdr *)(ip + 1); tcpip_fillheaders(inp, tw->t_port, ip, th); } #endif to.to_flags = 0; /* * Send a timestamp and echo-reply if both our side and our peer * have sent timestamps in our SYN's and this is not a RST. */ if (tw->t_recent && flags == TH_ACK) { to.to_flags |= TOF_TS; to.to_tsval = tcp_ts_getticks() + tw->ts_offset; to.to_tsecr = tw->t_recent; } optlen = tcp_addoptions(&to, (u_char *)(th + 1)); if (udp) { ulen += optlen; udp->uh_ulen = htons(ulen); } m->m_len = hdrlen + optlen; m->m_pkthdr.len = m->m_len; KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small")); th->th_seq = htonl(tw->snd_nxt); th->th_ack = htonl(tw->rcv_nxt); th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; th->th_flags = flags; th->th_win = htons(tw->last_win); #ifdef INET6 if (isipv6) { if (tw->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0); th->th_sum = htons(0); } else { m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); } ip6->ip6_hlim = in6_selecthlim(inp, NULL); TCP_PROBE5(send, NULL, NULL, ip6, NULL, th); error = ip6_output(m, inp->in6p_outputopts, NULL, (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET { if (tw->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); th->th_sum = htons(0); } else { m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); } ip->ip_len = htons(m->m_pkthdr.len); if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); TCP_PROBE5(send, NULL, NULL, ip, NULL, th); error = ip_output(m, inp->inp_options, NULL, ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, inp); } #endif if (flags & TH_ACK) TCPSTAT_INC(tcps_sndacks); else TCPSTAT_INC(tcps_sndctrl); TCPSTAT_INC(tcps_sndtotal); return (error); } static void tcp_tw_2msl_reset(struct tcptw *tw, int rearm) { NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(tw->tw_inpcb); TW_WLOCK(V_tw_lock); if (rearm) TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl); tw->tw_time = ticks + 2 * tcp_msl; TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl); TW_WUNLOCK(V_tw_lock); } static void tcp_tw_2msl_stop(struct tcptw *tw, int reuse) { struct ucred *cred; struct inpcb *inp; int released __unused; NET_EPOCH_ASSERT(); TW_WLOCK(V_tw_lock); inp = tw->tw_inpcb; tw->tw_inpcb = NULL; TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl); cred = tw->tw_cred; tw->tw_cred = NULL; TW_WUNLOCK(V_tw_lock); if (cred != NULL) crfree(cred); released = in_pcbrele_wlocked(inp); KASSERT(!released, ("%s: inp should not be released here", __func__)); if (!reuse) uma_zfree(V_tcptw_zone, tw); TCPSTATES_DEC(TCPS_TIME_WAIT); } struct tcptw * tcp_tw_2msl_scan(int reuse) { struct tcptw *tw; struct inpcb *inp; NET_EPOCH_ASSERT(); for (;;) { TW_RLOCK(V_tw_lock); tw = TAILQ_FIRST(&V_twq_2msl); if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) { TW_RUNLOCK(V_tw_lock); break; } KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL", __func__)); inp = tw->tw_inpcb; in_pcbref(inp); TW_RUNLOCK(V_tw_lock); INP_WLOCK(inp); tw = intotw(inp); if (in_pcbrele_wlocked(inp)) { if (__predict_true(tw == NULL)) { continue; } else { /* This should not happen as in TIMEWAIT * state the inp should not be destroyed * before its tcptw. If INVARIANTS is * defined panic. */ #ifdef INVARIANTS panic("%s: Panic before an infinite " "loop: INP_TIMEWAIT && (INP_FREED " "|| inp last reference) && tw != " "NULL", __func__); #else log(LOG_ERR, "%s: Avoid an infinite " "loop: INP_TIMEWAIT && (INP_FREED " "|| inp last reference) && tw != " "NULL", __func__); #endif break; } } if (tw == NULL) { /* tcp_twclose() has already been called */ INP_WUNLOCK(inp); continue; } tcp_twclose(tw, reuse); if (reuse) return tw; } return NULL; } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 6b121a7fd311..c8e262ddf357 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1,1176 +1,1181 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 * $FreeBSD$ */ #ifndef _NETINET_TCP_VAR_H_ #define _NETINET_TCP_VAR_H_ #include #include #ifdef _KERNEL #include "opt_kern_tls.h" #include #include #include #endif #define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information array" */ /* Types of ending byte info */ #define TCP_EI_EMPTY_SLOT 0 #define TCP_EI_STATUS_CLIENT_FIN 0x1 #define TCP_EI_STATUS_CLIENT_RST 0x2 #define TCP_EI_STATUS_SERVER_FIN 0x3 #define TCP_EI_STATUS_SERVER_RST 0x4 #define TCP_EI_STATUS_RETRAN 0x5 #define TCP_EI_STATUS_PROGRESS 0x6 #define TCP_EI_STATUS_PERSIST_MAX 0x7 #define TCP_EI_STATUS_KEEP_MAX 0x8 #define TCP_EI_STATUS_DATA_A_CLOSE 0x9 #define TCP_EI_STATUS_RST_IN_FRONT 0xa #define TCP_EI_STATUS_2MSL 0xb #define TCP_EI_STATUS_MAX_VALUE 0xb /************************************************/ /* Status bits we track to assure no duplicates, * the bits here are not used by the code but * for human representation. To check a bit we * take and shift over by 1 minus the value (1-8). */ /************************************************/ #define TCP_EI_BITS_CLIENT_FIN 0x001 #define TCP_EI_BITS_CLIENT_RST 0x002 #define TCP_EI_BITS_SERVER_FIN 0x004 #define TCP_EI_BITS_SERVER_RST 0x008 #define TCP_EI_BITS_RETRAN 0x010 #define TCP_EI_BITS_PROGRESS 0x020 #define TCP_EI_BITS_PRESIST_MAX 0x040 #define TCP_EI_BITS_KEEP_MAX 0x080 #define TCP_EI_BITS_DATA_A_CLO 0x100 #define TCP_EI_BITS_RST_IN_FR 0x200 /* a front state reset */ #define TCP_EI_BITS_2MS_TIMER 0x400 /* 2 MSL timer expired */ #if defined(_KERNEL) || defined(_WANT_TCPCB) /* TCP segment queue entry */ struct tseg_qent { TAILQ_ENTRY(tseg_qent) tqe_q; struct mbuf *tqe_m; /* mbuf contains packet */ struct mbuf *tqe_last; /* last mbuf in chain */ tcp_seq tqe_start; /* TCP Sequence number start */ int tqe_len; /* TCP segment data length */ uint32_t tqe_flags; /* The flags from the th->th_flags */ uint32_t tqe_mbuf_cnt; /* Count of mbuf overhead */ }; TAILQ_HEAD(tsegqe_head, tseg_qent); struct sackblk { tcp_seq start; /* start seq no. of sack block */ tcp_seq end; /* end seq no. */ }; struct sackhole { tcp_seq start; /* start seq no. of hole */ tcp_seq end; /* end seq no. */ tcp_seq rxmit; /* next seq. no in hole to be retransmitted */ TAILQ_ENTRY(sackhole) scblink; /* scoreboard linkage */ }; struct sackhint { struct sackhole *nexthole; int32_t sack_bytes_rexmit; tcp_seq last_sack_ack; /* Most recent/largest sacked ack */ int32_t delivered_data; /* Newly acked data from last SACK */ int32_t sacked_bytes; /* Total sacked bytes reported by the * receiver via sack option */ uint32_t recover_fs; /* Flight Size at the start of Loss recovery */ uint32_t prr_delivered; /* Total bytes delivered using PRR */ uint32_t prr_out; /* Bytes sent during IN_RECOVERY */ }; #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq) STAILQ_HEAD(tcp_log_stailq, tcp_log_mem); /* * Tcp control block, one per tcp; fields: * Organized for 64 byte cacheline efficiency based * on common tcp_input/tcp_output processing. */ struct tcpcb { /* Cache line 1 */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ uint32_t t_maxseg:24, /* maximum segment size */ t_logstate:8; /* State of "black box" logging */ uint32_t t_port:16, /* Tunneling (over udp) port */ t_state:4, /* state of this connection */ t_idle_reduce : 1, t_delayed_ack: 7, /* Delayed ack variable */ t_fin_is_rst: 1, /* Are fin's treated as resets */ t_log_state_set: 1, bits_spare : 2; u_int t_flags; tcp_seq snd_una; /* sent but unacknowledged */ tcp_seq snd_max; /* highest sequence number sent; * used to recognize retransmits */ tcp_seq snd_nxt; /* send next */ tcp_seq snd_up; /* send urgent pointer */ uint32_t snd_wnd; /* send window */ uint32_t snd_cwnd; /* congestion-controlled window */ uint32_t t_peakrate_thr; /* pre-calculated peak rate threshold */ /* Cache line 2 */ u_int32_t ts_offset; /* our timestamp offset */ u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ int rcv_numsacks; /* # distinct sack blks present */ u_int t_tsomax; /* TSO total burst length limit in bytes */ u_int t_tsomaxsegcount; /* TSO maximum segment count */ u_int t_tsomaxsegsize; /* TSO maximum segment size in bytes */ tcp_seq rcv_nxt; /* receive next */ tcp_seq rcv_adv; /* advertised window */ uint32_t rcv_wnd; /* receive window */ u_int t_flags2; /* More tcpcb flags storage */ int t_srtt; /* smoothed round-trip time */ int t_rttvar; /* variance in round-trip time */ u_int32_t ts_recent; /* timestamp echo data */ u_char snd_scale; /* window scaling for send window */ u_char rcv_scale; /* window scaling for recv window */ u_char snd_limited; /* segments limited transmitted */ u_char request_r_scale; /* pending window scaling */ tcp_seq last_ack_sent; u_int t_rcvtime; /* inactivity time */ /* Cache line 3 */ tcp_seq rcv_up; /* receive urgent pointer */ int t_segqlen; /* segment reassembly queue length */ uint32_t t_segqmbuflen; /* Count of bytes mbufs on all entries */ struct tsegqe_head t_segq; /* segment reassembly queue */ struct mbuf *t_in_pkt; struct mbuf *t_tail_pkt; struct tcp_timer *t_timers; /* All the TCP timers in one struct */ struct vnet *t_vnet; /* back pointer to parent vnet */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch */ tcp_seq snd_wl1; /* window update seg seq number */ /* Cache line 4 */ tcp_seq snd_wl2; /* window update seg ack number */ tcp_seq irs; /* initial receive sequence number */ tcp_seq iss; /* initial send sequence number */ u_int t_acktime; /* RACK and BBR incoming new data was acked */ u_int t_sndtime; /* time last data was sent */ u_int ts_recent_age; /* when last updated */ tcp_seq snd_recover; /* for use in NewReno Fast Recovery */ uint16_t cl4_spare; /* Spare to adjust CL 4 */ char t_oobflags; /* have some */ char t_iobc; /* input character */ int t_rxtcur; /* current retransmit value (ticks) */ int t_rxtshift; /* log(2) of rexmt exp. backoff */ u_int t_rtttime; /* RTT measurement start time */ tcp_seq t_rtseq; /* sequence number being timed */ u_int t_starttime; /* time connection was established */ u_int t_fbyte_in; /* ticks time when first byte queued in */ u_int t_fbyte_out; /* ticks time when first byte queued out */ u_int t_pmtud_saved_maxseg; /* pre-blackhole MSS */ int t_blackhole_enter; /* when to enter blackhole detection */ int t_blackhole_exit; /* when to exit blackhole detection */ u_int t_rttmin; /* minimum rtt allowed */ u_int t_rttbest; /* best rtt we've seen */ int t_softerror; /* possible error not yet reported */ uint32_t max_sndwnd; /* largest window peer has offered */ /* Cache line 5 */ uint32_t snd_cwnd_prev; /* cwnd prior to retransmit */ uint32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ int t_sndzerowin; /* zero-window updates sent */ u_long t_rttupdated; /* number of times rtt sampled */ int snd_numholes; /* number of holes seen by sender */ u_int t_badrxtwin; /* window for retransmit recovery */ TAILQ_HEAD(sackhole_head, sackhole) snd_holes; /* SACK scoreboard (sorted) */ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */ struct sackhint sackhint; /* SACK scoreboard hint */ int t_rttlow; /* smallest observerved RTT */ int rfbuf_cnt; /* recv buffer autoscaling byte count */ struct toedev *tod; /* toedev handling this connection */ int t_sndrexmitpack; /* retransmit packets sent */ int t_rcvoopack; /* out-of-order packets received */ void *t_toe; /* TOE pcb pointer */ struct cc_algo *cc_algo; /* congestion control algorithm */ struct cc_var *ccv; /* congestion control specific vars */ struct osd *osd; /* storage for Khelp module data */ int t_bytes_acked; /* # bytes acked during current RTT */ u_int t_maxunacktime; u_int t_keepinit; /* time to establish connection */ u_int t_keepidle; /* time before keepalive probes begin */ u_int t_keepintvl; /* interval between keepalives */ u_int t_keepcnt; /* number of keepalives before close */ int t_dupacks; /* consecutive dup acks recd */ int t_lognum; /* Number of log entries */ int t_loglimit; /* Maximum number of log entries */ int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */ struct tcp_log_stailq t_logs; /* Log buffer */ struct tcp_log_id_node *t_lin; struct tcp_log_id_bucket *t_lib; const char *t_output_caller; /* Function that called tcp_output */ struct statsblob *t_stats; /* Per-connection stats */ uint32_t t_logsn; /* Log "serial number" */ uint32_t gput_ts; /* Time goodput measurement started */ tcp_seq gput_seq; /* Outbound measurement seq */ tcp_seq gput_ack; /* Inbound measurement ack */ int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */ uint32_t t_maxpeakrate; /* max peak rate set by user, in bytes/s */ uint32_t t_sndtlppack; /* tail loss probe packets sent */ uint64_t t_sndtlpbyte; /* total tail loss probe bytes sent */ uint64_t t_sndbytes; /* total bytes sent */ uint64_t t_snd_rxt_bytes; /* total bytes retransmitted */ uint32_t t_dsack_bytes; /* Total number of dsack bytes we have received */ uint32_t t_dsack_tlp_bytes; /* Total number of dsack bytes we have received for TLPs sent */ uint32_t t_dsack_pack; /* Total dsack packets we have recieved */ uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ uint32_t t_end_info_status; /* Status flag of end info */ unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ union { uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]; uint64_t server; } t_tfo_cookie; /* TCP Fast Open cookie to send */ union { uint8_t t_end_info_bytes[TCP_END_BYTE_INFO]; uint64_t t_end_info; }; #ifdef TCPPCAP struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ #endif }; #endif /* _KERNEL || _WANT_TCPCB */ #ifdef _KERNEL struct tcptemp { u_char tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */ struct tcphdr tt_t; }; /* Enable TCP/UDP tunneling port */ #define TCP_TUNNELING_PORT_MIN 0 #define TCP_TUNNELING_PORT_MAX 65535 #define TCP_TUNNELING_PORT_DEFAULT 0 /* Enable TCP/UDP tunneling port */ #define TCP_TUNNELING_OVERHEAD_MIN sizeof(struct udphdr) #define TCP_TUNNELING_OVERHEAD_MAX 1024 #define TCP_TUNNELING_OVERHEAD_DEFAULT TCP_TUNNELING_OVERHEAD_MIN /* Minimum map entries limit value, if set */ #define TCP_MIN_MAP_ENTRIES_LIMIT 128 /* * TODO: We yet need to brave plowing in * to tcp_input() and the pru_usrreq() block. * Right now these go to the old standards which * are somewhat ok, but in the long term may * need to be changed. If we do tackle tcp_input() * then we need to get rid of the tcp_do_segment() * function below. */ /* Flags for tcp functions */ #define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ /* * If defining the optional tcp_timers, in the * tfb_tcp_timer_stop call you must use the * callout_async_drain() function with the * tcp_timer_discard callback. You should check * the return of callout_async_drain() and if 0 * increment tt_draincnt. Since the timer sub-system * does not know your callbacks you must provide a * stop_all function that loops through and calls * tcp_timer_stop() with each of your defined timers. * Adding a tfb_tcp_handoff_ok function allows the socket * option to change stacks to query you even if the * connection is in a later stage. You return 0 to * say you can take over and run your stack, you return * non-zero (an error number) to say no you can't. * If the function is undefined you can only change * in the early states (before connect or listen). * tfb_tcp_fb_fini is changed to add a flag to tell * the old stack if the tcb is being destroyed or * not. A one in the flag means the TCB is being * destroyed, a zero indicates its transitioning to * another stack (via socket option). */ struct tcp_function_block { char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; int (*tfb_tcp_output)(struct tcpcb *); int (*tfb_tcp_output_wtime)(struct tcpcb *, const struct timeval *); void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t); int (*tfb_do_queued_segments)(struct socket *, struct tcpcb *, int); int (*tfb_do_segment_nounlock)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int, struct timeval *); void (*tfb_tcp_hpts_do_segment)(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t, int, struct timeval *); int (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); /* Optional memory allocation/free routine */ int (*tfb_tcp_fb_init)(struct tcpcb *); void (*tfb_tcp_fb_fini)(struct tcpcb *, int); /* Optional timers, must define all if you define one */ int (*tfb_tcp_timer_stop_all)(struct tcpcb *); void (*tfb_tcp_timer_activate)(struct tcpcb *, uint32_t, u_int); int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); int (*tfb_tcp_handoff_ok)(struct tcpcb *); void (*tfb_tcp_mtu_chg)(struct tcpcb *); int (*tfb_pru_options)(struct tcpcb *, int); void (*tfb_hwtls_change)(struct tcpcb *, int); volatile uint32_t tfb_refcnt; uint32_t tfb_flags; uint8_t tfb_id; }; struct tcp_function { TAILQ_ENTRY(tcp_function) tf_next; char tf_name[TCP_FUNCTION_NAME_LEN_MAX]; struct tcp_function_block *tf_fb; }; TAILQ_HEAD(tcp_funchead, tcp_function); #endif /* _KERNEL */ /* * Flags and utility macros for the t_flags field. */ #define TF_ACKNOW 0x00000001 /* ack peer immediately */ #define TF_DELACK 0x00000002 /* ack, but try to delay it */ #define TF_NODELAY 0x00000004 /* don't delay packets to coalesce */ #define TF_NOOPT 0x00000008 /* don't use tcp options */ #define TF_SENTFIN 0x00000010 /* have sent FIN */ #define TF_REQ_SCALE 0x00000020 /* have/will request window scaling */ #define TF_RCVD_SCALE 0x00000040 /* other side has requested scaling */ #define TF_REQ_TSTMP 0x00000080 /* have/will request timestamps */ #define TF_RCVD_TSTMP 0x00000100 /* a timestamp was received in SYN */ #define TF_SACK_PERMIT 0x00000200 /* other side said I could SACK */ #define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */ #define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */ #define TF_NOPUSH 0x00001000 /* don't push */ #define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */ #define TF_WAKESOR 0x00004000 /* wake up receive socket */ #define TF_GPUTINPROG 0x00008000 /* Goodput measurement in progress */ #define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */ #define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */ #define TF_LASTIDLE 0x00040000 /* connection was previously idle */ #define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */ #define TF_FASTRECOVERY 0x00100000 /* in NewReno Fast Recovery */ #define TF_WASFRECOVERY 0x00200000 /* was in NewReno Fast Recovery */ #define TF_SIGNATURE 0x00400000 /* require MD5 digests (RFC2385) */ #define TF_FORCEDATA 0x00800000 /* force out a byte */ #define TF_TSO 0x01000000 /* TSO enabled on this connection */ #define TF_TOE 0x02000000 /* this connection is offloaded */ #define TF_UNUSED0 0x04000000 /* unused */ #define TF_UNUSED1 0x08000000 /* unused */ #define TF_LRD 0x10000000 /* Lost Retransmission Detection */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY #define EXIT_FASTRECOVERY(t_flags) t_flags &= ~TF_FASTRECOVERY #define IN_CONGRECOVERY(t_flags) (t_flags & TF_CONGRECOVERY) #define ENTER_CONGRECOVERY(t_flags) t_flags |= TF_CONGRECOVERY #define EXIT_CONGRECOVERY(t_flags) t_flags &= ~TF_CONGRECOVERY #define IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY)) #define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) #define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) #if defined(_KERNEL) && !defined(TCP_RFC7413) #define IS_FASTOPEN(t_flags) (false) #else #define IS_FASTOPEN(t_flags) (t_flags & TF_FASTOPEN) #endif #define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) /* * Flags for the t_oobflags field. */ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 /* * Flags for the extended TCP flags field, t_flags2 */ #define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */ #define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */ #define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */ #define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */ #define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */ #define TF2_ECN_PERMIT 0x00000020 /* connection ECN-ready */ #define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */ #define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */ #define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */ #define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */ /* * Structure to hold TCP options that are only used during segment * processing (in tcp_input), but not held in the tcpcb. * It's basically used to reduce the number of parameters * to tcp_dooptions and tcp_addoptions. * The binary order of the to_flags is relevant for packing of the * options in tcp_addoptions. */ struct tcpopt { u_int32_t to_flags; /* which options are present */ #define TOF_MSS 0x0001 /* maximum segment size */ #define TOF_SCALE 0x0002 /* window scaling */ #define TOF_SACKPERM 0x0004 /* SACK permitted */ #define TOF_TS 0x0010 /* timestamp */ #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ #define TOF_SACK 0x0080 /* Peer sent SACK option */ #define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */ #define TOF_MAXOPT 0x0200 u_int32_t to_tsval; /* new timestamp */ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ u_char *to_signature; /* pointer to the TCP-MD5 signature */ u_int8_t *to_tfo_cookie; /* pointer to the TFO cookie */ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */ u_int8_t to_tfo_len; /* TFO cookie length */ u_int32_t to_spare; /* UTO */ }; /* * Flags for tcp_dooptions. */ #define TO_SYN 0x01 /* parse SYN-only options */ struct hc_metrics_lite { /* must stay in sync with hc_metrics */ uint32_t rmx_mtu; /* MTU for this path */ uint32_t rmx_ssthresh; /* outbound gateway buffer limit */ uint32_t rmx_rtt; /* estimated round trip time */ uint32_t rmx_rttvar; /* estimated rtt variance */ uint32_t rmx_cwnd; /* congestion window */ uint32_t rmx_sendpipe; /* outbound delay-bandwidth product */ uint32_t rmx_recvpipe; /* inbound delay-bandwidth product */ }; /* * Used by tcp_maxmtu() to communicate interface specific features * and limits at the time of connection setup. */ struct tcp_ifcap { int ifcap; u_int tsomax; u_int tsomaxsegcount; u_int tsomaxsegsize; }; #ifndef _NETINET_IN_PCB_H_ struct in_conninfo; #endif /* _NETINET_IN_PCB_H_ */ struct tcptw { struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ uint32_t t_port:16, /* UDP port number if TCPoUDP */ t_unused:16; tcp_seq snd_nxt; tcp_seq rcv_nxt; u_short last_win; /* cached window value */ short tw_so_options; /* copy of so_options */ struct ucred *tw_cred; /* user credentials */ u_int32_t t_recent; u_int32_t ts_offset; /* our timestamp offset */ int tw_time; TAILQ_ENTRY(tcptw) tw_2msl; }; #define intotcpcb(ip) ((struct tcpcb *)(ip)->inp_ppcb) #define intotw(ip) ((struct tcptw *)(ip)->inp_ppcb) #define sototcpcb(so) (intotcpcb(sotoinpcb(so))) /* * The smoothed round-trip time and estimated variance * are stored as fixed point numbers scaled by the values below. * For convenience, these scales are also used in smoothing the average * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). * With these scales, srtt has 3 bits to the right of the binary point, * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the * binary point, and is smoothed with an ALPHA of 0.75. */ #define TCP_RTT_SCALE 32 /* multiplier for srtt; 3 bits frac. */ #define TCP_RTT_SHIFT 5 /* shift for srtt; 3 bits frac. */ #define TCP_RTTVAR_SCALE 16 /* multiplier for rttvar; 2 bits */ #define TCP_RTTVAR_SHIFT 4 /* shift for rttvar; 2 bits */ #define TCP_DELTA_SHIFT 2 /* see tcp_input.c */ /* * The initial retransmission should happen at rtt + 4 * rttvar. * Because of the way we do the smoothing, srtt and rttvar * will each average +1/2 tick of bias. When we compute * the retransmit timer, we want 1/2 tick of rounding and * 1 extra tick because of +-1/2 tick uncertainty in the * firing of the timer. The bias will give us exactly the * 1.5 tick we need. But, because the bias is * statistical, we have to test that we don't drop below * the minimum feasible timer (which is 2 ticks). * This version of the macro adapted from a paper by Lawrence * Brakmo and Larry Peterson which outlines a problem caused * by insufficient precision in the original implementation, * which results in inappropriately large RTO values for very * fast networks. */ #define TCP_REXMTVAL(tp) \ max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) /* * TCP statistics. * Many of these should be kept per connection, * but that's inconvenient at the moment. */ struct tcpstat { uint64_t tcps_connattempt; /* connections initiated */ uint64_t tcps_accepts; /* connections accepted */ uint64_t tcps_connects; /* connections established */ uint64_t tcps_drops; /* connections dropped */ uint64_t tcps_conndrops; /* embryonic connections dropped */ uint64_t tcps_minmssdrops; /* average minmss too low drops */ uint64_t tcps_closed; /* conn. closed (includes drops) */ uint64_t tcps_segstimed; /* segs where we tried to get rtt */ uint64_t tcps_rttupdated; /* times we succeeded */ uint64_t tcps_delack; /* delayed acks sent */ uint64_t tcps_timeoutdrop; /* conn. dropped in rxmt timeout */ uint64_t tcps_rexmttimeo; /* retransmit timeouts */ uint64_t tcps_persisttimeo; /* persist timeouts */ uint64_t tcps_keeptimeo; /* keepalive timeouts */ uint64_t tcps_keepprobe; /* keepalive probes sent */ uint64_t tcps_keepdrops; /* connections dropped in keepalive */ uint64_t tcps_sndtotal; /* total packets sent */ uint64_t tcps_sndpack; /* data packets sent */ uint64_t tcps_sndbyte; /* data bytes sent */ uint64_t tcps_sndrexmitpack; /* data packets retransmitted */ uint64_t tcps_sndrexmitbyte; /* data bytes retransmitted */ uint64_t tcps_sndrexmitbad; /* unnecessary packet retransmissions */ uint64_t tcps_sndacks; /* ack-only packets sent */ uint64_t tcps_sndprobe; /* window probes sent */ uint64_t tcps_sndurg; /* packets sent with URG only */ uint64_t tcps_sndwinup; /* window update-only packets sent */ uint64_t tcps_sndctrl; /* control (SYN|FIN|RST) packets sent */ uint64_t tcps_rcvtotal; /* total packets received */ uint64_t tcps_rcvpack; /* packets received in sequence */ uint64_t tcps_rcvbyte; /* bytes received in sequence */ uint64_t tcps_rcvbadsum; /* packets received with ccksum errs */ uint64_t tcps_rcvbadoff; /* packets received with bad offset */ uint64_t tcps_rcvreassfull; /* packets dropped for no reass space */ uint64_t tcps_rcvshort; /* packets received too short */ uint64_t tcps_rcvduppack; /* duplicate-only packets received */ uint64_t tcps_rcvdupbyte; /* duplicate-only bytes received */ uint64_t tcps_rcvpartduppack; /* packets with some duplicate data */ uint64_t tcps_rcvpartdupbyte; /* dup. bytes in part-dup. packets */ uint64_t tcps_rcvoopack; /* out-of-order packets received */ uint64_t tcps_rcvoobyte; /* out-of-order bytes received */ uint64_t tcps_rcvpackafterwin; /* packets with data after window */ uint64_t tcps_rcvbyteafterwin; /* bytes rcvd after window */ uint64_t tcps_rcvafterclose; /* packets rcvd after "close" */ uint64_t tcps_rcvwinprobe; /* rcvd window probe packets */ uint64_t tcps_rcvdupack; /* rcvd duplicate acks */ uint64_t tcps_rcvacktoomuch; /* rcvd acks for unsent data */ uint64_t tcps_rcvackpack; /* rcvd ack packets */ uint64_t tcps_rcvackbyte; /* bytes acked by rcvd acks */ uint64_t tcps_rcvwinupd; /* rcvd window update packets */ uint64_t tcps_pawsdrop; /* segments dropped due to PAWS */ uint64_t tcps_predack; /* times hdr predict ok for acks */ uint64_t tcps_preddat; /* times hdr predict ok for data pkts */ uint64_t tcps_pcbcachemiss; uint64_t tcps_cachedrtt; /* times cached RTT in route updated */ uint64_t tcps_cachedrttvar; /* times cached rttvar updated */ uint64_t tcps_cachedssthresh; /* times cached ssthresh updated */ uint64_t tcps_usedrtt; /* times RTT initialized from route */ uint64_t tcps_usedrttvar; /* times RTTVAR initialized from rt */ uint64_t tcps_usedssthresh; /* times ssthresh initialized from rt*/ uint64_t tcps_persistdrop; /* timeout in persist state */ uint64_t tcps_badsyn; /* bogus SYN, e.g. premature ACK */ uint64_t tcps_mturesent; /* resends due to MTU discovery */ uint64_t tcps_listendrop; /* listen queue overflows */ uint64_t tcps_badrst; /* ignored RSTs in the window */ uint64_t tcps_sc_added; /* entry added to syncache */ uint64_t tcps_sc_retransmitted; /* syncache entry was retransmitted */ uint64_t tcps_sc_dupsyn; /* duplicate SYN packet */ uint64_t tcps_sc_dropped; /* could not reply to packet */ uint64_t tcps_sc_completed; /* successful extraction of entry */ uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */ uint64_t tcps_sc_cacheoverflow; /* syncache cache limit hit */ uint64_t tcps_sc_reset; /* RST removed entry from syncache */ uint64_t tcps_sc_stale; /* timed out or listen socket gone */ uint64_t tcps_sc_aborted; /* syncache entry aborted */ uint64_t tcps_sc_badack; /* removed due to bad ACK */ uint64_t tcps_sc_unreach; /* ICMP unreachable received */ uint64_t tcps_sc_zonefail; /* zalloc() failed */ uint64_t tcps_sc_sendcookie; /* SYN cookie sent */ uint64_t tcps_sc_recvcookie; /* SYN cookie received */ uint64_t tcps_hc_added; /* entry added to hostcache */ uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */ uint64_t tcps_finwait2_drops; /* Drop FIN_WAIT_2 connection after time limit */ /* SACK related stats */ uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */ uint64_t tcps_sack_rexmits; /* SACK rexmit segments */ uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */ uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */ uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */ uint64_t tcps_sack_lostrexmt; /* SACK lost retransmission recovered */ uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */ /* ECN related stats */ uint64_t tcps_ecn_ce; /* ECN Congestion Experienced */ uint64_t tcps_ecn_ect0; /* ECN Capable Transport */ uint64_t tcps_ecn_ect1; /* ECN Capable Transport */ uint64_t tcps_ecn_shs; /* ECN successful handshakes */ uint64_t tcps_ecn_rcwnd; /* # times ECN reduced the cwnd */ /* TCP_SIGNATURE related stats */ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ uint64_t tcps_sig_err_buildsig; /* Failed to make signature */ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ /* Path MTU Discovery Black Hole Detection related stats */ uint64_t tcps_pmtud_blackhole_activated; /* Black Hole Count */ uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */ uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */ uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */ uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */ /* Dsack related stats */ uint64_t tcps_dsack_count; /* Number of ACKs arriving with DSACKs */ uint64_t tcps_dsack_bytes; /* Number of bytes DSACK'ed no TLP */ uint64_t tcps_dsack_tlp_bytes; /* Number of bytes DSACK'ed due to TLPs */ + /* TCPS_TIME_WAIT usage stats */ + uint64_t tcps_tw_recycles; /* Times time-wait was recycled. */ + uint64_t tcps_tw_resets; /* Times time-wait sent a reset. */ + uint64_t tcps_tw_responds; /* Times time-wait sent a valid ack. */ + uint64_t _pad[6]; /* 3 UTO, 3 TBD */ }; #define tcps_rcvmemdrop tcps_rcvreassfull /* compat */ #ifdef _KERNEL #define TI_UNLOCKED 1 #define TI_RLOCKED 2 #include VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat); /* tcp statistics */ /* * In-kernel consumers can use these accessor macros directly to update * stats. */ #define TCPSTAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val)) #define TCPSTAT_INC(name) TCPSTAT_ADD(name, 1) /* * Kernel module consumers must use this accessor macro. */ void kmod_tcpstat_add(int statnum, int val); #define KMOD_TCPSTAT_ADD(name, val) \ kmod_tcpstat_add(offsetof(struct tcpstat, name) / sizeof(uint64_t), val) #define KMOD_TCPSTAT_INC(name) KMOD_TCPSTAT_ADD(name, 1) /* * Running TCP connection count by state. */ VNET_DECLARE(counter_u64_t, tcps_states[TCP_NSTATES]); #define V_tcps_states VNET(tcps_states) #define TCPSTATES_INC(state) counter_u64_add(V_tcps_states[state], 1) #define TCPSTATES_DEC(state) counter_u64_add(V_tcps_states[state], -1) /* * TCP specific helper hook point identifiers. */ #define HHOOK_TCP_EST_IN 0 #define HHOOK_TCP_EST_OUT 1 #define HHOOK_TCP_LAST HHOOK_TCP_EST_OUT struct tcp_hhook_data { struct tcpcb *tp; struct tcphdr *th; struct tcpopt *to; uint32_t len; int tso; tcp_seq curack; }; #ifdef TCP_HHOOK void hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t len, int tso); #endif #endif /* * TCB structure exported to user-land via sysctl(3). * * Fields prefixed with "xt_" are unique to the export structure, and fields * with "t_" or other prefixes match corresponding fields of 'struct tcpcb'. * * Legend: * (s) - used by userland utilities in src * (p) - used by utilities in ports * (3) - is known to be used by third party software not in ports * (n) - no known usage * * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been * included. Not all of our clients do. */ #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_) struct xtcpcb { ksize_t xt_len; /* length of this structure */ struct xinpcb xt_inp; char xt_stack[TCP_FUNCTION_NAME_LEN_MAX]; /* (s) */ char xt_logid[TCP_LOG_ID_LEN]; /* (s) */ char xt_cc[TCP_CA_NAME_MAX]; /* (s) */ int64_t spare64[6]; int32_t t_state; /* (s,p) */ uint32_t t_flags; /* (s,p) */ int32_t t_sndzerowin; /* (s) */ int32_t t_sndrexmitpack; /* (s) */ int32_t t_rcvoopack; /* (s) */ int32_t t_rcvtime; /* (s) */ int32_t tt_rexmt; /* (s) */ int32_t tt_persist; /* (s) */ int32_t tt_keep; /* (s) */ int32_t tt_2msl; /* (s) */ int32_t tt_delack; /* (s) */ int32_t t_logstate; /* (3) */ uint32_t t_snd_cwnd; /* (s) */ uint32_t t_snd_ssthresh; /* (s) */ uint32_t t_maxseg; /* (s) */ uint32_t t_rcv_wnd; /* (s) */ uint32_t t_snd_wnd; /* (s) */ uint32_t xt_ecn; /* (s) */ uint32_t t_dsack_bytes; /* (n) */ uint32_t t_dsack_tlp_bytes; /* (n) */ uint32_t t_dsack_pack; /* (n) */ uint16_t xt_encaps_port; /* (s) */ int16_t spare16; int32_t spare32[22]; } __aligned(8); #ifdef _KERNEL void tcp_inptoxtp(const struct inpcb *, struct xtcpcb *); #endif #endif /* * TCP function information (name-to-id mapping, aliases, and refcnt) * exported to user-land via sysctl(3). */ struct tcp_function_info { uint32_t tfi_refcnt; uint8_t tfi_id; char tfi_name[TCP_FUNCTION_NAME_LEN_MAX]; char tfi_alias[TCP_FUNCTION_NAME_LEN_MAX]; }; /* * Identifiers for TCP sysctl nodes */ #define TCPCTL_DO_RFC1323 1 /* use RFC-1323 extensions */ #define TCPCTL_MSSDFLT 3 /* MSS default */ #define TCPCTL_STATS 4 /* statistics */ #define TCPCTL_RTTDFLT 5 /* default RTT estimate */ #define TCPCTL_KEEPIDLE 6 /* keepalive idle timer */ #define TCPCTL_KEEPINTVL 7 /* interval to send keepalives */ #define TCPCTL_SENDSPACE 8 /* send buffer space */ #define TCPCTL_RECVSPACE 9 /* receive buffer space */ #define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ #define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ #define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ #define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ #define TCPCTL_DROP 15 /* drop tcp connection */ #define TCPCTL_STATES 16 /* connection counts by TCP state */ #ifdef _KERNEL #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_tcp); SYSCTL_DECL(_net_inet_tcp_sack); MALLOC_DECLARE(M_TCPLOG); #endif VNET_DECLARE(int, tcp_log_in_vain); #define V_tcp_log_in_vain VNET(tcp_log_in_vain) /* * Global TCP tunables shared between different stacks. * Please keep the list sorted. */ VNET_DECLARE(int, drop_synfin); VNET_DECLARE(int, path_mtu_discovery); VNET_DECLARE(int, tcp_abc_l_var); VNET_DECLARE(int, tcp_autorcvbuf_max); VNET_DECLARE(int, tcp_autosndbuf_inc); VNET_DECLARE(int, tcp_autosndbuf_max); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_autorcvbuf); VNET_DECLARE(int, tcp_do_autosndbuf); VNET_DECLARE(int, tcp_do_ecn); VNET_DECLARE(int, tcp_do_lrd); VNET_DECLARE(int, tcp_do_prr); VNET_DECLARE(int, tcp_do_prr_conservative); VNET_DECLARE(int, tcp_do_newcwv); VNET_DECLARE(int, tcp_do_rfc1323); VNET_DECLARE(int, tcp_tolerate_missing_ts); VNET_DECLARE(int, tcp_do_rfc3042); VNET_DECLARE(int, tcp_do_rfc3390); VNET_DECLARE(int, tcp_do_rfc3465); VNET_DECLARE(int, tcp_do_newsack); VNET_DECLARE(int, tcp_do_sack); VNET_DECLARE(int, tcp_do_tso); VNET_DECLARE(int, tcp_ecn_maxretries); VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_insecure_rst); VNET_DECLARE(int, tcp_insecure_syn); VNET_DECLARE(uint32_t, tcp_map_entries_limit); VNET_DECLARE(uint32_t, tcp_map_split_limit); VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_mssdflt); #ifdef STATS VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl); VNET_DECLARE(int, tcp_perconn_stats_enable); #endif /* STATS */ VNET_DECLARE(int, tcp_recvspace); VNET_DECLARE(int, tcp_sack_globalholes); VNET_DECLARE(int, tcp_sack_globalmaxholes); VNET_DECLARE(int, tcp_sack_maxholes); VNET_DECLARE(int, tcp_sc_rst_sock_fail); VNET_DECLARE(int, tcp_sendspace); VNET_DECLARE(int, tcp_udp_tunneling_overhead); VNET_DECLARE(int, tcp_udp_tunneling_port); VNET_DECLARE(struct inpcbinfo, tcbinfo); #define V_tcp_do_lrd VNET(tcp_do_lrd) #define V_tcp_do_prr VNET(tcp_do_prr) #define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative) #define V_tcp_do_newcwv VNET(tcp_do_newcwv) #define V_drop_synfin VNET(drop_synfin) #define V_path_mtu_discovery VNET(path_mtu_discovery) #define V_tcbinfo VNET(tcbinfo) #define V_tcp_abc_l_var VNET(tcp_abc_l_var) #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) #define V_tcp_delack_enabled VNET(tcp_delack_enabled) #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) #define V_tcp_do_ecn VNET(tcp_do_ecn) #define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) #define V_tcp_tolerate_missing_ts VNET(tcp_tolerate_missing_ts) #define V_tcp_ts_offset_per_conn VNET(tcp_ts_offset_per_conn) #define V_tcp_do_rfc3042 VNET(tcp_do_rfc3042) #define V_tcp_do_rfc3390 VNET(tcp_do_rfc3390) #define V_tcp_do_rfc3465 VNET(tcp_do_rfc3465) #define V_tcp_do_newsack VNET(tcp_do_newsack) #define V_tcp_do_sack VNET(tcp_do_sack) #define V_tcp_do_tso VNET(tcp_do_tso) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_insecure_rst VNET(tcp_insecure_rst) #define V_tcp_insecure_syn VNET(tcp_insecure_syn) #define V_tcp_map_entries_limit VNET(tcp_map_entries_limit) #define V_tcp_map_split_limit VNET(tcp_map_split_limit) #define V_tcp_minmss VNET(tcp_minmss) #define V_tcp_mssdflt VNET(tcp_mssdflt) #ifdef STATS #define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl) #define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable) #endif /* STATS */ #define V_tcp_recvspace VNET(tcp_recvspace) #define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) #define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) #define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) #define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) #define V_tcp_sendspace VNET(tcp_sendspace) #define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead) #define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port) #ifdef TCP_HHOOK VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]); #define V_tcp_hhh VNET(tcp_hhh) #endif int tcp_addoptions(struct tcpopt *, u_char *); int tcp_ccalgounload(struct cc_algo *unload_algo); struct tcpcb * tcp_close(struct tcpcb *); void tcp_discardcb(struct tcpcb *); bool tcp_freecb(struct tcpcb *); void tcp_twstart(struct tcpcb *); void tcp_twclose(struct tcptw *, int); void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *); struct tcpcb * tcp_drop(struct tcpcb *, int); void tcp_drain(void); void tcp_init(void); void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *, const void *); char *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *, const void *); int tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct mbuf *); void tcp_reass_global_init(void); void tcp_reass_flush(struct tcpcb *); void tcp_dooptions(struct tcpopt *, u_char *, int, int); void tcp_dropwithreset(struct mbuf *, struct tcphdr *, struct tcpcb *, int, int); void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); void tcp_xmit_timer(struct tcpcb *, int); void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); void cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs, uint16_t type); void cc_conn_init(struct tcpcb *tp); void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th); void cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos); void cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos); void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); #ifdef TCP_HHOOK void hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to); #endif int tcp_input(struct mbuf **, int *, int); int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int); int tcp_input_with_port(struct mbuf **, int *, int, uint16_t); void tcp_handle_wakeup(struct tcpcb *, struct socket *); void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t); int register_tcp_functions(struct tcp_function_block *blk, int wait); int register_tcp_functions_as_names(struct tcp_function_block *blk, int wait, const char *names[], int *num_names); int register_tcp_functions_as_name(struct tcp_function_block *blk, const char *name, int wait); int deregister_tcp_functions(struct tcp_function_block *blk, bool quiesce, bool force); struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs); int find_tcp_function_alias(struct tcp_function_block *blk, struct tcp_function_set *fs); void tcp_switch_back_to_default(struct tcpcb *tp); struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *fs); int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); extern counter_u64_t tcp_inp_lro_direct_queue; extern counter_u64_t tcp_inp_lro_wokeup_queue; extern counter_u64_t tcp_inp_lro_compressed; extern counter_u64_t tcp_inp_lro_locks_taken; extern counter_u64_t tcp_extra_mbuf; extern counter_u64_t tcp_would_have_but; extern counter_u64_t tcp_comp_total; extern counter_u64_t tcp_uncomp_total; extern counter_u64_t tcp_bad_csums; #ifdef NETFLIX_EXP_DETECTION /* Various SACK attack thresholds */ extern int32_t tcp_force_detection; extern int32_t tcp_sack_to_ack_thresh; extern int32_t tcp_sack_to_move_thresh; extern int32_t tcp_restoral_thresh; extern int32_t tcp_sad_decay_val; extern int32_t tcp_sad_pacing_interval; extern int32_t tcp_sad_low_pps; extern int32_t tcp_map_minimum; extern int32_t tcp_attack_on_turns_on_logging; #endif extern uint32_t tcp_ack_war_time_window; extern uint32_t tcp_ack_war_cnt; uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); void tcp6_use_min_mtu(struct tcpcb *); u_int tcp_maxseg(const struct tcpcb *); u_int tcp_fixed_maxseg(const struct tcpcb *); void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, struct tcp_ifcap *); void tcp_mss(struct tcpcb *, int); int tcp_mssopt(struct in_conninfo *); struct inpcb * tcp_drop_syn_sent(struct inpcb *, int); struct tcpcb * tcp_newtcpcb(struct inpcb *); int tcp_output(struct tcpcb *); void tcp_state_change(struct tcpcb *, int); void tcp_respond(struct tcpcb *, void *, struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int); void tcp_tw_init(void); #ifdef VIMAGE void tcp_tw_destroy(void); #endif void tcp_tw_zone_change(void); int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); void tcp_setpersist(struct tcpcb *); void tcp_slowtimo(void); void tcp_record_dsack(struct tcpcb *tp, tcp_seq start, tcp_seq end, int tlp); struct tcptemp * tcpip_maketemplate(struct inpcb *); void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *); void tcp_timer_activate(struct tcpcb *, uint32_t, u_int); int tcp_timer_suspend(struct tcpcb *, uint32_t); void tcp_timers_unsuspend(struct tcpcb *, uint32_t); int tcp_timer_active(struct tcpcb *, uint32_t); void tcp_timer_stop(struct tcpcb *, uint32_t); void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int); int inp_to_cpuid(struct inpcb *inp); /* * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo) */ void tcp_hc_init(void); #ifdef VIMAGE void tcp_hc_destroy(void); #endif void tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *); uint32_t tcp_hc_getmtu(struct in_conninfo *); void tcp_hc_updatemtu(struct in_conninfo *, uint32_t); void tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *); extern struct pr_usrreqs tcp_usrreqs; uint32_t tcp_new_ts_offset(struct in_conninfo *); tcp_seq tcp_new_isn(struct in_conninfo *); int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); int tcp_dsack_block_exists(struct tcpcb *); void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq); void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_dsack_blocks(struct tcpcb *tp); void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *); void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); void tcp_sack_lost_retransmission(struct tcpcb *, struct tcphdr *); int tcp_newreno(struct tcpcb *, struct tcphdr *); int tcp_compute_pipe(struct tcpcb *); uint32_t tcp_compute_initwnd(uint32_t); void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t); int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes, size_t seed_len); int tcp_can_enable_pacing(void); void tcp_decrement_paced_conn(void); struct mbuf * tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls); int tcp_stats_init(void); void tcp_log_end_status(struct tcpcb *tp, uint8_t status); static inline void tcp_fields_to_host(struct tcphdr *th) { th->th_seq = ntohl(th->th_seq); th->th_ack = ntohl(th->th_ack); th->th_win = ntohs(th->th_win); th->th_urp = ntohs(th->th_urp); } static inline void tcp_fields_to_net(struct tcphdr *th) { th->th_seq = htonl(th->th_seq); th->th_ack = htonl(th->th_ack); th->th_win = htons(th->th_win); th->th_urp = htons(th->th_urp); } static inline void tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, uint8_t is_tlp, int hw_tls) { if (is_tlp) { tp->t_sndtlppack++; tp->t_sndtlpbyte += len; } /* To get total bytes sent you must add t_snd_rxt_bytes to t_sndbytes */ if (is_rxt) tp->t_snd_rxt_bytes += len; else tp->t_sndbytes += len; #ifdef KERN_TLS if (hw_tls && is_rxt && len != 0) { uint64_t rexmit_percent = (1000ULL * tp->t_snd_rxt_bytes) / (10ULL * (tp->t_snd_rxt_bytes + tp->t_sndbytes)); if (rexmit_percent > ktls_ifnet_max_rexmit_pct) ktls_disable_ifnet(tp); } #endif } #endif /* _KERNEL */ #endif /* _NETINET_TCP_VAR_H_ */ diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index dc777b0be990..b77447575911 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -1,1504 +1,1514 @@ /*- * Copyright (c) 1983, 1988, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)inet.c 8.5 (Berkeley) 5/24/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #define _WANT_SOCKET #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "netstat.h" #include "nl_defs.h" #define max(a, b) (((a) > (b)) ? (a) : (b)) #ifdef INET static void inetprint(const char *, struct in_addr *, int, const char *, int, const int); #endif #ifdef INET6 static int udp_done, tcp_done, sdp_done; #endif /* INET6 */ static int pcblist_sysctl(int proto, const char *name, char **bufp) { const char *mibvar; char *buf; size_t len; switch (proto) { case IPPROTO_TCP: mibvar = "net.inet.tcp.pcblist"; break; case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; case IPPROTO_DIVERT: mibvar = "net.inet.divert.pcblist"; break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) xo_warn("sysctl: %s", mibvar); return (0); } if ((buf = malloc(len)) == NULL) { xo_warnx("malloc %lu bytes", (u_long)len); return (0); } if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) { xo_warn("sysctl: %s", mibvar); free(buf); return (0); } *bufp = buf; return (1); } /* * Copied directly from uipc_socket2.c. We leave out some fields that are in * nested structures that aren't used to avoid extra work. */ static void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_ccc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mcnt = sb->sb_mcnt; xsb->sb_ccnt = sb->sb_ccnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } int sotoxsocket(struct socket *so, struct xsocket *xso) { struct protosw proto; struct domain domain; bzero(xso, sizeof *xso); xso->xso_len = sizeof *xso; xso->xso_so = (uintptr_t)so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = (uintptr_t)so->so_pcb; if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0) return (-1); xso->xso_protocol = proto.pr_protocol; if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0) return (-1); xso->xso_family = domain.dom_family; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; if ((so->so_options & SO_ACCEPTCONN) != 0) { xso->so_qlen = so->sol_qlen; xso->so_incqlen = so->sol_incqlen; xso->so_qlimit = so->sol_qlimit; } else { sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); xso->so_oobmark = so->so_oobmark; } return (0); } /* * Print a summary of connections related to an Internet * protocol. For TCP, also give state of connection. * Listening processes (aflag) are suppressed unless the * -a (all) flag is specified. */ void protopr(u_long off, const char *name, int af1, int proto) { static int first = 1; int istcp; char *buf; const char *vchar; struct xtcpcb *tp; struct xinpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; int fnamelen, cnamelen; istcp = 0; switch (proto) { case IPPROTO_TCP: #ifdef INET6 if (strncmp(name, "sdp", 3) != 0) { if (tcp_done != 0) return; else tcp_done = 1; } else { if (sdp_done != 0) return; else sdp_done = 1; } #endif istcp = 1; break; case IPPROTO_UDP: #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif break; } if (!pcblist_sysctl(proto, name, &buf)) return; if (cflag || Cflag) { fnamelen = strlen("Stack"); cnamelen = strlen("CC"); oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen*)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { tp = (struct xtcpcb *)xig; inp = &tp->xt_inp; } else { continue; } if (so->xso_protocol != proto) continue; if (inp->inp_gencnt > oxig->xig_gen) continue; fnamelen = max(fnamelen, (int)strlen(tp->xt_stack)); cnamelen = max(cnamelen, (int)strlen(tp->xt_cc)); } } oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { if (istcp) { tp = (struct xtcpcb *)xig; inp = &tp->xt_inp; } else { inp = (struct xinpcb *)xig; } so = &inp->xi_socket; /* Ignore sockets for protocols other than the desired one. */ if (so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ if (inp->inp_gencnt > oxig->xig_gen) continue; if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0) #ifdef INET6 || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0) #endif /* INET6 */ || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0 #ifdef INET6 && (inp->inp_vflag & INP_IPV6) == 0 #endif /* INET6 */ )) ) continue; if (!aflag && ( (istcp && tp->t_state == TCPS_LISTEN) || (af1 == AF_INET && inp->inp_laddr.s_addr == INADDR_ANY) #ifdef INET6 || (af1 == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif /* INET6 */ || (af1 == AF_UNSPEC && (((inp->inp_vflag & INP_IPV4) != 0 && inp->inp_laddr.s_addr == INADDR_ANY) #ifdef INET6 || ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) #endif )) )) continue; if (first) { if (!Lflag) { xo_emit("Active Internet connections"); if (aflag) xo_emit(" (including servers)"); } else xo_emit( "Current listen queue sizes (qlen/incqlen/maxqlen)"); xo_emit("\n"); if (Aflag) xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *), "Tcpcb"); if (Lflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-32.32s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-32.32s} {T:/%-45.45s}"), "Proto", "Listen", "Local Address"); else if (Tflag) xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"), "Proto", "Rexmit", "OOORcv", "0-win", "Local Address", "Foreign Address"); else { xo_emit((Aflag && !Wflag) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" : ((!Wflag || af1 == AF_INET) ? "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" : "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"), "Proto", "Recv-Q", "Send-Q", "Local Address", "Foreign Address"); if (!xflag && !Rflag) xo_emit(" {T:/%-11.11s}", "(state)"); } if (xflag) { xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} " "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}", "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS", "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA", "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX"); xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} " "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}", "rexmt", "persist", "keep", "2msl", "delack", "rcvtime"); } else if (Rflag) { xo_emit(" {T:/%8.8s} {T:/%5.5s}", "flowid", "ftype"); } if (cflag) { xo_emit(" {T:/%-*.*s}", fnamelen, fnamelen, "Stack"); } if (Cflag) xo_emit(" {T:/%-*.*s} {T:/%10.10s}" " {T:/%10.10s} {T:/%5.5s}" " {T:/%3.3s}", cnamelen, cnamelen, "CC", "cwin", "ssthresh", "MSS", "ECN"); if (Pflag) xo_emit(" {T:/%s}", "Log ID"); xo_emit("\n"); first = 0; } if (Lflag && so->so_qlimit == 0) continue; xo_open_instance("socket"); if (Aflag) { if (istcp) xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)inp->inp_ppcb); else xo_emit("{q:address/%*lx} ", 2 * (int)sizeof(void *), (u_long)so->so_pcb); } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "46" : "6"; else #endif vchar = ((inp->inp_vflag & INP_IPV4) != 0) ? "4" : ""; if (istcp && (tp->t_flags & TF_TOE) != 0) xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar); else xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar); if (Lflag) { char buf1[33]; snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen, so->so_incqlen, so->so_qlimit); xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1); } else if (Tflag) { if (istcp) xo_emit("{:sent-retransmit-packets/%6u} " "{:received-out-of-order-packets/%6u} " "{:sent-zero-window/%6u} ", tp->t_sndrexmitpack, tp->t_rcvoopack, tp->t_sndzerowin); else xo_emit("{P:/%21s}", ""); } else { xo_emit("{:receive-bytes-waiting/%6u} " "{:send-bytes-waiting/%6u} ", so->so_rcv.sb_cc, so->so_snd.sb_cc); } if (numeric_port) { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 1, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 1); } /* else nothing printed now */ #endif /* INET6 */ } else if (inp->inp_flags & INP_ANONPORT) { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 1, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, 0, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 1); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, 0); } /* else nothing printed now */ #endif /* INET6 */ } else { #ifdef INET if (inp->inp_vflag & INP_IPV4) { inetprint("local", &inp->inp_laddr, (int)inp->inp_lport, name, 0, af1); if (!Lflag) inetprint("remote", &inp->inp_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport, af1); } #endif #if defined(INET) && defined(INET6) else #endif #ifdef INET6 if (inp->inp_vflag & INP_IPV6) { inet6print("local", &inp->in6p_laddr, (int)inp->inp_lport, name, 0); if (!Lflag) inet6print("remote", &inp->in6p_faddr, (int)inp->inp_fport, name, inp->inp_lport != inp->inp_fport); } /* else nothing printed now */ #endif /* INET6 */ } if (xflag) { xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} " "{:receive-clusters/%6u} {:send-clusters/%6u} " "{:receive-high-water/%6u} {:send-high-water/%6u} " "{:receive-low-water/%6u} {:send-low-water/%6u} " "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} " "{:receive-mbuf-bytes-max/%6u} " "{:send-mbuf-bytes-max/%6u}", so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt, so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt, so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat, so->so_rcv.sb_lowat, so->so_snd.sb_lowat, so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt, so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax); if (istcp) xo_emit(" {:retransmit-timer/%4d.%02d} " "{:persist-timer/%4d.%02d} " "{:keepalive-timer/%4d.%02d} " "{:msl2-timer/%4d.%02d} " "{:delay-ack-timer/%4d.%02d} " "{:inactivity-timer/%4d.%02d}", tp->tt_rexmt / 1000, (tp->tt_rexmt % 1000) / 10, tp->tt_persist / 1000, (tp->tt_persist % 1000) / 10, tp->tt_keep / 1000, (tp->tt_keep % 1000) / 10, tp->tt_2msl / 1000, (tp->tt_2msl % 1000) / 10, tp->tt_delack / 1000, (tp->tt_delack % 1000) / 10, tp->t_rcvtime / 1000, (tp->t_rcvtime % 1000) / 10); } if (istcp && !Lflag && !xflag && !Tflag && !Rflag) { if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES) xo_emit("{:tcp-state/%-11d}", tp->t_state); else { xo_emit("{:tcp-state/%-11s}", tcpstates[tp->t_state]); #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN) /* Show T/TCP `hidden state' */ if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) xo_emit("{:need-syn-or-fin/*}"); #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */ } } if (Rflag) { /* XXX: is this right Alfred */ xo_emit(" {:flow-id/%08x} {:flow-type/%5d}", inp->inp_flowid, inp->inp_flowtype); } if (istcp) { if (cflag) xo_emit(" {:stack/%-*.*s}", fnamelen, fnamelen, tp->xt_stack); if (Cflag) xo_emit(" {:cc/%-*.*s}" " {:snd-cwnd/%10lu}" " {:snd-ssthresh/%10lu}" " {:t-maxseg/%5u} {:ecn/%3s}", cnamelen, cnamelen, tp->xt_cc, tp->t_snd_cwnd, tp->t_snd_ssthresh, tp->t_maxseg, (tp->t_state >= TCPS_ESTABLISHED ? (tp->xt_ecn > 0 ? (tp->xt_ecn == 1 ? "ecn" : "ace") : "off") : "n/a")); if (Pflag) xo_emit(" {:log-id/%s}", tp->xt_logid[0] == '\0' ? "-" : tp->xt_logid); } xo_emit("\n"); xo_close_instance("socket"); } if (xig != oxig && xig->xig_gen != oxig->xig_gen) { if (oxig->xig_count > xig->xig_count) { xo_emit("Some {d:lost/%s} sockets may have been " "deleted.\n", name); } else if (oxig->xig_count < xig->xig_count) { xo_emit("Some {d:created/%s} sockets may have been " "created.\n", name); } else { xo_emit("Some {d:changed/%s} sockets may have been " "created or deleted.\n", name); } } free(buf); } /* * Dump TCP statistics structure. */ void tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct tcpstat tcpstat; uint64_t tcps_states[TCP_NSTATES]; #ifdef INET6 if (tcp_done != 0) return; else tcp_done = 1; #endif if (fetch_stats("net.inet.tcp.stats", off, &tcpstat, sizeof(tcpstat), kread_counters) != 0) return; if (fetch_stats_ro("net.inet.tcp.states", nl[N_TCPS_STATES].n_value, &tcps_states, sizeof(tcps_states), kread_counters) != 0) return; xo_open_container("tcp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f)) #define p1a(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f) #define p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2, plural(tcpstat.f2)) #define p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1), \ (uintmax_t )tcpstat.f2) #define p3(f, m) if (tcpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f)) p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n"); p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} " "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n"); p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t" "{:sent-retransmitted-packets/%ju} {N:/data packet%s} " "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) " "{N:retransmitted}\n"); p(tcps_sndrexmitbad, "\t\t" "{:sent-unnecessary-retransmitted-packets/%ju} " "{N:/data packet%s unnecessarily retransmitted}\n"); p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} " "{N:/resend%s initiated by MTU discovery}\n"); p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} " "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} " "{N:delayed})\n"); p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} " "{N:/URG only packet%s}\n"); p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} " "{N:/window probe packet%s}\n"); p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} " "{N:/control packet%s}\n"); p(tcps_rcvtotal, "\t{:received-packets/%ju} " "{N:/packet%s received}\n"); p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t" "{:received-ack-packets/%ju} {N:/ack%s} " "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n"); p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} " "{N:/duplicate ack%s}\n"); p(tcps_tunneled_pkts, "\t\t{:received-udp-tunneled-pkts/%ju} " "{N:/UDP tunneled pkt%s}\n"); p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} " "{N:/UDP tunneled pkt cnt with error%s}\n"); p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} " "{N:/ack%s for unsent data}\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t" "{:received-in-sequence-packets/%ju} {N:/packet%s} " "({:received-in-sequence-bytes/%ju} {N:/byte%s}) " "{N:received in-sequence}\n"); p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t" "{:received-completely-duplicate-packets/%ju} " "{N:/completely duplicate packet%s} " "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n"); p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} " "{N:/old duplicate packet%s}\n"); p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t" "{:received-some-duplicate-packets/%ju} " "{N:/packet%s with some dup. data} " "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n"); p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} " "{N:/out-of-order packet%s} " "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n"); p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t" "{:received-after-window-packets/%ju} {N:/packet%s} " "({:received-after-window-bytes/%ju} {N:/byte%s}) " "{N:of data after window}\n"); p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} " "{N:/window probe%s}\n"); p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} " "{N:/window update packet%s}\n"); p(tcps_dsack_count, "\t\t{:received-with-dsack-packets/%ju} " "{N:/packet%s received with dsack}\n"); p(tcps_dsack_bytes, "\t\t{:received-with-dsack-bytes/%ju} " "{N:/dsack byte%s received (no TLP involved)}\n"); p(tcps_dsack_tlp_bytes, "\t\t{:received-with-dsack-bytes-tlp/%ju} " "{N:/dsack byte%s received (TLP responsible)}\n"); p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} " "{N:/packet%s received after close}\n"); p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} " "{N:/discarded for bad header offset field%s}\n"); p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} " "{N:discarded because packet too short}\n"); p1a(tcps_rcvreassfull, "\t\t{:discard-reassembly-queue-full/%ju} " "{N:discarded due to full reassembly queue}\n"); p(tcps_connattempt, "\t{:connection-requests/%ju} " "{N:/connection request%s}\n"); p(tcps_accepts, "\t{:connections-accepts/%ju} " "{N:/connection accept%s}\n"); p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} " "{N:/bad connection attempt%s}\n"); p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} " "{N:/listen queue overflow%s}\n"); p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} " "{N:/ignored RSTs in the window%s}\n"); p(tcps_connects, "\t{:connections-established/%ju} " "{N:/connection%s established (including accepts)}\n"); p(tcps_usedrtt, "\t\t{:connections-hostcache-rtt/%ju} " "{N:/time%s used RTT from hostcache}\n"); p(tcps_usedrttvar, "\t\t{:connections-hostcache-rttvar/%ju} " "{N:/time%s used RTT variance from hostcache}\n"); p(tcps_usedssthresh, "\t\t{:connections-hostcache-ssthresh/%ju} " "{N:/time%s used slow-start threshold from hostcache}\n"); p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} " "{N:/connection%s closed (including} " "{:connection-drops/%ju} {N:/drop%s})\n"); p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} " "{N:/connection%s updated cached RTT on close}\n"); p(tcps_cachedrttvar, "\t\t" "{:connections-updated-variance-on-close/%ju} " "{N:/connection%s updated cached RTT variance on close}\n"); p(tcps_cachedssthresh, "\t\t" "{:connections-updated-ssthresh-on-close/%ju} " "{N:/connection%s updated cached ssthresh on close}\n"); p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} " "{N:/embryonic connection%s dropped}\n"); p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} " "{N:/segment%s updated rtt (of} " "{:segment-update-attempts/%ju} {N:/attempt%s})\n"); p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} " "{N:/retransmit timeout%s}\n"); p(tcps_timeoutdrop, "\t\t" "{:connections-dropped-by-retransmit-timeout/%ju} " "{N:/connection%s dropped by rexmit timeout}\n"); p(tcps_persisttimeo, "\t{:persist-timeout/%ju} " "{N:/persist timeout%s}\n"); p(tcps_persistdrop, "\t\t" "{:connections-dropped-by-persist-timeout/%ju} " "{N:/connection%s dropped by persist timeout}\n"); p(tcps_finwait2_drops, "\t" "{:connections-dropped-by-finwait2-timeout/%ju} " "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n"); p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} " "{N:/keepalive timeout%s}\n"); p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} " "{N:/keepalive probe%s sent}\n"); p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} " "{N:/connection%s dropped by keepalive}\n"); p(tcps_predack, "\t{:ack-header-predictions/%ju} " "{N:/correct ACK header prediction%s}\n"); p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} " "{N:/correct data packet header prediction%s}\n"); xo_open_container("syncache"); p3(tcps_sc_added, "\t{:entries-added/%ju} " "{N:/syncache entr%s added}\n"); p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} " "{N:/retransmitted}\n"); p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n"); p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n"); p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n"); p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} " "{N:/bucket overflow}\n"); p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} " "{N:/cache overflow}\n"); p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n"); p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n"); p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n"); p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n"); p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n"); p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n"); p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n"); p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} " "{N:/cookie%s received}\n"); xo_close_container("syncache"); xo_open_container("hostcache"); p3(tcps_hc_added, "\t{:entries-added/%ju} " "{N:/hostcache entr%s added}\n"); p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} " "{N:/bucket overflow}\n"); xo_close_container("hostcache"); xo_open_container("sack"); p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} " "{N:/SACK recovery episode%s}\n"); p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} " "{N:/segment rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} " "{N:/byte rexmit%s in SACK recovery episodes}\n"); p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} " "{N:/SACK option%s (SACK blocks) received}\n"); p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} " "{N:/SACK option%s (SACK blocks) sent}\n"); p(tcps_sack_lostrexmt, "\t{:lost-retransmissions/%ju} " "{N:/SACK retransmission%s lost}\n"); p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} " "{N:/SACK scoreboard overflow}\n"); xo_close_container("sack"); xo_open_container("ecn"); p(tcps_ecn_ce, "\t{:ce-packets/%ju} " "{N:/packet%s with ECN CE bit set}\n"); p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} " "{N:/packet%s with ECN ECT(0) bit set}\n"); p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} " "{N:/packet%s with ECN ECT(1) bit set}\n"); p(tcps_ecn_shs, "\t{:handshakes/%ju} " "{N:/successful ECN handshake%s}\n"); p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); xo_close_container("ecn"); xo_open_container("tcp-signature"); p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} " "{N:/packet%s with matching signature received}\n"); p(tcps_sig_rcvbadsig, "\t{:received-bad-signature/%ju} " "{N:/packet%s with bad signature received}\n"); p(tcps_sig_err_buildsig, "\t{:failed-make-signature/%ju} " "{N:/time%s failed to make signature due to no SA}\n"); p(tcps_sig_err_sigopt, "\t{:no-signature-expected/%ju} " "{N:/time%s unexpected signature received}\n"); p(tcps_sig_err_nosigopt, "\t{:no-signature-provided/%ju} " "{N:/time%s no signature provided by segment}\n"); xo_close_container("tcp-signature"); xo_open_container("pmtud"); p(tcps_pmtud_blackhole_activated, "\t{:pmtud-activated/%ju} " "{N:/Path MTU discovery black hole detection activation%s}\n"); p(tcps_pmtud_blackhole_activated_min_mss, "\t{:pmtud-activated-min-mss/%ju} " "{N:/Path MTU discovery black hole detection min MSS activation%s}\n"); p(tcps_pmtud_blackhole_failed, "\t{:pmtud-failed/%ju} " "{N:/Path MTU discovery black hole detection failure%s}\n"); + + xo_close_container("pmtud"); + xo_open_container("tw"); + + p(tcps_tw_responds, "\t{:tw_responds/%ju} " + "{N:/time%s connection in TIME-WAIT responded with ACK}\n"); + p(tcps_tw_recycles, "\t{:tw_recycles/%ju} " + "{N:/time%s connection in TIME-WAIT was actively recycled}\n"); + p(tcps_tw_resets, "\t{:tw_resets/%ju} " + "{N:/time%s connection in TIME-WAIT responded with RST}\n"); + + xo_close_container("tw"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 - xo_close_container("pmtud"); - xo_open_container("TCP connection count by state"); xo_emit("{T:/TCP connection count by state}:\n"); for (int i = 0; i < TCP_NSTATES; i++) { /* * XXXGL: is there a way in libxo to use %s * in the "content string" of a format * string? I failed to do that, that's why * a temporary buffer is used to construct * format string for xo_emit(). */ char fmtbuf[80]; if (sflag > 1 && tcps_states[i] == 0) continue; snprintf(fmtbuf, sizeof(fmtbuf), "\t{:%s/%%ju} " "{Np:/connection ,connections} in %s state\n", tcpstates[i], tcpstates[i]); xo_emit(fmtbuf, (uintmax_t )tcps_states[i]); } xo_close_container("TCP connection count by state"); xo_close_container("tcp"); } /* * Dump UDP statistics structure. */ void udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct udpstat udpstat; uint64_t delivered; #ifdef INET6 if (udp_done != 0) return; else udp_done = 1; #endif if (fetch_stats("net.inet.udp.stats", off, &udpstat, sizeof(udpstat), kread_counters) != 0) return; xo_open_container("udp"); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f)) #define p1a(f, m) if (udpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)udpstat.f) p(udps_ipackets, "{:received-datagrams/%ju} " "{N:/datagram%s received}\n"); p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} " "{N:/with incomplete header}\n"); p1a(udps_badlen, "{:dropped-bad-data-length/%ju} " "{N:/with bad data length field}\n"); p1a(udps_badsum, "{:dropped-bad-checksum/%ju} " "{N:/with bad checksum}\n"); p1a(udps_nosum, "{:dropped-no-checksum/%ju} " "{N:/with no checksum}\n"); p1a(udps_noport, "{:dropped-no-socket/%ju} " "{N:/dropped due to no socket}\n"); p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} " "{N:/broadcast\\/multicast datagram%s undelivered}\n"); p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} " "{N:/dropped due to full socket buffers}\n"); p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} " "{N:/not for hashed pcb}\n"); delivered = udpstat.udps_ipackets - udpstat.udps_hdrops - udpstat.udps_badlen - udpstat.udps_badsum - udpstat.udps_noport - udpstat.udps_noportbcast - udpstat.udps_fullsock; if (delivered || sflag <= 1) xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n", (uint64_t)delivered); p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n"); /* the next statistic is cumulative in udps_noportbcast */ p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} " "{N:/time%s multicast source filter matched}\n"); #undef p #undef p1a xo_close_container("udp"); } /* * Dump CARP statistics structure. */ void carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct carpstats carpstat; if (fetch_stats("net.inet.carp.stats", off, &carpstat, sizeof(carpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f)) #define p2(f, m) if (carpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)carpstat.f) p(carps_ipackets, "\t{:received-inet-packets/%ju} " "{N:/packet%s received (IPv4)}\n"); p(carps_ipackets6, "\t{:received-inet6-packets/%ju} " "{N:/packet%s received (IPv6)}\n"); p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} " "{N:/packet%s discarded for wrong TTL}\n"); p(carps_hdrops, "\t\t{:dropped-short-header/%ju} " "{N:/packet%s shorter than header}\n"); p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} " "{N:/discarded for bad checksum%s}\n"); p(carps_badver, "\t\t{:dropped-bad-version/%ju} " "{N:/discarded packet%s with a bad version}\n"); p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} " "{N:/discarded because packet too short}\n"); p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} " "{N:/discarded for bad authentication}\n"); p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} " "{N:/discarded for bad vhid}\n"); p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} " "{N:/discarded because of a bad address list}\n"); p(carps_opackets, "\t{:sent-inet-packets/%ju} " "{N:/packet%s sent (IPv4)}\n"); p(carps_opackets6, "\t{:sent-inet6-packets/%ju} " "{N:/packet%s sent (IPv6)}\n"); p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} " "{N:/send failed due to mbuf memory error}\n"); #if notyet p(carps_ostates, "\t\t{:send-state-updates/%s} " "{N:/state update%s sent}\n"); #endif #undef p #undef p2 xo_close_container(name); } /* * Dump IP statistics structure. */ void ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct ipstat ipstat; if (fetch_stats("net.inet.ip.stats", off, &ipstat, sizeof(ipstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f)) #define p1a(f, m) if (ipstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t )ipstat.f) p(ips_total, "\t{:received-packets/%ju} " "{N:/total packet%s received}\n"); p(ips_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/bad header checksum%s}\n"); p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} " "{N:/with size smaller than minimum}\n"); p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} " "{N:/with data size < data length}\n"); p1a(ips_toolong, "\t{:dropped-too-long/%ju} " "{N:/with ip length > max ip packet size}\n"); p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} " "{N:/with header length < data size}\n"); p1a(ips_badlen, "\t{:dropped-short-data/%ju} " "{N:/with data length < header length}\n"); p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} " "{N:/with bad options}\n"); p1a(ips_badvers, "\t{:dropped-bad-version/%ju} " "{N:/with incorrect version number}\n"); p(ips_fragments, "\t{:received-fragments/%ju} " "{N:/fragment%s received}\n"); p(ips_fragdropped, "\t{:dropped-fragments/%ju} " "{N:/fragment%s dropped (dup or out of space)}\n"); p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} " "{N:/fragment%s dropped after timeout}\n"); p(ips_reassembled, "\t{:reassembled-packets/%ju} " "{N:/packet%s reassembled ok}\n"); p(ips_delivered, "\t{:received-local-packets/%ju} " "{N:/packet%s for this host}\n"); p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} " "{N:/packet%s for unknown\\/unsupported protocol}\n"); p(ips_forward, "\t{:forwarded-packets/%ju} " "{N:/packet%s forwarded}"); p(ips_fastforward, " ({:fast-forwarded-packets/%ju} " "{N:/packet%s fast forwarded})"); if (ipstat.ips_forward || sflag <= 1) xo_emit("\n"); p(ips_cantforward, "\t{:packets-cannot-forward/%ju} " "{N:/packet%s not forwardable}\n"); p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} " "{N:/packet%s received for unknown multicast group}\n"); p(ips_redirectsent, "\t{:redirects-sent/%ju} " "{N:/redirect%s sent}\n"); p(ips_localout, "\t{:sent-packets/%ju} " "{N:/packet%s sent from this host}\n"); p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} " "{N:/packet%s sent with fabricated ip header}\n"); p(ips_odropped, "\t{:discard-no-mbufs/%ju} " "{N:/output packet%s dropped due to no bufs, etc.}\n"); p(ips_noroute, "\t{:discard-no-route/%ju} " "{N:/output packet%s discarded due to no route}\n"); p(ips_fragmented, "\t{:sent-fragments/%ju} " "{N:/output datagram%s fragmented}\n"); p(ips_ofragments, "\t{:fragments-created/%ju} " "{N:/fragment%s created}\n"); p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} " "{N:/datagram%s that can't be fragmented}\n"); p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} " "{N:/tunneling packet%s that can't find gif}\n"); p(ips_badaddr, "\t{:discard-bad-address/%ju} " "{N:/datagram%s with bad address in header}\n"); #undef p #undef p1a xo_close_container(name); } /* * Dump ARP statistics structure. */ void arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct arpstat arpstat; if (fetch_stats("net.link.ether.arp.stats", off, &arpstat, sizeof(arpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f)) #define p2(f, m) if (arpstat.f || sflag <= 1) \ xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f)) p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n"); p(txerrors, "{:sent-failures/%ju} {N:/ARP request%s failed to sent}\n"); p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n"); p(rxrequests, "{:received-requests/%ju} " "{N:/ARP request%s received}\n"); p2(rxreplies, "{:received-replies/%ju} " "{N:/ARP repl%s received}\n"); p(received, "{:received-packets/%ju} " "{N:/ARP packet%s received}\n"); p(dropped, "{:dropped-no-entry/%ju} " "{N:/total packet%s dropped due to no ARP entry}\n"); p(timeouts, "{:entries-timeout/%ju} " "{N:/ARP entry%s timed out}\n"); p(dupips, "{:dropped-duplicate-address/%ju} " "{N:/Duplicate IP%s seen}\n"); #undef p #undef p2 xo_close_container(name); } static const char *icmpnames[ICMP_MAXTYPE + 1] = { "echo reply", /* RFC 792 */ "#1", "#2", "destination unreachable", /* RFC 792 */ "source quench", /* RFC 792 */ "routing redirect", /* RFC 792 */ "#6", "#7", "echo", /* RFC 792 */ "router advertisement", /* RFC 1256 */ "router solicitation", /* RFC 1256 */ "time exceeded", /* RFC 792 */ "parameter problem", /* RFC 792 */ "time stamp", /* RFC 792 */ "time stamp reply", /* RFC 792 */ "information request", /* RFC 792 */ "information request reply", /* RFC 792 */ "address mask request", /* RFC 950 */ "address mask reply", /* RFC 950 */ "#19", "#20", "#21", "#22", "#23", "#24", "#25", "#26", "#27", "#28", "#29", "icmp traceroute", /* RFC 1393 */ "datagram conversion error", /* RFC 1475 */ "mobile host redirect", "IPv6 where-are-you", "IPv6 i-am-here", "mobile registration req", "mobile registration reply", "domain name request", /* RFC 1788 */ "domain name reply", /* RFC 1788 */ "icmp SKIP", "icmp photuris", /* RFC 2521 */ }; /* * Dump ICMP statistics. */ void icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct icmpstat icmpstat; size_t len; int i, first; if (fetch_stats("net.inet.icmp.stats", off, &icmpstat, sizeof(icmpstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plural(icmpstat.f)) #define p1a(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f) #define p2(f, m) if (icmpstat.f || sflag <= 1) \ xo_emit(m, icmpstat.f, plurales(icmpstat.f)) p(icps_error, "\t{:icmp-calls/%lu} " "{N:/call%s to icmp_error}\n"); p(icps_oldicmp, "\t{:errors-not-from-message/%lu} " "{N:/error%s not generated in response to an icmp message}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_outhist[i] != 0) { if (first) { xo_open_list("output-histogram"); xo_emit("\tOutput histogram:\n"); first = 0; } xo_open_instance("output-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_outhist[i]); else xo_emit("\t\tunknown ICMP #{k:name/%d}: " "{:count/%lu}\n", i, icmpstat.icps_outhist[i]); xo_close_instance("output-histogram"); } } if (!first) xo_close_list("output-histogram"); p(icps_badcode, "\t{:dropped-bad-code/%lu} " "{N:/message%s with bad code fields}\n"); p(icps_tooshort, "\t{:dropped-too-short/%lu} " "{N:/message%s less than the minimum length}\n"); p(icps_checksum, "\t{:dropped-bad-checksum/%lu} " "{N:/message%s with bad checksum}\n"); p(icps_badlen, "\t{:dropped-bad-length/%lu} " "{N:/message%s with bad length}\n"); p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} " "{N:/multicast echo requests ignored}\n"); p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} " "{N:/multicast timestamp requests ignored}\n"); for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) { if (icmpstat.icps_inhist[i] != 0) { if (first) { xo_open_list("input-histogram"); xo_emit("\tInput histogram:\n"); first = 0; } xo_open_instance("input-histogram"); if (icmpnames[i] != NULL) xo_emit("\t\t{k:name/%s}: {:count/%lu}\n", icmpnames[i], icmpstat.icps_inhist[i]); else xo_emit( "\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n", i, icmpstat.icps_inhist[i]); xo_close_instance("input-histogram"); } } if (!first) xo_close_list("input-histogram"); p(icps_reflect, "\t{:sent-packets/%lu} " "{N:/message response%s generated}\n"); p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} " "{N:/invalid return address%s}\n"); p(icps_noroute, "\t{:discard-no-route/%lu} " "{N:/no return route%s}\n"); #undef p #undef p1a #undef p2 if (live) { len = sizeof i; if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) < 0) return; xo_emit("\tICMP address mask responses are " "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis"); } xo_close_container(name); } /* * Dump IGMP statistics structure. */ void igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct igmpstat igmpstat; int error, zflag0; if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread) != 0) return; /* * Reread net.inet.igmp.stats when zflag == 1. * This is because this MIB contains version number and * length of the structure which are not set when clearing * the counters. */ zflag0 = zflag; if (zflag) { zflag = 0; error = fetch_stats("net.inet.igmp.stats", 0, &igmpstat, sizeof(igmpstat), kread); zflag = zflag0; if (error) return; } if (igmpstat.igps_version != IGPS_VERSION_3) { xo_warnx("%s: version mismatch (%d != %d)", __func__, igmpstat.igps_version, IGPS_VERSION_3); return; } if (igmpstat.igps_len != IGPS_VERSION3_LEN) { xo_warnx("%s: size mismatch (%d != %d)", __func__, igmpstat.igps_len, IGPS_VERSION3_LEN); return; } xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f)) #define py64(f, m) if (igmpstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f)) p64(igps_rcv_total, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} " "{N:/message%s received with wrong TTL}\n"); p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} " "{N:/V1\\/V2 membership quer%s received}\n"); py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} " "{N:/V3 membership quer%s received}\n"); py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} " "{N:/membership quer%s received with invalid field(s)}\n"); py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} " "{N:/general quer%s received}\n"); py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} " "{N:/group quer%s received}\n"); py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} " "{N:/group-source quer%s received}\n"); py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} " "{N:/group-source quer%s dropped}\n"); p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} " "{N:/membership report%s received}\n"); p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} " "{N:/membership report%s received with invalid field(s)}\n"); p64(igps_rcv_ourreports, "\t" "{:received-membership-reports-matching/%ju} " "{N:/membership report%s received for groups to which we belong}" "\n"); p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} " "{N:/V3 report%s received without Router Alert}\n"); p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} " "{N:/membership report%s sent}\n"); #undef p64 #undef py64 xo_close_container(name); } /* * Dump PIM statistics structure. */ void pim_stats(u_long off __unused, const char *name, int af1 __unused, int proto __unused) { struct pimstat pimstat; if (fetch_stats("net.inet.pim.stats", off, &pimstat, sizeof(pimstat), kread_counters) != 0) return; xo_open_container(name); xo_emit("{T:/%s}:\n", name); #define p(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f)) #define py(f, m) if (pimstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y") p(pims_rcv_total_msgs, "\t{:received-messages/%ju} " "{N:/message%s received}\n"); p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} " "{N:/byte%s received}\n"); p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} " "{N:/message%s received with too few bytes}\n"); p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} " "{N:/message%s received with bad checksum}\n"); p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} " "{N:/message%s received with bad version}\n"); p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} " "{N:/data register message%s received}\n"); p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} " "{N:/data register byte%s received}\n"); p(pims_rcv_registers_wrongiif, "\t" "{:received-data-register-wrong-interface/%ju} " "{N:/data register message%s received on wrong iif}\n"); p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} " "{N:/bad register%s received}\n"); p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} " "{N:/data register message%s sent}\n"); p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} " "{N:/data register byte%s sent}\n"); #undef p #undef py xo_close_container(name); } #ifdef INET /* * Pretty print an Internet address (net address + port). */ static void inetprint(const char *container, struct in_addr *in, int port, const char *proto, int num_port, const int af1) { struct servent *sp = 0; char line[80], *cp; int width; size_t alen, plen; if (container) xo_open_container(container); if (Wflag) snprintf(line, sizeof(line), "%s.", inetname(in)); else snprintf(line, sizeof(line), "%.*s.", (Aflag && !num_port) ? 12 : 16, inetname(in)); alen = strlen(line); cp = line + alen; if (!num_port && port) sp = getservbyport((int)port, proto); if (sp || port == 0) snprintf(cp, sizeof(line) - alen, "%.15s ", sp ? sp->s_name : "*"); else snprintf(cp, sizeof(line) - alen, "%d ", ntohs((u_short)port)); width = (Aflag && !Wflag) ? 18 : ((!Wflag || af1 == AF_INET) ? 22 : 45); if (Wflag) xo_emit("{d:target/%-*s} ", width, line); else xo_emit("{d:target/%-*.*s} ", width, width, line); plen = strlen(cp) - 1; alen--; xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen, plen, cp); if (container) xo_close_container(container); } /* * Construct an Internet address representation. * If numeric_addr has been supplied, give * numeric value, otherwise try for symbolic name. */ char * inetname(struct in_addr *inp) { char *cp; static char line[MAXHOSTNAMELEN]; struct hostent *hp; cp = 0; if (!numeric_addr && inp->s_addr != INADDR_ANY) { hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET); if (hp) { cp = hp->h_name; trimdomain(cp, strlen(cp)); } } if (inp->s_addr == INADDR_ANY) strcpy(line, "*"); else if (cp) { strlcpy(line, cp, sizeof(line)); } else { inp->s_addr = ntohl(inp->s_addr); #define C(x) ((u_int)((x) & 0xff)) snprintf(line, sizeof(line), "%u.%u.%u.%u", C(inp->s_addr >> 24), C(inp->s_addr >> 16), C(inp->s_addr >> 8), C(inp->s_addr)); } return (line); } #endif