Page MenuHomeFreeBSD

D20655.id63550.diff
No OneTemporary

D20655.id63550.diff

Index: lib/libstats/Makefile
===================================================================
--- lib/libstats/Makefile
+++ lib/libstats/Makefile
@@ -3,12 +3,12 @@
LIB= stats
SHLIBDIR?= /lib
SHLIB_MAJOR= 0
-SRCS= subr_stats.c
+SRCS= subr_stats.c tcp_stats.c
# To debug, comment WITHOUT_ASSERT_DEBUG= and uncomment CFLAGS:=
WITHOUT_ASSERT_DEBUG=
#CFLAGS:=${CFLAGS:C/-O[0-9]/-O0 -g3/} -DDIAGNOSTIC
-.PATH: ${.CURDIR}/../../sys/kern
+.PATH: ${.CURDIR}/../../sys/kern ${.CURDIR}/../../sys/netinet
.include <bsd.lib.mk>
Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd August 26, 2019
+.Dd September 31, 2019
.Dt TCP 4
.Os
.Sh NAME
@@ -291,6 +291,10 @@
.Pp
If an SADB entry cannot be found for the destination,
the system does not send any outgoing segments and drops any inbound segments.
+.It Dv TCP_STATS
+Manage collection of connection level statistics using the
+.Xr stats 3
+framework.
.Pp
Each dropped segment is taken into account in the TCP protocol statistics.
.It Dv TCP_TXTLS_ENABLE
@@ -655,6 +659,17 @@
When initializing the TCP timestamps, use a per connection offset instead of a
per host pair offset.
Default is to use per connection offsets as recommended in RFC 7323.
+.It Va perconn_stats_enable
+Controls the default collection of statistics for all connections using the
+.Xr stats 3
+framework.
+0 disables, 1 enables, 2 enables random sampling across log id connection
+groups with all connections in a group receiving the same setting.
+.It Va perconn_stats_sample_rates
+A CSV list of template_spec=percent key-value pairs which controls the per
+template sampling rates when
+.Xr stats 3
+sampling is enabled.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:
@@ -694,6 +709,7 @@
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr socket 2 ,
+.Xr stats 3 ,
.Xr sysctl 3 ,
.Xr blackhole 4 ,
.Xr inet 4 ,
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4286,6 +4286,7 @@
compile-with "${NORMAL_C} ${NO_WNONNULL}"
netinet/tcp_reass.c optional inet | inet6
netinet/tcp_sack.c optional inet | inet6
+netinet/tcp_stats.c optional stats inet | stats inet6
netinet/tcp_subr.c optional inet | inet6
netinet/tcp_syncache.c optional inet | inet6
netinet/tcp_timer.c optional inet | inet6
Index: sys/netinet/cc/cc.h
===================================================================
--- sys/netinet/cc/cc.h
+++ sys/netinet/cc/cc.h
@@ -51,9 +51,7 @@
#ifndef _NETINET_CC_CC_H_
#define _NETINET_CC_CC_H_
-#if !defined(_KERNEL)
-#error "no user-serviceable parts inside"
-#endif
+#ifdef _KERNEL
/* Global CC vars. */
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
@@ -108,6 +106,7 @@
#define CC_DUPACK 0x0002 /* Duplicate ACK. */
#define CC_PARTIALACK 0x0004 /* Not yet. */
#define CC_SACK 0x0008 /* Not yet. */
+#endif /* _KERNEL */
/*
* Congestion signal types passed to the cong_signal() hook. The highest order 8
@@ -121,6 +120,7 @@
#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */
+#ifdef _KERNEL
/*
* Structure to hold data and function pointers that together represent a
* congestion control algorithm.
@@ -184,4 +184,5 @@
#define CC_ALGOOPT_LIMIT 2048
+#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -168,6 +168,7 @@
#define TCP_NOOPT 8 /* don't use TCP options */
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
+#define TCP_STATS 33 /* retrieve stats blob structure */
#define TCP_LOG 34 /* configure event logging for connection */
#define TCP_LOGBUF 35 /* retrieve event log for connection */
#define TCP_LOGID 36 /* configure log ID to correlate connections */
@@ -363,5 +364,19 @@
* TCP Control message types
*/
#define TLS_SET_RECORD_TYPE 1
+
+/*
+ * TCP specific variables of interest for tp->t_stats stats(9) accounting.
+ */
+#define VOI_TCP_TXPB 0 /* Transmit payload bytes */
+#define VOI_TCP_RETXPB 1 /* Retransmit payload bytes */
+#define VOI_TCP_FRWIN 2 /* Foreign receive window */
+#define VOI_TCP_LCWIN 3 /* Local congesiton window */
+#define VOI_TCP_RTT 4 /* Round trip time */
+#define VOI_TCP_CSIG 5 /* Congestion signal */
+#define VOI_TCP_GPUT 6 /* Goodput */
+#define VOI_TCP_CALCFRWINDIFF 7 /* Congestion avoidance LCWIN - FRWIN */
+#define VOI_TCP_GPUT_ND 8 /* Goodput normalised delta */
+#define VOI_TCP_ACKLEN 9 /* Average ACKed bytes per ACK */
#endif /* !_NETINET_TCP_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -58,6 +58,7 @@
#include "opt_tcpdebug.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@@ -66,6 +67,7 @@
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -73,6 +75,7 @@
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
+#include <sys/stats.h>
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
@@ -293,6 +296,10 @@
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
uint16_t type)
{
+#ifdef STATS
+ int32_t gput;
+#endif
+
INP_WLOCK_ASSERT(tp->t_inpcb);
tp->ccv->nsegs = nsegs;
@@ -303,6 +310,35 @@
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
+#ifdef STATS
+ stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
+ ((int32_t)tp->snd_cwnd) - tp->snd_wnd);
+ if (!IN_RECOVERY(tp->t_flags))
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
+ tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
+ if ((tp->t_flags & TF_GPUTINPROG) &&
+ SEQ_GEQ(th->th_ack, tp->gput_ack)) {
+ /*
+ * Compute goodput in bits per millisecond.
+ */
+ gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
+ max(1, tcp_ts_getticks() - tp->gput_ts);
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
+ gput);
+ /*
+ * XXXLAS: This is a temporary hack, and should be
+ * chained off VOI_TCP_GPUT when stats(9) grows an API
+ * to deal with chained VOIs.
+ */
+ if (tp->t_stats_gput_prev > 0)
+ stats_voi_update_abs_s32(tp->t_stats,
+ VOI_TCP_GPUT_ND,
+ ((gput - tp->t_stats_gput_prev) * 100) /
+ tp->t_stats_gput_prev);
+ tp->t_flags &= ~TF_GPUTINPROG;
+ tp->t_stats_gput_prev = gput;
+ }
+#endif /* STATS */
if (tp->snd_cwnd > tp->snd_ssthresh) {
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
@@ -321,6 +357,9 @@
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
+#endif
}
void
@@ -386,6 +425,10 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
+#endif
+
switch(type) {
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
@@ -1573,6 +1616,9 @@
* For the SYN_SENT state the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
+#endif
/*
* TCP ECN processing.
@@ -3446,6 +3492,9 @@
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
+#endif
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
/*
* srtt is stored as fixed point with 5 bits after the
Index: sys/netinet/tcp_log_buf.c
===================================================================
--- sys/netinet/tcp_log_buf.c
+++ sys/netinet/tcp_log_buf.c
@@ -30,10 +30,12 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/qmath.h>
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/rwlock.h>
@@ -41,6 +43,7 @@
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
+#include <sys/stats.h>
#include <sys/counter.h>
#include <dev/tcp_log/tcp_log_dev.h>
@@ -475,7 +478,7 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
-#ifdef NETFLIX
+#ifdef STATS
if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
#endif
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/domain.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@@ -54,10 +55,12 @@
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/stats.h>
#include <net/if.h>
#include <net/route.h>
@@ -991,15 +994,31 @@
struct sockbuf *msb;
u_int moff;
- if ((tp->t_flags & TF_FORCEDATA) && len == 1)
+ if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
- else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
+#ifdef STATS
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ stats_voi_update_abs_u32(tp->t_stats,
+ VOI_TCP_RETXPB, len);
+ else
+ stats_voi_update_abs_u64(tp->t_stats,
+ VOI_TCP_TXPB, len);
+#endif /* STATS */
+ } else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
+ len);
+#endif /* STATS */
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
+ len);
+#endif /* STATS */
}
#ifdef INET6
if (MHLEN < hdrlen + max_linkhdr)
@@ -1471,6 +1490,13 @@
tp->t_rtttime = ticks;
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
+ }
+ if (!(tp->t_flags & TF_GPUTINPROG) && len) {
+ tp->t_flags |= TF_GPUTINPROG;
+ tp->gput_seq = startseq;
+ tp->gput_ack = startseq +
+ ulmin(sbavail(&so->so_snd) - off, sendwin);
+ tp->gput_ts = tcp_ts_getticks();
}
}
Index: sys/netinet/tcp_stats.c
===================================================================
--- /dev/null
+++ sys/netinet/tcp_stats.c
@@ -0,0 +1,274 @@
+/*-
+ * Copyright (c) 2016-2018 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Author: Lawrence Stewart <lstewart@netflix.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/arb.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/qmath.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#ifdef _KERNEL
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/systm.h>
+#endif
+#include <sys/stats.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+
+#include <netinet/cc/cc.h>
+
+VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
+
+#ifndef _KERNEL
+#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
+#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
+#else /* _KERNEL */
+
+VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
+VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
+VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
+#define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
+#define V_tcp_stats_nrates VNET(tcp_stats_nrates)
+
+static struct rmlock tcp_stats_tpl_sampling_lock;
+static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
+ "Enable per-connection TCP stats gathering; 1 enables for all connections, "
+ "2 enables random sampling across log id connection groups");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
+ CTLTYPE_STRING | CTLFLAG_RW, tcp_stats_tpl_sr_cb,
+ sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
+ "TCP stats per template random sampling rates, in CSV tpl_spec=percent "
+ "key-value pairs (see stats(9) for template spec details)");
+#endif /* _KERNEL */
+
+#ifdef _KERNEL
+int
+#else
+static int
+/* Ensure all templates are also added to the userland template list. */
+__attribute__ ((constructor))
+#endif
+tcp_stats_init()
+{
+ int err, lasterr;
+
+ err = lasterr = 0;
+
+ V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
+ if (V_tcp_perconn_stats_dflt_tpl < 0)
+ return (-V_tcp_perconn_stats_dflt_tpl);
+
+ struct voistatspec vss_sum[] = {
+ STATS_VSS_SUM(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_max[] = {
+ STATS_VSS_MAX(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_rtt[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_MIN(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
+ NVSS(vss_rtt), vss_rtt, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_congsig[] = {
+ STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
+ DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
+ NVSS(vss_congsig), vss_congsig, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
+ NVSS(vss_gput), vss_gput, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput_nd[] = {
+ STATS_VSS_TDGSTCLUST32(10, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
+ NVSS(vss_gput_nd), vss_gput_nd, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_windiff[] = {
+ STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
+ NVSS(vss_windiff), vss_windiff, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_acklen[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
+ NVSS(vss_acklen), vss_acklen, 0);
+ lasterr = err ? err : lasterr;
+
+ return (lasterr);
+}
+
+#ifdef _KERNEL
+int
+tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len)
+{
+ struct rm_priotracker tracker;
+ int tpl;
+
+ tpl = -1;
+
+ if (V_tcp_stats_nrates > 0) {
+ rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
+ tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
+ V_tcp_stats_nrates, seed_bytes, seed_len);
+ rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
+
+ if (tpl >= 0) {
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ if (tp->t_stats != NULL)
+ stats_blob_destroy(tp->t_stats);
+ tp->t_stats = stats_blob_alloc(tpl, 0);
+ if (tp->t_stats == NULL)
+ tpl = -ENOMEM;
+ }
+ }
+
+ return (tpl);
+}
+
+/*
+ * Callback function for stats_tpl_sample_rates() to interact with the TCP
+ * subsystem's stats template sample rates list.
+ */
+int
+tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
+{
+ struct stats_tpl_sample_rate *old_rates;
+ int old_nrates;
+
+ if (ctx == NULL)
+ return (ENOMEM);
+
+ switch (action) {
+ case TPL_SR_RLOCKED_GET:
+ /*
+ * Return with rlock held i.e. this call must be paired with a
+ * "action == TPL_SR_RUNLOCK" call.
+ */
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ rm_rlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ /* FALLTHROUGH */
+ case TPL_SR_UNLOCKED_GET:
+ if (rates != NULL)
+ *rates = V_tcp_perconn_stats_sample_rates;
+ if (nrates != NULL)
+ *nrates = V_tcp_stats_nrates;
+ break;
+ case TPL_SR_RUNLOCK:
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
+ rm_runlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ break;
+ case TPL_SR_PUT:
+ KASSERT(rates != NULL && nrates != NULL,
+ ("%s: PUT without new rates", __func__));
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ if (rates == NULL || nrates == NULL)
+ return (EINVAL);
+ rm_wlock(&tcp_stats_tpl_sampling_lock);
+ old_rates = V_tcp_perconn_stats_sample_rates;
+ old_nrates = V_tcp_stats_nrates;
+ V_tcp_perconn_stats_sample_rates = *rates;
+ V_tcp_stats_nrates = *nrates;
+ rm_wunlock(&tcp_stats_tpl_sampling_lock);
+ *rates = old_rates;
+ *nrates = old_nrates;
+ break;
+ default:
+ return (EINVAL);
+ break;
+ }
+
+ return (0);
+}
+
+RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
+ "tcp_stats_tpl_sampling_lock");
+#endif /* _KERNEL */
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/callout.h>
#include <sys/eventhandler.h>
#ifdef TCP_HHOOK
@@ -54,6 +55,8 @@
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
+#include <sys/qmath.h>
+#include <sys/stats.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -1005,6 +1008,11 @@
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
#endif
+#ifdef STATS
+ if (tcp_stats_init())
+ printf("%s: WARNING: unable to initialise TCP stats\n",
+ __func__);
+#endif
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -1694,6 +1702,10 @@
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
+#ifdef STATS
+ if (V_tcp_perconn_stats_enable == 1)
+ tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
+#endif
return (tp); /* XXX */
}
@@ -1911,6 +1923,9 @@
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
+#endif
+#ifdef STATS
+ stats_blob_destroy(tp->t_stats);
#endif
CC_ALGO(tp) = NULL;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -49,11 +49,13 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/ktls.h>
+#include <sys/qmath.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#ifdef INET6
@@ -65,6 +67,7 @@
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/syslog.h>
+#include <sys/stats.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -108,6 +111,13 @@
#endif
#include <netipsec/ipsec_support.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
/*
* TCP protocol interface to socket abstraction.
*/
@@ -1762,6 +1772,9 @@
#endif
struct cc_algo *algo;
char *pbuf, buf[TCP_LOG_ID_LEN];
+#ifdef STATS
+ struct statsblob *sbp;
+#endif
size_t len;
/*
@@ -1879,6 +1892,36 @@
error = EINVAL;
break;
+ case TCP_STATS:
+#ifdef STATS
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ if (optval > 0)
+ sbp = stats_blob_alloc(
+ V_tcp_perconn_stats_dflt_tpl, 0);
+ else
+ sbp = NULL;
+
+ INP_WLOCK_RECHECK(inp);
+ if ((tp->t_stats != NULL && sbp == NULL) ||
+ (tp->t_stats == NULL && sbp != NULL)) {
+ struct statsblob *t = tp->t_stats;
+ tp->t_stats = sbp;
+ sbp = t;
+ }
+ INP_WUNLOCK(inp);
+
+ stats_blob_destroy(sbp);
+#else
+ INP_WUNLOCK(inp);
+ return (EOPNOTSUPP);
+#endif /* !STATS */
+ break;
+
case TCP_CONGESTION:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
@@ -2163,6 +2206,55 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
+ case TCP_STATS:
+ {
+#ifdef STATS
+ int nheld;
+ TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
+
+ error = 0;
+ socklen_t outsbsz = sopt->sopt_valsize;
+ if (tp->t_stats == NULL)
+ error = ENOENT;
+ else if (outsbsz >= tp->t_stats->cursz)
+ outsbsz = tp->t_stats->cursz;
+ else if (outsbsz >= sizeof(struct statsblob))
+ outsbsz = sizeof(struct statsblob);
+ else
+ error = EINVAL;
+ INP_WUNLOCK(inp);
+ if (error)
+ break;
+
+ sbp = sopt->sopt_val;
+ nheld = atop(round_page(((vm_offset_t)sbp) +
+ (vm_size_t)outsbsz) - trunc_page(sbp));
+ vm_page_t ma[nheld];
+ if (vm_fault_quick_hold_pages(
+ &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
+ outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
+ nheld) < 0) {
+ error = EFAULT;
+ break;
+ }
+
+ if ((error = copyin_nofault(&(sbp->flags), &sbflags,
+ SIZEOF_MEMBER(struct statsblob, flags))))
+ goto unhold;
+
+ INP_WLOCK_RECHECK(inp);
+ error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
+ sbflags | SB_CLONE_USRDSTNOFAULT);
+ INP_WUNLOCK(inp);
+ sopt->sopt_valsize = outsbsz;
+unhold:
+ vm_page_unhold_pages(ma, nheld);
+#else
+ INP_WUNLOCK(inp);
+ error = EOPNOTSUPP;
+#endif /* !STATS */
+ break;
+ }
case TCP_CONGESTION:
len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
INP_WUNLOCK(inp);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -210,7 +210,12 @@
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
const char *t_output_caller; /* Function that called tcp_output */
+ struct statsblob *t_stats; /* Per-connection stats */
uint32_t t_logsn; /* Log "serial number" */
+ uint32_t gput_ts; /* Time goodput measurement started */
+ tcp_seq gput_seq; /* Outbound measurement seq */
+ tcp_seq gput_ack; /* Inbound measurement ack */
+ int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
union {
@@ -326,6 +331,7 @@
#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
#define TF_NOPUSH 0x001000 /* don't push */
#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
+#define TF_GPUTINPROG 0x008000 /* Goodput measurement in progress */
#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
#define TF_LASTIDLE 0x040000 /* connection was previously idle */
@@ -780,6 +786,8 @@
VNET_DECLARE(int, tcp_insecure_syn);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
+VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
+VNET_DECLARE(int, tcp_perconn_stats_enable);
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
@@ -815,6 +823,8 @@
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
#define V_tcp_minmss VNET(tcp_minmss)
#define V_tcp_mssdflt VNET(tcp_mssdflt)
+#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
+#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
#define V_tcp_recvspace VNET(tcp_recvspace)
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes)
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes)
@@ -958,10 +968,13 @@
int tcp_compute_pipe(struct tcpcb *);
uint32_t tcp_compute_initwnd(uint32_t);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
+int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len);
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
+int tcp_stats_init(void);
static inline void
tcp_fields_to_host(struct tcphdr *th)

File Metadata

Mime Type
text/plain
Expires
Tue, Feb 11, 1:42 AM (6 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16587168
Default Alt Text
D20655.id63550.diff (26 KB)

Event Timeline