Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109828781
D20655.id62720.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
26 KB
Referenced Files
None
Subscribers
None
D20655.id62720.diff
View Options
Index: lib/libstats/Makefile
===================================================================
--- /dev/null
+++ lib/libstats/Makefile
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+LIB= stats
+SHLIBDIR?= /lib
+SHLIB_MAJOR= 0
+SRCS= subr_stats.c tcp_stats.c
+WARNS?= 6
+
+# To debug, comment WITHOUT_ASSERT_DEBUG= and uncomment CFLAGS:=
+WITHOUT_ASSERT_DEBUG=
+#CFLAGS:=${CFLAGS:C/-O[0-9]/-O0 -g3/} -DDIAGNOSTIC
+
+.PATH: ${.CURDIR}/../../sys/kern ${.CURDIR}/../../sys/netinet
+
+.include <bsd.lib.mk>
Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd August 26, 2019
+.Dd September 31, 2019
.Dt TCP 4
.Os
.Sh NAME
@@ -291,6 +291,10 @@
.Pp
If an SADB entry cannot be found for the destination,
the system does not send any outgoing segments and drops any inbound segments.
+.It Dv TCP_STATS
+Manage collection of connection level statistics using the
+.Xr stats 3
+framework.
.Pp
Each dropped segment is taken into account in the TCP protocol statistics.
.It Dv TCP_TXTLS_ENABLE
@@ -655,6 +659,17 @@
When initializing the TCP timestamps, use a per connection offset instead of a
per host pair offset.
Default is to use per connection offsets as recommended in RFC 7323.
+.It Va perconn_stats_enable
+Controls the default collection of statistics for all connections using the
+.Xr stats 3
+framework.
+0 disables, 1 enables, 2 enables random sampling across log id connection
+groups with all connections in a group receiving the same setting.
+.It Va perconn_stats_sample_rates
+A CSV list of template_spec=percent key-value pairs which controls the per
+template sampling rates when
+.Xr stats 3
+sampling is enabled.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:
@@ -694,6 +709,7 @@
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr socket 2 ,
+.Xr stats 3 ,
.Xr sysctl 3 ,
.Xr blackhole 4 ,
.Xr inet 4 ,
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3835,6 +3835,7 @@
kern/subr_sleepqueue.c standard
kern/subr_smp.c standard
kern/subr_stack.c optional ddb | stack | ktr
+kern/subr_stats.c optional stats
kern/subr_taskqueue.c standard
kern/subr_terminal.c optional vt
kern/subr_trap.c standard
@@ -4285,6 +4286,7 @@
compile-with "${NORMAL_C} ${NO_WNONNULL}"
netinet/tcp_reass.c optional inet | inet6
netinet/tcp_sack.c optional inet | inet6
+netinet/tcp_stats.c optional stats inet | stats inet6
netinet/tcp_subr.c optional inet | inet6
netinet/tcp_syncache.c optional inet | inet6
netinet/tcp_timer.c optional inet | inet6
Index: sys/netinet/cc/cc.h
===================================================================
--- sys/netinet/cc/cc.h
+++ sys/netinet/cc/cc.h
@@ -51,9 +51,7 @@
#ifndef _NETINET_CC_CC_H_
#define _NETINET_CC_CC_H_
-#if !defined(_KERNEL)
-#error "no user-serviceable parts inside"
-#endif
+#ifdef _KERNEL
/* Global CC vars. */
extern STAILQ_HEAD(cc_head, cc_algo) cc_list;
@@ -108,6 +106,7 @@
#define CC_DUPACK 0x0002 /* Duplicate ACK. */
#define CC_PARTIALACK 0x0004 /* Not yet. */
#define CC_SACK 0x0008 /* Not yet. */
+#endif /* _KERNEL */
/*
* Congestion signal types passed to the cong_signal() hook. The highest order 8
@@ -121,6 +120,7 @@
#define CC_SIGPRIVMASK 0xFF000000 /* Mask to check if sig is private. */
+#ifdef _KERNEL
/*
* Structure to hold data and function pointers that together represent a
* congestion control algorithm.
@@ -184,4 +184,5 @@
#define CC_ALGOOPT_LIMIT 2048
+#endif /* _KERNEL */
#endif /* _NETINET_CC_CC_H_ */
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -168,6 +168,7 @@
#define TCP_NOOPT 8 /* don't use TCP options */
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
+#define TCP_STATS 33 /* retrieve stats blob structure */
#define TCP_LOG 34 /* configure event logging for connection */
#define TCP_LOGBUF 35 /* retrieve event log for connection */
#define TCP_LOGID 36 /* configure log ID to correlate connections */
@@ -362,5 +363,19 @@
* TCP Control message types
*/
#define TLS_SET_RECORD_TYPE 1
+
+/*
+ * TCP specific variables of interest for tp->t_stats stats(9) accounting.
+ */
+#define VOI_TCP_TXPB 0 /* Transmit payload bytes */
+#define VOI_TCP_RETXPB 1 /* Retransmit payload bytes */
+#define VOI_TCP_FRWIN 2 /* Foreign receive window */
+#define VOI_TCP_LCWIN 3 /* Local congesiton window */
+#define VOI_TCP_RTT 4 /* Round trip time */
+#define VOI_TCP_CSIG 5 /* Congestion signal */
+#define VOI_TCP_GPUT 6 /* Goodput */
+#define VOI_TCP_CALCFRWINDIFF 7 /* Congestion avoidance LCWIN - FRWIN */
+#define VOI_TCP_GPUT_ND 8 /* Goodput normalised delta */
+#define VOI_TCP_ACKLEN 9 /* Average ACKed bytes per ACK */
#endif /* !_NETINET_TCP_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -58,6 +58,7 @@
#include "opt_tcpdebug.h"
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/kernel.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@@ -66,6 +67,7 @@
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -73,6 +75,7 @@
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
+#include <sys/stats.h>
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
@@ -293,6 +296,10 @@
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
uint16_t type)
{
+#ifdef STATS
+ int32_t gput;
+#endif
+
INP_WLOCK_ASSERT(tp->t_inpcb);
tp->ccv->nsegs = nsegs;
@@ -303,6 +310,35 @@
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
+#ifdef STATS
+ stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
+ ((int32_t)tp->snd_cwnd) - tp->snd_wnd);
+ if (!IN_RECOVERY(tp->t_flags))
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
+ tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
+ if ((tp->t_flags & TF_GPUTINPROG) &&
+ SEQ_GEQ(th->th_ack, tp->gput_ack)) {
+ /*
+ * Compute goodput in bits per millisecond.
+ */
+ gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
+ max(1, tcp_ts_getticks() - tp->gput_ts);
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
+ gput);
+ /*
+ * XXXLAS: This is a temporary hack, and should be
+ * chained off VOI_TCP_GPUT when stats(9) grows an API
+ * to deal with chained VOIs.
+ */
+ if (tp->t_stats_gput_prev > 0)
+ stats_voi_update_abs_s32(tp->t_stats,
+ VOI_TCP_GPUT_ND,
+ ((gput - tp->t_stats_gput_prev) * 100) /
+ tp->t_stats_gput_prev);
+ tp->t_flags &= ~TF_GPUTINPROG;
+ tp->t_stats_gput_prev = gput;
+ }
+#endif /* STATS */
if (tp->snd_cwnd > tp->snd_ssthresh) {
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
@@ -321,6 +357,9 @@
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
+#endif
}
void
@@ -386,6 +425,10 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
+#endif
+
switch(type) {
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
@@ -1573,6 +1616,9 @@
* For the SYN_SENT state the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
+#endif
/*
* TCP ECN processing.
@@ -3446,6 +3492,9 @@
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
+#endif
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
/*
* srtt is stored as fixed point with 5 bits after the
Index: sys/netinet/tcp_log_buf.c
===================================================================
--- sys/netinet/tcp_log_buf.c
+++ sys/netinet/tcp_log_buf.c
@@ -30,10 +30,12 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/arb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/qmath.h>
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/rwlock.h>
@@ -41,6 +43,7 @@
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
+#include <sys/stats.h>
#include <sys/counter.h>
#include <dev/tcp_log/tcp_log_dev.h>
@@ -475,7 +478,7 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
-#ifdef NETFLIX
+#ifdef STATS
if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
#endif
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/domain.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
@@ -54,10 +55,12 @@
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/stats.h>
#include <net/if.h>
#include <net/route.h>
@@ -991,15 +994,31 @@
struct sockbuf *msb;
u_int moff;
- if ((tp->t_flags & TF_FORCEDATA) && len == 1)
+ if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
- else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
+#ifdef STATS
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ stats_voi_update_abs_u32(tp->t_stats,
+ VOI_TCP_RETXPB, len);
+ else
+ stats_voi_update_abs_u64(tp->t_stats,
+ VOI_TCP_TXPB, len);
+#endif /* STATS */
+ } else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
+ len);
+#endif /* STATS */
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
+ len);
+#endif /* STATS */
}
#ifdef INET6
if (MHLEN < hdrlen + max_linkhdr)
@@ -1471,6 +1490,13 @@
tp->t_rtttime = ticks;
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
+ }
+ if (!(tp->t_flags & TF_GPUTINPROG) && len) {
+ tp->t_flags |= TF_GPUTINPROG;
+ tp->gput_seq = startseq;
+ tp->gput_ack = startseq +
+ ulmin(sbavail(&so->so_snd) - off, sendwin);
+ tp->gput_ts = tcp_ts_getticks();
}
}
Index: sys/netinet/tcp_stats.c
===================================================================
--- /dev/null
+++ sys/netinet/tcp_stats.c
@@ -0,0 +1,269 @@
+/*-
+ * Copyright (c) 2016-2018 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Author: Lawrence Stewart <lstewart@netflix.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/arb.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/qmath.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#ifdef _KERNEL
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/systm.h>
+#endif
+#include <sys/stats.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+
+#include <netinet/cc/cc.h>
+
+VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
+
+#ifdef _KERNEL
+
+VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
+VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
+VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
+#define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
+#define V_tcp_stats_nrates VNET(tcp_stats_nrates)
+
+static struct rmlock tcp_stats_tpl_sampling_lock;
+static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
+ "Enable per-connection TCP stats gathering; 1 enables for all connections, "
+ "2 enables random sampling across log id connection groups");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
+ CTLTYPE_STRING | CTLFLAG_RW, tcp_stats_tpl_sr_cb,
+ sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
+ "TCP stats per template random sampling rates, in CSV tpl_spec=percent "
+ "key-value pairs (see stats(9) for template spec details)");
+#endif /* _KERNEL */
+
+int
+#ifndef _KERNEL
+/* Ensure all templates are also added to the userland template list. */
+__attribute__ ((constructor))
+#endif
+tcp_stats_init()
+{
+ int err, lasterr;
+
+ err = lasterr = 0;
+
+ V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
+ if (V_tcp_perconn_stats_dflt_tpl < 0)
+ return (-V_tcp_perconn_stats_dflt_tpl);
+
+ struct voistatspec vss_sum[] = {
+ STATS_VSS_SUM(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_max[] = {
+ STATS_VSS_MAX(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_rtt[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_MIN(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
+ NVSS(vss_rtt), vss_rtt, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_congsig[] = {
+ STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
+ DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
+ NVSS(vss_congsig), vss_congsig, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
+ NVSS(vss_gput), vss_gput, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput_nd[] = {
+ STATS_VSS_TDGSTCLUST32(10, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
+ NVSS(vss_gput_nd), vss_gput_nd, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_windiff[] = {
+ STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
+ NVSS(vss_windiff), vss_windiff, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_acklen[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
+ NVSS(vss_acklen), vss_acklen, 0);
+ lasterr = err ? err : lasterr;
+
+ return (lasterr);
+}
+
+#ifdef _KERNEL
+int
+tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len)
+{
+ struct rm_priotracker tracker;
+ int tpl;
+
+ tpl = -1;
+
+ if (V_tcp_stats_nrates > 0) {
+ rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
+ tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
+ V_tcp_stats_nrates, seed_bytes, seed_len);
+ rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
+
+ if (tpl >= 0) {
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ if (tp->t_stats != NULL)
+ stats_blob_destroy(tp->t_stats);
+ tp->t_stats = stats_blob_alloc(tpl, 0);
+ if (tp->t_stats == NULL)
+ tpl = -ENOMEM;
+ }
+ }
+
+ return (tpl);
+}
+
+/*
+ * Callback function for stats_tpl_sample_rates() to interact with the TCP
+ * subsystem's stats template sample rates list.
+ */
+int
+tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
+{
+ struct stats_tpl_sample_rate *old_rates;
+ int old_nrates;
+
+ if (ctx == NULL)
+ return (ENOMEM);
+
+ switch (action) {
+ case TPL_SR_RLOCKED_GET:
+ /*
+ * Return with rlock held i.e. this call must be paired with a
+ * "action == TPL_SR_RUNLOCK" call.
+ */
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ rm_rlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ /* FALLTHROUGH */
+ case TPL_SR_UNLOCKED_GET:
+ if (rates != NULL)
+ *rates = V_tcp_perconn_stats_sample_rates;
+ if (nrates != NULL)
+ *nrates = V_tcp_stats_nrates;
+ break;
+ case TPL_SR_RUNLOCK:
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
+ rm_runlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ break;
+ case TPL_SR_PUT:
+ KASSERT(rates != NULL && nrates != NULL,
+ ("%s: PUT without new rates", __func__));
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ if (rates == NULL || nrates == NULL)
+ return (EINVAL);
+ rm_wlock(&tcp_stats_tpl_sampling_lock);
+ old_rates = V_tcp_perconn_stats_sample_rates;
+ old_nrates = V_tcp_stats_nrates;
+ V_tcp_perconn_stats_sample_rates = *rates;
+ V_tcp_stats_nrates = *nrates;
+ rm_wunlock(&tcp_stats_tpl_sampling_lock);
+ *rates = old_rates;
+ *nrates = old_nrates;
+ break;
+ default:
+ return (EINVAL);
+ break;
+ }
+
+ return (0);
+}
+
+RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
+ "tcp_stats_tpl_sampling_lock");
+#endif /* _KERNEL */
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/callout.h>
#include <sys/eventhandler.h>
#ifdef TCP_HHOOK
@@ -54,6 +55,8 @@
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
+#include <sys/qmath.h>
+#include <sys/stats.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -1005,6 +1008,11 @@
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
#endif
+#ifdef STATS
+ if (tcp_stats_init())
+ printf("%s: WARNING: unable to initialise TCP stats\n",
+ __func__);
+#endif
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -1694,6 +1702,10 @@
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
+#ifdef STATS
+ if (V_tcp_perconn_stats_enable == 1)
+ tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
+#endif
return (tp); /* XXX */
}
@@ -1911,6 +1923,9 @@
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
+#endif
+#ifdef STATS
+ stats_blob_destroy(tp->t_stats);
#endif
CC_ALGO(tp) = NULL;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -49,11 +49,13 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/arb.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/refcount.h>
#include <sys/kernel.h>
#include <sys/ktls.h>
+#include <sys/qmath.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#ifdef INET6
@@ -65,6 +67,7 @@
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/syslog.h>
+#include <sys/stats.h>
#ifdef DDB
#include <ddb/ddb.h>
@@ -108,6 +111,13 @@
#endif
#include <netipsec/ipsec_support.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
/*
* TCP protocol interface to socket abstraction.
*/
@@ -1762,6 +1772,9 @@
#endif
struct cc_algo *algo;
char *pbuf, buf[TCP_LOG_ID_LEN];
+#ifdef STATS
+ struct statsblob *sbp;
+#endif
size_t len;
/*
@@ -1879,6 +1892,36 @@
error = EINVAL;
break;
+ case TCP_STATS:
+#ifdef STATS
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ if (optval > 0)
+ sbp = stats_blob_alloc(
+ V_tcp_perconn_stats_dflt_tpl, 0);
+ else
+ sbp = NULL;
+
+ INP_WLOCK_RECHECK(inp);
+ if ((tp->t_stats != NULL && sbp == NULL) ||
+ (tp->t_stats == NULL && sbp != NULL)) {
+ struct statsblob *t = tp->t_stats;
+ tp->t_stats = sbp;
+ sbp = t;
+ }
+ INP_WUNLOCK(inp);
+
+ stats_blob_destroy(sbp);
+#else
+ INP_WUNLOCK(inp);
+ return (EOPNOTSUPP);
+#endif /* !STATS */
+ break;
+
case TCP_CONGESTION:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
@@ -2165,6 +2208,55 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
+ case TCP_STATS:
+ {
+#ifdef STATS
+ int nheld;
+ TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
+
+ error = 0;
+ socklen_t outsbsz = sopt->sopt_valsize;
+ if (tp->t_stats == NULL)
+ error = ENOENT;
+ else if (outsbsz >= tp->t_stats->cursz)
+ outsbsz = tp->t_stats->cursz;
+ else if (outsbsz >= sizeof(struct statsblob))
+ outsbsz = sizeof(struct statsblob);
+ else
+ error = EINVAL;
+ INP_WUNLOCK(inp);
+ if (error)
+ break;
+
+ sbp = sopt->sopt_val;
+ nheld = atop(round_page(((vm_offset_t)sbp) +
+ (vm_size_t)outsbsz) - trunc_page(sbp));
+ vm_page_t ma[nheld];
+ if (vm_fault_quick_hold_pages(
+ &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
+ outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
+ nheld) < 0) {
+ error = EFAULT;
+ break;
+ }
+
+ if ((error = copyin_nofault(&(sbp->flags), &sbflags,
+ SIZEOF_MEMBER(struct statsblob, flags))))
+ goto unhold;
+
+ INP_WLOCK_RECHECK(inp);
+ error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
+ sbflags | SB_CLONE_USRDSTNOFAULT);
+ INP_WUNLOCK(inp);
+ sopt->sopt_valsize = outsbsz;
+unhold:
+ vm_page_unhold_pages(ma, nheld);
+#else
+ INP_WUNLOCK(inp);
+ error = EOPNOTSUPP;
+#endif /* !STATS */
+ break;
+ }
case TCP_CONGESTION:
len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
INP_WUNLOCK(inp);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -210,7 +210,12 @@
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
const char *t_output_caller; /* Function that called tcp_output */
+ struct statsblob *t_stats; /* Per-connection stats */
uint32_t t_logsn; /* Log "serial number" */
+ uint32_t gput_ts; /* Time goodput measurement started */
+ tcp_seq gput_seq; /* Outbound measurement seq */
+ tcp_seq gput_ack; /* Inbound measurement ack */
+ int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
union {
@@ -326,6 +331,7 @@
#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
#define TF_NOPUSH 0x001000 /* don't push */
#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
+#define TF_GPUTINPROG 0x008000 /* Goodput measurement in progress */
#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
#define TF_LASTIDLE 0x040000 /* connection was previously idle */
@@ -744,6 +750,11 @@
#define TCPCTL_DROP 15 /* drop tcp connection */
#define TCPCTL_STATES 16 /* connection counts by TCP state */
+/* These stats(9) related bits need to be visible to userland code. */
+int tcp_stats_init(void);
+#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
+#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
+
#ifdef _KERNEL
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
@@ -780,6 +791,8 @@
VNET_DECLARE(int, tcp_insecure_syn);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
+VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
+VNET_DECLARE(int, tcp_perconn_stats_enable);
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
@@ -958,6 +971,8 @@
int tcp_compute_pipe(struct tcpcb *);
uint32_t tcp_compute_initwnd(uint32_t);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
+int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len);
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Feb 11, 1:32 AM (5 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16587041
Default Alt Text
D20655.id62720.diff (26 KB)
Attached To
Mode
D20655: Make use of stats(3) in the TCP stack
Attached
Detach File
Event Timeline
Log In to Comment