Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F112051643
D20655.id58671.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
27 KB
Referenced Files
None
Subscribers
None
D20655.id58671.diff
View Options
Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -291,6 +291,10 @@
.Pp
If an SADB entry cannot be found for the destination,
the system does not send any outgoing segments and drops any inbound segments.
+.It Dv TCP_STATS
+Manage collection of connection level statistics using the
+.Xr stats 3
+framework.
.Pp
Each dropped segment is taken into account in the TCP protocol statistics.
.El
@@ -606,6 +610,17 @@
.It Va insecure_syn
Use criteria defined in RFC793 instead of RFC5961 for accepting SYN segments.
Default is false.
+.It Va perconn_stats_enable
+Controls the default collection of statistics for all connections using the
+.Xr stats 3
+framework.
+0 disables, 1 enables, 2 enables random sampling across log id connection
+groups with all connections in a group receiving the same setting.
+.It Va perconn_stats_sample_rates
+A CSV list of template_spec=percent key-value pairs which controls the per
+template sampling rates when
+.Xr stats 3
+sampling is enabled.
.El
.Sh ERRORS
A socket operation may fail with one of the following errors returned:
@@ -645,6 +660,7 @@
.Sh SEE ALSO
.Xr getsockopt 2 ,
.Xr socket 2 ,
+.Xr stats 3 ,
.Xr sysctl 3 ,
.Xr blackhole 4 ,
.Xr inet 4 ,
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4271,6 +4271,7 @@
netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap
netinet/tcp_reass.c optional inet | inet6
netinet/tcp_sack.c optional inet | inet6
+netinet/tcp_stats.c optional stats inet | stats inet6
netinet/tcp_subr.c optional inet | inet6
netinet/tcp_syncache.c optional inet | inet6
netinet/tcp_timer.c optional inet | inet6
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -168,6 +168,7 @@
#define TCP_NOOPT 8 /* don't use TCP options */
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
+#define TCP_STATS 33 /* retrieve stats blob structure */
#define TCP_LOG 34 /* configure event logging for connection */
#define TCP_LOGBUF 35 /* retrieve event log for connection */
#define TCP_LOGID 36 /* configure log ID to correlate connections */
@@ -250,7 +251,20 @@
#define TCPI_OPT_WSCALE 0x04
#define TCPI_OPT_ECN 0x08
#define TCPI_OPT_TOE 0x10
+#define TCPI_OPT_TFO 0x20
+/*
+ * Storage for details exchanged during the handshake, regardless of whether
+ * they are in fact successfully negotiated or not. This is particularly useful
+ * for servers to gain insight into the options their clients try to use.
+ */
+struct tcpsyninfo {
+ uint8_t to_synopts[13]; /* opts recorded on SYN (in order) */
+ uint8_t to_nrsynopts; /* # opts recorded on SYN */
+ uint8_t to_npsynopts; /* # opts present on SYN */
+ uint8_t th_flags; /* flags present on SYN */
+};
+
/* Maximum length of log ID. */
#define TCP_LOG_ID_LEN 64
@@ -315,9 +329,10 @@
u_int32_t tcpi_snd_rexmitpack; /* Retransmitted packets */
u_int32_t tcpi_rcv_ooopack; /* Out-of-order packets */
u_int32_t tcpi_snd_zerowin; /* Zero-sized windows sent */
-
+ struct tcpsyninfo tcpi_rxsyninfo; /* Peers' SYN info */
+
/* Padding to grow without breaking ABI. */
- u_int32_t __tcpi_pad[26]; /* Padding. */
+ u_int32_t __tcpi_pad[22]; /* Padding. */
};
/*
@@ -337,4 +352,17 @@
uint32_t pcbcnt;
};
+/*
+ * TCP specific variables of interest for tp->t_stats stats(9) accounting.
+ */
+#define VOI_TCP_TXPB 0 /* Transmit payload bytes */
+#define VOI_TCP_RETXPB 1 /* Retransmit payload bytes */
+#define VOI_TCP_FRWIN 2 /* Foreign receive window */
+#define VOI_TCP_LCWIN 3 /* Local congesiton window */
+#define VOI_TCP_RTT 4 /* Round trip time */
+#define VOI_TCP_CSIG 5 /* Congestion signal */
+#define VOI_TCP_GPUT 6 /* Goodput */
+#define VOI_TCP_CALCFRWINDIFF 7 /* Congestion avoidance LCWIN - FRWIN */
+#define VOI_TCP_GPUT_ND 8 /* Goodput normalised delta */
+#define VOI_TCP_ACKLEN 9 /* Average ACKed bytes per ACK */
#endif /* !_NETINET_TCP_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -66,6 +66,7 @@
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
@@ -73,6 +74,8 @@
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
+#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
@@ -293,6 +296,10 @@
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
uint16_t type)
{
+#ifdef STATS
+ int32_t gput;
+#endif
+
INP_WLOCK_ASSERT(tp->t_inpcb);
tp->ccv->nsegs = nsegs;
@@ -303,6 +310,32 @@
tp->ccv->flags &= ~CCF_CWND_LIMITED;
if (type == CC_ACK) {
+#ifdef STATS
+ stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF,
+ ((int32_t)tp->snd_cwnd) - tp->snd_wnd);
+ if (!IN_RECOVERY(tp->t_flags))
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN,
+ tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs));
+ if ((tp->t_flags & TF_GPUTINPROG) &&
+ SEQ_GEQ(th->th_ack, tp->gput_ack)) {
+ gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) /
+ max(1, tcp_ts_getticks() - tp->gput_ts);
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT,
+ gput);
+ /*
+ * XXXLAS: This is a temporary hack, and should be
+ * chained off VOI_TCP_GPUT when stats(9) grows an API
+ * to deal with chained VOIs.
+ */
+ if (tp->t_stats_gput_prev > 0)
+ stats_voi_update_abs_s32(tp->t_stats,
+ VOI_TCP_GPUT_ND,
+ ((gput - tp->t_stats_gput_prev) * 100) /
+ tp->t_stats_gput_prev);
+ tp->t_flags &= ~TF_GPUTINPROG;
+ tp->t_stats_gput_prev = gput;
+ }
+#endif /* STATS */
if (tp->snd_cwnd > tp->snd_ssthresh) {
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
@@ -321,6 +354,9 @@
tp->ccv->curack = th->th_ack;
CC_ALGO(tp)->ack_received(tp->ccv, type);
}
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd);
+#endif
}
void
@@ -386,6 +422,10 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
+#endif
+
switch(type) {
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
@@ -1571,6 +1611,9 @@
* For the SYN_SENT state the scale is zero.
*/
tiwin = th->th_win << tp->snd_scale;
+#ifdef STATS
+ stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin);
+#endif
/*
* TCP ECN processing.
@@ -3289,9 +3332,15 @@
void
tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
{
+ struct tcpsyninfo *syninfo;
int opt, optlen;
to->to_flags = 0;
+ if (flags & TO_SYN) {
+ syninfo = &to->to_syninfo;
+ syninfo->to_npsynopts = syninfo->to_nrsynopts =
+ syninfo->th_flags = 0;
+ }
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[0];
if (opt == TCPOPT_EOL)
@@ -3305,6 +3354,12 @@
if (optlen < 2 || optlen > cnt)
break;
}
+ if (flags & TO_SYN) {
+ syninfo->to_npsynopts++;
+ if (syninfo->to_nrsynopts < sizeof(syninfo->to_synopts))
+ syninfo->to_synopts[syninfo->to_nrsynopts++] =
+ cp[0];
+ }
switch (opt) {
case TCPOPT_MAXSEG:
if (optlen != TCPOLEN_MAXSEG)
@@ -3436,6 +3491,9 @@
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt));
+#endif
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) {
/*
* srtt is stored as fixed point with 5 bits after the
Index: sys/netinet/tcp_log_buf.c
===================================================================
--- sys/netinet/tcp_log_buf.c
+++ sys/netinet/tcp_log_buf.c
@@ -34,6 +34,7 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/qmath.h>
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/rwlock.h>
@@ -41,6 +42,7 @@
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#include <sys/counter.h>
#include <dev/tcp_log/tcp_log_dev.h>
@@ -475,7 +477,7 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
-#ifdef NETFLIX
+#ifdef STATS
if (V_tcp_perconn_stats_enable == 2 && tp->t_stats == NULL)
(void)tcp_stats_sample_rollthedice(tp, tlb_id, strlen(tlb_id));
#endif
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -50,10 +50,13 @@
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
+#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#include <net/if.h>
#include <net/route.h>
@@ -968,15 +971,31 @@
struct sockbuf *msb;
u_int moff;
- if ((tp->t_flags & TF_FORCEDATA) && len == 1)
+ if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
- else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
+#ifdef STATS
+ if (SEQ_LT(tp->snd_nxt, tp->snd_max))
+ stats_voi_update_abs_u32(tp->t_stats,
+ VOI_TCP_RETXPB, len);
+ else
+ stats_voi_update_abs_u64(tp->t_stats,
+ VOI_TCP_TXPB, len);
+#endif /* STATS */
+ } else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
+ len);
+#endif /* STATS */
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
+#ifdef STATS
+ stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
+ len);
+#endif /* STATS */
}
#ifdef INET6
if (MHLEN < hdrlen + max_linkhdr)
@@ -1448,6 +1467,13 @@
tp->t_rtttime = ticks;
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
+ }
+ if (!(tp->t_flags & TF_GPUTINPROG) && len) {
+ tp->t_flags |= TF_GPUTINPROG;
+ tp->gput_seq = startseq;
+ tp->gput_ack = startseq +
+ ulmin(sbavail(&so->so_snd) - off, sendwin);
+ tp->gput_ts = tcp_ts_getticks();
}
}
Index: sys/netinet/tcp_reass.c
===================================================================
--- sys/netinet/tcp_reass.c
+++ sys/netinet/tcp_reass.c
@@ -47,11 +47,14 @@
#include <sys/eventhandler.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/qmath.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
+#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#include <vm/uma.h>
Index: sys/netinet/tcp_stats.c
===================================================================
--- /dev/null
+++ sys/netinet/tcp_stats.c
@@ -0,0 +1,269 @@
+/*-
+ * Copyright (c) 2016-2018 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Author: Lawrence Stewart <lstewart@netflix.com>
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/qmath.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/tree.h>
+#ifdef _KERNEL
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/systm.h>
+#endif
+#include <sys/stats.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_var.h>
+
+#include <netinet/cc/cc.h>
+
+VNET_DEFINE(int, tcp_perconn_stats_dflt_tpl) = -1;
+
+#ifdef _KERNEL
+
+VNET_DEFINE(int, tcp_perconn_stats_enable) = 2;
+VNET_DEFINE_STATIC(struct stats_tpl_sample_rate *, tcp_perconn_stats_sample_rates);
+VNET_DEFINE_STATIC(int, tcp_stats_nrates) = 0;
+#define V_tcp_perconn_stats_sample_rates VNET(tcp_perconn_stats_sample_rates)
+#define V_tcp_stats_nrates VNET(tcp_stats_nrates)
+
+static struct rmlock tcp_stats_tpl_sampling_lock;
+static int tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx);
+
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, perconn_stats_enable,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_perconn_stats_enable), 0,
+ "Enable per-connection TCP stats gathering; 1 enables for all connections, "
+ "2 enables random sampling across log id connection groups");
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, perconn_stats_sample_rates,
+ CTLTYPE_STRING | CTLFLAG_RW, tcp_stats_tpl_sr_cb,
+ sizeof(struct rm_priotracker), stats_tpl_sample_rates, "A",
+ "TCP stats per template random sampling rates, in CSV tpl_spec=percent "
+ "key-value pairs (see stats(9) for template spec details)");
+#endif /* _KERNEL */
+
+int
+#ifndef _KERNEL
+/* Ensure all templates are also added to the userland template list. */
+__attribute__ ((constructor))
+#endif
+tcp_stats_init()
+{
+ int err, lasterr;
+
+ err = lasterr = 0;
+
+ V_tcp_perconn_stats_dflt_tpl = stats_tpl_alloc("TCP_DEFAULT", 0);
+ if (V_tcp_perconn_stats_dflt_tpl < 0)
+ return (-V_tcp_perconn_stats_dflt_tpl);
+
+ struct voistatspec vss_sum[] = {
+ STATS_VSS_SUM(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_TXPB, "TCP_TXPB", VSD_DTYPE_INT_U64,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RETXPB, "TCP_RETXPB", VSD_DTYPE_INT_U32,
+ NVSS(vss_sum), vss_sum, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_max[] = {
+ STATS_VSS_MAX(),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_FRWIN, "TCP_FRWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_LCWIN, "TCP_LCWIN", VSD_DTYPE_INT_ULONG,
+ NVSS(vss_max), vss_max, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_rtt[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_MIN(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_RTT, "TCP_RTT", VSD_DTYPE_INT_U32,
+ NVSS(vss_rtt), vss_rtt, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_congsig[] = {
+ STATS_VSS_DVHIST32_USR(HBKTS(DVBKT(CC_ECN), DVBKT(CC_RTO),
+ DVBKT(CC_RTO_ERR), DVBKT(CC_NDUPACK)), 0)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CSIG, "TCP_CSIG", VSD_DTYPE_INT_U32,
+ NVSS(vss_congsig), vss_congsig, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_TDGSTCLUST32(20, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT, "TCP_GPUT", VSD_DTYPE_INT_U32,
+ NVSS(vss_gput), vss_gput, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_gput_nd[] = {
+ STATS_VSS_TDGSTCLUST32(10, 4),
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_GPUT_ND, "TCP_GPUT_ND", VSD_DTYPE_INT_S32,
+ NVSS(vss_gput_nd), vss_gput_nd, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_windiff[] = {
+ STATS_VSS_CRHIST32_USR(HBKTS(CRBKT(0)), VSD_HIST_LBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_CALCFRWINDIFF, "TCP_CALCFRWINDIFF", VSD_DTYPE_INT_S32,
+ NVSS(vss_windiff), vss_windiff, 0);
+ lasterr = err ? err : lasterr;
+
+ struct voistatspec vss_acklen[] = {
+ STATS_VSS_MAX(),
+ STATS_VSS_CRHIST32_LIN(0, 9, 1, VSD_HIST_UBOUND_INF)
+ };
+ err |= stats_tpl_add_voistats(V_tcp_perconn_stats_dflt_tpl,
+ VOI_TCP_ACKLEN, "TCP_ACKLEN", VSD_DTYPE_INT_U32,
+ NVSS(vss_acklen), vss_acklen, 0);
+ lasterr = err ? err : lasterr;
+
+ return (lasterr);
+}
+
+#ifdef _KERNEL
+int
+tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len)
+{
+ struct rm_priotracker tracker;
+ int tpl;
+
+ tpl = -1;
+
+ if (V_tcp_stats_nrates > 0) {
+ rm_rlock(&tcp_stats_tpl_sampling_lock, &tracker);
+ tpl = stats_tpl_sample_rollthedice(V_tcp_perconn_stats_sample_rates,
+ V_tcp_stats_nrates, seed_bytes, seed_len);
+ rm_runlock(&tcp_stats_tpl_sampling_lock, &tracker);
+
+ if (tpl >= 0) {
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ if (tp->t_stats != NULL)
+ stats_blob_destroy(tp->t_stats);
+ tp->t_stats = stats_blob_alloc(tpl, 0);
+ if (tp->t_stats == NULL)
+ tpl = -ENOMEM;
+ }
+ }
+
+ return (tpl);
+}
+
+/*
+ * Callback function for stats_tpl_sample_rates() to interact with the TCP
+ * subsystem's stats template sample rates list.
+ */
+int
+tcp_stats_tpl_sr_cb(enum stats_tpl_sr_cb_action action,
+ struct stats_tpl_sample_rate **rates, int *nrates, void *ctx)
+{
+ struct stats_tpl_sample_rate *old_rates;
+ int old_nrates;
+
+ if (ctx == NULL)
+ return (ENOMEM);
+
+ switch (action) {
+ case TPL_SR_RLOCKED_GET:
+ /*
+ * Return with rlock held i.e. this call must be paired with a
+ * "action == TPL_SR_RUNLOCK" call.
+ */
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ rm_rlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ /* FALLTHROUGH */
+ case TPL_SR_UNLOCKED_GET:
+ if (rates != NULL)
+ *rates = V_tcp_perconn_stats_sample_rates;
+ if (nrates != NULL)
+ *nrates = V_tcp_stats_nrates;
+ break;
+ case TPL_SR_RUNLOCK:
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_RLOCKED);
+ rm_runlock(&tcp_stats_tpl_sampling_lock,
+ (struct rm_priotracker *)ctx);
+ break;
+ case TPL_SR_PUT:
+ KASSERT(rates != NULL && nrates != NULL,
+ ("%s: PUT without new rates", __func__));
+ rm_assert(&tcp_stats_tpl_sampling_lock, RA_UNLOCKED);
+ if (rates == NULL || nrates == NULL)
+ return (EINVAL);
+ rm_wlock(&tcp_stats_tpl_sampling_lock);
+ old_rates = V_tcp_perconn_stats_sample_rates;
+ old_nrates = V_tcp_stats_nrates;
+ V_tcp_perconn_stats_sample_rates = *rates;
+ V_tcp_stats_nrates = *nrates;
+ rm_wunlock(&tcp_stats_tpl_sampling_lock);
+ *rates = old_rates;
+ *nrates = old_nrates;
+ break;
+ default:
+ return (EINVAL);
+ break;
+ }
+
+ return (0);
+}
+
+RM_SYSINIT(tcp_stats_tpl_sampling_lock, &tcp_stats_tpl_sampling_lock,
+ "tcp_stats_tpl_sampling_lock");
+#endif /* _KERNEL */
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -50,6 +50,9 @@
#ifdef TCP_HHOOK
#include <sys/khelp.h>
#endif
+#include <sys/qmath.h>
+#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
@@ -992,6 +995,13 @@
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
#endif
+
+#ifdef STATS
+ if (tcp_stats_init())
+ printf("%s: WARNING: unable to initialise TCP stats\n",
+ __func__);
+#endif
+
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -1674,6 +1684,11 @@
if (tp->t_fb->tfb_tcp_fb_init) {
(*tp->t_fb->tfb_tcp_fb_init)(tp);
}
+#ifdef STATS
+ if (V_tcp_perconn_stats_enable == 1)
+ tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
+#endif
+
return (tp); /* XXX */
}
@@ -1892,7 +1907,9 @@
#ifdef TCP_HHOOK
khelp_destroy_osd(tp->osd);
#endif
-
+#ifdef STATS
+ stats_blob_destroy(tp->t_stats);
+#endif
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;
if (tp->t_timers->tt_draincnt == 0) {
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -52,6 +52,7 @@
#include <sys/malloc.h>
#include <sys/refcount.h>
#include <sys/kernel.h>
+#include <sys/qmath.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#ifdef INET6
@@ -63,6 +64,8 @@
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/syslog.h>
+#include <sys/tree.h>
+#include <sys/stats.h> /* Must come after qmath.h and tree.h */
#ifdef DDB
#include <ddb/ddb.h>
@@ -106,6 +109,13 @@
#endif
#include <netipsec/ipsec_support.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+
/*
* TCP protocol interface to socket abstraction.
*/
@@ -1520,6 +1530,9 @@
if (tp->t_flags & TF_ECN_PERMIT)
ti->tcpi_options |= TCPI_OPT_ECN;
+ if (tp->t_flags & TF_FASTOPEN)
+ ti->tcpi_options |= TCPI_OPT_TFO;
+
ti->tcpi_rto = tp->t_rxtcur * tick;
ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
@@ -1541,6 +1554,8 @@
ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
ti->tcpi_snd_zerowin = tp->t_sndzerowin;
+
+ memcpy(&ti->tcpi_rxsyninfo, &tp->t_rxsyninfo, sizeof(struct tcpsyninfo));
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE) {
ti->tcpi_options |= TCPI_OPT_TOE;
@@ -1758,6 +1773,9 @@
struct tcp_info ti;
struct cc_algo *algo;
char *pbuf, buf[TCP_LOG_ID_LEN];
+#ifdef STATS
+ struct statsblob *sbp;
+#endif
size_t len;
/*
@@ -1875,6 +1893,36 @@
error = EINVAL;
break;
+ case TCP_STATS:
+#ifdef STATS
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &optval, sizeof optval,
+ sizeof optval);
+ if (error)
+ return (error);
+
+ if (optval > 0)
+ sbp = stats_blob_alloc(
+ V_tcp_perconn_stats_dflt_tpl, 0);
+ else
+ sbp = NULL;
+
+ INP_WLOCK_RECHECK(inp);
+ if ((tp->t_stats != NULL && sbp == NULL) ||
+ (tp->t_stats == NULL && sbp != NULL)) {
+ struct statsblob *t = tp->t_stats;
+ tp->t_stats = sbp;
+ sbp = t;
+ }
+ INP_WUNLOCK(inp);
+
+ stats_blob_destroy(sbp);
+#else /* !STATS */
+ INP_WUNLOCK(inp);
+ return (EOPNOTSUPP);
+#endif /* !STATS */
+ break;
+
case TCP_CONGESTION:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
@@ -2138,6 +2186,55 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, &ti, sizeof ti);
break;
+ case TCP_STATS:
+ {
+#ifdef STATS
+ int nheld;
+ TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
+
+ error = 0;
+ socklen_t outsbsz = sopt->sopt_valsize;
+ if (tp->t_stats == NULL)
+ error = ENOENT;
+ else if (outsbsz >= tp->t_stats->cursz)
+ outsbsz = tp->t_stats->cursz;
+ else if (outsbsz >= sizeof(struct statsblob))
+ outsbsz = sizeof(struct statsblob);
+ else
+ error = EINVAL;
+ INP_WUNLOCK(inp);
+ if (error)
+ break;
+
+ sbp = sopt->sopt_val;
+ nheld = atop(round_page(((vm_offset_t)sbp) +
+ (vm_size_t)outsbsz) - trunc_page(sbp));
+ vm_page_t ma[nheld];
+ if (vm_fault_quick_hold_pages(
+ &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
+ outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
+ nheld) < 0) {
+ error = EFAULT;
+ break;
+ }
+
+ if ((error = copyin_nofault(&(sbp->flags), &sbflags,
+ SIZEOF_MEMBER(struct statsblob, flags))))
+ goto unhold;
+
+ INP_WLOCK_RECHECK(inp);
+ error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
+ sbflags | SB_CLONE_USRDSTNOFAULT);
+ INP_WUNLOCK(inp);
+ sopt->sopt_valsize = outsbsz;
+unhold:
+ vm_page_unhold_pages(ma, nheld);
+#else /* !STATS */
+ INP_WUNLOCK(inp);
+ error = EOPNOTSUPP;
+#endif /* !STATS */
+ break;
+ }
case TCP_CONGESTION:
len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
INP_WUNLOCK(inp);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -209,7 +209,14 @@
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
const char *t_output_caller; /* Function that called tcp_output */
+ struct statsblob *t_stats; /* Per-connection stats */
uint32_t t_logsn; /* Log "serial number" */
+ uint32_t gput_ts; /* Time goodput measurement started */
+ tcp_seq gput_seq; /* Outbound measurement seq */
+ tcp_seq gput_ack; /* Inbound measurement ack */
+ struct tcpsyninfo t_rxsyninfo; /* Peer's SYN details */
+ int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */
+
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */
union {
@@ -320,6 +327,7 @@
#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
#define TF_NOPUSH 0x001000 /* don't push */
#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
+#define TF_GPUTINPROG 0x008000 /* Goodput measurement in progress */
#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
#define TF_LASTIDLE 0x040000 /* connection was previously idle */
@@ -399,7 +407,7 @@
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_int8_t to_tfo_len; /* TFO cookie length */
- u_int32_t to_spare; /* UTO */
+ struct tcpsyninfo to_syninfo; /* SYN specific opts data */
};
/*
@@ -738,6 +746,11 @@
#define TCPCTL_DROP 15 /* drop tcp connection */
#define TCPCTL_STATES 16 /* connection counts by TCP state */
+/* These stats(9) related bits need to be visible to userland code. */
+int tcp_stats_init(void);
+#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable)
+#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl)
+
#ifdef _KERNEL
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet_tcp);
@@ -774,6 +787,8 @@
VNET_DECLARE(int, tcp_insecure_syn);
VNET_DECLARE(int, tcp_minmss);
VNET_DECLARE(int, tcp_mssdflt);
+VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl);
+VNET_DECLARE(int, tcp_perconn_stats_enable);
VNET_DECLARE(int, tcp_recvspace);
VNET_DECLARE(int, tcp_sack_globalholes);
VNET_DECLARE(int, tcp_sack_globalmaxholes);
@@ -942,6 +957,8 @@
int tcp_compute_pipe(struct tcpcb *);
uint32_t tcp_compute_initwnd(uint32_t);
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t);
+int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes,
+ size_t seed_len);
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Mar 13, 3:18 AM (14 h, 52 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17129343
Default Alt Text
D20655.id58671.diff (27 KB)
Attached To
Mode
D20655: Make use of stats(3) in the TCP stack
Attached
Detach File
Event Timeline
Log In to Comment