Changeset View
Standalone View
sys/netinet/tcp_var.h
Show First 20 Lines • Show All 204 Lines • ▼ Show 20 Lines | struct tcpcb { | ||||
u_int t_keepintvl; /* interval between keepalives */ | u_int t_keepintvl; /* interval between keepalives */ | ||||
u_int t_keepcnt; /* number of keepalives before close */ | u_int t_keepcnt; /* number of keepalives before close */ | ||||
int t_dupacks; /* consecutive dup acks recd */ | int t_dupacks; /* consecutive dup acks recd */ | ||||
int t_lognum; /* Number of log entries */ | int t_lognum; /* Number of log entries */ | ||||
struct tcp_log_stailq t_logs; /* Log buffer */ | struct tcp_log_stailq t_logs; /* Log buffer */ | ||||
struct tcp_log_id_node *t_lin; | struct tcp_log_id_node *t_lin; | ||||
struct tcp_log_id_bucket *t_lib; | struct tcp_log_id_bucket *t_lib; | ||||
const char *t_output_caller; /* Function that called tcp_output */ | const char *t_output_caller; /* Function that called tcp_output */ | ||||
struct statsblob *t_stats; /* Per-connection stats */ | |||||
uint32_t t_logsn; /* Log "serial number" */ | uint32_t t_logsn; /* Log "serial number" */ | ||||
uint32_t gput_ts; /* Time goodput measurement started */ | |||||
tcp_seq gput_seq; /* Outbound measurement seq */ | |||||
tcp_seq gput_ack; /* Inbound measurement ack */ | |||||
int32_t t_stats_gput_prev; /* XXXLAS: Prev gput measurement */ | |||||
lstewart: Should all these be under `#ifdef STATS`? May also want to consider moving somewhere else e.g. | |||||
Not Done Inline ActionsMaking them conditional could break ABI for userspace. Quick grepping for 'tcpcb' shows some stuff in DTrace scripts, ipfilter, systat(1), and trpt(1). trasz: Making them conditional could break ABI for userspace. Quick grepping for 'tcpcb' shows some… | |||||
Not Done Inline ActionsYes... there's no clear cut choice, but default off options (e.g. TCPPCAP a few lines below) tend to put their bits under #ifdef to avoid bloat for the majority who don't explicitly enable the feature. I don't have a strong opinion on this, but thought I'd flag it in case other opinions existed. lstewart: Yes... there's no clear cut choice, but default off options (e.g. `TCPPCAP` a few lines below)… | |||||
uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ | uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ | ||||
unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ | unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ | ||||
union { | union { | ||||
uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]; | uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]; | ||||
uint64_t server; | uint64_t server; | ||||
} t_tfo_cookie; /* TCP Fast Open cookie to send */ | } t_tfo_cookie; /* TCP Fast Open cookie to send */ | ||||
#ifdef TCPPCAP | #ifdef TCPPCAP | ||||
struct mbufq t_inpkts; /* List of saved input packets. */ | struct mbufq t_inpkts; /* List of saved input packets. */ | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | |||||
#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ | #define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ | ||||
#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ | #define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ | ||||
#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ | #define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ | ||||
#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ | #define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ | ||||
#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ | #define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ | ||||
#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ | #define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ | ||||
#define TF_NOPUSH 0x001000 /* don't push */ | #define TF_NOPUSH 0x001000 /* don't push */ | ||||
#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ | #define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ | ||||
#define TF_GPUTINPROG 0x008000 /* Goodput measurement in progress */ | |||||
#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ | #define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ | ||||
#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ | #define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ | ||||
#define TF_LASTIDLE 0x040000 /* connection was previously idle */ | #define TF_LASTIDLE 0x040000 /* connection was previously idle */ | ||||
#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ | #define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ | ||||
#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ | #define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ | ||||
#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ | #define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ | ||||
#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ | #define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ | ||||
#define TF_FORCEDATA 0x800000 /* force out a byte */ | #define TF_FORCEDATA 0x800000 /* force out a byte */ | ||||
▲ Show 20 Lines • Show All 402 Lines • ▼ Show 20 Lines | |||||
#define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ | #define TCPCTL_KEEPINIT 10 /* timeout for establishing syn */ | ||||
#define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ | #define TCPCTL_PCBLIST 11 /* list of all outstanding PCBs */ | ||||
#define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ | #define TCPCTL_DELACKTIME 12 /* time before sending delayed ACK */ | ||||
#define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ | #define TCPCTL_V6MSSDFLT 13 /* MSS default for IPv6 */ | ||||
#define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ | #define TCPCTL_SACK 14 /* Selective Acknowledgement,rfc 2018 */ | ||||
#define TCPCTL_DROP 15 /* drop tcp connection */ | #define TCPCTL_DROP 15 /* drop tcp connection */ | ||||
#define TCPCTL_STATES 16 /* connection counts by TCP state */ | #define TCPCTL_STATES 16 /* connection counts by TCP state */ | ||||
/* These stats(9) related bits need to be visible to userland code. */ | |||||
int tcp_stats_init(void); | |||||
#define V_tcp_perconn_stats_enable VNET(tcp_perconn_stats_enable) | |||||
#define V_tcp_perconn_stats_dflt_tpl VNET(tcp_perconn_stats_dflt_tpl) | |||||
Done Inline ActionsThe comment sounds wrong to me. One variable looks like a sysctl and with that should not need to be user land visible. bz: The comment sounds wrong to me. One variable looks like a sysctl and with that should not need… | |||||
Done Inline ActionsI agree. I don't think any of these three lines need to be visible to userland. I think they can safely move within the #ifdef _KERNEL block. I suspect that this is a little cruft which managed to sneak in during the 3+ years of development on this project. jtl: I agree. I don't think any of these three lines need to be visible to userland. I think they… | |||||
Done Inline ActionsActually, they do need to be visible to userland in the current way things work, but the comment does possibly need to be expanded to explain why (or my arguably hacky way of doing things needs to be improved). The underlying issue here is the need for statsblob templates defined in the kernel or userland to be shared across both domains, such that a statsblob generated in one domain based on a particular template can be manipulated in the other domain. By shared, I don't mean in a direct memory access sense, but the statsblob template needs to exist or be discoverable in both domains (or in future, perhaps across machines). The only direction that matters in the current code being upstreamed is for templates defined in kernel, and then statsblobs based on a kernel-defined template being exported to userspace, and manipulated in userspace e.g. for rendering to JSON for logging purposes. The "right" solution would be to have an API for exporting/importing statsblob templates. I punted on doing it the "right" way and cheated. Regarding the TCP-related stats code, tcp_stats_init() in tcp_stats.c defines and registers the "TCP_DEFAULT" statsblob template together with its associated variables of interest. tcp_stats.c is compiled into the kernel, and tcp_stats_init() is called from tcp_init() so that the kernel statsblob template registry contains the "TCP_DEFAULT" template for use by the TCP stack. kern/subr_stats.c is both compiled into the kernel and into a userland library (similar to the way sbuf(9) is built for both domains), and in order to have the stats templates match between the kernel and userland, I compile tcp_stats.c into the userland library too with tcp_stats_init() tagged as a library constructor so that the "TCP_DEFAULT" template is defined the same as what's in the kernel, and is registered on userland library load. As a consequence, tcp_stats.c has a bit of #ifdef goo to make compilation in userland work correctly, and these tcp_var.h bits need to be visible to make the userland library compile. This scheme does mean that the kernel and userland can get out of sync if a change is made to the template but only one of the kernel or userland library are recompiled against the new code. In such a situation, statsblobs generated e.g. in kernel, would have a template hash id which differs from the templates known in userland, and the userland code would fail any statsblob manipulation operations that required the template to be known (return an errno). In practice, this scheme works well and has not caused problems in the Netflix use case. It was always the plan to eventually implement the API for template export/import and clean this up when that work is done, but I don't think the API is a requirement for upstreaming. lstewart: Actually, they do need to be visible to userland in the current way things work, but the… | |||||
Done Inline ActionsCan you point me at the userland code which "reads" these things? Given one is a sysctl already I'd think it'd be easy to adjust that. I am only wondering if the code was originally written with VIMAGE in mind? bz: Can you point me at the userland code which "reads" these things? Given one is a sysctl… | |||||
Done Inline Actions@bz Oops, sorry for being unclear. The userland library does not read or interact with any of these tcp_var.h variables in kernel. If you look in tcp_stats.c you'll see where this all comes together. tcp_stats_init() is executed in both kernel, and separately in the userland library as a library constructor. V_tcp_perconn_stats_dflt_tpl is used in tcp_stats_init() and therefore needs to be resolvable for compilation of stats code in both kernel and userland. In the userland library it just resolves to a library-scope global integer - no relation/access to the kernel variable of same name. V_tcp_perconn_stats_enable does not strictly speaking need to be visible to userland as it's only referenced from kernel code, but it doesn't hurt having it here and keeping these bits together (though I would also not object to it moving elsewhere). lstewart: @bz Oops, sorry for being unclear. The userland library does not read or interact with any of… | |||||
Done Inline ActionsFWIW, I think this dual-use (especially of the V_tcp_perconn_stats_dflt_tpl variable) makes things unclear. I think it would be nice to eliminate it, if possible, without adding code duplication. However, I understand that may not be possible. I *suspect* you may be able to make the constructor be a static function in the user-land library. This would avoid the need to expose it via headers. Also, as best I can tell, V_tcp_perconn_stats_dflt_tpl is not used in the userland library outside of tcp_stats_init. Is that true? If so, it might be possible to do a trivial rewrite of this function to use a local variable, and only update V_tcp_perconn_stats_dflt_tpl in the kernel. jtl: FWIW, I think this dual-use (especially of the V_tcp_perconn_stats_dflt_tpl variable) makes… | |||||
Done Inline ActionsI've moved it from a header into tcp_stats.c. Does that look ok? trasz: I've moved it from a header into tcp_stats.c. Does that look ok? | |||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
#ifdef SYSCTL_DECL | #ifdef SYSCTL_DECL | ||||
SYSCTL_DECL(_net_inet_tcp); | SYSCTL_DECL(_net_inet_tcp); | ||||
SYSCTL_DECL(_net_inet_tcp_sack); | SYSCTL_DECL(_net_inet_tcp_sack); | ||||
MALLOC_DECLARE(M_TCPLOG); | MALLOC_DECLARE(M_TCPLOG); | ||||
#endif | #endif | ||||
extern int tcp_log_in_vain; | extern int tcp_log_in_vain; | ||||
Show All 20 Lines | |||||
VNET_DECLARE(int, tcp_do_sack); | VNET_DECLARE(int, tcp_do_sack); | ||||
VNET_DECLARE(int, tcp_do_tso); | VNET_DECLARE(int, tcp_do_tso); | ||||
VNET_DECLARE(int, tcp_ecn_maxretries); | VNET_DECLARE(int, tcp_ecn_maxretries); | ||||
VNET_DECLARE(int, tcp_initcwnd_segments); | VNET_DECLARE(int, tcp_initcwnd_segments); | ||||
VNET_DECLARE(int, tcp_insecure_rst); | VNET_DECLARE(int, tcp_insecure_rst); | ||||
VNET_DECLARE(int, tcp_insecure_syn); | VNET_DECLARE(int, tcp_insecure_syn); | ||||
VNET_DECLARE(int, tcp_minmss); | VNET_DECLARE(int, tcp_minmss); | ||||
VNET_DECLARE(int, tcp_mssdflt); | VNET_DECLARE(int, tcp_mssdflt); | ||||
VNET_DECLARE(int, tcp_perconn_stats_dflt_tpl); | |||||
VNET_DECLARE(int, tcp_perconn_stats_enable); | |||||
Done Inline ActionsWrap in #ifdef STATS? lstewart: Wrap in `#ifdef STATS`? | |||||
VNET_DECLARE(int, tcp_recvspace); | VNET_DECLARE(int, tcp_recvspace); | ||||
VNET_DECLARE(int, tcp_sack_globalholes); | VNET_DECLARE(int, tcp_sack_globalholes); | ||||
VNET_DECLARE(int, tcp_sack_globalmaxholes); | VNET_DECLARE(int, tcp_sack_globalmaxholes); | ||||
VNET_DECLARE(int, tcp_sack_maxholes); | VNET_DECLARE(int, tcp_sack_maxholes); | ||||
VNET_DECLARE(int, tcp_sc_rst_sock_fail); | VNET_DECLARE(int, tcp_sc_rst_sock_fail); | ||||
VNET_DECLARE(int, tcp_sendspace); | VNET_DECLARE(int, tcp_sendspace); | ||||
VNET_DECLARE(struct inpcbhead, tcb); | VNET_DECLARE(struct inpcbhead, tcb); | ||||
VNET_DECLARE(struct inpcbinfo, tcbinfo); | VNET_DECLARE(struct inpcbinfo, tcbinfo); | ||||
Show All 20 Lines | |||||
#define V_tcp_do_tso VNET(tcp_do_tso) | #define V_tcp_do_tso VNET(tcp_do_tso) | ||||
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) | #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) | ||||
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) | #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) | ||||
#define V_tcp_insecure_rst VNET(tcp_insecure_rst) | #define V_tcp_insecure_rst VNET(tcp_insecure_rst) | ||||
#define V_tcp_insecure_syn VNET(tcp_insecure_syn) | #define V_tcp_insecure_syn VNET(tcp_insecure_syn) | ||||
#define V_tcp_minmss VNET(tcp_minmss) | #define V_tcp_minmss VNET(tcp_minmss) | ||||
#define V_tcp_mssdflt VNET(tcp_mssdflt) | #define V_tcp_mssdflt VNET(tcp_mssdflt) | ||||
#define V_tcp_recvspace VNET(tcp_recvspace) | #define V_tcp_recvspace VNET(tcp_recvspace) | ||||
#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) | #define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) | ||||
Done Inline ActionsWrap in #ifdef STATS? lstewart: Wrap in `#ifdef STATS`? | |||||
#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) | #define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) | ||||
#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) | #define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) | ||||
#define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) | #define V_tcp_sc_rst_sock_fail VNET(tcp_sc_rst_sock_fail) | ||||
#define V_tcp_sendspace VNET(tcp_sendspace) | #define V_tcp_sendspace VNET(tcp_sendspace) | ||||
#define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead) | #define V_tcp_udp_tunneling_overhead VNET(tcp_udp_tunneling_overhead) | ||||
#define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port) | #define V_tcp_udp_tunneling_port VNET(tcp_udp_tunneling_port) | ||||
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | |||||
void tcp_sack_adjust(struct tcpcb *tp); | void tcp_sack_adjust(struct tcpcb *tp); | ||||
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); | struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); | ||||
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); | void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); | ||||
void tcp_free_sackholes(struct tcpcb *tp); | void tcp_free_sackholes(struct tcpcb *tp); | ||||
int tcp_newreno(struct tcpcb *, struct tcphdr *); | int tcp_newreno(struct tcpcb *, struct tcphdr *); | ||||
int tcp_compute_pipe(struct tcpcb *); | int tcp_compute_pipe(struct tcpcb *); | ||||
uint32_t tcp_compute_initwnd(uint32_t); | uint32_t tcp_compute_initwnd(uint32_t); | ||||
void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t); | void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t); | ||||
int tcp_stats_sample_rollthedice(struct tcpcb *tp, void *seed_bytes, | |||||
size_t seed_len); | |||||
struct mbuf * | struct mbuf * | ||||
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, | tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, | ||||
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls); | int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls); | ||||
static inline void | static inline void | ||||
tcp_fields_to_host(struct tcphdr *th) | tcp_fields_to_host(struct tcphdr *th) | ||||
{ | { | ||||
Show All 19 Lines |
Should all these be under #ifdef STATS? May also want to consider moving somewhere else e.g. down where #ifdef TCPPCAP is...