Changeset View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_inet.h" | #include "opt_inet.h" | ||||
#include "opt_inet6.h" | #include "opt_inet6.h" | ||||
#include "opt_ipsec.h" | #include "opt_ipsec.h" | ||||
#include "opt_tcpdebug.h" | #include "opt_tcpdebug.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/arb.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#ifdef TCP_HHOOK | #ifdef TCP_HHOOK | ||||
#include <sys/hhook.h> | #include <sys/hhook.h> | ||||
#endif | #endif | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#include <sys/proc.h> /* for proc0 declaration */ | #include <sys/proc.h> /* for proc0 declaration */ | ||||
#include <sys/protosw.h> | #include <sys/protosw.h> | ||||
#include <sys/qmath.h> | |||||
#include <sys/sdt.h> | #include <sys/sdt.h> | ||||
#include <sys/signalvar.h> | #include <sys/signalvar.h> | ||||
#include <sys/socket.h> | #include <sys/socket.h> | ||||
#include <sys/socketvar.h> | #include <sys/socketvar.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/syslog.h> | #include <sys/syslog.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/stats.h> | |||||
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ | #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ | ||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
#include <net/if.h> | #include <net/if.h> | ||||
#include <net/if_var.h> | #include <net/if_var.h> | ||||
#include <net/route.h> | #include <net/route.h> | ||||
▲ Show 20 Lines • Show All 204 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* CC wrapper hook functions | * CC wrapper hook functions | ||||
*/ | */ | ||||
void | void | ||||
cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs, | cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs, | ||||
uint16_t type) | uint16_t type) | ||||
{ | { | ||||
#ifdef STATS | |||||
int32_t gput; | |||||
#endif | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
tp->ccv->nsegs = nsegs; | tp->ccv->nsegs = nsegs; | ||||
tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); | tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); | ||||
if (tp->snd_cwnd <= tp->snd_wnd) | if (tp->snd_cwnd <= tp->snd_wnd) | ||||
tp->ccv->flags |= CCF_CWND_LIMITED; | tp->ccv->flags |= CCF_CWND_LIMITED; | ||||
else | else | ||||
tp->ccv->flags &= ~CCF_CWND_LIMITED; | tp->ccv->flags &= ~CCF_CWND_LIMITED; | ||||
if (type == CC_ACK) { | if (type == CC_ACK) { | ||||
#ifdef STATS | |||||
stats_voi_update_abs_s32(tp->t_stats, VOI_TCP_CALCFRWINDIFF, | |||||
((int32_t)tp->snd_cwnd) - tp->snd_wnd); | |||||
if (!IN_RECOVERY(tp->t_flags)) | |||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN, | |||||
tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs)); | |||||
if ((tp->t_flags & TF_GPUTINPROG) && | |||||
SEQ_GEQ(th->th_ack, tp->gput_ack)) { | |||||
/* | |||||
thj: Is the goodput measurement time (i.e. number of ms for the measurement window) reported by… | |||||
Done Inline ActionsI don't think the raw measurement time is reported - but there's the digest computed for goodput, which I'm guessing is what you'd want the times for? Regarding rtt < 1ms - well, it's just 'capped from the bottom' with the max(), but I don't know how realistic times below 1ms are, given that the resolution of tcp_ts_getticks() seems to be 1ms? As for the units - that's a good suggestion; let me do a little experiment to make sure I get this right and I'll add it to the VOI declarations in tcp.h. trasz: I don't think the raw measurement time is reported - but there's the digest computed for… | |||||
Not Done Inline ActionsNot sure if this is relevant but at $PRIORJOB I implemented some similar facilities. It was used for tracking RTT for sessions but we were uninterested in 'local' session so anything with an RTT of less than 1mSec (from memory) was called 0 and ignored. julian: Not sure if this is relevant but at $PRIORJOB I implemented some similar facilities. It was… | |||||
Not Done Inline ActionsDoes this address your feebback @thj? allanjude: Does this address your feebback @thj? | |||||
* Compute goodput in bits per millisecond. | |||||
*/ | |||||
gput = (((int64_t)(th->th_ack - tp->gput_seq)) << 3) / | |||||
max(1, tcp_ts_getticks() - tp->gput_ts); | |||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_GPUT, | |||||
gput); | |||||
/* | |||||
* XXXLAS: This is a temporary hack, and should be | |||||
* chained off VOI_TCP_GPUT when stats(9) grows an API | |||||
* to deal with chained VOIs. | |||||
*/ | |||||
if (tp->t_stats_gput_prev > 0) | |||||
stats_voi_update_abs_s32(tp->t_stats, | |||||
VOI_TCP_GPUT_ND, | |||||
((gput - tp->t_stats_gput_prev) * 100) / | |||||
tp->t_stats_gput_prev); | |||||
tp->t_flags &= ~TF_GPUTINPROG; | |||||
tp->t_stats_gput_prev = gput; | |||||
} | |||||
#endif /* STATS */ | |||||
if (tp->snd_cwnd > tp->snd_ssthresh) { | if (tp->snd_cwnd > tp->snd_ssthresh) { | ||||
tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, | tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, | ||||
nsegs * V_tcp_abc_l_var * tcp_maxseg(tp)); | nsegs * V_tcp_abc_l_var * tcp_maxseg(tp)); | ||||
if (tp->t_bytes_acked >= tp->snd_cwnd) { | if (tp->t_bytes_acked >= tp->snd_cwnd) { | ||||
tp->t_bytes_acked -= tp->snd_cwnd; | tp->t_bytes_acked -= tp->snd_cwnd; | ||||
tp->ccv->flags |= CCF_ABC_SENTAWND; | tp->ccv->flags |= CCF_ABC_SENTAWND; | ||||
} | } | ||||
} else { | } else { | ||||
tp->ccv->flags &= ~CCF_ABC_SENTAWND; | tp->ccv->flags &= ~CCF_ABC_SENTAWND; | ||||
tp->t_bytes_acked = 0; | tp->t_bytes_acked = 0; | ||||
} | } | ||||
} | } | ||||
if (CC_ALGO(tp)->ack_received != NULL) { | if (CC_ALGO(tp)->ack_received != NULL) { | ||||
/* XXXLAS: Find a way to live without this */ | /* XXXLAS: Find a way to live without this */ | ||||
tp->ccv->curack = th->th_ack; | tp->ccv->curack = th->th_ack; | ||||
CC_ALGO(tp)->ack_received(tp->ccv, type); | CC_ALGO(tp)->ack_received(tp->ccv, type); | ||||
} | } | ||||
#ifdef STATS | |||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); | |||||
#endif | |||||
} | } | ||||
void | void | ||||
cc_conn_init(struct tcpcb *tp) | cc_conn_init(struct tcpcb *tp) | ||||
{ | { | ||||
struct hc_metrics_lite metrics; | struct hc_metrics_lite metrics; | ||||
struct inpcb *inp = tp->t_inpcb; | struct inpcb *inp = tp->t_inpcb; | ||||
u_int maxseg; | u_int maxseg; | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | |||||
void inline | void inline | ||||
cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) | cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type) | ||||
{ | { | ||||
u_int maxseg; | u_int maxseg; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
#ifdef STATS | |||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type); | |||||
#endif | |||||
switch(type) { | switch(type) { | ||||
case CC_NDUPACK: | case CC_NDUPACK: | ||||
if (!IN_FASTRECOVERY(tp->t_flags)) { | if (!IN_FASTRECOVERY(tp->t_flags)) { | ||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max; | ||||
if (tp->t_flags & TF_ECN_PERMIT) | if (tp->t_flags & TF_ECN_PERMIT) | ||||
tp->t_flags |= TF_ECN_SND_CWR; | tp->t_flags |= TF_ECN_SND_CWR; | ||||
} | } | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 1,171 Lines • ▼ Show 20 Lines | #endif | ||||
*/ | */ | ||||
tp->t_rcvtime = ticks; | tp->t_rcvtime = ticks; | ||||
/* | /* | ||||
* Scale up the window into a 32-bit value. | * Scale up the window into a 32-bit value. | ||||
* For the SYN_SENT state the scale is zero. | * For the SYN_SENT state the scale is zero. | ||||
*/ | */ | ||||
tiwin = th->th_win << tp->snd_scale; | tiwin = th->th_win << tp->snd_scale; | ||||
#ifdef STATS | |||||
stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_FRWIN, tiwin); | |||||
#endif | |||||
/* | /* | ||||
* TCP ECN processing. | * TCP ECN processing. | ||||
*/ | */ | ||||
if (tp->t_flags & TF_ECN_PERMIT) { | if (tp->t_flags & TF_ECN_PERMIT) { | ||||
if (thflags & TH_CWR) | if (thflags & TH_CWR) | ||||
tp->t_flags &= ~TF_ECN_SND_ECE; | tp->t_flags &= ~TF_ECN_SND_ECE; | ||||
switch (iptos & IPTOS_ECN_MASK) { | switch (iptos & IPTOS_ECN_MASK) { | ||||
▲ Show 20 Lines • Show All 1,857 Lines • ▼ Show 20 Lines | |||||
tcp_xmit_timer(struct tcpcb *tp, int rtt) | tcp_xmit_timer(struct tcpcb *tp, int rtt) | ||||
{ | { | ||||
int delta; | int delta; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
TCPSTAT_INC(tcps_rttupdated); | TCPSTAT_INC(tcps_rttupdated); | ||||
tp->t_rttupdated++; | tp->t_rttupdated++; | ||||
#ifdef STATS | |||||
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt)); | |||||
#endif | |||||
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { | if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { | ||||
/* | /* | ||||
* srtt is stored as fixed point with 5 bits after the | * srtt is stored as fixed point with 5 bits after the | ||||
* binary point (i.e., scaled by 8). The following magic | * binary point (i.e., scaled by 8). The following magic | ||||
* is equivalent to the smoothing algorithm in rfc793 with | * is equivalent to the smoothing algorithm in rfc793 with | ||||
* an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed | * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed | ||||
* point). Adjust rtt to origin 0. | * point). Adjust rtt to origin 0. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 418 Lines • Show Last 20 Lines |
Is the goodput measurement time (i.e. number of ms for the measurement window) reported by stats somewhere?
Does the calculation break with an rtt <1ms?
I think a comment here might be helpful for future readers of the code, something along the lines of