Changeset View
Changeset View
Standalone View
Standalone View
bbr.c
/*- | /*- | ||||
* Copyright (c) 2016-9 | * Copyright (c) 2016-2020 Netflix, Inc. | ||||
* Netflix Inc. | |||||
* All rights reserved. | |||||
* | * | ||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
Context not available. | |||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/tim_filter.h> | #include <sys/tim_filter.h> | ||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <sys/protosw.h> | |||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
#include <sys/kern_prefetch.h> | #include <sys/kern_prefetch.h> | ||||
Context not available. | |||||
&bbr_clear_lost, 0, sysctl_bbr_clear_lost, "IU", "Clear lost counters"); | &bbr_clear_lost, 0, sysctl_bbr_clear_lost, "IU", "Clear lost counters"); | ||||
} | } | ||||
static inline int32_t | |||||
bbr_progress_timeout_check(struct tcp_bbr *bbr) | |||||
{ | |||||
if (bbr->rc_tp->t_maxunacktime && bbr->rc_tp->t_acktime && | |||||
TSTMP_GT(ticks, bbr->rc_tp->t_acktime)) { | |||||
if ((((uint32_t)ticks - bbr->rc_tp->t_acktime)) >= bbr->rc_tp->t_maxunacktime) { | |||||
/* | |||||
* There is an assumption here that the caller will | |||||
* drop the connection, so we increment the | |||||
* statistics. | |||||
*/ | |||||
bbr_log_progress_event(bbr, bbr->rc_tp, ticks, PROGRESS_DROP, __LINE__); | |||||
BBR_STAT_INC(bbr_progress_drops); | |||||
#ifdef NETFLIX_STATS | |||||
KMOD_TCPSTAT_INC(tcps_progdrops); | |||||
#endif | |||||
return (1); | |||||
} | |||||
} | |||||
return (0); | |||||
} | |||||
static void | static void | ||||
bbr_counter_destroy(void) | bbr_counter_destroy(void) | ||||
{ | { | ||||
Context not available. | |||||
COUNTER_ARRAY_FREE(bbr_state_lost, BBR_MAX_STAT); | COUNTER_ARRAY_FREE(bbr_state_lost, BBR_MAX_STAT); | ||||
COUNTER_ARRAY_FREE(bbr_state_time, BBR_MAX_STAT); | COUNTER_ARRAY_FREE(bbr_state_time, BBR_MAX_STAT); | ||||
COUNTER_ARRAY_FREE(bbr_state_resend, BBR_MAX_STAT); | COUNTER_ARRAY_FREE(bbr_state_resend, BBR_MAX_STAT); | ||||
counter_u64_free(bbr_nohdwr_pacing_enobuf); | |||||
counter_u64_free(bbr_hdwr_pacing_enobuf); | |||||
counter_u64_free(bbr_flows_whdwr_pacing); | counter_u64_free(bbr_flows_whdwr_pacing); | ||||
counter_u64_free(bbr_flows_nohdwr_pacing); | counter_u64_free(bbr_flows_nohdwr_pacing); | ||||
Context not available. | |||||
/* Its not time yet */ | /* Its not time yet */ | ||||
return (0); | return (0); | ||||
} | } | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
} | } | ||||
/* | /* | ||||
* Persists timer, here we simply need to setup the | * Here we send a KEEP-ALIVE like probe to the | ||||
* FORCE-DATA flag the output routine will send | * peer, we do not send data. | ||||
* the one byte send. | |||||
* | * | ||||
* We only return 1, saying don't proceed, if all timers | * We only return 1, saying don't proceed, if all timers | ||||
* are stopped (destroyed PCB?). | * are stopped (destroyed PCB?). | ||||
Context not available. | |||||
/* | /* | ||||
* Have we exceeded the user specified progress time? | * Have we exceeded the user specified progress time? | ||||
*/ | */ | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
goto out; | goto out; | ||||
} | } | ||||
Context not available. | |||||
(ticks - tp->t_rcvtime >= tcp_maxpersistidle || | (ticks - tp->t_rcvtime >= tcp_maxpersistidle || | ||||
tuexen: I guess you want to keep `KMOD_TCPSTAT_INC` here, since this is a kernel module. | |||||
Done Inline Actionsyep this was probably not upstream when you did the update. rrs: yep this was probably not upstream when you did the update. | |||||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { | ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { | ||||
KMOD_TCPSTAT_INC(tcps_persistdrop); | KMOD_TCPSTAT_INC(tcps_persistdrop); | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
goto out; | goto out; | ||||
} | } | ||||
Context not available. | |||||
if (tp->t_state > TCPS_CLOSE_WAIT && | if (tp->t_state > TCPS_CLOSE_WAIT && | ||||
Not Done Inline ActionsI guess you want to keep KMOD_TCPSTAT_INC here, since this is a kernel module. tuexen: I guess you want to keep `KMOD_TCPSTAT_INC` here, since this is a kernel module. | |||||
Done Inline Actionsyep rrs: yep | |||||
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { | (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { | ||||
KMOD_TCPSTAT_INC(tcps_persistdrop); | KMOD_TCPSTAT_INC(tcps_persistdrop); | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
goto out; | goto out; | ||||
} | } | ||||
Context not available. | |||||
return (1); | return (1); | ||||
Not Done Inline ActionsI guess you want to keep KMOD_TCPSTAT_INC here, since this is a kernel module. tuexen: I guess you want to keep `KMOD_TCPSTAT_INC` here, since this is a kernel module. | |||||
Done Inline Actionsyep rrs: yep | |||||
dropit: | dropit: | ||||
KMOD_TCPSTAT_INC(tcps_keepdrops); | KMOD_TCPSTAT_INC(tcps_keepdrops); | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
* retransmit interval. Back off to a longer retransmit interval | * retransmit interval. Back off to a longer retransmit interval | ||||
* and retransmit one segment. | * and retransmit one segment. | ||||
*/ | */ | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
retval = 1; | retval = 1; | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); | ||||
goto out; | goto out; | ||||
} | } | ||||
Context not available. | |||||
tp->t_rxtshift = TCP_MAXRXTSHIFT; | tp->t_rxtshift = TCP_MAXRXTSHIFT; | ||||
KMOD_TCPSTAT_INC(tcps_timeoutdrop); | KMOD_TCPSTAT_INC(tcps_timeoutdrop); | ||||
retval = 1; | retval = 1; | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN); | |||||
tcp_set_inp_to_drop(bbr->rc_inp, | tcp_set_inp_to_drop(bbr->rc_inp, | ||||
(tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT)); | (tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT)); | ||||
goto out; | goto out; | ||||
Context not available. | |||||
* to reset him. | * to reset him. | ||||
*/ | */ | ||||
*ret_val = 1; | *ret_val = 1; | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_DATA_A_CLOSE); | |||||
/* tcp_close will kill the inp pre-log the Reset */ | |||||
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); | |||||
tp = tcp_close(tp); | tp = tcp_close(tp); | ||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, tlen); | ||||
BBR_STAT_INC(bbr_dropped_af_data); | BBR_STAT_INC(bbr_dropped_af_data); | ||||
Context not available. | |||||
idle_time = bbr_calc_time(cts, bbr->r_ctl.rc_went_idle_time); | idle_time = bbr_calc_time(cts, bbr->r_ctl.rc_went_idle_time); | ||||
bbr->rc_in_persist = 0; | bbr->rc_in_persist = 0; | ||||
bbr->rc_hit_state_1 = 0; | bbr->rc_hit_state_1 = 0; | ||||
tp->t_flags &= ~TF_FORCEDATA; | |||||
bbr->r_ctl.rc_del_time = cts; | bbr->r_ctl.rc_del_time = cts; | ||||
/* | /* | ||||
* We invalidate the last ack here since we | * We invalidate the last ack here since we | ||||
Context not available. | |||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Process segments with URG. | * We don't support urgent data but | ||||
* drag along the up just to make sure | |||||
* if there is a stack switch no one | |||||
* is surprised. | |||||
*/ | */ | ||||
if ((thflags & TH_URG) && th->th_urp && | tp->rcv_up = tp->rcv_nxt; | ||||
TCPS_HAVERCVDFIN(tp->t_state) == 0) { | |||||
/* | |||||
* This is a kludge, but if we receive and accept random | |||||
* urgent pointers, we'll crash in soreceive. It's hard to | |||||
* imagine someone actually wanting to send this much urgent | |||||
* data. | |||||
*/ | |||||
SOCKBUF_LOCK(&so->so_rcv); | |||||
if (th->th_urp + sbavail(&so->so_rcv) > sb_max) { | |||||
th->th_urp = 0; /* XXX */ | |||||
thflags &= ~TH_URG; /* XXX */ | |||||
SOCKBUF_UNLOCK(&so->so_rcv); /* XXX */ | |||||
goto dodata; /* XXX */ | |||||
} | |||||
/* | |||||
* If this segment advances the known urgent pointer, then | |||||
* mark the data stream. This should not happen in | |||||
* CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since a | |||||
* FIN has been received from the remote side. In these | |||||
* states we ignore the URG. | |||||
* | |||||
* According to RFC961 (Assigned Protocols), the urgent | |||||
* pointer points to the last octet of urgent data. We | |||||
* continue, however, to consider it to indicate the first | |||||
* octet of data past the urgent section as the original | |||||
* spec states (in one of two places). | |||||
*/ | |||||
if (SEQ_GT(th->th_seq + th->th_urp, tp->rcv_up)) { | |||||
tp->rcv_up = th->th_seq + th->th_urp; | |||||
so->so_oobmark = sbavail(&so->so_rcv) + | |||||
(tp->rcv_up - tp->rcv_nxt) - 1; | |||||
if (so->so_oobmark == 0) | |||||
so->so_rcv.sb_state |= SBS_RCVATMARK; | |||||
sohasoutofband(so); | |||||
tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA); | |||||
} | |||||
SOCKBUF_UNLOCK(&so->so_rcv); | |||||
/* | |||||
* Remove out of band data so doesn't get presented to user. | |||||
* This can happen independent of advancing the URG pointer, | |||||
* but if two URG's are pending at once, some out-of-band | |||||
* data may creep in... ick. | |||||
*/ | |||||
if (th->th_urp <= (uint32_t)tlen && | |||||
!(so->so_options & SO_OOBINLINE)) { | |||||
/* hdr drop is delayed */ | |||||
tcp_pulloutofband(so, th, m, drop_hdrlen); | |||||
} | |||||
} else { | |||||
/* | |||||
* If no out of band data is expected, pull receive urgent | |||||
* pointer along with the receive window. | |||||
*/ | |||||
if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) | |||||
tp->rcv_up = tp->rcv_nxt; | |||||
} | |||||
dodata: /* XXX */ | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
/* | /* | ||||
Context not available. | |||||
static int | static int | ||||
bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t nxt_pkt) | uint32_t tiwin, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t acked; | int32_t acked; | ||||
uint16_t nsegs; | uint16_t nsegs; | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t todrop; | int32_t todrop; | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
Context not available. | |||||
if ((thflags & TH_ACK) && | if ((thflags & TH_ACK) && | ||||
(SEQ_LEQ(th->th_ack, tp->iss) || | (SEQ_LEQ(th->th_ack, tp->iss) || | ||||
SEQ_GT(th->th_ack, tp->snd_max))) { | SEQ_GT(th->th_ack, tp->snd_max))) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | |||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
if ((thflags & TH_ACK) && | if ((thflags & TH_ACK) && | ||||
(SEQ_LEQ(th->th_ack, tp->snd_una) || | (SEQ_LEQ(th->th_ack, tp->snd_una) || | ||||
SEQ_GT(th->th_ack, tp->snd_max))) { | SEQ_GT(th->th_ack, tp->snd_max))) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | |||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
* data), a valid ACK, a FIN, or a RST. | * data), a valid ACK, a FIN, or a RST. | ||||
*/ | */ | ||||
if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { | if ((thflags & (TH_SYN | TH_ACK)) == (TH_SYN | TH_ACK)) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | |||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} else if (thflags & TH_SYN) { | } else if (thflags & TH_SYN) { | ||||
Context not available. | |||||
* "LAND" DoS attack. | * "LAND" DoS attack. | ||||
*/ | */ | ||||
if (SEQ_LT(th->th_seq, tp->irs)) { | if (SEQ_LT(th->th_seq, tp->irs)) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | |||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
struct tcp_bbr *bbr; | struct tcp_bbr *bbr; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
__predict_true(th->th_seq == tp->rcv_nxt)) { | __predict_true(th->th_seq == tp->rcv_nxt)) { | ||||
if (tlen == 0) { | if (tlen == 0) { | ||||
if (bbr_fastack(m, th, so, tp, to, drop_hdrlen, tlen, | if (bbr_fastack(m, th, so, tp, to, drop_hdrlen, tlen, | ||||
tiwin, nxt_pkt)) { | tiwin, nxt_pkt, iptos)) { | ||||
return (0); | return (0); | ||||
} | } | ||||
} else { | } else { | ||||
Context not available. | |||||
return (ret_val); | return (ret_val); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
struct tcp_bbr *bbr; | struct tcp_bbr *bbr; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
return (ret_val); | return (ret_val); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
if (bbr->rc_allow_data_af_clo == 0) { | if (bbr->rc_allow_data_af_clo == 0) { | ||||
close_now: | close_now: | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_DATA_A_CLOSE); | |||||
/* tcp_close will kill the inp pre-log the Reset */ | |||||
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); | |||||
tp = tcp_close(tp); | tp = tcp_close(tp); | ||||
KMOD_TCPSTAT_INC(tcps_rcvafterclose); | KMOD_TCPSTAT_INC(tcps_rcvafterclose); | ||||
ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, (*tlen)); | ctf_do_dropwithreset(m, tp, th, BANDLIM_UNLIMITED, (*tlen)); | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
tcp_state_change(tp, TCPS_FIN_WAIT_2); | tcp_state_change(tp, TCPS_FIN_WAIT_2); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
return (1); | return (1); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
return (1); | return (1); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
static int | static int | ||||
bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, | bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen, | ||||
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt) | uint32_t tiwin, int32_t thflags, int32_t nxt_pkt, uint8_t iptos) | ||||
{ | { | ||||
int32_t ourfinisacked = 0; | int32_t ourfinisacked = 0; | ||||
int32_t ret_val; | int32_t ret_val; | ||||
Context not available. | |||||
return (ret_val); | return (ret_val); | ||||
} | } | ||||
if (sbavail(&so->so_snd)) { | if (sbavail(&so->so_snd)) { | ||||
if (bbr_progress_timeout_check(bbr)) { | if (ctf_progress_timeout_check(tp, true)) { | ||||
bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
* always. All other times (timers etc) we must have a rack-state | * always. All other times (timers etc) we must have a rack-state | ||||
* set (so we assure we have done the checks above for SACK). | * set (so we assure we have done the checks above for SACK). | ||||
*/ | */ | ||||
if (thflags & TH_FIN) | |||||
tcp_log_end_status(tp, TCP_EI_STATUS_CLIENT_FIN); | |||||
if (bbr->r_state != tp->t_state) | if (bbr->r_state != tp->t_state) | ||||
bbr_set_state(tp, bbr, tiwin); | bbr_set_state(tp, bbr, tiwin); | ||||
Context not available. | |||||
*/ | */ | ||||
if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && | if ((tp->t_state == TCPS_SYN_SENT) && (thflags & TH_ACK) && | ||||
(SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { | (SEQ_LEQ(th->th_ack, tp->iss) || SEQ_GT(th->th_ack, tp->snd_max))) { | ||||
tcp_log_end_status(tp, TCP_EI_STATUS_RST_IN_FRONT); | |||||
ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ctf_do_dropwithreset_conn(m, tp, th, BANDLIM_RST_OPENPORT, tlen); | ||||
return (1); | return (1); | ||||
} | } | ||||
Context not available. | |||||
} | } | ||||
retval = (*bbr->r_substate) (m, th, so, | retval = (*bbr->r_substate) (m, th, so, | ||||
tp, &to, drop_hdrlen, | tp, &to, drop_hdrlen, | ||||
tlen, tiwin, thflags, nxt_pkt); | tlen, tiwin, thflags, nxt_pkt, iptos); | ||||
#ifdef BBR_INVARIANTS | #ifdef BBR_INVARIANTS | ||||
if ((retval == 0) && | if ((retval == 0) && | ||||
(tp->t_inpcb == NULL)) { | (tp->t_inpcb == NULL)) { | ||||
Context not available. | |||||
bbr_do_error_accounting(tp, bbr, rsm, len, error); | bbr_do_error_accounting(tp, bbr, rsm, len, error); | ||||
return; | return; | ||||
} | } | ||||
if ((tp->t_flags & TF_FORCEDATA) && len == 1) { | if (rsm) { | ||||
/* Window probe */ | |||||
KMOD_TCPSTAT_INC(tcps_sndprobe); | |||||
#ifdef STATS | |||||
stats_voi_update_abs_u32(tp->t_stats, | |||||
VOI_TCP_RETXPB, len); | |||||
#endif | |||||
} else if (rsm) { | |||||
if (rsm->r_flags & BBR_TLP) { | if (rsm->r_flags & BBR_TLP) { | ||||
/* | /* | ||||
* TLP should not count in retran count, but in its | * TLP should not count in retran count, but in its | ||||
Context not available. | |||||
} | } | ||||
/* Mark that we have called bbr_output(). */ | /* Mark that we have called bbr_output(). */ | ||||
if ((bbr->r_timer_override) || | if ((bbr->r_timer_override) || | ||||
(tp->t_flags & TF_FORCEDATA) || | |||||
(tp->t_state < TCPS_ESTABLISHED)) { | (tp->t_state < TCPS_ESTABLISHED)) { | ||||
/* Timeouts or early states are exempt */ | /* Timeouts or early states are exempt */ | ||||
if (inp->inp_in_hpts) | if (inp->inp_in_hpts) | ||||
Context not available. | |||||
} | } | ||||
SOCKBUF_LOCK(sb); | SOCKBUF_LOCK(sb); | ||||
/* | /* | ||||
* If in persist timeout with window of 0, send 1 byte. Otherwise, | |||||
* if window is small but nonzero and time TF_SENTFIN expired, we | |||||
* will send what we can and go to transmit state. | |||||
*/ | |||||
if (tp->t_flags & TF_FORCEDATA) { | |||||
if ((sendwin == 0) || (sendwin <= (tp->snd_max - tp->snd_una))) { | |||||
/* | |||||
* If we still have some data to send, then clear | |||||
* the FIN bit. Usually this would happen below | |||||
* when it realizes that we aren't sending all the | |||||
* data. However, if we have exactly 1 byte of | |||||
* unsent data, then it won't clear the FIN bit | |||||
* below, and if we are in persist state, we wind up | |||||
* sending the packet without recording that we sent | |||||
* the FIN bit. | |||||
* | |||||
* We can't just blindly clear the FIN bit, because | |||||
* if we don't have any more data to send then the | |||||
* probe will be the FIN itself. | |||||
*/ | |||||
if (sb_offset < sbused(sb)) | |||||
flags &= ~TH_FIN; | |||||
sendwin = 1; | |||||
} else { | |||||
if ((bbr->rc_in_persist != 0) && | |||||
(tp->snd_wnd >= min((bbr->r_ctl.rc_high_rwnd/2), | |||||
bbr_minseg(bbr)))) { | |||||
/* Exit persists if there is space */ | |||||
bbr_exit_persist(tp, bbr, cts, __LINE__); | |||||
} | |||||
if (rsm == NULL) { | |||||
/* | |||||
* If we are dropping persist mode then we | |||||
* need to correct sb_offset if not a | |||||
* retransmit. | |||||
*/ | |||||
sb_offset = tp->snd_max - tp->snd_una; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* If snd_nxt == snd_max and we have transmitted a FIN, the | * If snd_nxt == snd_max and we have transmitted a FIN, the | ||||
* sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a | * sb_offset will be > 0 even if so_snd.sb_cc is 0, resulting in a | ||||
* negative length. This can also occur when TCP opens up its | * negative length. This can also occur when TCP opens up its | ||||
Context not available. | |||||
*/ | */ | ||||
len = 0; | len = 0; | ||||
} | } | ||||
if ((tp->t_flags & TF_FORCEDATA) && (bbr->rc_in_persist)) { | if (bbr->rc_in_persist) { | ||||
/* | /* | ||||
* We are in persists, figure out if | * We are in persists, figure out if | ||||
* a retransmit is available (maybe the previous | * a retransmit is available (maybe the previous | ||||
Context not available. | |||||
if ((tp->snd_una == tp->snd_max) && len) { /* Nothing outstanding */ | if ((tp->snd_una == tp->snd_max) && len) { /* Nothing outstanding */ | ||||
goto send; | goto send; | ||||
} | } | ||||
if (tp->t_flags & TF_FORCEDATA) { /* typ. timeout case */ | |||||
goto send; | |||||
} | |||||
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) { | if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) { | ||||
goto send; | goto send; | ||||
} | } | ||||
Context not available. | |||||
goto send; | goto send; | ||||
} | } | ||||
/* | /* | ||||
* Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW | * Send if we owe the peer an ACK, RST, SYN. ACKNOW | ||||
* is also a catch-all for the retransmit timer timeout case. | * is also a catch-all for the retransmit timer timeout case. | ||||
*/ | */ | ||||
if (tp->t_flags & TF_ACKNOW) { | if (tp->t_flags & TF_ACKNOW) { | ||||
Context not available. | |||||
if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) { | if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) { | ||||
goto send; | goto send; | ||||
} | } | ||||
if (SEQ_GT(tp->snd_up, tp->snd_una)) { | |||||
goto send; | |||||
} | |||||
/* | /* | ||||
* If our state indicates that FIN should be sent and we have not | * If our state indicates that FIN should be sent and we have not | ||||
* yet done so, then we need to send. | * yet done so, then we need to send. | ||||
Context not available. | |||||
} | } | ||||
if (tot_len == 0) | if (tot_len == 0) | ||||
counter_u64_add(bbr_out_size[TCP_MSS_ACCT_JUSTRET], 1); | counter_u64_add(bbr_out_size[TCP_MSS_ACCT_JUSTRET], 1); | ||||
tp->t_flags &= ~TF_FORCEDATA; | |||||
/* Dont update the time if we did not send */ | /* Dont update the time if we did not send */ | ||||
bbr->r_ctl.rc_last_delay_val = 0; | bbr->r_ctl.rc_last_delay_val = 0; | ||||
bbr->rc_output_starts_timer = 1; | bbr->rc_output_starts_timer = 1; | ||||
Context not available. | |||||
KMOD_TCPSTAT_INC(tcps_sndacks); | KMOD_TCPSTAT_INC(tcps_sndacks); | ||||
else if (flags & (TH_SYN | TH_FIN | TH_RST)) | else if (flags & (TH_SYN | TH_FIN | TH_RST)) | ||||
KMOD_TCPSTAT_INC(tcps_sndctrl); | KMOD_TCPSTAT_INC(tcps_sndctrl); | ||||
else if (SEQ_GT(tp->snd_up, tp->snd_una)) | |||||
KMOD_TCPSTAT_INC(tcps_sndurg); | |||||
else | else | ||||
KMOD_TCPSTAT_INC(tcps_sndwinup); | KMOD_TCPSTAT_INC(tcps_sndwinup); | ||||
Context not available. | |||||
tp->t_flags |= TF_RXWIN0SENT; | tp->t_flags |= TF_RXWIN0SENT; | ||||
} else | } else | ||||
tp->t_flags &= ~TF_RXWIN0SENT; | tp->t_flags &= ~TF_RXWIN0SENT; | ||||
if (SEQ_GT(tp->snd_up, tp->snd_max)) { | /* | ||||
th->th_urp = htons((u_short)(tp->snd_up - tp->snd_max)); | * We don't support urgent data, but drag along | ||||
th->th_flags |= TH_URG; | * the pointer in case of a stack switch. | ||||
} else | */ | ||||
/* | tp->snd_up = tp->snd_una; | ||||
* If no urgent pointer to send, then we pull the urgent | |||||
* pointer to the left edge of the send window so that it | |||||
* doesn't drift into the send window on sequence number | |||||
* wraparound. | |||||
*/ | |||||
tp->snd_up = tp->snd_una; /* drag it along */ | |||||
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) | #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) | ||||
if (to.to_flags & TOF_SIGNATURE) { | if (to.to_flags & TOF_SIGNATURE) { | ||||
Context not available. | |||||
*/ | */ | ||||
return (0); | return (0); | ||||
} | } | ||||
if (((tp->t_flags & TF_FORCEDATA) == 0) || | if (bbr->rc_in_persist == 0) { | ||||
(bbr->rc_in_persist == 0)) { | |||||
/* | /* | ||||
* Advance snd_nxt over sequence space of this segment. | * Advance snd_nxt over sequence space of this segment. | ||||
*/ | */ | ||||
Context not available. | |||||
tp->t_maxseg = old_maxseg - 40; | tp->t_maxseg = old_maxseg - 40; | ||||
bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts); | bbr_log_msgsize_fail(bbr, tp, len, maxseg, mtu, 0, tso, cts); | ||||
} | } | ||||
tp->t_flags &= ~TF_FORCEDATA; | |||||
/* | /* | ||||
* Nuke all other things that can interfere | * Nuke all other things that can interfere | ||||
* with slot | * with slot | ||||
Context not available. | |||||
} | } | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
default: | default: | ||||
tp->t_flags &= ~TF_FORCEDATA; | |||||
slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt; | slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt; | ||||
bbr->rc_output_starts_timer = 1; | bbr->rc_output_starts_timer = 1; | ||||
bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0); | bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0); | ||||
Context not available. | |||||
((flags & TH_RST) == 0) && | ((flags & TH_RST) == 0) && | ||||
(IN_RECOVERY(tp->t_flags) == 0) && | (IN_RECOVERY(tp->t_flags) == 0) && | ||||
(bbr->rc_in_persist == 0) && | (bbr->rc_in_persist == 0) && | ||||
((tp->t_flags & TF_FORCEDATA) == 0) && | |||||
(tot_len < bbr->r_ctl.rc_pace_max_segs)) { | (tot_len < bbr->r_ctl.rc_pace_max_segs)) { | ||||
/* | /* | ||||
* For non-tso we need to goto again until we have sent out | * For non-tso we need to goto again until we have sent out | ||||
Context not available. | |||||
} | } | ||||
rsm = NULL; | rsm = NULL; | ||||
sack_rxmit = 0; | sack_rxmit = 0; | ||||
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK | TF_FORCEDATA); | tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); | ||||
goto again; | goto again; | ||||
} | } | ||||
skip_again: | skip_again: | ||||
if ((error == 0) && (flags & TH_FIN)) | |||||
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_FIN); | |||||
if ((error == 0) && (flags & TH_RST)) | |||||
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST); | |||||
if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) { | if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) { | ||||
/* | /* | ||||
* Calculate/Re-Calculate the hptsi slot in usecs based on | * Calculate/Re-Calculate the hptsi slot in usecs based on | ||||
Context not available. | |||||
if (bbr->rc_no_pacing) | if (bbr->rc_no_pacing) | ||||
slot = 0; | slot = 0; | ||||
} | } | ||||
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK | TF_FORCEDATA); | tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); | ||||
enobufs: | enobufs: | ||||
if (bbr->rc_use_google == 0) | if (bbr->rc_use_google == 0) | ||||
bbr_check_bbr_for_state(bbr, cts, __LINE__, 0); | bbr_check_bbr_for_state(bbr, cts, __LINE__, 0); | ||||
Context not available. | |||||
return (error); | return (error); | ||||
} | } | ||||
static int | |||||
bbr_pru_options(struct tcpcb *tp, int flags) | |||||
{ | |||||
if (flags & PRUS_OOB) | |||||
return (EOPNOTSUPP); | |||||
return (0); | |||||
} | |||||
struct tcp_function_block __tcp_bbr = { | struct tcp_function_block __tcp_bbr = { | ||||
.tfb_tcp_block_name = __XSTRING(STACKNAME), | .tfb_tcp_block_name = __XSTRING(STACKNAME), | ||||
Context not available. | |||||
.tfb_tcp_timer_stop = bbr_timer_stop, | .tfb_tcp_timer_stop = bbr_timer_stop, | ||||
.tfb_tcp_rexmit_tmr = bbr_remxt_tmr, | .tfb_tcp_rexmit_tmr = bbr_remxt_tmr, | ||||
.tfb_tcp_handoff_ok = bbr_handoff_ok, | .tfb_tcp_handoff_ok = bbr_handoff_ok, | ||||
.tfb_tcp_mtu_chg = bbr_mtu_chg | .tfb_tcp_mtu_chg = bbr_mtu_chg, | ||||
.tfb_pru_options = bbr_pru_options, | |||||
}; | }; | ||||
static const char *bbr_stack_names[] = { | static const char *bbr_stack_names[] = { | ||||
Context not available. |
I guess you want to keep KMOD_TCPSTAT_INC here, since this is a kernel module.