Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netinet/tcp_stacks/rack.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show All 26 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_inet.h" | #include "opt_inet.h" | ||||
#include "opt_inet6.h" | #include "opt_inet6.h" | ||||
#include "opt_ipsec.h" | #include "opt_ipsec.h" | ||||
#include "opt_tcpdebug.h" | #include "opt_tcpdebug.h" | ||||
#include "opt_ratelimit.h" | #include "opt_ratelimit.h" | ||||
#include "opt_kern_tls.h" | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/arb.h> | #include <sys/arb.h> | ||||
#include <sys/module.h> | #include <sys/module.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#ifdef TCP_HHOOK | #ifdef TCP_HHOOK | ||||
#include <sys/hhook.h> | #include <sys/hhook.h> | ||||
#endif | #endif | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#include <sys/proc.h> /* for proc0 declaration */ | #include <sys/proc.h> /* for proc0 declaration */ | ||||
#include <sys/socket.h> | #include <sys/socket.h> | ||||
#include <sys/socketvar.h> | #include <sys/socketvar.h> | ||||
#ifdef KERN_TLS | |||||
#include <sys/ktls.h> | |||||
#endif | |||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#ifdef STATS | #ifdef STATS | ||||
#include <sys/qmath.h> | #include <sys/qmath.h> | ||||
#include <sys/tree.h> | #include <sys/tree.h> | ||||
#include <sys/stats.h> /* Must come after qmath.h and tree.h */ | #include <sys/stats.h> /* Must come after qmath.h and tree.h */ | ||||
#else | #else | ||||
#include <sys/tree.h> | #include <sys/tree.h> | ||||
▲ Show 20 Lines • Show All 133 Lines • ▼ Show 20 Lines | |||||
static int32_t rack_limits_scwnd = 1; | static int32_t rack_limits_scwnd = 1; | ||||
static int32_t rack_enable_mqueue_for_nonpaced = 0; | static int32_t rack_enable_mqueue_for_nonpaced = 0; | ||||
static int32_t rack_disable_prr = 0; | static int32_t rack_disable_prr = 0; | ||||
static int32_t use_rack_rr = 1; | static int32_t use_rack_rr = 1; | ||||
static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */ | static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */ | ||||
static int32_t rack_persist_min = 250; /* 250ms */ | static int32_t rack_persist_min = 250; /* 250ms */ | ||||
static int32_t rack_persist_max = 2000; /* 2 Second */ | static int32_t rack_persist_max = 2000; /* 2 Second */ | ||||
static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */ | static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */ | ||||
static int32_t rack_hw_tls_max_seg = 3; /* 3 means use hw-tls single segment */ | |||||
static int32_t rack_default_init_window = 0; /* Use system default */ | static int32_t rack_default_init_window = 0; /* Use system default */ | ||||
static int32_t rack_limit_time_with_srtt = 0; | static int32_t rack_limit_time_with_srtt = 0; | ||||
static int32_t rack_hw_pace_adjust = 0; | static int32_t rack_hw_pace_adjust = 0; | ||||
/* | /* | ||||
* Currently regular tcp has a rto_min of 30ms | * Currently regular tcp has a rto_min of 30ms | ||||
* the backoff goes 12 times so that ends up | * the backoff goes 12 times so that ends up | ||||
* being a total of 122.850 seconds before a | * being a total of 122.850 seconds before a | ||||
* connection is killed. | * connection is killed. | ||||
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | |||||
counter_u64_t rack_used_tlpmethod; | counter_u64_t rack_used_tlpmethod; | ||||
counter_u64_t rack_used_tlpmethod2; | counter_u64_t rack_used_tlpmethod2; | ||||
counter_u64_t rack_enter_tlp_calc; | counter_u64_t rack_enter_tlp_calc; | ||||
counter_u64_t rack_input_idle_reduces; | counter_u64_t rack_input_idle_reduces; | ||||
counter_u64_t rack_collapsed_win; | counter_u64_t rack_collapsed_win; | ||||
counter_u64_t rack_tlp_does_nada; | counter_u64_t rack_tlp_does_nada; | ||||
counter_u64_t rack_try_scwnd; | counter_u64_t rack_try_scwnd; | ||||
/* Counters for HW TLS */ | |||||
counter_u64_t rack_tls_rwnd; | |||||
counter_u64_t rack_tls_cwnd; | |||||
counter_u64_t rack_tls_app; | |||||
counter_u64_t rack_tls_other; | |||||
counter_u64_t rack_tls_filled; | |||||
counter_u64_t rack_tls_rxt; | |||||
counter_u64_t rack_tls_tlp; | |||||
/* Temp CPU counters */ | /* Temp CPU counters */ | ||||
counter_u64_t rack_find_high; | counter_u64_t rack_find_high; | ||||
counter_u64_t rack_progress_drops; | counter_u64_t rack_progress_drops; | ||||
counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE]; | counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE]; | ||||
counter_u64_t rack_opts_arry[RACK_OPTS_SIZE]; | counter_u64_t rack_opts_arry[RACK_OPTS_SIZE]; | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 190 Lines • ▼ Show 20 Lines | #endif | ||||
counter_u64_zero(rack_sack_proc_all); | counter_u64_zero(rack_sack_proc_all); | ||||
counter_u64_zero(rack_sack_proc_short); | counter_u64_zero(rack_sack_proc_short); | ||||
counter_u64_zero(rack_sack_proc_restart); | counter_u64_zero(rack_sack_proc_restart); | ||||
counter_u64_zero(rack_to_alloc); | counter_u64_zero(rack_to_alloc); | ||||
counter_u64_zero(rack_to_alloc_limited); | counter_u64_zero(rack_to_alloc_limited); | ||||
counter_u64_zero(rack_alloc_limited_conns); | counter_u64_zero(rack_alloc_limited_conns); | ||||
counter_u64_zero(rack_split_limited); | counter_u64_zero(rack_split_limited); | ||||
counter_u64_zero(rack_find_high); | counter_u64_zero(rack_find_high); | ||||
counter_u64_zero(rack_tls_rwnd); | |||||
counter_u64_zero(rack_tls_cwnd); | |||||
counter_u64_zero(rack_tls_app); | |||||
counter_u64_zero(rack_tls_other); | |||||
counter_u64_zero(rack_tls_filled); | |||||
counter_u64_zero(rack_tls_rxt); | |||||
counter_u64_zero(rack_tls_tlp); | |||||
counter_u64_zero(rack_sack_attacks_detected); | counter_u64_zero(rack_sack_attacks_detected); | ||||
counter_u64_zero(rack_sack_attacks_reversed); | counter_u64_zero(rack_sack_attacks_reversed); | ||||
counter_u64_zero(rack_sack_used_next_merge); | counter_u64_zero(rack_sack_used_next_merge); | ||||
counter_u64_zero(rack_sack_used_prev_merge); | counter_u64_zero(rack_sack_used_prev_merge); | ||||
counter_u64_zero(rack_sack_splits); | counter_u64_zero(rack_sack_splits); | ||||
counter_u64_zero(rack_sack_skipped_acked); | counter_u64_zero(rack_sack_skipped_acked); | ||||
counter_u64_zero(rack_ack_total); | counter_u64_zero(rack_ack_total); | ||||
counter_u64_zero(rack_express_sack); | counter_u64_zero(rack_express_sack); | ||||
Show All 40 Lines | rack_counters = SYSCTL_ADD_NODE(&rack_sysctl_ctx, | ||||
"stats", | "stats", | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"Rack Counters"); | "Rack Counters"); | ||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | SYSCTL_ADD_S32(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_sysctl_root), | SYSCTL_CHILDREN(rack_sysctl_root), | ||||
OID_AUTO, "rate_sample_method", CTLFLAG_RW, | OID_AUTO, "rate_sample_method", CTLFLAG_RW, | ||||
&rack_rate_sample_method , USE_RTT_LOW, | &rack_rate_sample_method , USE_RTT_LOW, | ||||
"What method should we use for rate sampling 0=high, 1=low "); | "What method should we use for rate sampling 0=high, 1=low "); | ||||
SYSCTL_ADD_S32(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_sysctl_root), | |||||
OID_AUTO, "hw_tlsmax", CTLFLAG_RW, | |||||
&rack_hw_tls_max_seg , 3, | |||||
"What is the maximum number of full TLS records that will be sent at once"); | |||||
/* Probe rtt related controls */ | /* Probe rtt related controls */ | ||||
rack_probertt = SYSCTL_ADD_NODE(&rack_sysctl_ctx, | rack_probertt = SYSCTL_ADD_NODE(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_sysctl_root), | SYSCTL_CHILDREN(rack_sysctl_root), | ||||
OID_AUTO, | OID_AUTO, | ||||
"probertt", | "probertt", | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"ProbeRTT related Controls"); | "ProbeRTT related Controls"); | ||||
SYSCTL_ADD_U16(&rack_sysctl_ctx, | SYSCTL_ADD_U16(&rack_sysctl_ctx, | ||||
▲ Show 20 Lines • Show All 801 Lines • ▼ Show 20 Lines | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
"Total number of nada tlp calls"); | "Total number of nada tlp calls"); | ||||
rack_try_scwnd = counter_u64_alloc(M_WAITOK); | rack_try_scwnd = counter_u64_alloc(M_WAITOK); | ||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_counters), | SYSCTL_CHILDREN(rack_counters), | ||||
OID_AUTO, "tried_scwnd", CTLFLAG_RD, | OID_AUTO, "tried_scwnd", CTLFLAG_RD, | ||||
&rack_try_scwnd, | &rack_try_scwnd, | ||||
"Total number of scwnd attempts"); | "Total number of scwnd attempts"); | ||||
rack_tls_rwnd = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_rwnd", CTLFLAG_RD, | |||||
&rack_tls_rwnd, | |||||
"Total hdwr tls rwnd limited"); | |||||
rack_tls_cwnd = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_cwnd", CTLFLAG_RD, | |||||
&rack_tls_cwnd, | |||||
"Total hdwr tls cwnd limited"); | |||||
rack_tls_app = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_app", CTLFLAG_RD, | |||||
&rack_tls_app, | |||||
"Total hdwr tls app limited"); | |||||
rack_tls_other = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_other", CTLFLAG_RD, | |||||
&rack_tls_other, | |||||
"Total hdwr tls other limited"); | |||||
rack_tls_filled = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_filled", CTLFLAG_RD, | |||||
&rack_tls_filled, | |||||
"Total hdwr tls filled"); | |||||
rack_tls_rxt = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_rxt", CTLFLAG_RD, | |||||
&rack_tls_rxt, | |||||
"Total hdwr rxt"); | |||||
rack_tls_tlp = counter_u64_alloc(M_WAITOK); | |||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | |||||
SYSCTL_CHILDREN(rack_counters), | |||||
OID_AUTO, "tls_tlp", CTLFLAG_RD, | |||||
&rack_tls_tlp, | |||||
"Total hdwr tls tlp"); | |||||
rack_per_timer_hole = counter_u64_alloc(M_WAITOK); | rack_per_timer_hole = counter_u64_alloc(M_WAITOK); | ||||
SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | SYSCTL_ADD_COUNTER_U64(&rack_sysctl_ctx, | ||||
SYSCTL_CHILDREN(rack_counters), | SYSCTL_CHILDREN(rack_counters), | ||||
OID_AUTO, "timer_hole", CTLFLAG_RD, | OID_AUTO, "timer_hole", CTLFLAG_RD, | ||||
&rack_per_timer_hole, | &rack_per_timer_hole, | ||||
"Total persists start in timer hole"); | "Total persists start in timer hole"); | ||||
COUNTER_ARRAY_ALLOC(rack_out_size, TCP_MSS_ACCT_SIZE, M_WAITOK); | COUNTER_ARRAY_ALLOC(rack_out_size, TCP_MSS_ACCT_SIZE, M_WAITOK); | ||||
SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), | SYSCTL_ADD_COUNTER_U64_ARRAY(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), | ||||
▲ Show 20 Lines • Show All 740 Lines • ▼ Show 20 Lines | rack_counter_destroy(void) | ||||
counter_u64_free(rack_used_tlpmethod2); | counter_u64_free(rack_used_tlpmethod2); | ||||
counter_u64_free(rack_sack_skipped_acked); | counter_u64_free(rack_sack_skipped_acked); | ||||
counter_u64_free(rack_sack_splits); | counter_u64_free(rack_sack_splits); | ||||
counter_u64_free(rack_progress_drops); | counter_u64_free(rack_progress_drops); | ||||
counter_u64_free(rack_input_idle_reduces); | counter_u64_free(rack_input_idle_reduces); | ||||
counter_u64_free(rack_collapsed_win); | counter_u64_free(rack_collapsed_win); | ||||
counter_u64_free(rack_tlp_does_nada); | counter_u64_free(rack_tlp_does_nada); | ||||
counter_u64_free(rack_try_scwnd); | counter_u64_free(rack_try_scwnd); | ||||
counter_u64_free(rack_tls_rwnd); | |||||
counter_u64_free(rack_tls_cwnd); | |||||
counter_u64_free(rack_tls_app); | |||||
counter_u64_free(rack_tls_other); | |||||
counter_u64_free(rack_tls_filled); | |||||
counter_u64_free(rack_tls_rxt); | |||||
counter_u64_free(rack_tls_tlp); | |||||
counter_u64_free(rack_per_timer_hole); | counter_u64_free(rack_per_timer_hole); | ||||
COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE); | COUNTER_ARRAY_FREE(rack_out_size, TCP_MSS_ACCT_SIZE); | ||||
COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE); | COUNTER_ARRAY_FREE(rack_opts_arry, RACK_OPTS_SIZE); | ||||
} | } | ||||
static struct rack_sendmap * | static struct rack_sendmap * | ||||
rack_alloc(struct tcp_rack *rack) | rack_alloc(struct tcp_rack *rack) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 3,003 Lines • ▼ Show 20 Lines | rack_timeout_tlp(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) | ||||
* A TLP timer has expired. We have been idle for 2 rtts. So we now | * A TLP timer has expired. We have been idle for 2 rtts. So we now | ||||
* need to figure out how to force a full MSS segment out. | * need to figure out how to force a full MSS segment out. | ||||
*/ | */ | ||||
rack_log_to_event(rack, RACK_TO_FRM_TLP, NULL); | rack_log_to_event(rack, RACK_TO_FRM_TLP, NULL); | ||||
counter_u64_add(rack_tlp_tot, 1); | counter_u64_add(rack_tlp_tot, 1); | ||||
if (rack->r_state && (rack->r_state != tp->t_state)) | if (rack->r_state && (rack->r_state != tp->t_state)) | ||||
rack_set_state(tp, rack); | rack_set_state(tp, rack); | ||||
so = tp->t_inpcb->inp_socket; | so = tp->t_inpcb->inp_socket; | ||||
#ifdef KERN_TLS | |||||
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { | |||||
/* | |||||
* For hardware TLS we do *not* want to send | |||||
* new data, lets instead just do a retransmission. | |||||
*/ | |||||
goto need_retran; | |||||
} | |||||
#endif | |||||
avail = sbavail(&so->so_snd); | avail = sbavail(&so->so_snd); | ||||
out = tp->snd_max - tp->snd_una; | out = tp->snd_max - tp->snd_una; | ||||
if (out > tp->snd_wnd) { | if (out > tp->snd_wnd) { | ||||
/* special case, we need a retransmission */ | /* special case, we need a retransmission */ | ||||
collapsed_win = 1; | collapsed_win = 1; | ||||
goto need_retran; | goto need_retran; | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 2,908 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
rack_check_bottom_drag(struct tcpcb *tp, | rack_check_bottom_drag(struct tcpcb *tp, | ||||
struct tcp_rack *rack, | struct tcp_rack *rack, | ||||
struct socket *so, int32_t acked) | struct socket *so, int32_t acked) | ||||
{ | { | ||||
uint32_t segsiz, minseg; | uint32_t segsiz, minseg; | ||||
segsiz = ctf_fixed_maxseg(tp); | segsiz = ctf_fixed_maxseg(tp); | ||||
if (so->so_snd.sb_flags & SB_TLS_IFNET) { | |||||
minseg = rack->r_ctl.rc_pace_min_segs; | |||||
} else { | |||||
minseg = segsiz; | minseg = segsiz; | ||||
} | |||||
if (tp->snd_max == tp->snd_una) { | if (tp->snd_max == tp->snd_una) { | ||||
/* | /* | ||||
* We are doing dynamic pacing and we are way | * We are doing dynamic pacing and we are way | ||||
* under. Basically everything got acked while | * under. Basically everything got acked while | ||||
* we were still waiting on the pacer to expire. | * we were still waiting on the pacer to expire. | ||||
* | * | ||||
* This means we need to boost the b/w in | * This means we need to boost the b/w in | ||||
* addition to any earlier boosting of | * addition to any earlier boosting of | ||||
▲ Show 20 Lines • Show All 2,157 Lines • ▼ Show 20 Lines | rack_clear_rate_sample(struct tcp_rack *rack) | ||||
rack->r_ctl.rack_rs.rs_rtt_cnt = 0; | rack->r_ctl.rack_rs.rs_rtt_cnt = 0; | ||||
rack->r_ctl.rack_rs.rs_rtt_tot = 0; | rack->r_ctl.rack_rs.rs_rtt_tot = 0; | ||||
} | } | ||||
static void | static void | ||||
rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line) | rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line) | ||||
{ | { | ||||
uint64_t bw_est, rate_wanted; | uint64_t bw_est, rate_wanted; | ||||
uint32_t tls_seg = 0; | |||||
int chged = 0; | int chged = 0; | ||||
uint32_t user_max; | uint32_t user_max; | ||||
user_max = ctf_fixed_maxseg(tp) * rack->rc_user_set_max_segs; | user_max = ctf_fixed_maxseg(tp) * rack->rc_user_set_max_segs; | ||||
#ifdef KERN_TLS | |||||
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { | |||||
tls_seg = ctf_get_opt_tls_size(rack->rc_inp->inp_socket, rack->rc_tp->snd_wnd); | |||||
if (tls_seg != rack->r_ctl.rc_pace_min_segs) | |||||
chged = 1; | |||||
rack->r_ctl.rc_pace_min_segs = tls_seg; | |||||
} else | |||||
#endif | |||||
{ | |||||
if (ctf_fixed_maxseg(tp) != rack->r_ctl.rc_pace_min_segs) | if (ctf_fixed_maxseg(tp) != rack->r_ctl.rc_pace_min_segs) | ||||
chged = 1; | chged = 1; | ||||
rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp); | rack->r_ctl.rc_pace_min_segs = ctf_fixed_maxseg(tp); | ||||
} | |||||
if (rack->use_fixed_rate || rack->rc_force_max_seg) { | if (rack->use_fixed_rate || rack->rc_force_max_seg) { | ||||
if (user_max != rack->r_ctl.rc_pace_max_segs) | if (user_max != rack->r_ctl.rc_pace_max_segs) | ||||
chged = 1; | chged = 1; | ||||
} | } | ||||
if (rack->rc_force_max_seg) { | if (rack->rc_force_max_seg) { | ||||
rack->r_ctl.rc_pace_max_segs = user_max; | rack->r_ctl.rc_pace_max_segs = user_max; | ||||
} else if (rack->use_fixed_rate) { | } else if (rack->use_fixed_rate) { | ||||
bw_est = rack_get_bw(rack); | bw_est = rack_get_bw(rack); | ||||
Show All 40 Lines | #endif | ||||
chged = 1; | chged = 1; | ||||
rack->r_ctl.rc_pace_max_segs = rc_init_window(rack); | rack->r_ctl.rc_pace_max_segs = rc_init_window(rack); | ||||
} | } | ||||
} | } | ||||
if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) { | if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) { | ||||
chged = 1; | chged = 1; | ||||
rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES; | rack->r_ctl.rc_pace_max_segs = PACE_MAX_IP_BYTES; | ||||
} | } | ||||
#ifdef KERN_TLS | |||||
uint32_t orig; | |||||
if (tls_seg != 0) { | |||||
orig = rack->r_ctl.rc_pace_max_segs; | |||||
if (rack_hw_tls_max_seg > 1) { | |||||
rack->r_ctl.rc_pace_max_segs /= tls_seg; | |||||
if (rack_hw_tls_max_seg > rack->r_ctl.rc_pace_max_segs) | |||||
rack->r_ctl.rc_pace_max_segs = rack_hw_tls_max_seg; | |||||
} else { | |||||
rack->r_ctl.rc_pace_max_segs = 1; | |||||
} | |||||
if (rack->r_ctl.rc_pace_max_segs == 0) | |||||
rack->r_ctl.rc_pace_max_segs = 1; | |||||
rack->r_ctl.rc_pace_max_segs *= tls_seg; | |||||
if (rack->r_ctl.rc_pace_max_segs > PACE_MAX_IP_BYTES) { | |||||
/* We can't go over the max bytes (usually 64k) */ | |||||
rack->r_ctl.rc_pace_max_segs = ((PACE_MAX_IP_BYTES / tls_seg) * tls_seg); | |||||
} | |||||
if (orig != rack->r_ctl.rc_pace_max_segs) | |||||
chged = 1; | |||||
} | |||||
#endif | |||||
if (chged) | if (chged) | ||||
rack_log_type_hrdwtso(tp, rack, tls_seg, rack->rc_inp->inp_socket->so_snd.sb_flags, line, 2); | rack_log_type_hrdwtso(tp, rack, 0, rack->rc_inp->inp_socket->so_snd.sb_flags, line, 2); | ||||
} | } | ||||
static int | static int | ||||
rack_init(struct tcpcb *tp) | rack_init(struct tcpcb *tp) | ||||
{ | { | ||||
struct tcp_rack *rack = NULL; | struct tcp_rack *rack = NULL; | ||||
struct rack_sendmap *insret; | struct rack_sendmap *insret; | ||||
uint32_t iwin, snt, us_cts; | uint32_t iwin, snt, us_cts; | ||||
▲ Show 20 Lines • Show All 1,170 Lines • ▼ Show 20 Lines | #endif | ||||
else | else | ||||
oh = 0; | oh = 0; | ||||
segs *= oh; | segs *= oh; | ||||
lentim = (uint64_t)(len + segs) * (uint64_t)HPTS_USEC_IN_SEC; | lentim = (uint64_t)(len + segs) * (uint64_t)HPTS_USEC_IN_SEC; | ||||
res = lentim / rate_wanted; | res = lentim / rate_wanted; | ||||
slot = (uint32_t)res; | slot = (uint32_t)res; | ||||
orig_val = rack->r_ctl.rc_pace_max_segs; | orig_val = rack->r_ctl.rc_pace_max_segs; | ||||
rack_set_pace_segments(rack->rc_tp, rack, __LINE__); | rack_set_pace_segments(rack->rc_tp, rack, __LINE__); | ||||
#ifdef KERN_TLS | |||||
/* For TLS we need to override this, possibly */ | |||||
if (rack->rc_inp->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { | |||||
rack_set_pace_segments(rack->rc_tp, rack, __LINE__); | |||||
} | |||||
#endif | |||||
/* Did we change the TSO size, if so log it */ | /* Did we change the TSO size, if so log it */ | ||||
if (rack->r_ctl.rc_pace_max_segs != orig_val) | if (rack->r_ctl.rc_pace_max_segs != orig_val) | ||||
rack_log_pacing_delay_calc(rack, len, slot, orig_val, 0, 0, 15, __LINE__, NULL); | rack_log_pacing_delay_calc(rack, len, slot, orig_val, 0, 0, 15, __LINE__, NULL); | ||||
if ((rack->rc_pace_to_cwnd) && | if ((rack->rc_pace_to_cwnd) && | ||||
(rack->in_probe_rtt == 0) && | (rack->in_probe_rtt == 0) && | ||||
(IN_RECOVERY(rack->rc_tp->t_flags) == 0)) { | (IN_RECOVERY(rack->rc_tp->t_flags) == 0)) { | ||||
/* | /* | ||||
* We want to pace at our rate *or* faster to | * We want to pace at our rate *or* faster to | ||||
▲ Show 20 Lines • Show All 349 Lines • ▼ Show 20 Lines | #endif | ||||
struct tcpopt to; | struct tcpopt to; | ||||
int32_t slot = 0; | int32_t slot = 0; | ||||
int32_t sup_rack = 0; | int32_t sup_rack = 0; | ||||
uint32_t cts, us_cts, delayed, early; | uint32_t cts, us_cts, delayed, early; | ||||
uint8_t hpts_calling, new_data_tlp = 0, doing_tlp = 0; | uint8_t hpts_calling, new_data_tlp = 0, doing_tlp = 0; | ||||
uint32_t cwnd_to_use; | uint32_t cwnd_to_use; | ||||
int32_t do_a_prefetch; | int32_t do_a_prefetch; | ||||
int32_t prefetch_rsm = 0; | int32_t prefetch_rsm = 0; | ||||
int force_tso = 0; | |||||
int32_t orig_len; | int32_t orig_len; | ||||
struct timeval tv; | struct timeval tv; | ||||
int32_t prefetch_so_done = 0; | int32_t prefetch_so_done = 0; | ||||
struct tcp_log_buffer *lgb = NULL; | struct tcp_log_buffer *lgb = NULL; | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
struct sockbuf *sb; | struct sockbuf *sb; | ||||
#ifdef INET6 | #ifdef INET6 | ||||
struct ip6_hdr *ip6 = NULL; | struct ip6_hdr *ip6 = NULL; | ||||
int32_t isipv6; | int32_t isipv6; | ||||
#endif | #endif | ||||
uint8_t filled_all = 0; | uint8_t filled_all = 0; | ||||
bool hw_tls = false; | bool hw_tls = false; | ||||
/* setup and take the cache hits here */ | /* setup and take the cache hits here */ | ||||
rack = (struct tcp_rack *)tp->t_fb_ptr; | rack = (struct tcp_rack *)tp->t_fb_ptr; | ||||
inp = rack->rc_inp; | inp = rack->rc_inp; | ||||
so = inp->inp_socket; | so = inp->inp_socket; | ||||
sb = &so->so_snd; | sb = &so->so_snd; | ||||
kern_prefetch(sb, &do_a_prefetch); | kern_prefetch(sb, &do_a_prefetch); | ||||
do_a_prefetch = 1; | do_a_prefetch = 1; | ||||
hpts_calling = inp->inp_hpts_calls; | hpts_calling = inp->inp_hpts_calls; | ||||
#ifdef KERN_TLS | |||||
hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0; | hw_tls = (so->so_snd.sb_flags & SB_TLS_IFNET) != 0; | ||||
#endif | |||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
#ifdef TCP_OFFLOAD | #ifdef TCP_OFFLOAD | ||||
if (tp->t_flags & TF_TOE) | if (tp->t_flags & TF_TOE) | ||||
return (tcp_offload_output(tp)); | return (tcp_offload_output(tp)); | ||||
#endif | #endif | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | again: | ||||
* resending already delivered data. Adjust snd_nxt accordingly. | * resending already delivered data. Adjust snd_nxt accordingly. | ||||
*/ | */ | ||||
sendalot = 0; | sendalot = 0; | ||||
us_cts = tcp_get_usecs(&tv); | us_cts = tcp_get_usecs(&tv); | ||||
cts = tcp_tv_to_mssectick(&tv); | cts = tcp_tv_to_mssectick(&tv); | ||||
tso = 0; | tso = 0; | ||||
mtu = 0; | mtu = 0; | ||||
segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs); | segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs); | ||||
if (so->so_snd.sb_flags & SB_TLS_IFNET) { | |||||
minseg = rack->r_ctl.rc_pace_min_segs; | |||||
} else { | |||||
minseg = segsiz; | minseg = segsiz; | ||||
} | |||||
sb_offset = tp->snd_max - tp->snd_una; | sb_offset = tp->snd_max - tp->snd_una; | ||||
cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd; | cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd; | ||||
#ifdef NETFLIX_SHARED_CWND | #ifdef NETFLIX_SHARED_CWND | ||||
if ((tp->t_flags2 & TF2_TCP_SCWND_ALLOWED) && | if ((tp->t_flags2 & TF2_TCP_SCWND_ALLOWED) && | ||||
rack->rack_enable_scwnd) { | rack->rack_enable_scwnd) { | ||||
/* We are doing cwnd sharing */ | /* We are doing cwnd sharing */ | ||||
if (rack->rc_gp_filled && | if (rack->rc_gp_filled && | ||||
(rack->rack_attempted_scwnd == 0) && | (rack->rack_attempted_scwnd == 0) && | ||||
▲ Show 20 Lines • Show All 1,016 Lines • ▼ Show 20 Lines | if (tp->t_inpcb->inp_options) | ||||
ipoptlen = tp->t_inpcb->inp_options->m_len - | ipoptlen = tp->t_inpcb->inp_options->m_len - | ||||
offsetof(struct ipoption, ipopt_list); | offsetof(struct ipoption, ipopt_list); | ||||
else | else | ||||
ipoptlen = 0; | ipoptlen = 0; | ||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT) | #if defined(IPSEC) || defined(IPSEC_SUPPORT) | ||||
ipoptlen += ipsec_optlen; | ipoptlen += ipsec_optlen; | ||||
#endif | #endif | ||||
#ifdef KERN_TLS | |||||
/* force TSO for so TLS offload can get mss */ | |||||
if (sb->sb_flags & SB_TLS_IFNET) { | |||||
force_tso = 1; | |||||
} | |||||
#endif | |||||
/* | /* | ||||
* Adjust data length if insertion of options will bump the packet | * Adjust data length if insertion of options will bump the packet | ||||
* length beyond the t_maxseg length. Clear the FIN bit because we | * length beyond the t_maxseg length. Clear the FIN bit because we | ||||
* cut off the tail of the segment. | * cut off the tail of the segment. | ||||
*/ | */ | ||||
if (len + optlen + ipoptlen > tp->t_maxseg) { | if (len + optlen + ipoptlen > tp->t_maxseg) { | ||||
if (tso) { | if (tso) { | ||||
uint32_t if_hw_tsomax; | uint32_t if_hw_tsomax; | ||||
Show All 23 Lines | if (tso) { | ||||
mark = 2; | mark = 2; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Prevent the last segment from being fractional | * Prevent the last segment from being fractional | ||||
* unless the send sockbuf can be emptied: | * unless the send sockbuf can be emptied: | ||||
*/ | */ | ||||
max_len = (tp->t_maxseg - optlen); | max_len = (tp->t_maxseg - optlen); | ||||
if (((sb_offset + len) < sbavail(sb)) && | if ((sb_offset + len) < sbavail(sb)) { | ||||
(hw_tls == 0)) { | |||||
moff = len % (u_int)max_len; | moff = len % (u_int)max_len; | ||||
if (moff != 0) { | if (moff != 0) { | ||||
mark = 3; | mark = 3; | ||||
len -= moff; | len -= moff; | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* In case there are too many small fragments don't | * In case there are too many small fragments don't | ||||
▲ Show 20 Lines • Show All 455 Lines • ▼ Show 20 Lines | KASSERT(ip->ip_v == IPVERSION, | ||||
("%s: IP version incorrect: %d", __func__, ip->ip_v)); | ("%s: IP version incorrect: %d", __func__, ip->ip_v)); | ||||
} | } | ||||
#endif | #endif | ||||
/* | /* | ||||
* Enable TSO and specify the size of the segments. The TCP pseudo | * Enable TSO and specify the size of the segments. The TCP pseudo | ||||
* header checksum is always provided. XXX: Fixme: This is currently | * header checksum is always provided. XXX: Fixme: This is currently | ||||
* not the case for IPv6. | * not the case for IPv6. | ||||
*/ | */ | ||||
if (tso || force_tso) { | if (tso) { | ||||
KASSERT(force_tso || len > tp->t_maxseg - optlen, | KASSERT(len > tp->t_maxseg - optlen, | ||||
("%s: len <= tso_segsz", __func__)); | ("%s: len <= tso_segsz", __func__)); | ||||
m->m_pkthdr.csum_flags |= CSUM_TSO; | m->m_pkthdr.csum_flags |= CSUM_TSO; | ||||
m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; | m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; | ||||
} | } | ||||
KASSERT(len + hdrlen == m_length(m, NULL), | KASSERT(len + hdrlen == m_length(m, NULL), | ||||
("%s: mbuf chain different than expected: %d + %u != %u", | ("%s: mbuf chain different than expected: %d + %u != %u", | ||||
__func__, len, hdrlen, m_length(m, NULL))); | __func__, len, hdrlen, m_length(m, NULL))); | ||||
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines | if (error == 0) { | ||||
} else if (len > 1) { | } else if (len > 1) { | ||||
int idx; | int idx; | ||||
idx = (len / segsiz) + 3; | idx = (len / segsiz) + 3; | ||||
if (idx >= TCP_MSS_ACCT_ATIMER) | if (idx >= TCP_MSS_ACCT_ATIMER) | ||||
counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1); | counter_u64_add(rack_out_size[(TCP_MSS_ACCT_ATIMER-1)], 1); | ||||
else | else | ||||
counter_u64_add(rack_out_size[idx], 1); | counter_u64_add(rack_out_size[idx], 1); | ||||
} | |||||
if (hw_tls && len > 0) { | |||||
if (filled_all) { | |||||
counter_u64_add(rack_tls_filled, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 0, orig_len, 1); | |||||
} else { | |||||
if (rsm) { | |||||
counter_u64_add(rack_tls_rxt, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 2, orig_len, 1); | |||||
} else if (doing_tlp) { | |||||
counter_u64_add(rack_tls_tlp, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 3, orig_len, 1); | |||||
} else if ( (ctf_outstanding(tp) + minseg) > sbavail(sb)) { | |||||
counter_u64_add(rack_tls_app, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 4, orig_len, 1); | |||||
} else if ((ctf_flight_size(tp, rack->r_ctl.rc_sacked) + minseg) > cwnd_to_use) { | |||||
counter_u64_add(rack_tls_cwnd, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 5, orig_len, 1); | |||||
} else if ((ctf_outstanding(tp) + minseg) > tp->snd_wnd) { | |||||
counter_u64_add(rack_tls_rwnd, 1); | |||||
rack_log_type_hrdwtso(tp, rack, len, 6, orig_len, 1); | |||||
} else { | |||||
rack_log_type_hrdwtso(tp, rack, len, 7, orig_len, 1); | |||||
counter_u64_add(rack_tls_other, 1); | |||||
} | |||||
} | |||||
} | } | ||||
} | } | ||||
if (rack->rack_no_prr == 0) { | if (rack->rack_no_prr == 0) { | ||||
if (sub_from_prr && (error == 0)) { | if (sub_from_prr && (error == 0)) { | ||||
if (rack->r_ctl.rc_prr_sndcnt >= len) | if (rack->r_ctl.rc_prr_sndcnt >= len) | ||||
rack->r_ctl.rc_prr_sndcnt -= len; | rack->r_ctl.rc_prr_sndcnt -= len; | ||||
else | else | ||||
rack->r_ctl.rc_prr_sndcnt = 0; | rack->r_ctl.rc_prr_sndcnt = 0; | ||||
▲ Show 20 Lines • Show All 1,142 Lines • Show Last 20 Lines |