Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/tcp_rack.h
Context not available. | |||||
#define _NETINET_TCP_RACK_H_ | #define _NETINET_TCP_RACK_H_ | ||||
#define RACK_ACKED 0x0001/* The remote endpoint acked this */ | #define RACK_ACKED 0x0001/* The remote endpoint acked this */ | ||||
#define RACK_TO_MIXED 0x0002/* A timeout occurred that mixed the send order - not used */ | #define RACK_TO_REXT 0x0002/* A timeout occured on this sendmap entry */ | ||||
#define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */ | #define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */ | ||||
#define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ | #define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ | ||||
#define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ | #define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ | ||||
Context not available. | |||||
#define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */ | #define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */ | ||||
#define RACK_APP_LIMITED 0x0200/* We went app limited after this send */ | #define RACK_APP_LIMITED 0x0200/* We went app limited after this send */ | ||||
#define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */ | #define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */ | ||||
#define RACK_HAS_SIN 0x0800/* SIN is on this guy */ | #define RACK_HAS_SYN 0x0800/* SYN is on this guy */ | ||||
#define RACK_SENT_W_DSACK 0x1000/* Sent with a dsack */ | |||||
#define RACK_SENT_SP 0x2000/* sent in slow path */ | |||||
#define RACK_SENT_FP 0x4000/* sent in fast path */ | |||||
#define RACK_HAD_PUSH 0x8000/* Push was sent on original send */ | |||||
#define RACK_NUM_OF_RETRANS 3 | #define RACK_NUM_OF_RETRANS 3 | ||||
#define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */ | #define RACK_INITIAL_RTO 1000000 /* 1 second in microseconds */ | ||||
#define RACK_REQ_AVG 4 /* Must be less than 256 */ | #define RACK_REQ_AVG 3 /* Must be less than 256 */ | ||||
struct rack_sendmap { | struct rack_sendmap { | ||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */ | |||||
uint32_t r_start; /* Sequence number of the segment */ | uint32_t r_start; /* Sequence number of the segment */ | ||||
uint32_t r_end; /* End seq, this is 1 beyond actually */ | uint32_t r_end; /* End seq, this is 1 beyond actually */ | ||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */ | |||||
RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */ | |||||
uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ | uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ | ||||
uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time | uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time | ||||
* sent */ | * sent */ | ||||
uint16_t r_flags; /* Flags as defined above */ | uint16_t r_flags; /* Flags as defined above */ | ||||
uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS]; | struct mbuf *m; | ||||
uint32_t usec_orig_send; /* time of orginal send in useconds */ | uint32_t soff; | ||||
uint32_t orig_m_len; | |||||
uint32_t r_nseq_appl; /* If this one is app limited, this is the nxt seq limited */ | uint32_t r_nseq_appl; /* If this one is app limited, this is the nxt seq limited */ | ||||
uint32_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */ | |||||
uint8_t r_dupack; /* Dup ack count */ | uint8_t r_dupack; /* Dup ack count */ | ||||
uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */ | uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */ | ||||
uint8_t r_limit_type; /* is this entry counted against a limit? */ | uint8_t r_limit_type; /* is this entry counted against a limit? */ | ||||
uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited */ | uint8_t r_just_ret : 1, /* After sending, the next pkt was just returned, i.e. limited */ | ||||
r_one_out_nr : 1, /* Special case 1 outstanding and not in recovery */ | r_one_out_nr : 1, /* Special case 1 outstanding and not in recovery */ | ||||
r_avail : 6; | r_no_rtt_allowed : 1, /* No rtt measurement allowed */ | ||||
uint8_t r_resv[36]; | r_avail : 5; | ||||
uint64_t r_tim_lastsent[RACK_NUM_OF_RETRANS]; | |||||
uint64_t r_ack_arrival; /* This is the time of ack-arrival (if SACK'd) */ | |||||
RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */ | |||||
}; | }; | ||||
struct deferred_opt_list { | |||||
TAILQ_ENTRY(deferred_opt_list) next; | |||||
int optname; | |||||
uint64_t optval; | |||||
}; | |||||
/* | |||||
* Timestamps in the rack sendmap are now moving to be | |||||
* uint64_t's. This means that if you want a uint32_t | |||||
* usec timestamp (the old usecond timestamp) you simply have | |||||
* to cast it to uint32_t. The reason we do this is not for | |||||
* wrap, but we need to get back, at times, to the millisecond | |||||
* timestamp that is used in the TSTMP option. To do this we | |||||
* can use the rack_ts_to_msec() inline below which can take | |||||
* the 64bit ts and make into the correct timestamp millisecond | |||||
* wise. Thats not possible with the 32bit usecond timestamp since | |||||
* the seconds wrap too quickly to cover all bases. | |||||
* | |||||
* There are quite a few places in rack where I simply cast | |||||
* back to uint32_t and then end up using the TSTMP_XX() | |||||
* macros. This is ok, but we could do simple compares if | |||||
* we ever decided to move all of those variables to 64 bits | |||||
* as well. | |||||
*/ | |||||
inline uint64_t | |||||
rack_to_usec_ts(struct timeval *tv) | |||||
{ | |||||
return ((tv->tv_sec * HPTS_USEC_IN_SEC) + tv->tv_usec); | |||||
} | |||||
inline uint32_t | |||||
rack_ts_to_msec(uint64_t ts) | |||||
{ | |||||
return((uint32_t)(ts / HPTS_MSEC_IN_SEC)); | |||||
} | |||||
RB_HEAD(rack_rb_tree_head, rack_sendmap); | RB_HEAD(rack_rb_tree_head, rack_sendmap); | ||||
TAILQ_HEAD(rack_head, rack_sendmap); | TAILQ_HEAD(rack_head, rack_sendmap); | ||||
TAILQ_HEAD(def_opt_head, deferred_opt_list); | |||||
/* Map change logging */ | |||||
#define MAP_MERGE 0x01 | |||||
#define MAP_SPLIT 0x02 | |||||
#define MAP_NEW 0x03 | |||||
#define MAP_SACK_M1 0x04 | |||||
#define MAP_SACK_M2 0x05 | |||||
#define MAP_SACK_M3 0x06 | |||||
#define MAP_SACK_M4 0x07 | |||||
#define MAP_SACK_M5 0x08 | |||||
#define MAP_FREE 0x09 | |||||
#define MAP_TRIM_HEAD 0x0a | |||||
#define RACK_LIMIT_TYPE_SPLIT 1 | #define RACK_LIMIT_TYPE_SPLIT 1 | ||||
Context not available. | |||||
#define RACK_TO_FRM_DELACK 6 | #define RACK_TO_FRM_DELACK 6 | ||||
struct rack_opts_stats { | struct rack_opts_stats { | ||||
uint64_t tcp_rack_prop_rate; | |||||
uint64_t tcp_rack_prop; | |||||
uint64_t tcp_rack_tlp_reduce; | uint64_t tcp_rack_tlp_reduce; | ||||
uint64_t tcp_rack_early_recov; | |||||
uint64_t tcp_rack_pace_always; | uint64_t tcp_rack_pace_always; | ||||
uint64_t tcp_rack_pace_reduce; | uint64_t tcp_rack_pace_reduce; | ||||
uint64_t tcp_rack_max_seg; | uint64_t tcp_rack_max_seg; | ||||
Context not available. | |||||
uint64_t tcp_npush; | uint64_t tcp_npush; | ||||
uint64_t tcp_lscwnd; | uint64_t tcp_lscwnd; | ||||
uint64_t tcp_profile; | uint64_t tcp_profile; | ||||
uint64_t tcp_hdwr_rate_cap; | |||||
uint64_t tcp_pacing_rate_cap; | |||||
uint64_t tcp_pacing_up_only; | |||||
uint64_t tcp_use_cmp_acks; | |||||
uint64_t tcp_rack_abc_val; | |||||
uint64_t tcp_rec_abc_val; | |||||
uint64_t tcp_rack_measure_cnt; | |||||
uint64_t tcp_rack_delayed_ack; | |||||
uint64_t tcp_rack_rtt_use; | |||||
uint64_t tcp_data_after_close; | |||||
uint64_t tcp_defer_opt; | |||||
uint64_t tcp_rack_fastrsm_hack; | |||||
uint64_t tcp_rack_beta; | |||||
uint64_t tcp_rack_beta_ecn; | |||||
}; | }; | ||||
/* RTT shrink reasons */ | /* RTT shrink reasons */ | ||||
Context not available. | |||||
*/ | */ | ||||
#define RACK_GP_HIST 4 /* How much goodput history do we maintain? */ | #define RACK_GP_HIST 4 /* How much goodput history do we maintain? */ | ||||
#define RACK_NUM_FSB_DEBUG 16 | |||||
struct rack_fast_send_blk { | |||||
uint32_t left_to_send; | |||||
uint16_t tcp_ip_hdr_len; | |||||
uint8_t tcp_flags; | |||||
uint8_t hoplimit; | |||||
uint8_t *tcp_ip_hdr; | |||||
uint32_t recwin; | |||||
uint32_t off; | |||||
struct tcphdr *th; | |||||
struct udphdr *udp; | |||||
struct mbuf *m; | |||||
uint32_t o_m_len; | |||||
uint32_t rfo_apply_push : 1, | |||||
unused : 31; | |||||
}; | |||||
struct rack_control { | struct rack_control { | ||||
/* Second cache line 0x40 from tcp_rack */ | /* Second cache line 0x40 from tcp_rack */ | ||||
struct rack_rb_tree_head rc_mtree; /* Tree of all segments Lock(a) */ | struct rack_rb_tree_head rc_mtree; /* Tree of all segments Lock(a) */ | ||||
Context not available. | |||||
* tlp_sending Lock(a) */ | * tlp_sending Lock(a) */ | ||||
struct rack_sendmap *rc_resend; /* something we have been asked to | struct rack_sendmap *rc_resend; /* something we have been asked to | ||||
* resend */ | * resend */ | ||||
struct rack_fast_send_blk fsb; /* The fast-send block */ | |||||
uint32_t input_pkt; | uint32_t input_pkt; | ||||
uint32_t saved_input_pkt; | uint32_t saved_input_pkt; | ||||
uint32_t rc_hpts_flags; | uint32_t rc_hpts_flags; | ||||
Context not available. | |||||
/* Third Cache line 0x80 */ | /* Third Cache line 0x80 */ | ||||
struct rack_head rc_free; /* Allocation array */ | struct rack_head rc_free; /* Allocation array */ | ||||
uint64_t last_hw_bw_req; | |||||
uint64_t crte_prev_rate; | |||||
uint64_t bw_rate_cap; | |||||
uint32_t rc_time_last_sent; /* Time we last sent some data and | uint32_t rc_time_last_sent; /* Time we last sent some data and | ||||
* logged it Lock(a). */ | * logged it Lock(a). */ | ||||
uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */ | uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */ | ||||
Context not available. | |||||
uint32_t rc_agg_delayed; | uint32_t rc_agg_delayed; | ||||
uint32_t rc_tlp_rxt_last_time; | uint32_t rc_tlp_rxt_last_time; | ||||
uint32_t rc_saved_cwnd; | uint32_t rc_saved_cwnd; | ||||
uint32_t rc_gp_output_ts; | uint64_t rc_gp_output_ts; /* chg*/ | ||||
uint32_t rc_gp_cumack_ts; | uint64_t rc_gp_cumack_ts; /* chg*/ | ||||
struct timeval act_rcv_time; | struct timeval act_rcv_time; | ||||
struct timeval rc_last_time_decay; /* SAD time decay happened here */ | struct timeval rc_last_time_decay; /* SAD time decay happened here */ | ||||
uint64_t gp_bw; | uint64_t gp_bw; | ||||
Context not available. | |||||
uint64_t last_gp_comp_bw; | uint64_t last_gp_comp_bw; | ||||
uint64_t last_max_bw; /* Our calculated max b/w last */ | uint64_t last_max_bw; /* Our calculated max b/w last */ | ||||
struct time_filter_small rc_gp_min_rtt; | struct time_filter_small rc_gp_min_rtt; | ||||
struct def_opt_head opt_list; | |||||
int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */ | int32_t rc_rtt_diff; /* Timely style rtt diff of our gp_srtt */ | ||||
uint32_t rc_gp_srtt; /* Current GP srtt */ | uint32_t rc_gp_srtt; /* Current GP srtt */ | ||||
uint32_t rc_prev_gp_srtt; /* Previous RTT */ | uint32_t rc_prev_gp_srtt; /* Previous RTT */ | ||||
Context not available. | |||||
uint32_t rc_time_of_last_probertt; | uint32_t rc_time_of_last_probertt; | ||||
uint32_t rc_target_probertt_flight; | uint32_t rc_target_probertt_flight; | ||||
uint32_t rc_probertt_sndmax_atexit; /* Highest sent to in probe-rtt */ | uint32_t rc_probertt_sndmax_atexit; /* Highest sent to in probe-rtt */ | ||||
uint32_t rc_cwnd_at_erec; | |||||
uint32_t rc_ssthresh_at_erec; | |||||
uint32_t dsack_byte_cnt; | |||||
uint32_t retran_during_recovery; | |||||
uint32_t rc_gp_lowrtt; /* Lowest rtt seen during GPUT measurement */ | uint32_t rc_gp_lowrtt; /* Lowest rtt seen during GPUT measurement */ | ||||
uint32_t rc_gp_high_rwnd; /* Highest rwnd seen during GPUT measurement */ | uint32_t rc_gp_high_rwnd; /* Highest rwnd seen during GPUT measurement */ | ||||
uint32_t rc_snd_max_at_rto; /* For non-sack when the RTO occured what was snd-max */ | |||||
uint32_t rc_out_at_rto; | |||||
int32_t rc_scw_index; | int32_t rc_scw_index; | ||||
uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */ | uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */ | ||||
uint32_t rc_last_timeout_snduna; | |||||
uint32_t challenge_ack_ts; | |||||
uint32_t challenge_ack_cnt; | |||||
uint32_t rc_min_to; /* Socket option value Lock(a) */ | |||||
uint32_t rc_pkt_delay; /* Socket option value Lock(a) */ | |||||
struct newreno rc_saved_beta; /* | |||||
* For newreno cc: | |||||
* rc_saved_cc are the values we have had | |||||
* set by the user, if pacing is not happening | |||||
* (i.e. its early and we have not turned on yet | |||||
* or it was turned off). The minute pacing | |||||
* is turned on we pull out the values currently | |||||
* being used by newreno and replace them with | |||||
* these values, then save off the old values here, | |||||
* we also set the flag (if ecn_beta is set) to make | |||||
* new_reno do less of a backoff for ecn (think abe). | |||||
*/ | |||||
uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */ | uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */ | ||||
uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ | uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ | ||||
uint16_t rc_pkt_delay; /* Socket option value Lock(a) */ | |||||
uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */ | uint8_t rc_no_push_at_mrtt; /* No push when we exceed max rtt */ | ||||
uint8_t num_avg; /* average count before we go to normal decay */ | uint8_t num_measurements; /* Number of measurements (up to 0xff, we freeze at 0xff) */ | ||||
uint8_t rc_prop_rate; /* Socket option value Lock(a) */ | uint8_t req_measurements; /* How many measurements are required? */ | ||||
uint8_t rc_prop_reduce; /* Socket option value Lock(a) */ | |||||
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */ | uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */ | ||||
uint8_t rc_early_recovery; /* Socket option value Lock(a) */ | |||||
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */ | uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */ | ||||
uint8_t rc_min_to; /* Socket option value Lock(a) */ | |||||
uint8_t rc_rate_sample_method; | uint8_t rc_rate_sample_method; | ||||
uint8_t rc_gp_hist_idx; | uint8_t rc_gp_hist_idx; | ||||
}; | }; | ||||
Context not available. | |||||
int32_t, int32_t, uint32_t, int, int, uint8_t); /* Lock(a) */ | int32_t, int32_t, uint32_t, int, int, uint8_t); /* Lock(a) */ | ||||
struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ | struct tcpcb *rc_tp; /* The tcpcb Lock(a) */ | ||||
struct inpcb *rc_inp; /* The inpcb Lock(a) */ | struct inpcb *rc_inp; /* The inpcb Lock(a) */ | ||||
uint32_t rc_free_cnt; /* Number of free entries on the rc_free list | uint8_t rc_free_cnt; /* Number of free entries on the rc_free list | ||||
* Lock(a) */ | * Lock(a) */ | ||||
uint8_t client_bufferlvl; /* 0 - 5 normaly, less than or at 2 means its real low */ | |||||
uint8_t no_prr_addback : 1, | |||||
gp_ready : 1, | |||||
defer_options: 1, | |||||
fast_rsm_hack: 1, | |||||
rc_ack_can_sendout_data: 1, /* | |||||
* If set it will override pacing restrictions on not sending | |||||
* data when the pacing timer is running. I.e. you set this | |||||
* and an ACK will send data. Default is off and its only used | |||||
* without pacing when we are doing 5G speed up for there | |||||
* ack filtering. | |||||
*/ | |||||
rc_pacing_cc_set: 1, /* | |||||
* If we are pacing (pace_always=1) and we have reached the | |||||
* point where we start pacing (fixed or gp has reached its | |||||
* magic gp_ready state) this flag indicates we have set in | |||||
* values to effect CC's backoff's. If pacing is turned off | |||||
* then we must restore the values saved in rc_saved_beta, | |||||
* if its going to gp_ready we need to copy the values into | |||||
* the CC module and set our flags. | |||||
* | |||||
* Note this only happens if the cc name is newreno (CCALGONAME_NEWRENO). | |||||
*/ | |||||
avail :2; | |||||
uint8_t avail_bytes; | |||||
uint32_t rc_rack_rtt; /* RACK-RTT Lock(a) */ | uint32_t rc_rack_rtt; /* RACK-RTT Lock(a) */ | ||||
uint16_t r_mbuf_queue : 1, /* Do we do mbuf queue for non-paced */ | uint16_t r_mbuf_queue : 1, /* Do we do mbuf queue for non-paced */ | ||||
rtt_limit_mul : 4, /* muliply this by low rtt */ | rtt_limit_mul : 4, /* muliply this by low rtt */ | ||||
r_limit_scw : 1, | r_limit_scw : 1, | ||||
r_avail_bits : 10; /* Available */ | r_must_retran : 1, /* For non-sack customers we hit an RTO and new data should be resends */ | ||||
r_use_cmp_ack: 1, /* Do we use compressed acks */ | |||||
uint16_t rc_user_set_max_segs; /* Socket option value Lock(a) */ | r_ent_rec_ns: 1, /* We entered recovery and have not sent */ | ||||
r_might_revert: 1, /* Flag to find out if we might need to revert */ | |||||
r_fast_output: 1, /* Fast output is in progress we can skip the bulk of rack_output */ | |||||
r_fsb_inited: 1, | |||||
r_rack_hw_rate_caps: 1, | |||||
r_up_only: 1, | |||||
r_via_fill_cw : 1, | |||||
r_fill_less_agg : 1; | |||||
uint8_t rc_user_set_max_segs; /* Socket option value Lock(a) */ | |||||
uint8_t rc_labc; /* Appropriate Byte Counting Value */ | |||||
uint16_t forced_ack : 1, | uint16_t forced_ack : 1, | ||||
rc_gp_incr : 1, | rc_gp_incr : 1, | ||||
rc_gp_bwred : 1, | rc_gp_bwred : 1, | ||||
rc_gp_timely_inc_cnt : 3, | rc_gp_timely_inc_cnt : 3, | ||||
rc_gp_timely_dec_cnt : 3, | rc_gp_timely_dec_cnt : 3, | ||||
rc_not_backing_off: 1, | r_use_labc_for_rec: 1, | ||||
rc_highly_buffered: 1, /* The path is highly buffered */ | rc_highly_buffered: 1, /* The path is highly buffered */ | ||||
rc_dragged_bottom: 1, | rc_dragged_bottom: 1, | ||||
rc_dack_mode : 1, /* Mac O/S emulation of d-ack */ | rc_dack_mode : 1, /* Mac O/S emulation of d-ack */ | ||||
Context not available. | |||||
rc_always_pace : 1, /* Socket option value Lock(a) */ | rc_always_pace : 1, /* Socket option value Lock(a) */ | ||||
rc_pace_to_cwnd : 1, | rc_pace_to_cwnd : 1, | ||||
rc_pace_fill_if_rttin_range : 1, | rc_pace_fill_if_rttin_range : 1, | ||||
xxx_avail_bits : 1; | rc_srtt_measure_made : 1; | ||||
uint8_t app_limited_needs_set : 1, | uint8_t app_limited_needs_set : 1, | ||||
use_fixed_rate : 1, | use_fixed_rate : 1, | ||||
rc_has_collapsed : 1, | rc_has_collapsed : 1, | ||||
Context not available. |