Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netinet/tcp_stacks/tcp_rack.h
/*- | /*- | ||||
* Copyright (c) 2016 Netflix, Inc. | * Copyright (c) 2016-9 Netflix, Inc. | ||||
* | * | ||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
* are met: | * are met: | ||||
* 1. Redistributions of source code must retain the above copyright | * 1. Redistributions of source code must retain the above copyright | ||||
* notice, this list of conditions and the following disclaimer. | * notice, this list of conditions and the following disclaimer. | ||||
* 2. Redistributions in binary form must reproduce the above copyright | * 2. Redistributions in binary form must reproduce the above copyright | ||||
* notice, this list of conditions and the following disclaimer in the | * notice, this list of conditions and the following disclaimer in the | ||||
Show All 12 Lines | |||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
* | * | ||||
* $FreeBSD$ | * $FreeBSD$ | ||||
*/ | */ | ||||
#ifndef _NETINET_TCP_RACK_H_ | #ifndef _NETINET_TCP_RACK_H_ | ||||
#define _NETINET_TCP_RACK_H_ | #define _NETINET_TCP_RACK_H_ | ||||
#define RACK_ACKED 0x0001/* The remote endpoint acked this */ | #define RACK_ACKED 0x0001/* The remote endpoint acked this */ | ||||
#define RACK_TO_MIXED 0x0002/* A timeout occured that mixed the send order */ | #define RACK_TO_MIXED 0x0002/* A timeout occured that mixed the send order - not used */ | ||||
#define RACK_DEFERRED 0x0004/* We can't use this for RTT calc */ | #define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */ | ||||
#define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ | #define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ | ||||
#define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ | #define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ | ||||
#define RACK_WAS_SACKPASS 0x0020/* We retransmitted due to SACK pass */ | #define RACK_WAS_SACKPASS 0x0020/* We retransmitted due to SACK pass */ | ||||
#define RACK_HAS_FIN 0x0040/* segment is sent with fin */ | #define RACK_HAS_FIN 0x0040/* segment is sent with fin */ | ||||
#define RACK_TLP 0x0080/* segment sent as tail-loss-probe */ | #define RACK_TLP 0x0080/* segment sent as tail-loss-probe */ | ||||
#define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */ | |||||
#define RACK_NUM_OF_RETRANS 3 | #define RACK_NUM_OF_RETRANS 3 | ||||
#define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */ | #define RACK_INITIAL_RTO 1000 /* 1 second in milli seconds */ | ||||
struct rack_sendmap { | struct rack_sendmap { | ||||
TAILQ_ENTRY(rack_sendmap) r_next; /* seq number arrayed next */ | |||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */ | |||||
uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS]; | |||||
uint32_t r_start; /* Sequence number of the segment */ | uint32_t r_start; /* Sequence number of the segment */ | ||||
uint32_t r_end; /* End seq, this is 1 beyond actually */ | uint32_t r_end; /* End seq, this is 1 beyond actually */ | ||||
TAILQ_ENTRY(rack_sendmap) r_tnext; /* Time of transmit based next */ | |||||
RB_ENTRY(rack_sendmap) r_next; /* RB Tree next */ | |||||
uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ | uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ | ||||
uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time | uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time | ||||
* sent */ | * sent */ | ||||
uint8_t r_flags; /* Flags as defined above */ | uint16_t r_flags; /* Flags as defined above */ | ||||
uint8_t r_sndcnt; /* Retran count, not limited by | uint32_t r_tim_lastsent[RACK_NUM_OF_RETRANS]; | ||||
* RACK_NUM_OF_RETRANS */ | uint8_t r_dupack; /* Dup ack count */ | ||||
uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */ | uint8_t r_in_tmap; /* Flag to see if its in the r_tnext array */ | ||||
uint8_t r_limit_type; /* is this entry counted against a limit? */ | uint8_t r_limit_type; /* is this entry counted against a limit? */ | ||||
uint8_t r_resv[2]; | uint8_t r_resv[49]; | ||||
}; | }; | ||||
#define RACK_LIMIT_TYPE_SPLIT 1 | |||||
RB_HEAD(rack_rb_tree_head, rack_sendmap); | |||||
TAILQ_HEAD(rack_head, rack_sendmap); | TAILQ_HEAD(rack_head, rack_sendmap); | ||||
#define RACK_LIMIT_TYPE_SPLIT 1 | |||||
/* | /* | ||||
* We use the rate sample structure to | * We use the rate sample structure to | ||||
* assist in single sack/ack rate and rtt | * assist in single sack/ack rate and rtt | ||||
* calculation. In the future we will expand | * calculation. In the future we will expand | ||||
* this in BBR to do forward rate sample | * this in BBR to do forward rate sample | ||||
* b/w estimation. | * b/w estimation. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | struct rack_opts_stats { | ||||
uint64_t tcp_rack_pkt_delay; | uint64_t tcp_rack_pkt_delay; | ||||
uint64_t tcp_rack_tlp_inc_var; | uint64_t tcp_rack_tlp_inc_var; | ||||
uint64_t tcp_tlp_use; | uint64_t tcp_tlp_use; | ||||
uint64_t tcp_rack_idle_reduce; | uint64_t tcp_rack_idle_reduce; | ||||
uint64_t tcp_rack_idle_reduce_high; | uint64_t tcp_rack_idle_reduce_high; | ||||
uint64_t rack_no_timer_in_hpts; | uint64_t rack_no_timer_in_hpts; | ||||
uint64_t tcp_rack_min_pace_seg; | uint64_t tcp_rack_min_pace_seg; | ||||
uint64_t tcp_rack_min_pace; | uint64_t tcp_rack_min_pace; | ||||
uint64_t tcp_rack_cheat; | |||||
uint64_t tcp_rack_no_sack; | |||||
}; | }; | ||||
#define TLP_USE_ID 1 /* Internet draft behavior */ | #define TLP_USE_ID 1 /* Internet draft behavior */ | ||||
#define TLP_USE_TWO_ONE 2 /* Use 2.1 behavior */ | #define TLP_USE_TWO_ONE 2 /* Use 2.1 behavior */ | ||||
#define TLP_USE_TWO_TWO 3 /* Use 2.2 behavior */ | #define TLP_USE_TWO_TWO 3 /* Use 2.2 behavior */ | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
#define RACK_OPTS_SIZE (sizeof(struct rack_opts_stats)/sizeof(uint64_t)) | #define RACK_OPTS_SIZE (sizeof(struct rack_opts_stats)/sizeof(uint64_t)) | ||||
Show All 36 Lines | |||||
#define TT_RACK_FR_TMR 0x2000 | #define TT_RACK_FR_TMR 0x2000 | ||||
/* | /* | ||||
* Locking for the rack control block. | * Locking for the rack control block. | ||||
* a) Locked by INP_WLOCK | * a) Locked by INP_WLOCK | ||||
* b) Locked by the hpts-mutex | * b) Locked by the hpts-mutex | ||||
* | * | ||||
*/ | */ | ||||
#define RACK_GP_HIST 4 /* How much goodput history do we maintain? */ | |||||
struct rack_control { | struct rack_control { | ||||
/* Second cache line 0x40 from tcp_rack */ | /* Second cache line 0x40 from tcp_rack */ | ||||
struct rack_head rc_map;/* List of all segments Lock(a) */ | struct rack_rb_tree_head rc_mtree; /* Tree of all segments Lock(a) */ | ||||
struct rack_head rc_tmap; /* List in transmit order Lock(a) */ | struct rack_head rc_tmap; /* List in transmit order Lock(a) */ | ||||
struct rack_sendmap *rc_tlpsend; /* Remembered place for | struct rack_sendmap *rc_tlpsend; /* Remembered place for | ||||
* tlp_sending Lock(a) */ | * tlp_sending Lock(a) */ | ||||
struct rack_sendmap *rc_resend; /* something we have been asked to | struct rack_sendmap *rc_resend; /* something we have been asked to | ||||
* resend */ | * resend */ | ||||
struct timeval rc_last_time_decay; /* SAD time decay happened here */ | |||||
uint32_t input_pkt; | |||||
uint32_t saved_input_pkt; | |||||
uint32_t rc_hpts_flags; | uint32_t rc_hpts_flags; | ||||
uint32_t rc_timer_exp; /* If a timer ticks of expiry */ | uint32_t rc_timer_exp; /* If a timer ticks of expiry */ | ||||
uint32_t rc_rack_min_rtt; /* lowest RTT seen Lock(a) */ | uint32_t rc_rack_min_rtt; /* lowest RTT seen Lock(a) */ | ||||
uint32_t rc_rack_largest_cwnd; /* Largest CWND we have seen Lock(a) */ | uint32_t rc_rack_largest_cwnd; /* Largest CWND we have seen Lock(a) */ | ||||
/* Third Cache line 0x80 */ | /* Third Cache line 0x80 */ | ||||
struct rack_head rc_free; /* Allocation array */ | struct rack_head rc_free; /* Allocation array */ | ||||
uint32_t rc_time_last_sent; /* Time we last sent some data and | uint32_t rc_time_last_sent; /* Time we last sent some data and | ||||
Show All 31 Lines | struct rack_control { | ||||
uint32_t rc_rsm_start; /* RSM seq number we retransmitted Lock(a) */ | uint32_t rc_rsm_start; /* RSM seq number we retransmitted Lock(a) */ | ||||
uint32_t rc_cwnd_at; /* cwnd at the retransmit Lock(a) */ | uint32_t rc_cwnd_at; /* cwnd at the retransmit Lock(a) */ | ||||
uint32_t rc_ssthresh_at;/* ssthresh at the retransmit Lock(a) */ | uint32_t rc_ssthresh_at;/* ssthresh at the retransmit Lock(a) */ | ||||
uint32_t rc_num_maps_alloced; /* Number of map blocks (sacks) we | uint32_t rc_num_maps_alloced; /* Number of map blocks (sacks) we | ||||
* have allocated */ | * have allocated */ | ||||
uint32_t rc_rcvtime; /* When we last received data */ | uint32_t rc_rcvtime; /* When we last received data */ | ||||
uint32_t rc_num_split_allocs; /* num split map entries allocated */ | uint32_t rc_num_split_allocs; /* num split map entries allocated */ | ||||
uint32_t rc_last_output_to; | uint32_t rc_last_output_to; | ||||
uint32_t rc_went_idle_time; | uint32_t rc_went_idle_time; | ||||
struct rack_sendmap *rc_sacklast; /* sack remembered place | struct rack_sendmap *rc_sacklast; /* sack remembered place | ||||
* Lock(a) */ | * Lock(a) */ | ||||
struct rack_sendmap *rc_next; /* remembered place where we next | |||||
* retransmit at Lock(a) */ | |||||
struct rack_sendmap *rc_rsm_at_retran; /* Debug variable kept for | struct rack_sendmap *rc_rsm_at_retran; /* Debug variable kept for | ||||
* cache line alignment | * cache line alignment | ||||
* Lock(a) */ | * Lock(a) */ | ||||
struct timeval rc_last_ack; | |||||
/* Cache line split 0x100 */ | /* Cache line split 0x100 */ | ||||
struct sack_filter rack_sf; | struct sack_filter rack_sf; | ||||
/* Cache line split 0x140 */ | /* Cache line split 0x140 */ | ||||
/* Flags for various things */ | /* Flags for various things */ | ||||
uint32_t rc_pace_max_segs; | |||||
uint32_t rc_pace_min_segs; | |||||
uint32_t rc_high_rwnd; | |||||
uint32_t ack_count; | |||||
uint32_t sack_count; | |||||
uint32_t sack_noextra_move; | |||||
uint32_t sack_moved_extra; | |||||
struct rack_rtt_sample rack_rs; | struct rack_rtt_sample rack_rs; | ||||
uint32_t rc_tlp_rxt_last_time; | |||||
uint32_t rc_saved_cwnd; | |||||
uint32_t rc_gp_history[RACK_GP_HIST]; | |||||
uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */ | uint32_t rc_tlp_threshold; /* Socket option value Lock(a) */ | ||||
uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */ | uint16_t rc_early_recovery_segs; /* Socket option value Lock(a) */ | ||||
uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ | uint16_t rc_reorder_shift; /* Socket option value Lock(a) */ | ||||
uint16_t rc_pkt_delay; /* Socket option value Lock(a) */ | uint16_t rc_pkt_delay; /* Socket option value Lock(a) */ | ||||
uint8_t rc_prop_rate; /* Socket option value Lock(a) */ | uint8_t rc_prop_rate; /* Socket option value Lock(a) */ | ||||
uint8_t rc_prop_reduce; /* Socket option value Lock(a) */ | uint8_t rc_prop_reduce; /* Socket option value Lock(a) */ | ||||
uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */ | uint8_t rc_tlp_cwnd_reduce; /* Socket option value Lock(a) */ | ||||
uint8_t rc_early_recovery; /* Socket option value Lock(a) */ | uint8_t rc_early_recovery; /* Socket option value Lock(a) */ | ||||
uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */ | uint8_t rc_prr_sendalot;/* Socket option value Lock(a) */ | ||||
uint8_t rc_min_to; /* Socket option value Lock(a) */ | uint8_t rc_min_to; /* Socket option value Lock(a) */ | ||||
uint8_t rc_prr_inc_var; /* Socket option value Lock(a) */ | |||||
uint8_t rc_tlp_rtx_out; /* This is TLPRtxOut in the draft */ | uint8_t rc_tlp_rtx_out; /* This is TLPRtxOut in the draft */ | ||||
uint8_t rc_rate_sample_method; | uint8_t rc_rate_sample_method; | ||||
uint8_t rc_gp_hist_idx: 7, | |||||
rc_gp_hist_filled: 1; | |||||
}; | }; | ||||
#ifdef _KERNEL | #ifdef _KERNEL | ||||
struct tcp_rack { | struct tcp_rack { | ||||
/* First cache line 0x00 */ | /* First cache line 0x00 */ | ||||
TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */ | TAILQ_ENTRY(tcp_rack) r_hpts; /* hptsi queue next Lock(b) */ | ||||
int32_t(*r_substate) (struct mbuf *, struct tcphdr *, | int32_t(*r_substate) (struct mbuf *, struct tcphdr *, | ||||
Show All 16 Lines | uint8_t rc_enobuf; /* count of enobufs on connection provides | ||||
* backoff Lock(a) */ | * backoff Lock(a) */ | ||||
uint8_t r_timer_override : 1, /* hpts override Lock(a) */ | uint8_t r_timer_override : 1, /* hpts override Lock(a) */ | ||||
r_tlp_running : 1, /* Running from a TLP timeout Lock(a) */ | r_tlp_running : 1, /* Running from a TLP timeout Lock(a) */ | ||||
r_is_v6 : 1, /* V6 pcb Lock(a) */ | r_is_v6 : 1, /* V6 pcb Lock(a) */ | ||||
rc_in_persist : 1, | rc_in_persist : 1, | ||||
rc_last_pto_set : 1, /* XXX not used */ | rc_last_pto_set : 1, /* XXX not used */ | ||||
rc_tlp_in_progress : 1, | rc_tlp_in_progress : 1, | ||||
rc_always_pace : 1, /* Socket option value Lock(a) */ | rc_always_pace : 1, /* Socket option value Lock(a) */ | ||||
rc_timer_up : 1; /* The rack timer is up flag Lock(a) */ | tlp_timer_up : 1; /* The tlp timer is up flag Lock(a) */ | ||||
uint8_t r_idle_reduce_largest : 1, | uint8_t r_enforce_min_pace : 2, | ||||
r_enforce_min_pace : 2, | rc_has_collapsed : 1, | ||||
r_min_pace_seg_thresh : 5; | r_rep_attack : 1, | ||||
r_rep_reverse : 1, | |||||
r_xxx_min_pace_seg_thresh : 3; | |||||
uint8_t rack_tlp_threshold_use; | uint8_t rack_tlp_threshold_use; | ||||
uint8_t rc_allow_data_af_clo: 1, | uint8_t rc_allow_data_af_clo: 1, | ||||
delayed_ack : 1, | delayed_ack : 1, | ||||
set_pacing_done_a_iw : 1, | |||||
use_rack_cheat : 1, | |||||
alloc_limit_reported : 1, | alloc_limit_reported : 1, | ||||
rc_avail : 5; | sack_attack_disable : 1, | ||||
uint8_t r_resv[2]; /* Fill to cache line boundary */ | do_detection : 1, | ||||
rc_avail : 1; | |||||
uint16_t rack_per_of_gp; | |||||
/* Cache line 2 0x40 */ | /* Cache line 2 0x40 */ | ||||
struct rack_control r_ctl; | struct rack_control r_ctl; | ||||
} __aligned(CACHE_LINE_SIZE); | } __aligned(CACHE_LINE_SIZE); | ||||
#endif | #endif | ||||
#endif | #endif |