diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -240,6 +240,7 @@ static int32_t rack_enobuf_hw_max = 12000; /* 12 ms in usecs */ static int32_t rack_enobuf_hw_min = 10000; /* 10 ms in usecs */ static int32_t rack_hw_rwnd_factor = 2; /* How many max_segs the rwnd must be before we hold off sending */ + /* * Currently regular tcp has a rto_min of 30ms * the backoff goes 12 times so that ends up @@ -326,6 +327,10 @@ static int32_t rack_down_raise_thresh = 100; static int32_t rack_req_segs = 1; static uint64_t rack_bw_rate_cap = 0; +static uint32_t rack_trace_point_config = 0; +static uint32_t rack_trace_point_bb_mode = 4; +static int32_t rack_trace_point_count = 0; + /* Weird delayed ack mode */ static int32_t rack_use_imac_dack = 0; @@ -547,6 +552,25 @@ int32_t rack_clear_counter=0; +static inline void +rack_trace_point(struct tcp_rack *rack, int num) +{ + if (((rack_trace_point_config == num) || + (rack_trace_point_config = 0xffffffff)) && + (rack_trace_point_bb_mode != 0) && + (rack_trace_point_count > 0) && + (rack->rc_tp->t_logstate == 0)) { + int res; + res = atomic_fetchadd_int(&rack_trace_point_count, -1); + if (res > 0) { + rack->rc_tp->t_logstate = rack_trace_point_bb_mode; + } else { + /* Loss a race assure its zero now */ + rack_trace_point_count = 0; + } + } +} + static void rack_set_cc_pacing(struct tcp_rack *rack) { @@ -785,6 +809,7 @@ struct sysctl_oid *rack_measure; struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; + struct sysctl_oid *rack_tracepoint; rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -915,6 +940,28 @@ OID_AUTO, "hbp_threshold", CTLFLAG_RW, &rack_hbp_thresh, 3, "We are highly buffered if min_rtt_seen / max_rtt_seen > this-threshold"); + + rack_tracepoint = SYSCTL_ADD_NODE(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_sysctl_root), + OID_AUTO, + "tp", + CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "Rack tracepoint facility"); + SYSCTL_ADD_U32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "number", CTLFLAG_RW, + &rack_trace_point_config, 0, + "What is the trace point number to activate (0=none, 0xffffffff = all)?"); + SYSCTL_ADD_U32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "bbmode", CTLFLAG_RW, + &rack_trace_point_bb_mode, 4, + "What is BB logging mode that is activated?"); + SYSCTL_ADD_S32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "count", CTLFLAG_RW, + &rack_trace_point_count, 0, + "How many connections will have BB logging turned on that hit the tracepoint?"); /* Pacing related sysctls */ rack_pacing = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -10286,6 +10333,7 @@ #endif tcp_seq max_seq; + rack_trace_point(rack, RACK_TP_COLLAPSED_WND); max_seq = rack->rc_tp->snd_una + rack->rc_tp->snd_wnd; memset(&fe, 0, sizeof(fe)); fe.r_start = max_seq; @@ -15983,6 +16031,10 @@ } counter_u64_add(rack_fto_rsm_send, 1); if (error && (error == ENOBUFS)) { + if (rack->r_ctl.crte != NULL) { + rack_trace_point(rack, RACK_TP_HWENOBUF); + } else + rack_trace_point(rack, RACK_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; @@ -18839,6 +18891,10 @@ * Pace us right away to retry in a some * time */ + if (rack->r_ctl.crte != NULL) { + rack_trace_point(rack, RACK_TP_HWENOBUF); + } else + rack_trace_point(rack, RACK_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -261,6 +261,36 @@ #define RACK_QUALITY_PROBERTT 4 /* A measurement where we went into or exited probe RTT */ #define RACK_QUALITY_ALLACKED 5 /* All data is now acknowledged */ +/*********************/ +/* Rack Trace points */ +/*********************/ +/* + * Rack trace points are interesting points within + * the rack code that the author/debugger may want + * to have BB logging enabled if we hit that point. + * In order to enable a trace point you set the + * sysctl var net.inet.tcp..tp.number to + * one of the numbers listed below. You also + * must make sure net.inet.tcp..tp.bbmode is + * non-zero, the default is 4 for continous tracing. + * You also set in the number of connections you want + * have get BB logs in net.inet.tcp..tp.count. + * + * Count will decrement every time BB logging is assigned + * to a connection that hit your tracepoint. + * + * You can enable all trace points by setting the number + * to 0xffffffff. You can disable all trace points by + * setting number to zero (or count to 0). + * + * Below are the enumerated list of tracepoints that + * have currently been defined in the code. Add more + * as you add a call to rack_trace_point(rack, ); + * where is defined below. + */ +#define RACK_TP_HWENOBUF 0x00000001 /* When we are doing hardware pacing and hit enobufs */ +#define RACK_TP_ENOBUF 0x00000002 /* When we hit enobufs with software pacing */ +#define RACK_TP_COLLAPSED_WND 0x00000003 /* When a peer to collapses its rwnd on us */ #define MIN_GP_WIN 6 /* We need at least 6 MSS in a GP measurement */ #ifdef _KERNEL