Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -227,6 +227,7 @@ SYSVSHM opt_sysvipc.h SW_WATCHDOG opt_watchdog.h TCPHPTS opt_inet.h +TCP_ACCOUNTING opt_inet.h TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINS opt_global.h Index: sys/netinet/cc/cc_cubic.c =================================================================== --- sys/netinet/cc/cc_cubic.c +++ sys/netinet/cc/cc_cubic.c @@ -129,7 +129,7 @@ if (hystart_bblogs == 0) return; tp = ccv->ccvc.tcp; - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv; Index: sys/netinet/cc/cc_newreno.c =================================================================== --- sys/netinet/cc/cc_newreno.c +++ sys/netinet/cc/cc_newreno.c @@ -139,7 +139,7 @@ if (hystart_bblogs == 0) return; tp = ccv->ccvc.tcp; - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv; Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -330,6 +330,18 @@ /* Maximum length of log ID. */ #define TCP_LOG_ID_LEN 64 +/* TCP accounting counters */ +#define TCP_NUM_PROC_COUNTERS 11 +#define TCP_NUM_CNT_COUNTERS 13 + +/* Must match counter array sizes in tcpcb */ +struct tcp_perf_info { + uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS]; + uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS]; + uint64_t timebase; /* timebase for tcp_proc_time */ + uint8_t tb_is_stable; /* timebase is stable/invariant */ +}; + /* * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits * the caller to query certain information about the state of a TCP Index: sys/netinet/tcp_accounting.h =================================================================== --- sys/netinet/tcp_accounting.h +++ sys/netinet/tcp_accounting.h @@ -25,15 +25,7 @@ #define CYC_HANDLE_MAP 11 #define CYC_HANDLE_ACK 12 -/* Should the tp->xxx array's be alloc'ed? */ /* #define TCP_NUM_PROC_COUNTERS 11 defined in tcp_var.h */ /* #define TCP_NUM_CNT_COUNTERS 13 defined in tcp_var.h */ -#ifdef _KERNEL -#ifdef TCP_ACCOUNTING -extern counter_u64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS]; -extern counter_u64_t tcp_proc_time[TCP_NUM_PROC_COUNTERS]; -#endif -#endif - #endif Index: sys/netinet/tcp_hpts.c =================================================================== --- sys/netinet/tcp_hpts.c +++ sys/netinet/tcp_hpts.c @@ -1344,7 +1344,7 @@ } CURVNET_SET(inp->inp_vnet); /* Lets do any logging that we might want to */ - if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (hpts_does_tp_logging && tcp_bblogging_on(tp)) { tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout); } Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -104,10 +104,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include Index: sys/netinet/tcp_log_buf.h =================================================================== --- sys/netinet/tcp_log_buf.h +++ sys/netinet/tcp_log_buf.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (c) 2016-2018 Netflix, Inc. + * Copyright (c) 2016-2020 Netflix, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -101,13 +101,42 @@ uint32_t pkt_epoch; }; -/* Per-stack stack-specific info. */ +/* shadows tcp_log_bbr struct element sizes */ +struct tcp_log_raw { + uint64_t u64_flex[4]; + uint32_t u32_flex[14]; + uint16_t u16_flex[3]; + uint8_t u8_flex[6]; + uint32_t u32_flex2[1]; +}; + +struct tcp_log_uint64 { + uint64_t u64_flex[13]; +}; + +struct tcp_log_sendfile { + uint64_t offset; + uint64_t length; + uint32_t flags; +}; + +/* + * tcp_log_stackspecific is currently being used as "event specific" log + * info by all stacks (i.e. struct tcp_log_bbr is used for generic event + * logging). Until this is cleaned up more generically and throughout, + * allow events to use the same space in the union. + */ union tcp_log_stackspecific { struct tcp_log_rack u_rack; struct tcp_log_bbr u_bbr; + struct tcp_log_sendfile u_sf; + struct tcp_log_raw u_raw; /* "raw" log access */ + struct tcp_log_uint64 u64_raw; /* just u64's - used by process info */ }; +typedef union tcp_log_stackspecific tcp_log_eventspecific_t; + struct tcp_log_buffer { /* Event basics */ @@ -156,7 +185,6 @@ uint8_t tlb_snd_scale:4, /* TCPCB snd_scale */ tlb_rcv_scale:4; /* TCPCB rcv_scale */ uint8_t _pad[3]; /* Padding */ - /* Per-stack info */ union tcp_log_stackspecific tlb_stackinfo; #define tlb_rack tlb_stackinfo.u_rack @@ -245,8 +273,14 @@ }; enum tcp_log_states { - TCP_LOG_STATE_CLEAR = -1, /* Deactivate and clear tracing */ + TCP_LOG_STATE_RATIO_OFF = -2, /* Log ratio evaluation yielded an OFF + result. Only used for tlb_logstate */ + TCP_LOG_STATE_CLEAR = -1, /* Deactivate and clear tracing. Passed + to tcp_log_state_change() but never + stored in any logstate variable */ TCP_LOG_STATE_OFF = 0, /* Pause */ + + /* Positively numbered states represent active logging modes */ TCP_LOG_STATE_TAIL=1, /* Keep the trailing events */ TCP_LOG_STATE_HEAD=2, /* Keep the leading events */ TCP_LOG_STATE_HEAD_AUTO=3, /* Keep the leading events, and @@ -256,6 +290,7 @@ TCP_LOG_STATE_TAIL_AUTO=5, /* Keep the trailing events, and automatically dump them when the session ends */ + TCP_LOG_VIA_BBPOINTS=6 /* Log only if the BB point has been configured */ }; /* Use this if we don't know whether the operation succeeded. */ @@ -298,8 +333,136 @@ #endif /* _KERNEL */ #endif /* __tcp_log_dev_h__ */ +/* + * Defined BBPOINTS that can be used + * with TCP_LOG_VIA_BBPOINTS. + */ +#define TCP_BBPOINT_NONE 0 +#define TCP_BBPOINT_REQ_LEVEL_LOGGING 1 + +/*********************/ +/* TCP Trace points */ +/*********************/ +/* + * TCP trace points are interesting points within + * the TCP code that the author/debugger may want + * to have BB logging enabled if we hit that point. + * In order to enable a trace point you set the + * sysctl var net.inet.tcp.bb.tp.number to + * one of the numbers listed below. You also + * must make sure net.inet.tcp.bb.tp.bbmode is + * non-zero, the default is 4 for continuous tracing. + * You also set in the number of connections you want + * have get BB logs in net.inet.tcp.bb.tp.count. + * + * Count will decrement every time BB logging is assigned + * to a connection that hit your tracepoint. + * + * You can enable all trace points by setting the number + * to 0xffffffff. You can disable all trace points by + * setting number to zero (or count to 0). + * + * Below are the enumerated list of tracepoints that + * have currently been defined in the code. Add more + * as you add a call to rack_trace_point(rack, ); + * where is defined below. + */ +#define TCP_TP_HWENOBUF 0x00000001 /* When we are doing hardware pacing and hit enobufs */ +#define TCP_TP_ENOBUF 0x00000002 /* When we hit enobufs with software pacing */ +#define TCP_TP_COLLAPSED_WND 0x00000003 /* When a peer to collapses its rwnd on us */ +#define TCP_TP_COLLAPSED_RXT 0x00000004 /* When we actually retransmit a collapsed window rsm */ +#define TCP_TP_HTTP_LOG_FAIL 0x00000005 /* We tried to allocate a HTTP log but had no space */ +#define TCP_TP_RESET_RCV 0x00000006 /* Triggers when we receive a RST */ +#define TCP_TP_EXCESS_RXT 0x00000007 /* When we get excess RXT's clamping the cwnd */ +#define TCP_TP_SAD_TRIGGERED 0x00000008 /* Sack Attack Detection triggers */ + +#define TCP_TP_SAD_SUSPECT 0x0000000a /* A sack has supicious information in it */ + #ifdef _KERNEL +extern uint32_t tcp_trace_point_config; +extern uint32_t tcp_trace_point_bb_mode; +extern int32_t tcp_trace_point_count; + +/* + * Returns true if any sort of BB logging is enabled, + * commonly used throughout the codebase. + */ +static inline int +tcp_bblogging_on(struct tcpcb *tp) +{ + if (tp->_t_logstate <= TCP_LOG_STATE_OFF) + return (0); + if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) + return (0); + return (1); +} + +/* + * Returns true if we match a specific bbpoint when + * in TCP_LOG_VIA_BBPOINTS, but also returns true + * for all the other logging states. + */ +static inline int +tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint) +{ + if (tp->_t_logstate <= TCP_LOG_STATE_OFF) + return (0); + if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) && + (tp->_t_logpoint == bbpoint)) + return (1); + else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) + return (0); + return (1); +} + +static inline void +tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint) +{ + if ((ls == TCP_LOG_VIA_BBPOINTS) && + (tp->_t_logstate <= TCP_LOG_STATE_OFF)){ + /* + * We don't allow a BBPOINTS set to override + * other types of BB logging set by other means such + * as the bb_ratio/bb_state URL parameters. In other + * words BBlogging must be *off* in order to turn on + * a BBpoint. + */ + tp->_t_logpoint = bbpoint; + tp->_t_logstate = ls; + } else if (ls != TCP_LOG_VIA_BBPOINTS) { + tp->_t_logpoint = 0; + if ((ls >= TCP_LOG_STATE_OFF) && + (ls < TCP_LOG_VIA_BBPOINTS)) + tp->_t_logstate = ls; + } +} + +static inline uint32_t +tcp_get_bblog_state(struct tcpcb *tp) +{ + return (tp->_t_logstate); +} + +static inline void +tcp_trace_point(struct tcpcb *tp, int num) +{ + if (((tcp_trace_point_config == num) || + (tcp_trace_point_config == 0xffffffff)) && + (tcp_trace_point_bb_mode != 0) && + (tcp_trace_point_count > 0) && + (tcp_bblogging_on(tp) == 0)) { + int res; + res = atomic_fetchadd_int(&tcp_trace_point_count, -1); + if (res > 0) { + tcp_set_bblog_state(tp, tcp_trace_point_bb_mode, TCP_BBPOINT_NONE); + } else { + /* Loss a race assure its zero now */ + tcp_trace_point_count = 0; + } + } +} + #define TCP_LOG_BUF_DEFAULT_SESSION_LIMIT 5000 #define TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT 5000000 @@ -309,15 +472,51 @@ */ #define TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ do { \ - if (tp->t_logstate != TCP_LOG_STATE_OFF) \ - tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ + if (tcp_bblogging_on(tp)) \ + tcp_log_event(tp, th, rxbuf, txbuf, eventid, \ errornum, len, stackinfo, th_hostorder, \ tp->t_output_caller, __func__, __LINE__, tv);\ } while (0) /* * TCP_LOG_EVENT: This is a macro so we can capture function/line - * information when needed. + * information when needed. You can use the macro when you are not + * doing a lot of prep in the stack specific information i.e. you + * don't add extras (stackinfo). If you are adding extras which + * means filling out a stack variable instead use the tcp_log_event() + * function but enclose the call to the log (and all the setup) in a + * if (tcp_bblogging_on(tp)) { + * ... setup and logging call ... + * } + * + * Always use the macro tcp_bblogging_on() since sometimes the defintions + * do change. + * + * BBlogging also supports the concept of a BBpoint. The idea behind this + * is that when you set a specific BBpoint on and turn the logging into + * the BBpoint mode (TCP_LOG_VIA_BBPOINTS) you will be defining very very + * few of these points to come out. The point is specific to a code you + * want tied to that one BB logging. This allows you to turn on a much broader + * scale set of limited logging on more connections without overwhelming the + * I/O system with too much BBlogs. This of course means you need to be quite + * careful on how many BBlogs go with each point, but you can have multiple points + * only one of which is active at a time. + * + * To define a point you add it above under the define for TCP_BBPOINT_NONE (which + * is the default i.e. no point is defined. You then, for your point use the + * tcp_bblogging_point_on(struct tcpcb *tp, uint8_t bbpoint) inline to enclose + * your call to tcp_log_event. Do not use one of the TCP_LOGGING macros else + * your point will never come out. You specify your defined point in the bbpoint + * side of the inline. An example of this you can find in rack where the + * TCP_BBPOINT_REQ_LEVEL_LOGGING is used. There a specific set of logs are generated + * for each http request that rack is tracking. + * + * When turning on BB logging use the inline: + * tcp_set_bblog_state(struct tcpcb *tp, uint8_t ls, uint8_t bbpoint) + * the ls field is the logging state TCP_LOG_STATE_CONTINUAL etc. The + * bbpoint field is ignored unless the ls field is set to TCP_LOG_VIA_BBPOINTS. + * Currently there is only a socket option that turns on the non-BBPOINT + * logging. * * Prototype: * TCP_LOG_EVENT(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, @@ -343,16 +542,16 @@ TCP_LOG_EVENT_VERBOSE(tp, th, rxbuf, txbuf, \ eventid, errornum, len, stackinfo, \ th_hostorder, NULL); \ - else if (tp->t_logstate != TCP_LOG_STATE_OFF) \ - tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ + else if (tcp_bblogging_on(tp)) \ + tcp_log_event(tp, th, rxbuf, txbuf, eventid, \ errornum, len, stackinfo, th_hostorder, \ NULL, NULL, 0, NULL); \ } while (0) #endif /* TCP_LOG_FORCEVERBOSE */ #define TCP_LOG_EVENTP(tp, th, rxbuf, txbuf, eventid, errornum, len, stackinfo, th_hostorder, tv) \ do { \ - if (tp->t_logstate != TCP_LOG_STATE_OFF) \ - tcp_log_event_(tp, th, rxbuf, txbuf, eventid, \ + if (tcp_bblogging_on(tp)) \ + tcp_log_event(tp, th, rxbuf, txbuf, eventid, \ errornum, len, stackinfo, th_hostorder, \ NULL, NULL, 0, tv); \ } while (0) @@ -362,7 +561,7 @@ void tcp_log_drain(struct tcpcb *tp); int tcp_log_dump_tp_logbuf(struct tcpcb *tp, char *reason, int how, bool force); void tcp_log_dump_tp_bucket_logbufs(struct tcpcb *tp, char *reason); -struct tcp_log_buffer *tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, +struct tcp_log_buffer *tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, union tcp_log_stackspecific *stackinfo, int th_hostorder, const char *output_caller, const char *func, int line, const struct timeval *tv); @@ -377,11 +576,14 @@ void tcp_log_tcpcbinit(struct tcpcb *tp); void tcp_log_tcpcbfini(struct tcpcb *tp); void tcp_log_flowend(struct tcpcb *tp); +void tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, + int flags); +int tcp_log_apply_ratio(struct tcpcb *tp, int ratio); #else /* !TCP_BLACKBOX */ #define tcp_log_verbose (false) static inline struct tcp_log_buffer * -tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, +tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, union tcp_log_stackspecific *stackinfo, int th_hostorder, const char *output_caller, const char *func, int line, Index: sys/netinet/tcp_log_buf.c =================================================================== --- sys/netinet/tcp_log_buf.c +++ sys/netinet/tcp_log_buf.c @@ -29,8 +29,10 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_inet.h" #include #include +#include #include #include #include @@ -45,7 +47,6 @@ #include #include /* Must come after qmath.h and tree.h */ #include - #include #include @@ -57,6 +58,7 @@ #include #include #include +#include /* Default expiry time */ #define TCP_LOG_EXPIRE_TIME ((sbintime_t)60 * SBT_1S) @@ -85,6 +87,9 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, bb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP Black Box controls"); +SYSCTL_NODE(_net_inet_tcp_bb, OID_AUTO, tp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "TCP Black Box Trace Point controls"); + SYSCTL_BOOL(_net_inet_tcp_bb, OID_AUTO, log_verbose, CTLFLAG_RW, &tcp_log_verbose, 0, "Force verbose logging for TCP traces"); @@ -92,6 +97,23 @@ CTLFLAG_RW, &tcp_log_session_limit, 0, "Maximum number of events maintained for each TCP session"); +uint32_t tcp_trace_point_config = 0; +SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, number, CTLFLAG_RW, + &tcp_trace_point_config, TCP_LOG_STATE_HEAD_AUTO, + "What is the trace point number to activate (0=none, 0xffffffff = all)?"); + +uint32_t tcp_trace_point_bb_mode = TCP_LOG_STATE_CONTINUAL; +SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, bbmode, CTLFLAG_RW, + &tcp_trace_point_bb_mode, TCP_LOG_STATE_HEAD_AUTO, + "What is BB logging mode that is activated?"); + +int32_t tcp_trace_point_count = 0; +SYSCTL_U32(_net_inet_tcp_bb_tp, OID_AUTO, count, CTLFLAG_RW, + &tcp_trace_point_count, TCP_LOG_STATE_HEAD_AUTO, + "How many connections will have BB logging turned on that hit the tracepoint?"); + + + SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_global_limit, CTLFLAG_RW, &tcp_log_zone, "Maximum number of events maintained for all TCP sessions"); @@ -264,7 +286,7 @@ volatile u_int tlb_refcnt; volatile u_int tlb_reqcnt; uint32_t tlb_loglimit; - uint8_t tlb_logstate; + int8_t tlb_logstate; }; struct tcp_log_id_node @@ -514,6 +536,61 @@ atomic_fetchadd_int(&tlb->tlb_reqcnt, 1); } +int +tcp_log_apply_ratio(struct tcpcb *tp, int ratio) +{ + struct tcp_log_id_bucket *tlb; + struct inpcb *inp = tptoinpcb(tp); + uint32_t hash, ratio_hash_thresh; + int rv, tree_locked; + + rv = 0; + tree_locked = TREE_UNLOCKED; + tlb = tp->t_lib; + + INP_WLOCK_ASSERT(inp); + if (tlb == NULL) { + INP_WUNLOCK(inp); + return (EOPNOTSUPP); + } + ratio_hash_thresh = max(1, UINT32_MAX / ratio); + TCPID_BUCKET_REF(tlb); + INP_WUNLOCK(inp); + TCPID_BUCKET_LOCK(tlb); + + hash = hash32_buf(tlb->tlb_id, strlen(tlb->tlb_id), 0); + if (hash > ratio_hash_thresh && tp->_t_logstate == TCP_LOG_STATE_OFF && + tlb->tlb_logstate == TCP_LOG_STATE_OFF) { + /* + * Ratio decision not to log this log ID (and this connection by + * way of association). We only apply a log ratio log disable + * decision if it would not interfere with a log enable decision + * made elsewhere e.g. tcp_log_selectauto() or setsockopt(). + */ + tlb->tlb_logstate = TCP_LOG_STATE_RATIO_OFF; + INP_WLOCK(inp); + RECHECK_INP(); + (void)tcp_log_state_change(tp, TCP_LOG_STATE_OFF); +done: + INP_WUNLOCK(inp); + } + + INP_UNLOCK_ASSERT(inp); + if (!tcp_log_unref_bucket(tlb, &tree_locked, NULL)) + TCPID_BUCKET_UNLOCK(tlb); + + if (tree_locked == TREE_WLOCKED) { + TCPID_TREE_WLOCK_ASSERT(); + TCPID_TREE_WUNLOCK(); + } else if (tree_locked == TREE_RLOCKED) { + TCPID_TREE_RLOCK_ASSERT(); + TCPID_TREE_RUNLOCK(); + } else + TCPID_TREE_UNLOCK_ASSERT(); + + return (rv); +} + /* * Associate the specified tag with a particular TCP log ID. * Called with INPCB locked. Returns with it unlocked. @@ -565,7 +642,7 @@ struct tcp_log_id_node *tln; struct inpcb *inp = tptoinpcb(tp); int tree_locked, rv; - bool bucket_locked; + bool bucket_locked, same; tlb = NULL; tln = NULL; @@ -574,17 +651,41 @@ restart: INP_WLOCK_ASSERT(inp); - /* See if the ID is unchanged. */ - if ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) || - (tp->t_lib == NULL && *id == 0)) { + same = ((tp->t_lib != NULL && !strcmp(tp->t_lib->tlb_id, id)) || + (tp->t_lib == NULL && *id == 0)); + if (tp->_t_logstate && STAILQ_FIRST(&tp->t_logs) && !same) { + /* + * There are residual logs left we may + * be changing id's so dump what we can. + */ + switch(tp->_t_logstate) { + case TCP_LOG_STATE_HEAD_AUTO: + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at id switch", + M_NOWAIT, false); + break; + case TCP_LOG_STATE_TAIL_AUTO: + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at id switch", + M_NOWAIT, false); + break; + case TCP_LOG_STATE_CONTINUAL: + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual at id switch", + M_NOWAIT, false); + break; + case TCP_LOG_VIA_BBPOINTS: + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints at id switch", + M_NOWAIT, false); + break; + } + } + if (same) { if (tp->t_lib != NULL) { tcp_log_increment_reqcnt(tp->t_lib); - if ((tp->t_lib->tlb_logstate) && + if ((tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) && (tp->t_log_state_set == 0)) { /* Clone in any logging */ - tp->t_logstate = tp->t_lib->tlb_logstate; + tp->_t_logstate = tp->t_lib->tlb_logstate; } if ((tp->t_lib->tlb_loglimit) && (tp->t_log_state_set == 0)) { @@ -873,10 +974,10 @@ SLIST_INSERT_HEAD(&tlb->tlb_head, tln, tln_list); tp->t_lib = tlb; tp->t_lin = tln; - if (tp->t_lib->tlb_logstate) { + if (tp->t_lib->tlb_logstate > TCP_LOG_STATE_OFF) { /* Clone in any logging */ - tp->t_logstate = tp->t_lib->tlb_logstate; + tp->_t_logstate = tp->t_lib->tlb_logstate; } if (tp->t_lib->tlb_loglimit) { /* The loglimit too */ @@ -1189,7 +1290,7 @@ if ((tcp_log_auto_all == true) && tcp_log_auto_mode && tcp_log_selectauto()) { - tp->t_logstate = tcp_log_auto_mode; + tp->_t_logstate = tcp_log_auto_mode; tp->t_flags2 |= TF2_LOG_AUTO; } } @@ -1325,17 +1426,57 @@ struct tcp_log_mem *log_entry; sbintime_t callouttime; - INP_WLOCK_ASSERT(tptoinpcb(tp)); - - TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_CONNEND, 0, 0, NULL, false); + INP_WLOCK_ASSERT(tptoinpcb(tp)); +#ifdef TCP_ACCOUNTING + if (tp->_t_logstate) { + struct tcp_log_buffer *lgb; + union tcp_log_stackspecific log; + struct timeval tv; + int i; + + memset(&log, 0, sizeof(log)); + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + for (i = 0; itcp_cnt_counters[i]; + } + lgb = tcp_log_event(tp, NULL, + NULL, + NULL, + TCP_LOG_ACCOUNTING, 0, + 0, &log, false, NULL, NULL, 0, &tv); + lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; + lgb->tlb_flex2 = 1; + for (i = 0; itcp_proc_time[i]; + } + lgb = tcp_log_event(tp, NULL, + NULL, + NULL, + TCP_LOG_ACCOUNTING, 0, + 0, &log, false, NULL, NULL, 0, &tv); + if (tptoinpcb(tp)->inp_flags2 & INP_MBUF_ACKCMP) + lgb->tlb_flex1 = TCP_NUM_CNT_COUNTERS; + else + lgb->tlb_flex1 = TCP_NUM_PROC_COUNTERS; + lgb->tlb_flex2 = 2; + } + log.u_bbr.timeStamp = tcp_get_usecs(&tv); + log.u_bbr.cur_del_rate = tp->t_end_info; + TCP_LOG_EVENTP(tp, NULL, + NULL, + NULL, + TCP_LOG_CONNEND, 0, + 0, &log, false, &tv); + } +#endif /* * If we were gathering packets to be automatically dumped, try to do * it now. If this succeeds, the log information in the TCPCB will be * cleared. Otherwise, we'll handle the log information as we do * for other states. */ - switch(tp->t_logstate) { + switch(tp->_t_logstate) { case TCP_LOG_STATE_HEAD_AUTO: (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", M_NOWAIT, false); @@ -1344,6 +1485,10 @@ (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail", M_NOWAIT, false); break; + case TCP_LOG_VIA_BBPOINTS: + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", + M_NOWAIT, false); + break; case TCP_LOG_STATE_CONTINUAL: (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", M_NOWAIT, false); @@ -1493,7 +1638,7 @@ * Change the log state to off (just in case anything tries to sneak * in a last-minute log). */ - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; } static void @@ -1510,7 +1655,7 @@ KASSERT(tp->t_lognum == 0, ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", __func__, tp->t_lognum)); - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; } /* @@ -1520,7 +1665,7 @@ */ struct tcp_log_buffer * -tcp_log_event_(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, +tcp_log_event(struct tcpcb *tp, struct tcphdr *th, struct sockbuf *rxbuf, struct sockbuf *txbuf, uint8_t eventid, int errornum, uint32_t len, union tcp_log_stackspecific *stackinfo, int th_hostorder, const char *output_caller, const char *func, int line, const struct timeval *itv) @@ -1547,13 +1692,14 @@ tcp_log_purge_tp_logbuf(tp); return (NULL); } - KASSERT(tp->t_logstate == TCP_LOG_STATE_HEAD || - tp->t_logstate == TCP_LOG_STATE_TAIL || - tp->t_logstate == TCP_LOG_STATE_CONTINUAL || - tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO || - tp->t_logstate == TCP_LOG_STATE_TAIL_AUTO, - ("%s called with unexpected tp->t_logstate (%d)", __func__, - tp->t_logstate)); + KASSERT(tp->_t_logstate == TCP_LOG_STATE_HEAD || + tp->_t_logstate == TCP_LOG_STATE_TAIL || + tp->_t_logstate == TCP_LOG_STATE_CONTINUAL || + tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO || + tp->_t_logstate == TCP_LOG_VIA_BBPOINTS || + tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO, + ("%s called with unexpected tp->_t_logstate (%d)", __func__, + tp->_t_logstate)); /* * Get the serial number. We do this early so it will @@ -1589,7 +1735,7 @@ "to set tp %p to TCP_LOG_STATE_CLEAR", __func__, __LINE__, tp); #endif - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; } return (NULL); } @@ -1598,12 +1744,12 @@ * the buffers. If successful, deactivate tracing. Otherwise, * leave it active so we will retry. */ - if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO && + if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO && !tcp_log_dump_tp_logbuf(tp, "auto-dumped from head", M_NOWAIT, false)) { - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; return(NULL); - } else if ((tp->t_logstate == TCP_LOG_STATE_CONTINUAL) && + } else if ((tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) && !tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", M_NOWAIT, false)) { if (attempt_count == 0) { @@ -1614,15 +1760,26 @@ counter_u64_add(tcp_log_que_fail4, 1); #endif return(NULL); - } else if (tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO) + + } else if ((tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) && + !tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", + M_NOWAIT, false)) { + if (attempt_count == 0) { + attempt_count++; + goto retry; + } +#ifdef TCPLOG_DEBUG_COUNTERS + counter_u64_add(tcp_log_que_fail4, 1); +#endif + return(NULL); + } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) return(NULL); /* If in HEAD state, just deactivate the tracing and return. */ - if (tp->t_logstate == TCP_LOG_STATE_HEAD) { - tp->t_logstate = TCP_LOG_STATE_OFF; + if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { + tp->_t_logstate = TCP_LOG_STATE_OFF; return(NULL); } - /* * Get a buffer to reuse. If that fails, just give up. * (We can't log anything without a buffer in which to @@ -1647,7 +1804,7 @@ /* Basic entries. */ if (itv == NULL) - getmicrouptime(&log_buf->tlb_tv); + microuptime(&log_buf->tlb_tv); else memcpy(&log_buf->tlb_tv, itv, sizeof(struct timeval)); log_buf->tlb_ticks = ticks; @@ -1663,12 +1820,18 @@ log_buf->tlb_rxbuf.tls_sb_acc = rxbuf->sb_acc; log_buf->tlb_rxbuf.tls_sb_ccc = rxbuf->sb_ccc; log_buf->tlb_rxbuf.tls_sb_spare = 0; + } else { + log_buf->tlb_rxbuf.tls_sb_acc = 0; + log_buf->tlb_rxbuf.tls_sb_ccc = 0; } if (txbuf != NULL) { log_buf->tlb_eventflags |= TLB_FLAG_TXBUF; log_buf->tlb_txbuf.tls_sb_acc = txbuf->sb_acc; log_buf->tlb_txbuf.tls_sb_ccc = txbuf->sb_ccc; log_buf->tlb_txbuf.tls_sb_spare = 0; + } else { + log_buf->tlb_txbuf.tls_sb_acc = 0; + log_buf->tlb_txbuf.tls_sb_ccc = 0; } /* Copy values from tp to the log entry. */ #define COPY_STAT(f) log_buf->tlb_ ## f = tp->f @@ -1700,8 +1863,6 @@ COPY_STAT_T(fbyte_out); #undef COPY_STAT #undef COPY_STAT_T - log_buf->tlb_flex1 = 0; - log_buf->tlb_flex2 = 0; /* Copy stack-specific info. */ if (stackinfo != NULL) { memcpy(&log_buf->tlb_stackinfo, stackinfo, @@ -1721,6 +1882,8 @@ optlen = (th->th_off << 2) - sizeof (struct tcphdr); if (optlen > 0) memcpy(log_buf->tlb_opts, th + 1, optlen); + } else { + memset(&log_buf->tlb_th, 0, sizeof(*th)); } /* Verbose information */ @@ -1749,8 +1912,10 @@ tcp_log_state_change(struct tcpcb *tp, int state) { struct tcp_log_mem *log_entry; + int rv; INP_WLOCK_ASSERT(tptoinpcb(tp)); + rv = 0; switch(state) { case TCP_LOG_STATE_CLEAR: while ((log_entry = STAILQ_FIRST(&tp->t_logs)) != NULL) @@ -1758,15 +1923,27 @@ /* Fall through */ case TCP_LOG_STATE_OFF: - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; break; case TCP_LOG_STATE_TAIL: case TCP_LOG_STATE_HEAD: case TCP_LOG_STATE_CONTINUAL: + case TCP_LOG_VIA_BBPOINTS: case TCP_LOG_STATE_HEAD_AUTO: case TCP_LOG_STATE_TAIL_AUTO: - tp->t_logstate = state; + /* + * When the RATIO_OFF state is set for the bucket, the log ID + * this tp is associated with has been probabilistically opted + * out of logging per tcp_log_apply_ratio(). + */ + if (tp->t_lib == NULL || + tp->t_lib->tlb_logstate != TCP_LOG_STATE_RATIO_OFF) { + tp->_t_logstate = state; + } else { + rv = ECANCELED; + tp->_t_logstate = TCP_LOG_STATE_OFF; + } break; default: @@ -1774,11 +1951,12 @@ } if (tcp_disable_all_bb_logs) { /* We are prohibited from doing any logs */ - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; + rv = EBUSY; } tp->t_flags2 &= ~(TF2_LOG_AUTO); - return (0); + return (rv); } /* If tcp_drain() is called, flush half the log entries. */ @@ -1792,13 +1970,26 @@ if ((target = tp->t_lognum / 2) == 0) return; + /* + * XXXRRS: At this I don't think this is wise that + * we do this. All that a drain call means is that + * we are hitting one of the system mbuf limits. BB + * logging, or freeing of them, will not create any + * more mbufs and really has nothing to do with + * the system running out of mbufs. For now I + * am changing this to free any "AUTO" by dumping + * them out. But this should either be changed + * so that it gets called when we hit the BB limit + * or it should just not get called (one of the two) + * since I don't think the mbuf <-> BB log cleanup + * is the right thing to do here. + */ /* * If we are logging the "head" packets, we want to discard * from the tail of the queue. Otherwise, we want to discard * from the head. */ - if (tp->t_logstate == TCP_LOG_STATE_HEAD || - tp->t_logstate == TCP_LOG_STATE_HEAD_AUTO) { + if (tp->_t_logstate == TCP_LOG_STATE_HEAD) { skip = tp->t_lognum - target; STAILQ_FOREACH(log_entry, &tp->t_logs, tlm_queue) if (!--skip) @@ -1818,7 +2009,16 @@ KASSERT(target == 0, ("%s: After removing from tail, target was %d", __func__, target)); - } else if (tp->t_logstate == TCP_LOG_STATE_CONTINUAL) { + } else if (tp->_t_logstate == TCP_LOG_STATE_HEAD_AUTO) { + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from head at drain", + M_NOWAIT, false); + } else if (tp->_t_logstate == TCP_LOG_STATE_TAIL_AUTO) { + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from tail at drain", + M_NOWAIT, false); + } else if (tp->_t_logstate == TCP_LOG_VIA_BBPOINTS) { + (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from bbpoints", + M_NOWAIT, false); + } else if (tp->_t_logstate == TCP_LOG_STATE_CONTINUAL) { (void)tcp_log_dump_tp_logbuf(tp, "auto-dumped from continual", M_NOWAIT, false); } else { @@ -2181,7 +2381,7 @@ KASSERT(tp->t_lognum == 0, ("%s: After freeing entries, tp->t_lognum=%d (expected 0)", __func__, tp->t_lognum)); - tp->t_logstate = TCP_LOG_STATE_OFF; + tp->_t_logstate = TCP_LOG_STATE_OFF; return (0); } @@ -2632,9 +2832,47 @@ void tcp_log_flowend(struct tcpcb *tp) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tp->_t_logstate != TCP_LOG_STATE_OFF) { struct socket *so = tptosocket(tp); TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_FLOWEND, 0, 0, NULL, false); } } + +void +tcp_log_sendfile(struct socket *so, off_t offset, size_t nbytes, int flags) +{ + struct inpcb *inp; + struct tcpcb *tp; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_log_sendfile: inp == NULL")); + + /* quick check to see if logging is enabled for this connection */ + tp = intotcpcb(inp); + if ((inp->inp_flags & INP_DROPPED) || + (tp->_t_logstate == TCP_LOG_STATE_OFF)) { + return; + } + + INP_WLOCK(inp); + /* double check log state now that we have the lock */ + if (inp->inp_flags & INP_DROPPED) + goto done; + if (tp->_t_logstate != TCP_LOG_STATE_OFF) { + struct timeval tv; + tcp_log_eventspecific_t log; + + microuptime(&tv); + log.u_sf.offset = offset; + log.u_sf.length = nbytes; + log.u_sf.flags = flags; + + TCP_LOG_EVENTP(tp, NULL, + &tptosocket(tp)->so_rcv, + &tptosocket(tp)->so_snd, + TCP_LOG_SENDFILE, 0, 0, &log, false, &tv); + } +done: + INP_WUNLOCK(inp); +} Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c +++ sys/netinet/tcp_lro.c @@ -681,7 +681,7 @@ int frm, int32_t tcp_data_len, uint32_t th_seq, uint32_t th_ack, uint16_t th_win) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv, btv; uint32_t cts; Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -81,9 +81,9 @@ #include #define TCPOUTFLAGS #include -#include #include #include +#include #include #include #include @@ -1411,8 +1411,9 @@ /* We're getting ready to send; log now. */ /* XXXMT: We are not honoring verbose logging. */ - if (tp->t_logstate != TCP_LOG_STATE_OFF) - lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd, + + if (tcp_bblogging_on(tp)) + lgb = tcp_log_event(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, len, NULL, false, NULL, NULL, 0, NULL); else Index: sys/netinet/tcp_ratelimit.c =================================================================== --- sys/netinet/tcp_ratelimit.c +++ sys/netinet/tcp_ratelimit.c @@ -1572,7 +1572,7 @@ uint64_t hw_rate, uint32_t time_between, uint32_t calc_time_between, uint32_t segs, uint32_t res_div, uint16_t mult, uint8_t mod) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv; Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -1885,7 +1885,7 @@ static void bbr_log_type_bw_reduce(struct tcp_bbr *bbr, int reason) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -1908,7 +1908,7 @@ static void bbr_log_type_rwnd_collapse(struct tcp_bbr *bbr, int seq, int mode, uint32_t count) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -1927,7 +1927,7 @@ bbr_log_type_just_return(struct tcp_bbr *bbr, uint32_t cts, uint32_t tlen, uint8_t hpts_calling, uint8_t reason, uint32_t p_maxseg, int len) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -1952,7 +1952,7 @@ static void bbr_log_type_enter_rec(struct tcp_bbr *bbr, uint32_t seq) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -1970,7 +1970,7 @@ static void bbr_log_msgsize_fail(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t len, uint32_t maxseg, uint32_t mtu, int32_t csum_flags, int32_t tso, uint32_t cts) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -1989,7 +1989,7 @@ static void bbr_log_flowend(struct tcp_bbr *bbr) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; struct sockbuf *r, *s; struct timeval tv; @@ -2012,7 +2012,7 @@ bbr_log_pkt_epoch(struct tcp_bbr *bbr, uint32_t cts, uint32_t line, uint32_t lost, uint32_t del) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2036,7 +2036,7 @@ static void bbr_log_time_epoch(struct tcp_bbr *bbr, uint32_t cts, uint32_t line, uint32_t epoch_time) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2055,7 +2055,7 @@ static void bbr_log_set_of_state_target(struct tcp_bbr *bbr, uint32_t new_tar, int line, int meth) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2079,7 +2079,7 @@ static void bbr_log_type_statechange(struct tcp_bbr *bbr, uint32_t cts, int32_t line) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2107,7 +2107,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint32_t rtt, uint32_t line, uint8_t reas, uint16_t cond) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2130,7 +2130,7 @@ static void bbr_log_type_exit_rec(struct tcp_bbr *bbr) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2149,7 +2149,7 @@ bbr_log_type_cwndupd(struct tcp_bbr *bbr, uint32_t bytes_this_ack, uint32_t chg, uint32_t prev_acked, int32_t meth, uint32_t target, uint32_t th_ack, int32_t line) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2175,7 +2175,7 @@ * Log the rtt sample we are applying to the srtt algorithm in * useconds. */ - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2199,7 +2199,7 @@ static void bbr_log_type_pesist(struct tcp_bbr *bbr, uint32_t cts, uint32_t time_in, int32_t line, uint8_t enter_exit) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2216,7 +2216,7 @@ static void bbr_log_ack_clear(struct tcp_bbr *bbr, uint32_t cts) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2237,7 +2237,7 @@ bbr_log_ack_event(struct tcp_bbr *bbr, struct tcphdr *th, struct tcpopt *to, uint32_t tlen, uint16_t nsegs, uint32_t cts, int32_t nxt_pkt, struct mbuf *m) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2285,7 +2285,7 @@ static void bbr_log_doseg_done(struct tcp_bbr *bbr, uint32_t cts, int32_t nxt_pkt, int32_t did_out) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2310,7 +2310,7 @@ bbr_log_enobuf_jmp(struct tcp_bbr *bbr, uint32_t len, uint32_t cts, int32_t line, uint32_t o_len, uint32_t segcnt, uint32_t segsiz) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2329,7 +2329,7 @@ static void bbr_log_to_processing(struct tcp_bbr *bbr, uint32_t cts, int32_t ret, int32_t timers, uint8_t hpts_calling) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2351,7 +2351,7 @@ static void bbr_log_to_event(struct tcp_bbr *bbr, uint32_t cts, int32_t to_num) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; uint64_t ar; @@ -2379,7 +2379,7 @@ static void bbr_log_startup_event(struct tcp_bbr *bbr, uint32_t cts, uint32_t flex1, uint32_t flex2, uint32_t flex3, uint8_t reason) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2402,7 +2402,7 @@ static void bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2439,7 +2439,7 @@ bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts, uint32_t time_since_sent, uint32_t srtt, uint32_t thresh, uint32_t to) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2462,7 +2462,7 @@ bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len, uint32_t cts, uint32_t usecs, uint64_t bw, uint32_t override, int mod) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2488,7 +2488,7 @@ static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, uint8_t which) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2512,7 +2512,7 @@ static void bbr_log_thresh_choice(struct tcp_bbr *bbr, uint32_t cts, uint32_t thresh, uint32_t lro, uint32_t srtt, struct bbr_sendmap *rsm, uint8_t frm) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2535,7 +2535,7 @@ static void bbr_log_to_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts, uint8_t hpts_removed) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2558,7 +2558,7 @@ static void bbr_log_tstmp_validation(struct tcp_bbr *bbr, uint64_t peer_delta, uint64_t delta) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2580,7 +2580,7 @@ static void bbr_log_type_tsosize(struct tcp_bbr *bbr, uint32_t cts, uint32_t tsosz, uint32_t tls, uint32_t old_val, uint32_t maxseg, int hdwr) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2609,7 +2609,7 @@ bbr_log_type_rsmclear(struct tcp_bbr *bbr, uint32_t cts, struct bbr_sendmap *rsm, uint32_t flags, uint32_t line) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2638,7 +2638,7 @@ uint32_t flex4, uint32_t flex1) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2669,7 +2669,7 @@ uint32_t newbw, uint32_t obw, uint32_t diff, uint32_t tim) { - if (/*bbr_verbose_logging && */(bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2697,7 +2697,7 @@ static inline void bbr_log_progress_event(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t tick, int event, int line) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -2719,7 +2719,7 @@ uint64_t rate, uint64_t hw_rate, int line, uint32_t cts, int error) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2747,7 +2747,7 @@ static void bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2770,7 +2770,7 @@ static void bbr_log_type_bbrrttprop(struct tcp_bbr *bbr, uint32_t t, uint32_t end, uint32_t tsconv, uint32_t cts, int32_t match, uint32_t seq, uint8_t flags) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2793,7 +2793,7 @@ static void bbr_log_exit_gain(struct tcp_bbr *bbr, uint32_t cts, int32_t entry_method) { - if (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -2816,7 +2816,7 @@ static void bbr_log_settings_change(struct tcp_bbr *bbr, int settings_desired) { - if (bbr_verbose_logging && (bbr->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (bbr_verbose_logging && tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, bbr->r_ctl.rc_rcvtime); @@ -13474,7 +13474,7 @@ #endif /* Log to the black box */ - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(bbr->rc_tp)) { union tcp_log_stackspecific log; bbr_fill_in_logging_data(bbr, &log.u_bbr, cts); @@ -13503,7 +13503,7 @@ } else { log.u_bbr.flex8 = 0; } - lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, + lgb = tcp_log_event(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, len, &log, false, NULL, NULL, 0, tv); } else { lgb = NULL; Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -89,10 +89,10 @@ #include #define TCPOUTFLAGS #include -#include #include #include #include +#include #include #include #include @@ -324,9 +324,6 @@ static int32_t rack_down_raise_thresh = 100; static int32_t rack_req_segs = 1; static uint64_t rack_bw_rate_cap = 0; -static uint32_t rack_trace_point_config = 0; -static uint32_t rack_trace_point_bb_mode = 4; -static int32_t rack_trace_point_count = 0; /* Weird delayed ack mode */ @@ -547,25 +544,6 @@ int32_t rack_clear_counter=0; -static inline void -rack_trace_point(struct tcp_rack *rack, int num) -{ - if (((rack_trace_point_config == num) || - (rack_trace_point_config = 0xffffffff)) && - (rack_trace_point_bb_mode != 0) && - (rack_trace_point_count > 0) && - (rack->rc_tp->t_logstate == 0)) { - int res; - res = atomic_fetchadd_int(&rack_trace_point_count, -1); - if (res > 0) { - rack->rc_tp->t_logstate = rack_trace_point_bb_mode; - } else { - /* Loss a race assure its zero now */ - rack_trace_point_count = 0; - } - } -} - static void rack_swap_beta_values(struct tcp_rack *rack, uint8_t flex8) { @@ -629,7 +607,7 @@ /* Save off the values for restoral */ memcpy(&rack->r_ctl.rc_saved_beta, &old, sizeof(struct newreno)); out: - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; struct newreno *ptr; @@ -650,7 +628,7 @@ log.u_bbr.flex7 |= rack->rc_pacing_cc_set; log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt; log.u_bbr.flex8 = flex8; - tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, error, + tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, error, 0, &log, false, NULL, NULL, 0, &tv); } } @@ -778,7 +756,6 @@ struct sysctl_oid *rack_measure; struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; - struct sysctl_oid *rack_tracepoint; rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -909,28 +886,6 @@ OID_AUTO, "hbp_threshold", CTLFLAG_RW, &rack_hbp_thresh, 3, "We are highly buffered if min_rtt_seen / max_rtt_seen > this-threshold"); - - rack_tracepoint = SYSCTL_ADD_NODE(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_sysctl_root), - OID_AUTO, - "tp", - CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "Rack tracepoint facility"); - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_tracepoint), - OID_AUTO, "number", CTLFLAG_RW, - &rack_trace_point_config, 0, - "What is the trace point number to activate (0=none, 0xffffffff = all)?"); - SYSCTL_ADD_U32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_tracepoint), - OID_AUTO, "bbmode", CTLFLAG_RW, - &rack_trace_point_bb_mode, 4, - "What is BB logging mode that is activated?"); - SYSCTL_ADD_S32(&rack_sysctl_ctx, - SYSCTL_CHILDREN(rack_tracepoint), - OID_AUTO, "count", CTLFLAG_RW, - &rack_trace_point_count, 0, - "How many connections will have BB logging turned on that hit the tracepoint?"); /* Pacing related sysctls */ rack_pacing = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -1958,7 +1913,7 @@ * 5 = Socket option set changing the control flags rc_rack_tmr_std_based, rc_rack_use_dsack * 6 = Final rack rtt, flex4 is srtt and flex6 is final limited thresh. */ - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -1989,7 +1944,7 @@ uint64_t rate, uint64_t hw_rate, int line, int error, uint16_t mod) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; const struct ifnet *ifp; @@ -2090,7 +2045,7 @@ static void rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t tsused, uint32_t thresh, int mod) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2131,7 +2086,7 @@ static void rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2168,7 +2123,7 @@ static void rack_log_to_event(struct tcp_rack *rack, int32_t to_num, struct rack_sendmap *rsm) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2205,7 +2160,7 @@ struct rack_sendmap *next, int flag, uint32_t th_ack, int line) { - if (rack_verbose_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2251,7 +2206,7 @@ rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t len, struct rack_sendmap *rsm, int conf) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -2325,7 +2280,7 @@ * applying to the srtt algorithm in * useconds. */ - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2374,7 +2329,7 @@ static void rack_log_rtt_sample_calc(struct tcp_rack *rack, uint32_t rtt, uint32_t send_time, uint32_t ack_time, int where) { - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2399,7 +2354,7 @@ static inline void rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick, int event, int line) { - if (rack_verbose_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2426,7 +2381,7 @@ static void rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -2454,7 +2409,7 @@ static void rack_log_doseg_done(struct tcp_rack *rack, uint32_t cts, int32_t nxt_pkt, int32_t did_out, int way_out, int nsegs) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2495,7 +2450,7 @@ static void rack_log_type_pacing_sizes(struct tcpcb *tp, struct tcp_rack *rack, uint32_t arg1, uint32_t arg2, uint32_t arg3, uint8_t frm) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2522,7 +2477,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot, uint8_t hpts_calling, int reason, uint32_t cwnd_to_use) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2556,7 +2511,7 @@ rack_log_to_cancel(struct tcp_rack *rack, int32_t hpts_removed, int line, uint32_t us_cts, struct timeval *tv, uint32_t flags_on_entry) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -2593,7 +2548,7 @@ uint32_t flex5, uint32_t flex6, uint16_t flex7, uint8_t mod) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2622,7 +2577,7 @@ static void rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, int32_t timers) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2652,7 +2607,7 @@ static void rack_log_to_prr(struct tcp_rack *rack, int frm, int orig_cwnd, int line) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2686,7 +2641,7 @@ static void rack_log_sad(struct tcp_rack *rack, int event) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -3042,7 +2997,7 @@ uint32_t logged, uint64_t cur_bw, uint64_t low_bnd, uint64_t up_bnd, int line, uint8_t method) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -3457,7 +3412,7 @@ rack_log_rtt_shrinks(struct tcp_rack *rack, uint32_t us_cts, uint32_t rtt, uint32_t line, uint8_t reas) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -4595,7 +4550,7 @@ labc_to_use = rack->rc_labc; else labc_to_use = rack_max_abc_post_recovery; - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -4609,7 +4564,7 @@ log.u_bbr.flex6 = prior_cwnd; log.u_bbr.flex7 = V_tcp_do_newsack; log.u_bbr.flex8 = 1; - lgb = tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, + lgb = tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, 0, &log, false, NULL, NULL, 0, &tv); } if (CC_ALGO(tp)->ack_received != NULL) { @@ -4701,7 +4656,7 @@ tp->snd_cwnd = tp->snd_ssthresh; } } - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -4716,7 +4671,7 @@ log.u_bbr.flex7 = V_tcp_do_newsack; log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt; log.u_bbr.flex8 = 2; - tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, + tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, 0, &log, false, NULL, NULL, 0, &tv); } if ((rack->rack_no_prr == 0) && @@ -5509,7 +5464,7 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts, struct hpts_diag *diag, struct timeval *tv) { - if (rack_verbose_logging && rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -5547,7 +5502,7 @@ static void rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uint32_t len, int type) { - if (rack_verbose_logging && rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -9208,13 +9163,7 @@ rack->r_rep_attack = 1; counter_u64_add(rack_sack_attacks_detected, 1); } - if (tcp_attack_on_turns_on_logging) { - /* - * Turn on logging, used for debugging - * false positives. - */ - rack->rc_tp->t_logstate = tcp_attack_on_turns_on_logging; - } + tcp_trace_point(rack->rc_tp, TCP_TP_SAD_TRIGGERED); /* Clamp the cwnd at flight size */ rack->r_ctl.rc_saved_cwnd = rack->rc_tp->snd_cwnd; rack->rc_tp->snd_cwnd = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); @@ -10281,7 +10230,7 @@ rack_log_collapse(struct tcp_rack *rack, uint32_t cnt, uint32_t split, uint32_t out, int line, int dir, uint32_t flags, struct rack_sendmap *rsm) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -10320,7 +10269,7 @@ * sense splitting our map until we know where the * peer finally lands in the collapse. */ - rack_trace_point(rack, RACK_TP_COLLAPSED_WND); + tcp_trace_point(rack->rc_tp, TCP_TP_COLLAPSED_WND); if ((rack->rc_has_collapsed == 0) || (rack->r_ctl.last_collapse_point != (rack->rc_tp->snd_una + rack->rc_tp->snd_wnd))) counter_u64_add(rack_collapsed_win_seen, 1); @@ -13058,7 +13007,7 @@ rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent *ae, int ackval, uint32_t high_seq) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { struct inpcb *inp = tptoinpcb(tp); union tcp_log_stackspecific log; struct timeval ltv; @@ -13392,8 +13341,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ae->ack_val_set] , - (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ae->ack_val_set] += (rdstc - ts_val); } @@ -13426,7 +13373,6 @@ rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__); #ifdef TCP_ACCOUNTING /* Count for the specific type of ack in */ - counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[ae->ack_val_set]++; } @@ -13500,11 +13446,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[CNT_OF_ACKS_IN] += (((ae->ack - high_seq) + segsiz - 1) / segsiz); } - counter_u64_add(tcp_cnt_counters[CNT_OF_ACKS_IN], - (((ae->ack - high_seq) + segsiz - 1) / segsiz)); #endif high_seq = ae->ack; - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -13515,7 +13459,7 @@ log.u_bbr.flex3 = rack->r_ctl.current_round; log.u_bbr.rttProp = (uint64_t)CC_ALGO(tp)->newround; log.u_bbr.flex8 = 8; - tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, + tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, 0, &log, false, NULL, NULL, 0, &tv); } /* @@ -13558,7 +13502,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ae->ack_val_set] , (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ae->ack_val_set] += (rdstc - ts_val); if (ae->ack_val_set == ACK_CUMACK) @@ -13749,7 +13692,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13815,7 +13757,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13834,8 +13775,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , - (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13851,8 +13790,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , - (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13868,8 +13805,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , - (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13898,7 +13833,6 @@ #ifdef TCP_ACCOUNTING rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_CUMACK] , (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_CUMACK] += (rdstc - ts_val); tp->tcp_proc_time[CYC_HANDLE_ACK] += (rdstc - ts_val); @@ -13908,7 +13842,6 @@ } else if (win_up_req) { rdstc = get_cyclecount(); if (rdstc > ts_val) { - counter_u64_add(tcp_proc_time[ACK_RWND] , (rdstc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ACK_RWND] += (rdstc - ts_val); } @@ -14080,7 +14013,7 @@ } } high_seq = th->th_ack; - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval ltv; #ifdef NETFLIX_HTTP_LOGGING @@ -14387,7 +14320,6 @@ uint64_t crtsc; crtsc = get_cyclecount(); - counter_u64_add(tcp_proc_time[ack_val_set] , (crtsc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[ack_val_set] += (crtsc - ts_val); } @@ -14404,7 +14336,7 @@ rack_free_trim(rack); } /* Update any rounds needed */ - if (rack_verbose_logging && (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF)) { + if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -14415,7 +14347,7 @@ log.u_bbr.flex3 = rack->r_ctl.current_round; log.u_bbr.rttProp = (uint64_t)CC_ALGO(tp)->newround; log.u_bbr.flex8 = 9; - tcp_log_event_(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, + tcp_log_event(tp, NULL, NULL, NULL, BBR_LOG_CWND, 0, 0, &log, false, NULL, NULL, 0, &tv); } /* @@ -14479,22 +14411,6 @@ rack_log_doseg_done(rack, cts, nxt_pkt, did_out, way_out, max(1, nsegs)); if (did_out) rack->r_wanted_output = 0; -#ifdef TCP_ACCOUNTING - } else { - /* - * Track the time (see above). - */ - if (ack_val_set != 0xf) { - uint64_t crtsc; - - crtsc = get_cyclecount(); - counter_u64_add(tcp_proc_time[ack_val_set] , (crtsc - ts_val)); - /* - * Note we *DO NOT* increment the per-tcb counters since - * in the else the TP may be gone!! - */ - } -#endif } #ifdef TCP_ACCOUNTING sched_unpin(); @@ -14608,7 +14524,7 @@ uint64_t bw_est, uint64_t bw, uint64_t len_time, int method, int line, struct rack_sendmap *rsm, uint8_t quality) { - if (rack->rc_tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -15359,7 +15275,7 @@ unsigned ipoptlen, int32_t orig_len, int32_t len, int error, int rsm_is_null, int optlen, int line, uint16_t mode) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -15382,7 +15298,7 @@ log.u_bbr.delivered = line; log.u_bbr.timeStamp = tcp_get_usecs(&tv); log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); - tcp_log_event_(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_FSB, 0, + tcp_log_event(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_FSB, 0, len, &log, false, NULL, NULL, 0, &tv); } } @@ -15896,7 +15812,7 @@ } else { th->th_off = sizeof(struct tcphdr) >> 2; } - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; if (rsm->r_flags & RACK_RWND_COLLAPSED) { @@ -15929,7 +15845,7 @@ log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use; log.u_bbr.delivered = 0; - lgb = tcp_log_event_(tp, th, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK, + lgb = tcp_log_event(tp, th, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK, len, &log, false, NULL, NULL, 0, tv); } else lgb = NULL; @@ -15992,9 +15908,9 @@ counter_u64_add(rack_fto_rsm_send, 1); if (error && (error == ENOBUFS)) { if (rack->r_ctl.crte != NULL) { - rack_trace_point(rack, RACK_TP_HWENOBUF); + tcp_trace_point(rack->rc_tp, TCP_TP_HWENOBUF); } else - rack_trace_point(rack, RACK_TP_ENOBUF); + tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; @@ -16018,15 +15934,12 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru; } - counter_u64_add(tcp_cnt_counters[SND_OUT_DATA], cnt_thru); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_DATA], (crtsc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((len + segsiz - 1) / segsiz); } - counter_u64_add(tcp_cnt_counters[CNT_OF_MSS_OUT], ((len + segsiz - 1) / segsiz)); sched_unpin(); #endif return (0); @@ -16379,7 +16292,7 @@ } else { th->th_off = sizeof(struct tcphdr) >> 2; } - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -16404,7 +16317,7 @@ log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); log.u_bbr.lt_epoch = rack->r_ctl.cwnd_to_use; log.u_bbr.delivered = 0; - lgb = tcp_log_event_(tp, th, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK, + lgb = tcp_log_event(tp, th, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK, len, &log, false, NULL, NULL, 0, tv); } else lgb = NULL; @@ -16497,15 +16410,12 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_DATA] += cnt_thru; } - counter_u64_add(tcp_cnt_counters[SND_OUT_DATA], cnt_thru); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_DATA], (crtsc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len + segsiz - 1) / segsiz); } - counter_u64_add(tcp_cnt_counters[CNT_OF_MSS_OUT], ((tot_len + segsiz - 1) / segsiz)); sched_unpin(); #endif return (0); @@ -16745,11 +16655,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_BLOCKED] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_BLOCKED], (crtsc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_BLOCKED]++; } - counter_u64_add(tcp_cnt_counters[SND_BLOCKED], 1); sched_unpin(); #endif counter_u64_add(rack_out_size[TCP_MSS_ACCT_INPACE], 1); @@ -16912,7 +16820,7 @@ * for us to retransmit it. Move up the collapse point, * since this rsm has its chance to retransmit now. */ - rack_trace_point(rack, RACK_TP_COLLAPSED_RXT); + tcp_trace_point(rack->rc_tp, TCP_TP_COLLAPSED_RXT); rack->r_ctl.last_collapse_point = rsm->r_end; /* Are we done? */ if (SEQ_GEQ(rack->r_ctl.last_collapse_point, @@ -17894,25 +17802,20 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_DATA]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_DATA], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_DATA] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_DATA], (crtsc - ts_val)); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) / segsiz); } - counter_u64_add(tcp_cnt_counters[CNT_OF_MSS_OUT], ((tot_len_this_send + segsiz - 1) / segsiz)); } else { crtsc = get_cyclecount(); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_LIMITED]++; } - counter_u64_add(tcp_cnt_counters[SND_LIMITED], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_LIMITED] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_LIMITED], (crtsc - ts_val)); } sched_unpin(); #endif @@ -18061,11 +17964,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_FAIL]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_FAIL], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_FAIL], (crtsc - ts_val)); sched_unpin(); #endif return (EHOSTUNREACH); @@ -18628,7 +18529,7 @@ hhook_run_tcp_est_out(tp, th, &to, len, tso); #endif /* We're getting ready to send; log now. */ - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(rack->rc_tp)) { union tcp_log_stackspecific log; memset(&log.u_bbr, 0, sizeof(log.u_bbr)); @@ -18670,7 +18571,7 @@ log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); log.u_bbr.lt_epoch = cwnd_to_use; log.u_bbr.delivered = sendalot; - lgb = tcp_log_event_(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, + lgb = tcp_log_event(tp, th, &so->so_rcv, &so->so_snd, TCP_LOG_OUT, ERRNO_UNK, len, &log, false, NULL, NULL, 0, &tv); } else lgb = NULL; @@ -18964,11 +18865,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_FAIL]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_FAIL], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_FAIL], (crtsc - ts_val)); sched_unpin(); #endif return (error); @@ -18978,9 +18877,9 @@ * time */ if (rack->r_ctl.crte != NULL) { - rack_trace_point(rack, RACK_TP_HWENOBUF); + tcp_trace_point(rack->rc_tp, TCP_TP_HWENOBUF); } else - rack_trace_point(rack, RACK_TP_ENOBUF); + tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; @@ -19014,11 +18913,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_FAIL]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_FAIL], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_FAIL], (crtsc - ts_val)); sched_unpin(); #endif return (error); @@ -19039,11 +18936,9 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_FAIL]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_FAIL], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_FAIL] += (crtsc - ts_val); } - counter_u64_add(tcp_proc_time[SND_OUT_FAIL], (crtsc - ts_val)); sched_unpin(); #endif return (error); @@ -19255,24 +19150,19 @@ if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_DATA]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_DATA], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_DATA] += crtsc; } - counter_u64_add(tcp_proc_time[SND_OUT_DATA], crtsc); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[CNT_OF_MSS_OUT] += ((tot_len_this_send + segsiz - 1) /segsiz); } - counter_u64_add(tcp_cnt_counters[CNT_OF_MSS_OUT], ((tot_len_this_send + segsiz - 1) /segsiz)); } else { if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_cnt_counters[SND_OUT_ACK]++; } - counter_u64_add(tcp_cnt_counters[SND_OUT_ACK], 1); if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { tp->tcp_proc_time[SND_OUT_ACK] += crtsc; } - counter_u64_add(tcp_proc_time[SND_OUT_ACK], crtsc); } sched_unpin(); #endif Index: sys/netinet/tcp_stacks/rack_bbr_common.c =================================================================== --- sys/netinet/tcp_stacks/rack_bbr_common.c +++ sys/netinet/tcp_stacks/rack_bbr_common.c @@ -984,7 +984,7 @@ void ctf_log_sack_filter(struct tcpcb *tp, int num_sack_blks, struct sackblk *sack_blocks) { - if (tp->t_logstate != TCP_LOG_STATE_OFF) { + if (tcp_bblogging_on(tp)) { union tcp_log_stackspecific log; struct timeval tv; Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -112,6 +112,7 @@ #include #include #include +#include #ifdef TCPPCAP #include #endif @@ -176,11 +177,6 @@ CTLFLAG_RW, &tcp_map_minimum, 500, "Number of Map enteries before we start detection"); -int32_t tcp_attack_on_turns_on_logging = 0; -SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, attacks_logged, - CTLFLAG_RW, - &tcp_attack_on_turns_on_logging, 0, - "When we have a positive hit on attack, do we turn on logging?"); int32_t tcp_sad_pacing_interval = 2000; SYSCTL_INT(_net_inet_tcp_sack_attack, OID_AUTO, sad_pacing_int, CTLFLAG_RW, @@ -2084,7 +2080,7 @@ if (flags & TH_RST) TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth); lgb = NULL; - if ((tp != NULL) && (tp->t_logstate != TCP_LOG_STATE_OFF)) { + if ((tp != NULL) && tcp_bblogging_on(tp)) { if (INP_WLOCKED(inp)) { union tcp_log_stackspecific log; struct timeval tv; @@ -2095,7 +2091,7 @@ log.u_bbr.pkts_out = tp->t_maxseg; log.u_bbr.timeStamp = tcp_get_usecs(&tv); log.u_bbr.delivered = 0; - lgb = tcp_log_event_(tp, nth, NULL, NULL, TCP_LOG_OUT, + lgb = tcp_log_event(tp, nth, NULL, NULL, TCP_LOG_OUT, ERRNO_UNK, 0, &log, false, NULL, NULL, 0, &tv); } else { /* @@ -3861,7 +3857,7 @@ bzero(xt, sizeof(*xt)); xt->t_state = tp->t_state; - xt->t_logstate = tp->t_logstate; + xt->t_logstate = tcp_get_bblog_state(tp); xt->t_flags = tp->t_flags; xt->t_sndzerowin = tp->t_sndzerowin; xt->t_sndrexmitpack = tp->t_sndrexmitpack; @@ -3967,3 +3963,59 @@ } } } + +#ifdef TCP_ACCOUNTING +int +tcp_do_ack_accounting(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t tiwin, int mss) +{ + if (SEQ_LT(th->th_ack, tp->snd_una)) { + /* Do we have a SACK? */ + if (to->to_flags & TOF_SACK) { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_SACK]++; + } + return (ACK_SACK); + } else { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_BEHIND]++; + } + return (ACK_BEHIND); + } + } else if (th->th_ack == tp->snd_una) { + /* Do we have a SACK? */ + if (to->to_flags & TOF_SACK) { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_SACK]++; + } + return (ACK_SACK); + } else if (tiwin != tp->snd_wnd) { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_RWND]++; + } + return (ACK_RWND); + } else { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_DUPACK]++; + } + return (ACK_DUPACK); + } + } else { + if (!SEQ_GT(th->th_ack, tp->snd_max)) { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[CNT_OF_ACKS_IN] += (((th->th_ack - tp->snd_una) + mss - 1)/mss); + } + } + if (to->to_flags & TOF_SACK) { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_CUMACK_SACK]++; + } + return (ACK_CUMACK_SACK); + } else { + if (tp->t_flags2 & TF2_TCP_ACCOUNTING) { + tp->tcp_cnt_counters[ACK_CUMACK]++; + } + return (ACK_CUMACK); + } + } +} +#endif Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -67,9 +67,9 @@ #include #include #include -#include #include #include +#include #include #include #ifdef INET6 @@ -796,8 +796,8 @@ uint64_t ms; INP_WLOCK_ASSERT(tptoinpcb(tp)); - if (tp->t_logstate != TCP_LOG_STATE_OFF) - lgb = tcp_log_event_(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, + if (tcp_bblogging_on(tp)) + lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false, NULL, NULL, 0, NULL); else lgb = NULL; Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -140,8 +140,8 @@ return; } INP_WLOCK_ASSERT(tptoinpcb(tp)); - if (tp->t_logstate != TCP_LOG_STATE_OFF) { - lgb = tcp_log_event_(tp, NULL, NULL, NULL, TCP_LOG_PRU, error, + if (tcp_bblogging_on(tp)) { + lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error, 0, NULL, false, NULL, NULL, 0, NULL); } else { lgb = NULL; @@ -2576,7 +2576,7 @@ break; #ifdef TCP_BLACKBOX case TCP_LOG: - optval = tp->t_logstate; + optval = tcp_get_bblog_state(tp); INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -158,7 +158,7 @@ sbintime_t t_precisions[TT_N]; uint32_t t_maxseg:24, /* maximum segment size */ - t_logstate:8; /* State of "black box" logging */ + _t_logstate:8; /* State of "black box" logging */ uint32_t t_port:16, /* Tunneling (over udp) port */ t_state:4, /* state of this connection */ t_idle_reduce : 1, @@ -272,6 +272,11 @@ struct tcp_log_id_bucket *t_lib; const char *t_output_caller; /* Function that called tcp_output */ struct statsblob *t_stats; /* Per-connection stats */ + /* Should these be a pointer to the arrays or an array? */ +#ifdef TCP_ACCOUNTING + uint64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS]; + uint64_t tcp_proc_time[TCP_NUM_CNT_COUNTERS]; +#endif uint32_t t_logsn; /* Log "serial number" */ uint32_t gput_ts; /* Time goodput measurement started */ tcp_seq gput_seq; /* Outbound measurement seq */ @@ -305,6 +310,7 @@ #ifdef TCP_HHOOK struct osd t_osd; /* storage for Khelp module data */ #endif + uint8_t _t_logpoint; /* Used when a BB log points is enabled */ }; #endif /* _KERNEL || _WANT_TCPCB */ @@ -602,6 +608,7 @@ #define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */ #define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */ #define TF2_ECN_USE_ECT1 0x00000800 /* Use ECT(1) marking on session */ +#define TF2_TCP_ACCOUNTING 0x00010000 /* Do TCP accounting */ /* * Structure to hold TCP options that are only used during segment @@ -1169,7 +1176,6 @@ extern int32_t tcp_sad_pacing_interval; extern int32_t tcp_sad_low_pps; extern int32_t tcp_map_minimum; -extern int32_t tcp_attack_on_turns_on_logging; #endif extern uint32_t tcp_ack_war_time_window; extern uint32_t tcp_ack_war_cnt; @@ -1247,6 +1253,10 @@ int tcp_stats_init(void); void tcp_log_end_status(struct tcpcb *tp, uint8_t status); +#ifdef TCP_ACCOUNTING +int tcp_do_ack_accounting(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, uint32_t tiwin, int mss); +#endif + static inline void tcp_fields_to_host(struct tcphdr *th)