diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -469,7 +469,7 @@ TCPPCAP opt_global.h SIFTR TCP_BLACKBOX opt_global.h -TCP_HHOOK opt_inet.h +TCP_HHOOK opt_global.h TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_RFC7413 opt_inet.h TCP_RFC7413_MAX_KEYS opt_inet.h diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h --- a/sys/netinet/cc/cc.h +++ b/sys/netinet/cc/cc.h @@ -70,7 +70,9 @@ #define V_cc_abe_frlossreduce VNET(cc_abe_frlossreduce) /* Define the new net.inet.tcp.cc sysctl tree. */ +#ifdef _SYS_SYSCTL_H_ SYSCTL_DECL(_net_inet_tcp_cc); +#endif /* For CC modules that use hystart++ */ extern uint32_t hystart_lowcwnd; @@ -207,10 +209,10 @@ #define CC_MODULE_BEING_REMOVED 0x01 /* The module is being removed */ /* Macro to obtain the CC algo's struct ptr. */ -#define CC_ALGO(tp) ((tp)->cc_algo) +#define CC_ALGO(tp) ((tp)->t_cc) /* Macro to obtain the CC algo's data ptr. */ -#define CC_DATA(tp) ((tp)->ccv->cc_data) +#define CC_DATA(tp) ((tp)->t_ccv.cc_data) /* Macro to obtain the system default CC algo's struct ptr. */ #define CC_DEFAULT_ALGO() V_default_cc_ptr diff --git a/sys/netinet/cc/cc_cdg.c b/sys/netinet/cc/cc_cdg.c --- a/sys/netinet/cc/cc_cdg.c +++ b/sys/netinet/cc/cc_cdg.c @@ -582,7 +582,7 @@ int congestion, new_measurement, slowstart; cdg_data = ccv->cc_data; - e_t = (struct ertt *)khelp_get_osd(CCV(ccv, osd), ertt_id); + e_t = (struct ertt *)khelp_get_osd(&CCV(ccv, t_osd), ertt_id); new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT; congestion = 0; cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt); diff --git a/sys/netinet/cc/cc_chd.c b/sys/netinet/cc/cc_chd.c --- a/sys/netinet/cc/cc_chd.c +++ b/sys/netinet/cc/cc_chd.c @@ -244,7 +244,7 @@ struct ertt *e_t; int backoff, new_measurement, qdly, rtt; - e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); + e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); chd_data = ccv->cc_data; new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT; backoff = qdly = 0; @@ -345,7 +345,7 @@ struct chd *chd_data; int qdly; - e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); + e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; diff --git a/sys/netinet/cc/cc_hd.c b/sys/netinet/cc/cc_hd.c --- a/sys/netinet/cc/cc_hd.c +++ b/sys/netinet/cc/cc_hd.c @@ -70,7 +70,8 @@ #include -#include +#include +#include #include #include #include @@ -146,7 +147,7 @@ int qdly; if (ack_type == CC_ACK) { - e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); + e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); if (e_t->rtt && e_t->minrtt && V_hd_qthresh > 0) { qdly = e_t->rtt - e_t->minrtt; diff --git a/sys/netinet/cc/cc_vegas.c b/sys/netinet/cc/cc_vegas.c --- a/sys/netinet/cc/cc_vegas.c +++ b/sys/netinet/cc/cc_vegas.c @@ -133,7 +133,7 @@ struct vegas *vegas_data; long actual_tx_rate, expected_tx_rate, ndiff; - e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); + e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id); vegas_data = ccv->cc_data; if (e_t->flags & ERTT_NEW_MEASUREMENT) { /* Once per RTT. */ diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -66,7 +66,6 @@ #include #include #include -#include #include #include diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -56,6 +56,7 @@ #include #include +#include #include #include #ifdef INET6 diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c --- a/sys/netinet/tcp_hpts.c +++ b/sys/netinet/tcp_hpts.c @@ -1368,10 +1368,10 @@ if (error < 0) goto skip_pacing; inp->inp_hpts_calls = 0; - if (ninp && ninp->inp_ppcb) { + if (ninp) { /* * If we have a nxt inp, see if we can - * prefetch its ppcb. Note this may seem + * prefetch it. Note this may seem * "risky" since we have no locks (other * than the previous inp) and there no * assurance that ninp was not pulled while @@ -1399,8 +1399,11 @@ * TLB hit, and instead if occurs just * cause us to load cache with a useless * address (to us). + * + * XXXGL: with tcpcb == inpcb, I'm unsure this + * prefetch is still correct and useful. */ - kern_prefetch(ninp->inp_ppcb, &prefetch_tp); + kern_prefetch(ninp, &prefetch_tp); prefetch_tp = 1; } INP_WUNLOCK(inp); diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -297,7 +297,7 @@ hhook_data.to = to; hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data, - tp->osd); + &tp->t_osd); } } #endif @@ -316,14 +316,14 @@ INP_WLOCK_ASSERT(inp); - tp->ccv->nsegs = nsegs; - tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th); + tp->t_ccv.nsegs = nsegs; + tp->t_ccv.bytes_this_ack = BYTES_THIS_ACK(tp, th); if ((!V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd)) || (V_tcp_do_newcwv && (tp->snd_cwnd <= tp->snd_wnd) && (tp->snd_cwnd < (tcp_compute_pipe(tp) * 2)))) - tp->ccv->flags |= CCF_CWND_LIMITED; + tp->t_ccv.flags |= CCF_CWND_LIMITED; else - tp->ccv->flags &= ~CCF_CWND_LIMITED; + tp->t_ccv.flags &= ~CCF_CWND_LIMITED; if (type == CC_ACK) { #ifdef STATS @@ -331,7 +331,7 @@ ((int32_t)tp->snd_cwnd) - tp->snd_wnd); if (!IN_RECOVERY(tp->t_flags)) stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_ACKLEN, - tp->ccv->bytes_this_ack / (tcp_maxseg(tp) * nsegs)); + tp->t_ccv.bytes_this_ack / (tcp_maxseg(tp) * nsegs)); if ((tp->t_flags & TF_GPUTINPROG) && SEQ_GEQ(th->th_ack, tp->gput_ack)) { /* @@ -356,21 +356,21 @@ } #endif /* STATS */ if (tp->snd_cwnd > tp->snd_ssthresh) { - tp->t_bytes_acked += tp->ccv->bytes_this_ack; + tp->t_bytes_acked += tp->t_ccv.bytes_this_ack; if (tp->t_bytes_acked >= tp->snd_cwnd) { tp->t_bytes_acked -= tp->snd_cwnd; - tp->ccv->flags |= CCF_ABC_SENTAWND; + tp->t_ccv.flags |= CCF_ABC_SENTAWND; } } else { - tp->ccv->flags &= ~CCF_ABC_SENTAWND; + tp->t_ccv.flags &= ~CCF_ABC_SENTAWND; tp->t_bytes_acked = 0; } } if (CC_ALGO(tp)->ack_received != NULL) { /* XXXLAS: Find a way to live without this */ - tp->ccv->curack = th->th_ack; - CC_ALGO(tp)->ack_received(tp->ccv, type); + tp->t_ccv.curack = th->th_ack; + CC_ALGO(tp)->ack_received(&tp->t_ccv, type); } #ifdef STATS stats_voi_update_abs_ulong(tp->t_stats, VOI_TCP_LCWIN, tp->snd_cwnd); @@ -430,7 +430,7 @@ tp->snd_cwnd = tcp_compute_initwnd(maxseg); if (CC_ALGO(tp)->conn_init != NULL) - CC_ALGO(tp)->conn_init(tp->ccv); + CC_ALGO(tp)->conn_init(&tp->t_ccv); } void inline @@ -489,8 +489,8 @@ if (CC_ALGO(tp)->cong_signal != NULL) { if (th != NULL) - tp->ccv->curack = th->th_ack; - CC_ALGO(tp)->cong_signal(tp->ccv, type); + tp->t_ccv.curack = th->th_ack; + CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); } } @@ -502,8 +502,8 @@ /* XXXLAS: KASSERT that we're in recovery? */ if (CC_ALGO(tp)->post_recovery != NULL) { - tp->ccv->curack = th->th_ack; - CC_ALGO(tp)->post_recovery(tp->ccv); + tp->t_ccv.curack = th->th_ack; + CC_ALGO(tp)->post_recovery(&tp->t_ccv); } /* XXXLAS: EXIT_RECOVERY ? */ tp->t_bytes_acked = 0; @@ -534,25 +534,25 @@ if (CC_ALGO(tp)->ecnpkt_handler != NULL) { switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: - tp->ccv->flags |= CCF_IPHDR_CE; + tp->t_ccv.flags |= CCF_IPHDR_CE; break; case IPTOS_ECN_ECT0: /* FALLTHROUGH */ case IPTOS_ECN_ECT1: /* FALLTHROUGH */ case IPTOS_ECN_NOTECT: - tp->ccv->flags &= ~CCF_IPHDR_CE; + tp->t_ccv.flags &= ~CCF_IPHDR_CE; break; } if (flags & TH_CWR) - tp->ccv->flags |= CCF_TCPHDR_CWR; + tp->t_ccv.flags |= CCF_TCPHDR_CWR; else - tp->ccv->flags &= ~CCF_TCPHDR_CWR; + tp->t_ccv.flags &= ~CCF_TCPHDR_CWR; - CC_ALGO(tp)->ecnpkt_handler(tp->ccv); + CC_ALGO(tp)->ecnpkt_handler(&tp->t_ccv); - if (tp->ccv->flags & CCF_ACKNOW) { + if (tp->t_ccv.flags & CCF_ACKNOW) { tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); tp->t_flags |= TF_ACKNOW; } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -173,7 +173,7 @@ hhook_data.tso = tso; hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data, - tp->osd); + &tp->t_osd); } } #endif @@ -187,7 +187,7 @@ INP_WLOCK_ASSERT(tptoinpcb(tp)); if (CC_ALGO(tp)->after_idle != NULL) - CC_ALGO(tp)->after_idle(tp->ccv); + CC_ALGO(tp)->after_idle(&tp->t_ccv); } /* diff --git a/sys/netinet/tcp_pcap.c b/sys/netinet/tcp_pcap.c --- a/sys/netinet/tcp_pcap.c +++ b/sys/netinet/tcp_pcap.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -587,16 +587,16 @@ return; tp = rack->rc_tp; - if (tp->cc_algo == NULL) { + if (tp->t_cc == NULL) { /* Tcb is leaving */ return; } rack->rc_pacing_cc_set = 1; - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) { + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) { /* Not new-reno we can't play games with beta! */ goto out; } - ptr = ((struct newreno *)tp->ccv->cc_data); + ptr = ((struct newreno *)tp->t_ccv.cc_data); if (CC_ALGO(tp)->ctl_output == NULL) { /* Huh, why does new_reno no longer have a set function? */ goto out; @@ -615,7 +615,7 @@ sopt.sopt_dir = SOPT_SET; opt.name = CC_NEWRENO_BETA; opt.val = rack->r_ctl.rc_saved_beta.beta; - error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt); + error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt); if (error) { goto out; } @@ -623,10 +623,10 @@ * Hack alert we need to set in our newreno_flags * so that Abe behavior is also applied. */ - ((struct newreno *)tp->ccv->cc_data)->newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED; + ((struct newreno *)tp->t_ccv.cc_data)->newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED; opt.name = CC_NEWRENO_BETA_ECN; opt.val = rack->r_ctl.rc_saved_beta.beta_ecn; - error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt); + error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt); if (error) { goto out; } @@ -637,7 +637,7 @@ union tcp_log_stackspecific log; struct timeval tv; - ptr = ((struct newreno *)tp->ccv->cc_data); + ptr = ((struct newreno *)tp->t_ccv.cc_data); memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.timeStamp = tcp_get_usecs(&tv); if (ptr) { @@ -670,14 +670,14 @@ return; tp = rack->rc_tp; rack->rc_pacing_cc_set = 0; - if (tp->cc_algo == NULL) + if (tp->t_cc == NULL) /* Tcb is leaving */ return; - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) { + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) { /* Not new-reno nothing to do! */ return; } - ptr = ((struct newreno *)tp->ccv->cc_data); + ptr = ((struct newreno *)tp->t_ccv.cc_data); if (ptr == NULL) { /* * This happens at rack_fini() if the @@ -697,7 +697,7 @@ union tcp_log_stackspecific log; struct timeval tv; - ptr = ((struct newreno *)tp->ccv->cc_data); + ptr = ((struct newreno *)tp->t_ccv.cc_data); memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.timeStamp = tcp_get_usecs(&tv); log.u_bbr.flex1 = ptr->beta; @@ -3700,7 +3700,7 @@ /* Clear all flags so we start fresh */ rack->rc_tp->t_bytes_acked = 0; - rack->rc_tp->ccv->flags &= ~CCF_ABC_SENTAWND; + rack->rc_tp->t_ccv.flags &= ~CCF_ABC_SENTAWND; /* * If configured to, set the cwnd and ssthresh to * our targets. @@ -4577,14 +4577,14 @@ uint8_t labc_to_use, quality; INP_WLOCK_ASSERT(tptoinpcb(tp)); - tp->ccv->nsegs = nsegs; - acked = tp->ccv->bytes_this_ack = (th_ack - tp->snd_una); + tp->t_ccv.nsegs = nsegs; + acked = tp->t_ccv.bytes_this_ack = (th_ack - tp->snd_una); if ((recovery) && (rack->r_ctl.rc_early_recovery_segs)) { uint32_t max; max = rack->r_ctl.rc_early_recovery_segs * ctf_fixed_maxseg(tp); - if (tp->ccv->bytes_this_ack > max) { - tp->ccv->bytes_this_ack = max; + if (tp->t_ccv.bytes_this_ack > max) { + tp->t_ccv.bytes_this_ack = max; } } #ifdef STATS @@ -4611,19 +4611,19 @@ } /* Which way our we limited, if not cwnd limited no advance in CA */ if (tp->snd_cwnd <= tp->snd_wnd) - tp->ccv->flags |= CCF_CWND_LIMITED; + tp->t_ccv.flags |= CCF_CWND_LIMITED; else - tp->ccv->flags &= ~CCF_CWND_LIMITED; + tp->t_ccv.flags &= ~CCF_CWND_LIMITED; if (tp->snd_cwnd > tp->snd_ssthresh) { - tp->t_bytes_acked += min(tp->ccv->bytes_this_ack, + tp->t_bytes_acked += min(tp->t_ccv.bytes_this_ack, nsegs * V_tcp_abc_l_var * ctf_fixed_maxseg(tp)); /* For the setting of a window past use the actual scwnd we are using */ if (tp->t_bytes_acked >= rack->r_ctl.cwnd_to_use) { tp->t_bytes_acked -= rack->r_ctl.cwnd_to_use; - tp->ccv->flags |= CCF_ABC_SENTAWND; + tp->t_ccv.flags |= CCF_ABC_SENTAWND; } } else { - tp->ccv->flags &= ~CCF_ABC_SENTAWND; + tp->t_ccv.flags &= ~CCF_ABC_SENTAWND; tp->t_bytes_acked = 0; } prior_cwnd = tp->snd_cwnd; @@ -4639,9 +4639,9 @@ memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.timeStamp = tcp_get_usecs(&tv); log.u_bbr.flex1 = th_ack; - log.u_bbr.flex2 = tp->ccv->flags; - log.u_bbr.flex3 = tp->ccv->bytes_this_ack; - log.u_bbr.flex4 = tp->ccv->nsegs; + log.u_bbr.flex2 = tp->t_ccv.flags; + log.u_bbr.flex3 = tp->t_ccv.bytes_this_ack; + log.u_bbr.flex4 = tp->t_ccv.nsegs; log.u_bbr.flex5 = labc_to_use; log.u_bbr.flex6 = prior_cwnd; log.u_bbr.flex7 = V_tcp_do_newsack; @@ -4651,10 +4651,10 @@ } if (CC_ALGO(tp)->ack_received != NULL) { /* XXXLAS: Find a way to live without this */ - tp->ccv->curack = th_ack; - tp->ccv->labc = labc_to_use; - tp->ccv->flags |= CCF_USE_LOCAL_ABC; - CC_ALGO(tp)->ack_received(tp->ccv, type); + tp->t_ccv.curack = th_ack; + tp->t_ccv.labc = labc_to_use; + tp->t_ccv.flags |= CCF_USE_LOCAL_ABC; + CC_ALGO(tp)->ack_received(&tp->t_ccv, type); } if (lgb) { lgb->tlb_stackinfo.u_bbr.flex6 = tp->snd_cwnd; @@ -4727,8 +4727,8 @@ rack = (struct tcp_rack *)tp->t_fb_ptr; /* only alert CC if we alerted when we entered */ if (CC_ALGO(tp)->post_recovery != NULL) { - tp->ccv->curack = th_ack; - CC_ALGO(tp)->post_recovery(tp->ccv); + tp->t_ccv.curack = th_ack; + CC_ALGO(tp)->post_recovery(&tp->t_ccv); if (tp->snd_cwnd < tp->snd_ssthresh) { /* * Rack has burst control and pacing @@ -4745,9 +4745,9 @@ memset(&log.u_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.timeStamp = tcp_get_usecs(&tv); log.u_bbr.flex1 = th_ack; - log.u_bbr.flex2 = tp->ccv->flags; - log.u_bbr.flex3 = tp->ccv->bytes_this_ack; - log.u_bbr.flex4 = tp->ccv->nsegs; + log.u_bbr.flex2 = tp->t_ccv.flags; + log.u_bbr.flex3 = tp->t_ccv.bytes_this_ack; + log.u_bbr.flex4 = tp->t_ccv.nsegs; log.u_bbr.flex5 = V_tcp_abc_l_var; log.u_bbr.flex6 = orig_cwnd; log.u_bbr.flex7 = V_tcp_do_newsack; @@ -4871,8 +4871,8 @@ } if ((CC_ALGO(tp)->cong_signal != NULL) && (type != CC_RTO)){ - tp->ccv->curack = ack; - CC_ALGO(tp)->cong_signal(tp->ccv, type); + tp->t_ccv.curack = ack; + CC_ALGO(tp)->cong_signal(&tp->t_ccv, type); } if ((in_rec_at_entry == 0) && IN_RECOVERY(tp->t_flags)) { rack_log_to_prr(rack, 15, cwnd_enter, line); @@ -4897,7 +4897,7 @@ KMOD_TCPSTAT_INC(tcps_idle_estrestarts); #endif if (CC_ALGO(tp)->after_idle != NULL) - CC_ALGO(tp)->after_idle(tp->ccv); + CC_ALGO(tp)->after_idle(&tp->t_ccv); if (tp->snd_cwnd == 1) i_cwnd = tp->t_maxseg; /* SYN(-ACK) lost */ @@ -5910,7 +5910,7 @@ */ struct rack_sendmap *rsm; - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } counter_u64_add(rack_to_tot, 1); @@ -6123,7 +6123,7 @@ uint32_t out, avail; int collapsed_win = 0; - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) { @@ -6312,7 +6312,7 @@ static int rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } rack_log_to_event(rack, RACK_TO_FRM_DELACK, NULL); @@ -6337,7 +6337,7 @@ struct tcptemp *t_template; int32_t retval = 1; - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } if (rack->rc_in_persist == 0) @@ -6425,7 +6425,7 @@ struct tcptemp *t_template; struct inpcb *inp = tptoinpcb(tp); - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP; @@ -6654,7 +6654,7 @@ int32_t retval = 0; bool isipv6; - if (tp->t_timers->tt_flags & TT_STOPPED) { + if (tp->tt_flags & TT_STOPPED) { return (1); } if ((tp->t_flags & TF_GPUTINPROG) && @@ -7862,7 +7862,7 @@ us_rtt = 1; if (CC_ALGO(tp)->rttsample != NULL) { /* Kick the RTT to the CC */ - CC_ALGO(tp)->rttsample(tp->ccv, us_rtt, 1, rsm->r_fas); + CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas); } rack_apply_updated_usrtt(rack, us_rtt, tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time)); if (ack_type == SACKED) { @@ -7959,7 +7959,7 @@ us_rtt = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[i]; else us_rtt = tcp_get_usecs(NULL) - (uint32_t)rsm->r_tim_lastsent[i]; - CC_ALGO(tp)->rttsample(tp->ccv, us_rtt, 1, rsm->r_fas); + CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas); } if ((i + 1) < rsm->r_rtr_cnt) { /* @@ -12725,11 +12725,11 @@ rack_convert_rtts(tp); tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow); if (rack_do_hystart) { - tp->ccv->flags |= CCF_HYSTART_ALLOWED; + tp->t_ccv.flags |= CCF_HYSTART_ALLOWED; if (rack_do_hystart > 1) - tp->ccv->flags |= CCF_HYSTART_CAN_SH_CWND; + tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND; if (rack_do_hystart > 2) - tp->ccv->flags |= CCF_HYSTART_CONS_SSTH; + tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH; } if (rack_def_profile) rack_set_profile(rack, rack_def_profile); @@ -13633,7 +13633,7 @@ rack->r_ctl.current_round++; rack->r_ctl.roundends = tp->snd_max; if (CC_ALGO(tp)->newround != NULL) { - CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round); + CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round); } } /* Setup our act_rcv_time */ @@ -14528,7 +14528,7 @@ rack->r_ctl.current_round++; rack->r_ctl.roundends = tp->snd_max; if (CC_ALGO(tp)->newround != NULL) { - CC_ALGO(tp)->newround(tp->ccv, rack->r_ctl.current_round); + CC_ALGO(tp)->newround(&tp->t_ccv, rack->r_ctl.current_round); } } if ((nxt_pkt == 0) && @@ -19658,7 +19658,7 @@ break; case TCP_RACK_PACING_BETA: RACK_OPTS_INC(tcp_rack_beta); - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) { + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) { /* This only works for newreno. */ error = EINVAL; break; @@ -19673,7 +19673,7 @@ opt.name = CC_NEWRENO_BETA; opt.val = optval; if (CC_ALGO(tp)->ctl_output != NULL) - error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt); + error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt); else { error = ENOENT; break; @@ -19701,7 +19701,7 @@ break; case TCP_RACK_PACING_BETA_ECN: RACK_OPTS_INC(tcp_rack_beta_ecn); - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) { + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) { /* This only works for newreno. */ error = EINVAL; break; @@ -19716,7 +19716,7 @@ opt.name = CC_NEWRENO_BETA_ECN; opt.val = optval; if (CC_ALGO(tp)->ctl_output != NULL) - error = CC_ALGO(tp)->ctl_output(tp->ccv, &sopt, &opt); + error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, &sopt, &opt); else error = ENOENT; } else { @@ -20161,13 +20161,13 @@ case TCP_RACK_ENABLE_HYSTART: { if (optval) { - tp->ccv->flags |= CCF_HYSTART_ALLOWED; + tp->t_ccv.flags |= CCF_HYSTART_ALLOWED; if (rack_do_hystart > RACK_HYSTART_ON) - tp->ccv->flags |= CCF_HYSTART_CAN_SH_CWND; + tp->t_ccv.flags |= CCF_HYSTART_CAN_SH_CWND; if (rack_do_hystart > RACK_HYSTART_ON_W_SC) - tp->ccv->flags |= CCF_HYSTART_CONS_SSTH; + tp->t_ccv.flags |= CCF_HYSTART_CONS_SSTH; } else { - tp->ccv->flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH); + tp->t_ccv.flags &= ~(CCF_HYSTART_ALLOWED|CCF_HYSTART_CAN_SH_CWND|CCF_HYSTART_CONS_SSTH); } } break; @@ -20587,7 +20587,7 @@ * when you exit recovery. */ case TCP_RACK_PACING_BETA: - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) error = EINVAL; else if (rack->rc_pacing_cc_set == 0) optval = rack->r_ctl.rc_saved_beta.beta; @@ -20597,8 +20597,8 @@ * I have previously set. Yeah it looks hackish but * we don't want to report the saved values. */ - if (tp->ccv->cc_data) - optval = ((struct newreno *)tp->ccv->cc_data)->beta; + if (tp->t_ccv.cc_data) + optval = ((struct newreno *)tp->t_ccv.cc_data)->beta; else error = EINVAL; } @@ -20612,7 +20612,7 @@ */ case TCP_RACK_PACING_BETA_ECN: - if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) + if (strcmp(tp->t_cc->name, CCALGONAME_NEWRENO) != 0) error = EINVAL; else if (rack->rc_pacing_cc_set == 0) optval = rack->r_ctl.rc_saved_beta.beta_ecn; @@ -20622,8 +20622,8 @@ * I have previously set. Yeah it looks hackish but * we don't want to report the saved values. */ - if (tp->ccv->cc_data) - optval = ((struct newreno *)tp->ccv->cc_data)->beta_ecn; + if (tp->t_ccv.cc_data) + optval = ((struct newreno *)tp->t_ccv.cc_data)->beta_ecn; else error = EINVAL; } @@ -20639,11 +20639,11 @@ break; case TCP_RACK_ENABLE_HYSTART: { - if (tp->ccv->flags & CCF_HYSTART_ALLOWED) { + if (tp->t_ccv.flags & CCF_HYSTART_ALLOWED) { optval = RACK_HYSTART_ON; - if (tp->ccv->flags & CCF_HYSTART_CAN_SH_CWND) + if (tp->t_ccv.flags & CCF_HYSTART_CAN_SH_CWND) optval = RACK_HYSTART_ON_W_SC; - if (tp->ccv->flags & CCF_HYSTART_CONS_SSTH) + if (tp->t_ccv.flags & CCF_HYSTART_CONS_SSTH) optval = RACK_HYSTART_ON_W_SC_C; } else { optval = RACK_HYSTART_OFF; diff --git a/sys/netinet/tcp_stacks/sack_filter.c b/sys/netinet/tcp_stacks/sack_filter.c --- a/sys/netinet/tcp_stacks/sack_filter.c +++ b/sys/netinet/tcp_stacks/sack_filter.c @@ -35,6 +35,8 @@ #include #include #endif +#include +#include #include #include #include diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1121,24 +1121,6 @@ #define TCBHASHSIZE 0 #endif -/* - * XXX - * Callouts should be moved into struct tcp directly. They are currently - * separate because the tcpcb structure is exported to userland for sysctl - * parsing purposes, which do not know about callouts. - */ -struct tcpcb_mem { - struct tcpcb tcb; - struct tcp_timer tt; - struct cc_var ccv; -#ifdef TCP_HHOOK - struct osd osd; -#endif -}; - -VNET_DEFINE_STATIC(uma_zone_t, tcpcb_zone); -#define V_tcpcb_zone VNET(tcpcb_zone) - MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers"); MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory"); @@ -1148,7 +1130,7 @@ #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) -INPCBSTORAGE_DEFINE(tcpcbstor, inpcb, "tcpinp", "tcp_inpcb", "tcp", "tcphash"); +INPCBSTORAGE_DEFINE(tcpcbstor, tcpcb, "tcpinp", "tcp_inpcb", "tcp", "tcphash"); /* * Take a value and get the next power of 2 that doesn't overflow. @@ -1471,14 +1453,6 @@ in_pcbinfo_init(&V_tcbinfo, &tcpcbstor, tcp_tcbhashsize, tcp_tcbhashsize); - /* - * These have to be type stable for the benefit of the timers. - */ - V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - uma_zone_set_max(V_tcpcb_zone, maxsockets); - uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached"); - syncache_init(); tcp_hc_init(); @@ -1643,7 +1617,6 @@ in_pcbinfo_destroy(&V_tcbinfo); /* tcp_discardcb() clears the sack_holes up. */ uma_zdestroy(V_sack_hole_zone); - uma_zdestroy(V_tcpcb_zone); /* * Cannot free the zone until all tcpcbs are released as we attach @@ -2198,29 +2171,28 @@ } /* - * Create a new TCP control block, making an - * empty reassembly queue and hooking it to the argument - * protocol control block. The `inp' parameter must have - * come from the zone allocator set up in tcp_init(). + * Create a new TCP control block, making an empty reassembly queue and hooking + * it to the argument protocol control block. The `inp' parameter must have + * come from the zone allocator set up by tcpcbstor declaration. */ struct tcpcb * tcp_newtcpcb(struct inpcb *inp) { - struct tcpcb_mem *tm; - struct tcpcb *tp; + struct tcpcb *tp = intotcpcb(inp); #ifdef INET6 int isipv6 = (inp->inp_vflag & INP_IPV6) != 0; #endif /* INET6 */ - tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO); - if (tm == NULL) - return (NULL); - tp = &tm->tcb; + /* + * Historically allocation was done with M_ZERO. There is a lot of + * code that rely on that. For now take safe approach and zero whole + * tcpcb. This definitely can be optimized. + */ + bzero(&tp->t_start_zero, t_zero_size); /* Initialise cc_var struct for this tcpcb. */ - tp->ccv = &tm->ccv; - tp->ccv->type = IPPROTO_TCP; - tp->ccv->ccvc.tcp = tp; + tp->t_ccv.type = IPPROTO_TCP; + tp->t_ccv.ccvc.tcp = tp; rw_rlock(&tcp_function_lock); tp->t_fb = tcp_func_set_ptr; refcount_acquire(&tp->t_fb->tfb_refcnt); @@ -2230,37 +2202,24 @@ */ cc_attach(tp, CC_DEFAULT_ALGO()); - /* - * The tcpcb will hold a reference on its inpcb until tcp_discardcb() - * is called. - */ - in_pcbref(inp); /* Reference for tcpcb */ - tp->t_inpcb = inp; - if (CC_ALGO(tp)->cb_init != NULL) - if (CC_ALGO(tp)->cb_init(tp->ccv, NULL) > 0) { + if (CC_ALGO(tp)->cb_init(&tp->t_ccv, NULL) > 0) { cc_detach(tp); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); - in_pcbrele_wlocked(inp); refcount_release(&tp->t_fb->tfb_refcnt); - uma_zfree(V_tcpcb_zone, tm); return (NULL); } #ifdef TCP_HHOOK - tp->osd = &tm->osd; - if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { + if (khelp_init_osd(HELPER_CLASS_TCP, &tp->t_osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); - in_pcbrele_wlocked(inp); refcount_release(&tp->t_fb->tfb_refcnt); - uma_zfree(V_tcpcb_zone, tm); return (NULL); } #endif - tp->t_timers = &tm->tt; TAILQ_INIT(&tp->t_segq); tp->t_maxseg = #ifdef INET6 @@ -2269,11 +2228,11 @@ V_tcp_mssdflt; /* Set up our timeouts. */ - callout_init(&tp->t_timers->tt_rexmt, 1); - callout_init(&tp->t_timers->tt_persist, 1); - callout_init(&tp->t_timers->tt_keep, 1); - callout_init(&tp->t_timers->tt_2msl, 1); - callout_init(&tp->t_timers->tt_delack, 1); + callout_init(&tp->tt_rexmt, 1); + callout_init(&tp->tt_persist, 1); + callout_init(&tp->tt_keep, 1); + callout_init(&tp->tt_2msl, 1); + callout_init(&tp->tt_delack, 1); switch (V_tcp_do_rfc1323) { case 0: @@ -2311,7 +2270,6 @@ * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; - inp->inp_ppcb = tp; #ifdef TCPHPTS /* * If using hpts lets drop a random number in so @@ -2333,8 +2291,6 @@ if (tp->t_fb->tfb_tcp_fb_init) { if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) { refcount_release(&tp->t_fb->tfb_refcnt); - in_pcbrele_wlocked(inp); - uma_zfree(V_tcpcb_zone, tm); return (NULL); } } @@ -2344,7 +2300,15 @@ #endif if (V_tcp_do_lrd) tp->t_flags |= TF_LRD; - return (tp); /* XXX */ + + /* + * XXXGL: this self-reference might be pointless. It will go away + * when the TCP timers are properly locked and could never fire after + * tcp_discardcb(). + */ + in_pcbref(inp); + + return (tp); } /* @@ -2388,7 +2352,7 @@ * callout, and the last discard function called will take care of * deleting the tcpcb. */ - tp->t_timers->tt_draincnt = 0; + tp->tt_draincnt = 0; tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); @@ -2425,27 +2389,21 @@ /* Allow the CC algorithm to clean up after itself. */ if (CC_ALGO(tp)->cb_destroy != NULL) - CC_ALGO(tp)->cb_destroy(tp->ccv); + CC_ALGO(tp)->cb_destroy(&tp->t_ccv); CC_DATA(tp) = NULL; /* Detach from the CC algorithm */ cc_detach(tp); #ifdef TCP_HHOOK - khelp_destroy_osd(tp->osd); + khelp_destroy_osd(&tp->t_osd); #endif #ifdef STATS stats_blob_destroy(tp->t_stats); #endif CC_ALGO(tp) = NULL; - inp->inp_ppcb = NULL; - if (tp->t_timers->tt_draincnt == 0) { - bool released __diagused; - - released = tcp_freecb(tp); - KASSERT(!released, ("%s: inp %p should not have been released " - "here", __func__, inp)); - } + if (tp->tt_draincnt == 0) + tcp_freecb(tp); } bool @@ -2458,7 +2416,7 @@ #endif INP_WLOCK_ASSERT(inp); - MPASS(tp->t_timers->tt_draincnt == 0); + MPASS(tp->tt_draincnt == 0); /* We own the last reference on tcpcb, let's free it. */ #ifdef TCP_BLACKBOX @@ -2531,7 +2489,6 @@ } refcount_release(&tp->t_fb->tfb_refcnt); - uma_zfree(V_tcpcb_zone, tp); return (in_pcbrele_wlocked(inp)); } @@ -3984,9 +3941,8 @@ now = getsbinuptime(); #define COPYTIMER(ttt) do { \ - if (callout_active(&tp->t_timers->ttt)) \ - xt->ttt = (tp->t_timers->ttt.c_time - now) / \ - SBT_1MS; \ + if (callout_active(&tp->ttt)) \ + xt->ttt = (tp->ttt.c_time - now) / SBT_1MS; \ else \ xt->ttt = 0; \ } while (0) diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -145,20 +145,8 @@ #ifdef _KERNEL -struct xtcp_timer; - -struct tcp_timer { - struct callout tt_rexmt; /* retransmit timer */ - struct callout tt_persist; /* retransmit persistence */ - struct callout tt_keep; /* keepalive */ - struct callout tt_2msl; /* 2*msl TIME_WAIT timer */ - struct callout tt_delack; /* delayed ACK timer */ - uint32_t tt_flags; /* Timers flags */ - uint32_t tt_draincnt; /* Count being drained */ -}; - /* - * Flags for the tt_flags field. + * Flags for the tcpcb's tt_flags field. */ #define TT_DELACK 0x0001 #define TT_REXMT 0x0002 diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -255,13 +255,13 @@ INP_WLOCK(inp); CURVNET_SET(inp->inp_vnet); - if (callout_pending(&tp->t_timers->tt_delack) || - !callout_active(&tp->t_timers->tt_delack)) { + if (callout_pending(&tp->tt_delack) || + !callout_active(&tp->tt_delack)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - callout_deactivate(&tp->t_timers->tt_delack); + callout_deactivate(&tp->tt_delack); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); CURVNET_RESTORE(); @@ -327,19 +327,19 @@ tcp_log_end_status(tp, TCP_EI_STATUS_2MSL); tcp_free_sackholes(tp); - if (callout_pending(&tp->t_timers->tt_2msl) || - !callout_active(&tp->t_timers->tt_2msl)) { + if (callout_pending(&tp->tt_2msl) || + !callout_active(&tp->tt_2msl)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - callout_deactivate(&tp->t_timers->tt_2msl); + callout_deactivate(&tp->tt_2msl); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + KASSERT((tp->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); /* * 2 MSL timeout in shutdown went off. If we're closed but @@ -366,7 +366,7 @@ return; } else { if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { - callout_reset(&tp->t_timers->tt_2msl, + callout_reset(&tp->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp); } else { tcp_timer_close(tp); @@ -402,19 +402,19 @@ INP_WLOCK(inp); CURVNET_SET(inp->inp_vnet); - if (callout_pending(&tp->t_timers->tt_keep) || - !callout_active(&tp->t_timers->tt_keep)) { + if (callout_pending(&tp->tt_keep) || + !callout_active(&tp->tt_keep)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - callout_deactivate(&tp->t_timers->tt_keep); + callout_deactivate(&tp->tt_keep); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + KASSERT((tp->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); /* @@ -428,7 +428,7 @@ idletime = ticks - tp->t_rcvtime; if (idletime < TP_KEEPIDLE(tp)) { - callout_reset(&tp->t_timers->tt_keep, + callout_reset(&tp->tt_keep, TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp); INP_WUNLOCK(inp); CURVNET_RESTORE(); @@ -470,10 +470,10 @@ NET_EPOCH_EXIT(et); free(t_template, M_TEMP); } - callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), + callout_reset(&tp->tt_keep, TP_KEEPINTVL(tp), tcp_timer_keep, tp); } else - callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), + callout_reset(&tp->tt_keep, TP_KEEPIDLE(tp), tcp_timer_keep, tp); #ifdef TCPDEBUG @@ -546,19 +546,19 @@ INP_WLOCK(inp); CURVNET_SET(inp->inp_vnet); - if (callout_pending(&tp->t_timers->tt_persist) || - !callout_active(&tp->t_timers->tt_persist)) { + if (callout_pending(&tp->tt_persist) || + !callout_active(&tp->tt_persist)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - callout_deactivate(&tp->t_timers->tt_persist); + callout_deactivate(&tp->tt_persist); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + KASSERT((tp->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); /* * Persistence timer into zero window. @@ -630,19 +630,19 @@ INP_WLOCK(inp); CURVNET_SET(inp->inp_vnet); - if (callout_pending(&tp->t_timers->tt_rexmt) || - !callout_active(&tp->t_timers->tt_rexmt)) { + if (callout_pending(&tp->tt_rexmt) || + !callout_active(&tp->tt_rexmt)) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - callout_deactivate(&tp->t_timers->tt_rexmt); + callout_deactivate(&tp->tt_rexmt); if (inp->inp_flags & INP_DROPPED) { INP_WUNLOCK(inp); CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, + KASSERT((tp->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); tcp_free_sackholes(tp); TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false); @@ -824,7 +824,7 @@ * as it may depend on the new MSS. */ if (CC_ALGO(tp)->conn_init != NULL) - CC_ALGO(tp)->conn_init(tp->ccv); + CC_ALGO(tp)->conn_init(&tp->t_ccv); } else { /* * If further retransmissions are still unsuccessful @@ -843,7 +843,7 @@ * may depend on the new MSS. */ if (CC_ALGO(tp)->conn_init != NULL) - CC_ALGO(tp)->conn_init(tp->ccv); + CC_ALGO(tp)->conn_init(&tp->t_ccv); } } } @@ -908,28 +908,28 @@ return; #endif - if (tp->t_timers->tt_flags & TT_STOPPED) + if (tp->tt_flags & TT_STOPPED) return; switch (timer_type) { case TT_DELACK: - t_callout = &tp->t_timers->tt_delack; + t_callout = &tp->tt_delack; f_callout = tcp_timer_delack; break; case TT_REXMT: - t_callout = &tp->t_timers->tt_rexmt; + t_callout = &tp->tt_rexmt; f_callout = tcp_timer_rexmt; break; case TT_PERSIST: - t_callout = &tp->t_timers->tt_persist; + t_callout = &tp->tt_persist; f_callout = tcp_timer_persist; break; case TT_KEEP: - t_callout = &tp->t_timers->tt_keep; + t_callout = &tp->tt_keep; f_callout = tcp_timer_keep; break; case TT_2MSL: - t_callout = &tp->t_timers->tt_2msl; + t_callout = &tp->tt_2msl; f_callout = tcp_timer_2msl; break; default: @@ -953,19 +953,19 @@ switch (timer_type) { case TT_DELACK: - t_callout = &tp->t_timers->tt_delack; + t_callout = &tp->tt_delack; break; case TT_REXMT: - t_callout = &tp->t_timers->tt_rexmt; + t_callout = &tp->tt_rexmt; break; case TT_PERSIST: - t_callout = &tp->t_timers->tt_persist; + t_callout = &tp->tt_persist; break; case TT_KEEP: - t_callout = &tp->t_timers->tt_keep; + t_callout = &tp->tt_keep; break; case TT_2MSL: - t_callout = &tp->t_timers->tt_2msl; + t_callout = &tp->tt_2msl; break; default: if (tp->t_fb->tfb_tcp_timer_active) { @@ -993,38 +993,39 @@ switch (timer_type) { case TT_DELACK: t_flags = TT_DELACK_SUS; - t_callout = &tp->t_timers->tt_delack; + t_callout = &tp->tt_delack; break; case TT_REXMT: t_flags = TT_REXMT_SUS; - t_callout = &tp->t_timers->tt_rexmt; + t_callout = &tp->tt_rexmt; break; case TT_PERSIST: t_flags = TT_PERSIST_SUS; - t_callout = &tp->t_timers->tt_persist; + t_callout = &tp->tt_persist; break; case TT_KEEP: t_flags = TT_KEEP_SUS; - t_callout = &tp->t_timers->tt_keep; + t_callout = &tp->tt_keep; break; case TT_2MSL: t_flags = TT_2MSL_SUS; - t_callout = &tp->t_timers->tt_2msl; + t_callout = &tp->tt_2msl; break; default: panic("tp:%p bad timer_type 0x%x", tp, timer_type); } - tp->t_timers->tt_flags |= t_flags; + tp->tt_flags |= t_flags; return (callout_stop(t_callout)); } void tcp_timers_unsuspend(struct tcpcb *tp, uint32_t timer_type) { + switch (timer_type) { case TT_DELACK: - if (tp->t_timers->tt_flags & TT_DELACK_SUS) { - tp->t_timers->tt_flags &= ~TT_DELACK_SUS; + if (tp->tt_flags & TT_DELACK_SUS) { + tp->tt_flags &= ~TT_DELACK_SUS; if (tp->t_flags & TF_DELACK) { /* Delayed ack timer should be up activate a timer */ tp->t_flags &= ~TF_DELACK; @@ -1034,8 +1035,8 @@ } break; case TT_REXMT: - if (tp->t_timers->tt_flags & TT_REXMT_SUS) { - tp->t_timers->tt_flags &= ~TT_REXMT_SUS; + if (tp->tt_flags & TT_REXMT_SUS) { + tp->tt_flags &= ~TT_REXMT_SUS; if (SEQ_GT(tp->snd_max, tp->snd_una) && (tcp_timer_active((tp), TT_PERSIST) == 0) && tp->snd_wnd) { @@ -1046,8 +1047,8 @@ } break; case TT_PERSIST: - if (tp->t_timers->tt_flags & TT_PERSIST_SUS) { - tp->t_timers->tt_flags &= ~TT_PERSIST_SUS; + if (tp->tt_flags & TT_PERSIST_SUS) { + tp->tt_flags &= ~TT_PERSIST_SUS; if (tp->snd_wnd == 0) { /* Activate the persists timer */ tp->t_rxtshift = 0; @@ -1056,18 +1057,18 @@ } break; case TT_KEEP: - if (tp->t_timers->tt_flags & TT_KEEP_SUS) { - tp->t_timers->tt_flags &= ~TT_KEEP_SUS; + if (tp->tt_flags & TT_KEEP_SUS) { + tp->tt_flags &= ~TT_KEEP_SUS; tcp_timer_activate(tp, TT_KEEP, TCPS_HAVEESTABLISHED(tp->t_state) ? TP_KEEPIDLE(tp) : TP_KEEPINIT(tp)); } break; case TT_2MSL: - if (tp->t_timers->tt_flags &= TT_2MSL_SUS) { + if (tp->tt_flags &= TT_2MSL_SUS) { struct socket *so = tptosocket(tp); - tp->t_timers->tt_flags &= ~TT_2MSL_SUS; + tp->tt_flags &= ~TT_2MSL_SUS; if ((tp->t_state == TCPS_FIN_WAIT_2) && (so == NULL || /* XXXGL: needed? */ (so->so_rcv.sb_state & SBS_CANTRCVMORE))) { @@ -1094,9 +1095,9 @@ CURVNET_SET(inp->inp_vnet); NET_EPOCH_ENTER(et); - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0, + KASSERT((tp->tt_flags & TT_STOPPED) != 0, ("%s: tcpcb has to be stopped here", __func__)); - if (--tp->t_timers->tt_draincnt > 0 || + if (--tp->tt_draincnt > 0 || tcp_freecb(tp) == false) INP_WUNLOCK(inp); NET_EPOCH_EXIT(et); @@ -1108,22 +1109,22 @@ { struct callout *t_callout; - tp->t_timers->tt_flags |= TT_STOPPED; + tp->tt_flags |= TT_STOPPED; switch (timer_type) { case TT_DELACK: - t_callout = &tp->t_timers->tt_delack; + t_callout = &tp->tt_delack; break; case TT_REXMT: - t_callout = &tp->t_timers->tt_rexmt; + t_callout = &tp->tt_rexmt; break; case TT_PERSIST: - t_callout = &tp->t_timers->tt_persist; + t_callout = &tp->tt_persist; break; case TT_KEEP: - t_callout = &tp->t_timers->tt_keep; + t_callout = &tp->tt_keep; break; case TT_2MSL: - t_callout = &tp->t_timers->tt_2msl; + t_callout = &tp->tt_2msl; break; default: if (tp->t_fb->tfb_tcp_timer_stop) { @@ -1143,6 +1144,6 @@ * to the last one. We do this using the async drain * function and incrementing the count in */ - tp->t_timers->tt_draincnt++; + tp->tt_draincnt++; } } diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2040,17 +2040,17 @@ * the old ones cleanup (if any). */ if (CC_ALGO(tp)->cb_destroy != NULL) - CC_ALGO(tp)->cb_destroy(tp->ccv); + CC_ALGO(tp)->cb_destroy(&tp->t_ccv); /* Detach the old CC from the tcpcb */ cc_detach(tp); /* Copy in our temp memory that was inited */ - memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var)); + memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var)); /* Now attach the new, which takes a reference */ cc_attach(tp, algo); /* Ok now are we where we have gotten past any conn_init? */ if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) { /* Yep run the connection init for the new CC */ - CC_ALGO(tp)->conn_init(tp->ccv); + CC_ALGO(tp)->conn_init(&tp->t_ccv); } } else if (ptr) free(ptr, M_CC_MEM); @@ -2121,7 +2121,7 @@ } INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP)); if (CC_ALGO(tp)->ctl_output != NULL) - error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf); + error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf); else error = ENOENT; INP_WUNLOCK(inp); @@ -3060,11 +3060,9 @@ db_print_indent(indent); db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", - &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep); - - db_print_indent(indent); - db_printf("tt_2msl: %p tt_delack: %p t_inpcb: %p\n", &tp->t_timers->tt_2msl, - &tp->t_timers->tt_delack, tp->t_inpcb); + &tp->tt_rexmt, &tp->tt_persist, &tp->tt_keep); + db_printf("tt_2msl: %p tt_delack: %p\n", &tp->tt_2msl, + &tp->tt_delack); db_print_indent(indent); db_printf("t_state: %d (", tp->t_state); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -81,6 +81,8 @@ #define TCP_EI_BITS_2MS_TIMER 0x400 /* 2 MSL timer expired */ #if defined(_KERNEL) || defined(_WANT_TCPCB) +#include + /* TCP segment queue entry */ struct tseg_qent { TAILQ_ENTRY(tseg_qent) tqe_q; @@ -131,9 +133,21 @@ */ struct tcpcb { /* Cache line 1 */ - struct inpcb *t_inpcb; /* back pointer to internet pcb */ + struct inpcb t_inpcb; /* embedded protocol indepenent cb */ +#define t_start_zero t_fb +#define t_zero_size (sizeof(struct tcpcb) - \ + offsetof(struct tcpcb, t_start_zero)) struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ + + struct callout tt_rexmt; /* retransmit timer */ + struct callout tt_persist; /* retransmit persistence */ + struct callout tt_keep; /* keepalive */ + struct callout tt_2msl; /* 2*msl TIME_WAIT timer */ + struct callout tt_delack; /* delayed ACK timer */ + uint32_t tt_flags; /* Timers flags */ + uint32_t tt_draincnt; /* Count being drained */ + uint32_t t_maxseg:24, /* maximum segment size */ t_logstate:8; /* State of "black box" logging */ uint32_t t_port:16, /* Tunneling (over udp) port */ @@ -180,7 +194,6 @@ struct tsegqe_head t_segq; /* segment reassembly queue */ struct mbuf *t_in_pkt; struct mbuf *t_tail_pkt; - struct tcp_timer *t_timers; /* All the TCP timers in one struct */ uint32_t snd_ssthresh; /* snd_cwnd size threshold for * for slow start exponential to * linear switch @@ -236,9 +249,8 @@ int t_sndrexmitpack; /* retransmit packets sent */ int t_rcvoopack; /* out-of-order packets received */ void *t_toe; /* TOE pcb pointer */ - struct cc_algo *cc_algo; /* congestion control algorithm */ - struct cc_var *ccv; /* congestion control specific vars */ - struct osd *osd; /* storage for Khelp module data */ + struct cc_algo *t_cc; /* congestion control algorithm */ + struct cc_var t_ccv; /* congestion control specific vars */ int t_bytes_acked; /* # bytes acked during current RTT */ u_int t_maxunacktime; u_int t_keepinit; /* time to establish connection */ @@ -285,6 +297,9 @@ struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ #endif +#ifdef TCP_HHOOK + struct osd t_osd; /* storage for Khelp module data */ +#endif }; #endif /* _KERNEL || _WANT_TCPCB */ @@ -391,10 +406,10 @@ struct tcpcb * tcp_drop(struct tcpcb *, int); #ifdef _NETINET_IN_PCB_H_ -#define intotcpcb(inp) ((struct tcpcb *)(inp)->inp_ppcb) +#define intotcpcb(inp) __containerof((inp), struct tcpcb, t_inpcb) #define sototcpcb(so) intotcpcb(sotoinpcb(so)) -#define tptoinpcb(tp) tp->t_inpcb -#define tptosocket(tp) tp->t_inpcb->inp_socket +#define tptoinpcb(tp) (&(tp)->t_inpcb) +#define tptosocket(tp) (tp)->t_inpcb.inp_socket /* * tcp_output() diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -103,7 +103,6 @@ #include #include #include -#include #include #include diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -65,6 +65,7 @@ #include #include +#include #include #include #include diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -53,6 +53,7 @@ #include #include +#include #include #include #include diff --git a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c --- a/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c +++ b/sys/netpfil/ipfilter/netinet/ip_fil_freebsd.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -53,6 +52,7 @@ #include #include #include +#include #include #include #include diff --git a/sys/netpfil/pf/pf_syncookies.c b/sys/netpfil/pf/pf_syncookies.c --- a/sys/netpfil/pf/pf_syncookies.c +++ b/sys/netpfil/pf/pf_syncookies.c @@ -80,6 +80,7 @@ #include #include +#include #include #include #include