Index: sys/amd64/include/clock.h =================================================================== --- sys/amd64/include/clock.h +++ sys/amd64/include/clock.h @@ -41,6 +41,8 @@ int timer_spkr_release(void); void timer_spkr_setfreq(int freq); + +extern sbintime_t (*cpu_tcp_ts_getsbintime)(void); #endif /* _KERNEL */ #endif /* !_MACHINE_CLOCK_H_ */ Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include /* for proc0 declaration */ @@ -238,7 +239,7 @@ struct tcpcb *, int, int); static void tcp_pulloutofband(struct socket *, struct tcphdr *, struct mbuf *, int); -static void tcp_xmit_timer(struct tcpcb *, int); +static void tcp_xmit_timer(struct tcpcb *, sbintime_t); static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *); static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type); @@ -1484,6 +1485,7 @@ int thflags, acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; u_long tiwin; + sbintime_t t; char *s; struct in_conninfo *inc; struct mbuf *mfree; @@ -1541,7 +1543,7 @@ * XXX: This should be done after segment * validation to ignore broken/spoofed segs. */ - tp->t_rcvtime = ticks; + tp->t_rcvtime = tcp_ts_getsbintime(); if (TCPS_HAVEESTABLISHED(tp->t_state)) tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); @@ -1594,7 +1596,7 @@ */ if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) { to.to_tsecr -= tp->ts_offset; - if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) + if (TSTMP_GT(to.to_tsecr, tcp_ts_getsbintime32())) to.to_tsecr = 0; } /* @@ -1637,7 +1639,7 @@ if (to.to_flags & TOF_TS) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; - tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent_age = tcp_ts_getsbintime(); } if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); @@ -1681,7 +1683,7 @@ */ if ((to.to_flags & TOF_TS) != 0 && SEQ_LEQ(th->th_seq, tp->last_ack_sent)) { - tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent_age = tcp_ts_getsbintime(); tp->ts_recent = to.to_tsval; } @@ -1705,7 +1707,7 @@ */ if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && - (int)(ticks - tp->t_badrxtwin) < 0) { + (int)(tcp_ts_getsbintime() - tp->t_badrxtwin) < 0) { cc_cong_signal(tp, th, CC_RTO_ERR); } @@ -1719,20 +1721,31 @@ */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { - u_int t; + u_int t, curts; + - t = tcp_ts_getticks() - to.to_tsecr; + curts = tcp_ts_getsbintime32(); + /* + * cope with hourly wrap + */ + if (__predict_true(curts > to.to_tsecr)) + t = curts - to.to_tsecr; + else + t = UINT_MAX - to.to_tsecr + curts; if (!tp->t_rttlow || tp->t_rttlow > t) tp->t_rttlow = t; tcp_xmit_timer(tp, - TCP_TS_TO_TICKS(t) + 1); + TCP_TS_TO_SBT(t) + 1); } else if (tp->t_rtttime && - SEQ_GT(th->th_ack, tp->t_rtseq)) { + SEQ_GT(th->th_ack, tp->t_rtseq)) { + u_int t; + + t = tcp_ts_getsbintime32(); if (!tp->t_rttlow || - tp->t_rttlow > ticks - tp->t_rtttime) - tp->t_rttlow = ticks - tp->t_rtttime; + tp->t_rttlow > t - tp->t_rtttime) + tp->t_rttlow = t - tp->t_rtttime; tcp_xmit_timer(tp, - ticks - tp->t_rtttime); + t - tp->t_rtttime); } acked = BYTES_THIS_ACK(tp, th); @@ -2010,7 +2023,7 @@ * SYN_SENT --> ESTABLISHED * SYN_SENT* --> FIN_WAIT_1 */ - tp->t_starttime = ticks; + tp->t_starttime = tcp_ts_getsbintime(); if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; @@ -2184,7 +2197,7 @@ TSTMP_LT(to.to_tsval, tp->ts_recent)) { /* Check to see if ts_recent is over 24 days old. */ - if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) { + if (tcp_ts_getsbintime() - tp->ts_recent_age > TCP_PAWS_IDLE) { /* * Invalidate ts_recent. If this segment updates * ts_recent, the age will be reset later and ts_recent @@ -2338,7 +2351,7 @@ SEQ_LEQ(th->th_seq, tp->last_ack_sent) && SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen + ((thflags & (TH_SYN|TH_FIN)) != 0))) { - tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent_age = tcp_ts_getsbintime(); tp->ts_recent = to.to_tsval; } @@ -2382,7 +2395,7 @@ * SYN-RECEIVED -> ESTABLISHED * SYN-RECEIVED* -> FIN-WAIT-1 */ - tp->t_starttime = ticks; + tp->t_starttime = tcp_ts_getsbintime(); if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); tp->t_flags &= ~TF_NEEDFIN; @@ -2666,8 +2679,9 @@ * original cwnd and ssthresh, and proceed to transmit where * we left off. */ + t = tcp_ts_getsbintime(); if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID && - (int)(ticks - tp->t_badrxtwin) < 0) + (int)(t - tp->t_badrxtwin) < 0) cc_cong_signal(tp, th, CC_RTO_ERR); /* @@ -2685,16 +2699,19 @@ * huge RTT and blow up the retransmit timer. */ if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) { - u_int t; + u_int t_; - t = tcp_ts_getticks() - to.to_tsecr; - if (!tp->t_rttlow || tp->t_rttlow > t) - tp->t_rttlow = t; - tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1); + t_ = ((uint32_t)t) - to.to_tsecr; + if (!tp->t_rttlow || tp->t_rttlow > t_) + tp->t_rttlow = t_; + tcp_xmit_timer(tp, TCP_TS_TO_SBT(t) + 1); } else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) { - if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime) - tp->t_rttlow = ticks - tp->t_rtttime; - tcp_xmit_timer(tp, ticks - tp->t_rtttime); + u_int t_; + + t_ = ((uint32_t)t); + if (!tp->t_rttlow || tp->t_rttlow > t_ - tp->t_rtttime) + tp->t_rttlow = t_ - tp->t_rtttime; + tcp_xmit_timer(tp, TCP_TS_TO_SBT(t_ - tp->t_rtttime)); } /* @@ -3004,7 +3021,7 @@ * enter the CLOSE_WAIT state. */ case TCPS_SYN_RECEIVED: - tp->t_starttime = ticks; + tp->t_starttime = tcp_ts_getsbintime(); /* FALLTHROUGH */ case TCPS_ESTABLISHED: tcp_state_change(tp, TCPS_CLOSE_WAIT); @@ -3334,15 +3351,16 @@ * and update averages and current timeout. */ static void -tcp_xmit_timer(struct tcpcb *tp, int rtt) +tcp_xmit_timer(struct tcpcb *tp, sbintime_t rtt) { - int delta; - int expected_samples, expected_shift, shift; + uint64_t delta; + uint64_t expected_samples, expected_shift, shift; INP_WLOCK_ASSERT(tp->t_inpcb); /* RFC 7323 Appendix G RTO Calculation Modification */ /* ExpectedSamples = ceiling(FlightSize / (SMSS * 2)) */ + /* roundup(x, y) == ceiling(x / y) * y */ expected_samples = ((tcp_compute_pipe(tp) + (tp->t_maxseg - 1)) / (tp->t_maxseg << 1)); /* alpha' = alpha / ExpectedSamples */ expected_shift = min(max(fls(expected_samples + 1) - 1, 0), TCP_DELTA_SHIFT); Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -749,13 +749,13 @@ /* Timestamps. */ if ((tp->t_flags & TF_RCVD_TSTMP) || ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { - to.to_tsval = tcp_ts_getticks() + tp->ts_offset; + to.to_tsval = tcp_ts_getsbintime32() + tp->ts_offset; to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; /* Set receive buffer autosizing timestamp. */ if (tp->rfbuf_ts == 0 && (so->so_rcv.sb_flags & SB_AUTOSIZE)) - tp->rfbuf_ts = tcp_ts_getticks(); + tp->rfbuf_ts = tcp_ts_getsbintime(); } /* Selective ACK's. */ if (tp->t_flags & TF_SACK_PERMIT) { @@ -1416,7 +1416,7 @@ * not currently timing anything. */ if (tp->t_rtttime == 0) { - tp->t_rtttime = ticks; + tp->t_rtttime = tcp_ts_getsbintime(); tp->t_rtseq = startseq; TCPSTAT_INC(tcps_segstimed); } Index: sys/netinet/tcp_seq.h =================================================================== --- sys/netinet/tcp_seq.h +++ sys/netinet/tcp_seq.h @@ -63,33 +63,61 @@ (tp)->snd_recover = (tp)->iss #ifdef _KERNEL + +/* + * RFC 7323 + * Section 5.4. Timestamp Clock + * + * (b) The timestamp clock must not be "too fast". + * + * The recycling time of the timestamp clock MUST be greater than + * MSL seconds. Since the clock (timestamp) is 32 bits and the + * worst-case MSL is 255 seconds, the maximum acceptable clock + * frequency is one tick every 59 ns. + */ + +/* + * The minimum permissible timestamp is 59ns. However, to reduce calculation + * overhead we use 256 - (8 bit shift). + * - (1<<32)/(1000000000/59) == 253 + * - (1<<32)/(1000000000/60) == 257 + * + * + * Note that MSL should be a function of RTT. Although 60ns is more than sufficient resolution for + * the time being a 255s MSL on data center network with a sub-millisecond RTT doesn't make a whole + * lot of senese. In the future the MSL should be determined dynamically or at the very least con- + * figurable per subnet. Nonetheless, fixing the timestamp clock at a rate corresponding to a 256s + * MSL gives us what we need for now while otherwise remaining as RFC compliant as possible. + * + */ +#define SBT_MINTS 256 +#define SBT_MINTS_SHIFT 8 + + /* * Clock macros for RFC 1323 timestamps. */ -#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000) +#define TCP_TS_TO_SBT(_t) ((_t) << SBT_MINTS_SHIFT) -/* Timestamp wrap-around time, 24 days. */ -#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000) /* - * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323. - * We always use 1ms granularity independent of hz. + * RFC defined MSL: 255s (+ 2s rounding slop) */ -static __inline u_int -tcp_ts_getticks(void) -{ - struct timeval tv; - u_long ms; - - /* - * getmicrouptime() should be good enough for any 1-1000ms granularity. - * Do not use getmicrotime() here as it might break nfsroot/tcp. - */ - getmicrouptime(&tv); - ms = tv.tv_sec * 1000 + tv.tv_usec / 1000; - - return (ms); -} +#define TCP_PAWS_IDLE (SBT_MINTS*SBT_1S) + +#if defined(__amd64__) || defined(__i386__) +#include +#endif + +#if !defined(__amd64__) && !defined(__i386__) +extern sbintime_t (*cpu_tcp_ts_getsbintime)(void); +#endif + +#define tcp_ts_getsbintime() (cpu_tcp_ts_getsbintime)() + +/* trivial macro to make intent clearer */ +#define tcp_ts_getsbintime32() ((uint32_t)tcp_ts_getsbintime()) + #endif /* _KERNEL */ #endif /* _NETINET_TCP_SEQ_H_ */ Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -311,6 +311,21 @@ return (hashsize); } + +#if !defined(__amd64__) && !defined(__i386__) +sbintime_t (*cpu_tcp_ts_getsbintime)(void); + +static sbintime_t +cpu_tcp_ts_getsbintime_(void) +{ + struct bintime bt; + + getbinuptime(&bt); + sbt = bt.frac >> SBT_MINTS_SHIFT; + return (sbt); +} +#endif + void tcp_init(void) { @@ -433,6 +448,10 @@ #ifdef TCPPCAP tcp_pcap_init(); #endif +#if !defined(__amd64__) && !defined(__i386__) + cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_; +#endif + } #ifdef VIMAGE Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -837,7 +837,7 @@ if (sc->sc_flags & SCF_TIMESTAMP) { tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP; tp->ts_recent = sc->sc_tsreflect; - tp->ts_recent_age = tcp_ts_getticks(); + tp->ts_recent_age = tcp_ts_getsbintime(); tp->ts_offset = sc->sc_tsoff; } #ifdef TCP_SIGNATURE @@ -1266,7 +1266,7 @@ */ if (to->to_flags & TOF_TS) { sc->sc_tsreflect = to->to_tsval; - sc->sc_ts = tcp_ts_getticks(); + sc->sc_ts = tcp_ts_getsbintime(); sc->sc_flags |= SCF_TIMESTAMP; } if (to->to_flags & TOF_SCALE) { @@ -1787,7 +1787,7 @@ /* Randomize the timestamp. */ if (sc->sc_flags & SCF_TIMESTAMP) { sc->sc_ts = arc4random(); - sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks(); + sc->sc_tsoff = sc->sc_ts - tcp_ts_getsbintime(); } TCPSTAT_INC(tcps_sc_sendcookie); @@ -1877,7 +1877,7 @@ sc->sc_flags |= SCF_TIMESTAMP; sc->sc_tsreflect = to->to_tsval; sc->sc_ts = to->to_tsecr; - sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks(); + sc->sc_tsoff = to->to_tsecr - tcp_ts_getsbintime(); } if (to->to_flags & TOF_SIGNATURE) Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -68,6 +68,7 @@ #include #include #include +#include #ifdef INET6 #include #endif @@ -356,7 +357,7 @@ TCPSTAT_INC(tcps_finwait2_drops); tp = tcp_close(tp); } else { - if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { + if (tcp_ts_getsbintime() - tp->t_rcvtime <= TP_MAXIDLE(tp)) { if (!callout_reset(&tp->t_timers->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) { tp->t_timers->tt_flags &= ~TT_2MSL_RST; @@ -421,7 +422,7 @@ goto dropit; if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) + if (tcp_ts_getsbintime() - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) goto dropit; /* * Send a packet designed to force a response @@ -484,6 +485,7 @@ { struct tcpcb *tp = xtp; struct inpcb *inp; + sbintime_t t; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -524,9 +526,11 @@ * (no responses to probes) reaches the maximum * backoff that we would use if retransmitting. */ + + t = tcp_ts_getsbintime() - tp->t_rcvtime; if (tp->t_rxtshift == TCP_MAXRXTSHIFT && - (ticks - tp->t_rcvtime >= tcp_maxpersistidle || - ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { + (t >= tcp_maxpersistidle*tick_sbt || + t >= TCP_REXMTVAL(tp) * tcp_totbackoff * tick_sbt)) { TCPSTAT_INC(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; @@ -536,7 +540,7 @@ * connection after a much reduced timeout. */ if (tp->t_state > TCPS_CLOSE_WAIT && - (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { + t >= TCPTV_PERSMAX*tick_sbt) { TCPSTAT_INC(tcps_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; @@ -638,7 +642,7 @@ tp->t_flags |= TF_WASCRECOVERY; else tp->t_flags &= ~TF_WASCRECOVERY; - tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + tp->t_badrxtwin = tcp_ts_getsbintime() + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); tp->t_flags |= TF_PREVVALID; } else tp->t_flags &= ~TF_PREVVALID; @@ -992,5 +996,5 @@ xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS; if (callout_active(&timer->tt_2msl)) xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS; - xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime); + xtimer->t_rcvtime = ticks_to_msecs((tcp_ts_getsbintime() - tp->t_rcvtime)/tick_sbt); } Index: sys/netinet/tcp_timewait.c =================================================================== --- sys/netinet/tcp_timewait.c +++ sys/netinet/tcp_timewait.c @@ -568,7 +568,7 @@ */ if (tw->t_recent && flags == TH_ACK) { to.to_flags |= TOF_TS; - to.to_tsval = tcp_ts_getticks() + tw->ts_offset; + to.to_tsval = tcp_ts_getsbintime32() + tw->ts_offset; to.to_tsecr = tw->t_recent; } optlen = tcp_addoptions(&to, (u_char *)(th + 1)); Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -1307,7 +1307,7 @@ } ti->tcpi_rto = tp->t_rxtcur * tick; - ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick; + ti->tcpi_last_data_recv = (long)((tcp_ts_getsbintime() - (int)tp->t_rcvtime)/tick_sbt) * tick; ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT; ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT; @@ -2075,20 +2075,20 @@ "0x%08x\n", tp->snd_ssthresh, tp->snd_recover); db_print_indent(indent); - db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n", + db_printf("t_maxopd: %u t_rcvtime: %zu t_startime: %zu\n", tp->t_maxopd, tp->t_rcvtime, tp->t_starttime); db_print_indent(indent); - db_printf("t_rttime: %zu t_rtsq: 0x%08x\n", + db_printf("t_rttime: %u t_rtsq: 0x%08x\n", tp->t_rtttime, tp->t_rtseq); db_print_indent(indent); - db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %zu\n", + db_printf("t_rxtcur: %zu t_maxseg: %u t_srtt: %zu\n", tp->t_rxtcur, tp->t_maxseg, tp->t_srtt); db_print_indent(indent); - db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %u " - "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, + db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %zu " + "t_rttbest: %zu\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin, tp->t_rttbest); db_print_indent(indent); @@ -2105,7 +2105,7 @@ tp->snd_scale, tp->rcv_scale, tp->request_r_scale); db_print_indent(indent); - db_printf("ts_recent: %u ts_recent_age: %u\n", + db_printf("ts_recent: %u ts_recent_age: %zu\n", tp->ts_recent, tp->ts_recent_age); db_print_indent(indent); @@ -2114,7 +2114,7 @@ db_print_indent(indent); db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x " - "t_badrxtwin: %u\n", tp->snd_ssthresh_prev, + "t_badrxtwin: %zu\n", tp->snd_ssthresh_prev, tp->snd_recover_prev, tp->t_badrxtwin); db_print_indent(indent); Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -135,22 +135,22 @@ u_int t_maxopd; /* mss plus options */ - u_int t_rcvtime; /* inactivity time */ - u_int t_starttime; /* time connection was established */ + sbintime_t t_rcvtime; /* inactivity time */ + sbintime_t t_starttime; /* time connection was established */ u_int t_rtttime; /* RTT measurement start time */ tcp_seq t_rtseq; /* sequence number being timed */ u_int t_bw_spare1; /* unused */ tcp_seq t_bw_spare2; /* unused */ - int t_rxtcur; /* current retransmit value (ticks) */ + sbintime_t t_rxtcur; /* current retransmit value */ u_int t_maxseg; /* maximum segment size */ - int t_srtt; /* smoothed round-trip time */ - int t_rttvar; /* variance in round-trip time */ + uint64_t t_srtt; /* smoothed round-trip time */ + uint64_t t_rttvar; /* variance in round-trip time */ int t_rxtshift; /* log(2) of rexmt exp. backoff */ - u_int t_rttmin; /* minimum rtt allowed */ - u_int t_rttbest; /* best rtt we've seen */ + sbintime_t t_rttmin; /* minimum rtt allowed */ + sbintime_t t_rttbest; /* best rtt we've seen */ u_long t_rttupdated; /* number of times rtt sampled */ u_long max_sndwnd; /* largest window peer has offered */ @@ -163,7 +163,7 @@ u_char rcv_scale; /* window scaling for recv window */ u_char request_r_scale; /* pending window scaling */ u_int32_t ts_recent; /* timestamp echo data */ - u_int ts_recent_age; /* when last updated */ + sbintime_t ts_recent_age; /* when last updated */ u_int32_t ts_offset; /* our timestamp offset */ tcp_seq last_ack_sent; @@ -172,7 +172,7 @@ u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */ int t_sndzerowin; /* zero-window updates sent */ - u_int t_badrxtwin; /* window for retransmit recovery */ + sbintime_t t_badrxtwin; /* window for retransmit recovery */ u_char snd_limited; /* segments limited transmitted */ /* SACK related state */ int snd_numholes; /* number of holes seen by sender */ @@ -367,7 +367,7 @@ struct ucred *tw_cred; /* user credentials */ u_int32_t t_recent; u_int32_t ts_offset; /* our timestamp offset */ - u_int t_starttime; + sbintime_t t_starttime; int tw_time; TAILQ_ENTRY(tcptw) tw_2msl; void *tw_pspare; /* TCP_SIGNATURE */ Index: sys/x86/x86/tsc.c =================================================================== --- sys/x86/x86/tsc.c +++ sys/x86/x86/tsc.c @@ -51,6 +51,12 @@ #include "cpufreq_if.h" +/* + * the number of tsc increments per minimum timestamp + */ +#define TSC_FREQ_MINTS (tsc_freq / (1000000000/60)) + + uint64_t tsc_freq; uint64_t tsc_sbt; int tsc_is_invariant; @@ -58,6 +64,10 @@ static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; +sbintime_t cpu_tcp_ts_getsbintime_rdtsc(void); +sbintime_t cpu_tcp_ts_getsbintime_rdtscp(void); +sbintime_t (*cpu_tcp_ts_getsbintime)(void); + SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); @@ -582,6 +592,12 @@ tsc_timecounter.tc_priv = (void *)(intptr_t)shift; tc_init(&tsc_timecounter); } + /* XXX yes this needs to be revisited */ +#if defined(__amd64__) + cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtscp; +#elif defined(__i386__) + cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtsc; +#endif } SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); @@ -734,6 +750,21 @@ return (tc == &tsc_timecounter); } +sbintime_t +cpu_tcp_ts_getsbintime_rdtsc(void) +{ + + return (rdtsc() / TSC_FREQ_MINTS); +} + +sbintime_t +cpu_tcp_ts_getsbintime_rdtscp(void) +{ + + return (rdtscp() / TSC_FREQ_MINTS); +} + + #ifdef COMPAT_FREEBSD32 uint32_t cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32,