Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153544636
D4366.id10757.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D4366.id10757.diff
View Options
Index: sys/amd64/include/clock.h
===================================================================
--- sys/amd64/include/clock.h
+++ sys/amd64/include/clock.h
@@ -41,6 +41,8 @@
int timer_spkr_release(void);
void timer_spkr_setfreq(int freq);
+
+extern sbintime_t (*cpu_tcp_ts_getsbintime)(void);
#endif /* _KERNEL */
#endif /* !_MACHINE_CLOCK_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -59,6 +59,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/hhook.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
@@ -238,7 +239,7 @@
struct tcpcb *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
-static void tcp_xmit_timer(struct tcpcb *, int);
+static void tcp_xmit_timer(struct tcpcb *, sbintime_t);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
uint16_t type);
@@ -347,7 +348,7 @@
}
TCPT_RANGESET(tp->t_rxtcur,
((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
- tp->t_rttmin, TCPTV_REXMTMAX);
+ tp->t_rttmin, TCPTV_REXMTMAX*tick_sbt);
}
if (metrics.rmx_ssthresh) {
/*
@@ -1484,6 +1485,7 @@
int thflags, acked, ourfinisacked, needoutput = 0;
int rstreason, todrop, win;
u_long tiwin;
+ sbintime_t t;
char *s;
struct in_conninfo *inc;
struct mbuf *mfree;
@@ -1541,9 +1543,9 @@
* XXX: This should be done after segment
* validation to ignore broken/spoofed segs.
*/
- tp->t_rcvtime = ticks;
+ tp->t_rcvtime = tcp_ts_getsbintime();
if (TCPS_HAVEESTABLISHED(tp->t_state))
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)*tick_sbt);
/*
* Scale up the window into a 32-bit value.
@@ -1594,7 +1596,7 @@
*/
if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
to.to_tsecr -= tp->ts_offset;
- if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
+ if (TSTMP_GT(to.to_tsecr, tcp_ts_getsbintime32()))
to.to_tsecr = 0;
}
/*
@@ -1637,7 +1639,7 @@
if (to.to_flags & TOF_TS) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
}
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
@@ -1681,7 +1683,7 @@
*/
if ((to.to_flags & TOF_TS) != 0 &&
SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_recent = to.to_tsval;
}
@@ -1705,7 +1707,7 @@
*/
if (tp->t_rxtshift == 1 &&
tp->t_flags & TF_PREVVALID &&
- (int)(ticks - tp->t_badrxtwin) < 0) {
+ (int)(tcp_ts_getsbintime() - tp->t_badrxtwin) < 0) {
cc_cong_signal(tp, th, CC_RTO_ERR);
}
@@ -1719,20 +1721,30 @@
*/
if ((to.to_flags & TOF_TS) != 0 &&
to.to_tsecr) {
- u_int t;
+ u_int t, curts;
- t = tcp_ts_getticks() - to.to_tsecr;
+
+ curts = tcp_ts_getsbintime32();
+ /*
+ * cope with hourly wrap
+ */
+ if (__predict_true(curts > to.to_tsecr))
+ t = curts - to.to_tsecr;
+ else
+ t = UINT_MAX - to.to_tsecr + curts;
if (!tp->t_rttlow || tp->t_rttlow > t)
tp->t_rttlow = t;
tcp_xmit_timer(tp,
- TCP_TS_TO_TICKS(t) + 1);
+ TCP_TS_TO_SBT(t) + 1);
} else if (tp->t_rtttime &&
- SEQ_GT(th->th_ack, tp->t_rtseq)) {
+ SEQ_GT(th->th_ack, tp->t_rtseq)) {
+ sbintime_t t;
+
+ t = tcp_ts_getsbintime();
if (!tp->t_rttlow ||
- tp->t_rttlow > ticks - tp->t_rtttime)
- tp->t_rttlow = ticks - tp->t_rtttime;
- tcp_xmit_timer(tp,
- ticks - tp->t_rtttime);
+ tp->t_rttlow > t - tp->t_rtttime)
+ tp->t_rttlow = t - tp->t_rtttime;
+ tcp_xmit_timer(tp, t - tp->t_rtttime);
}
acked = BYTES_THIS_ACK(tp, th);
@@ -2010,7 +2022,7 @@
* SYN_SENT --> ESTABLISHED
* SYN_SENT* --> FIN_WAIT_1
*/
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2021,7 +2033,7 @@
mtod(m, const char *), tp, th);
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPIDLE(tp));
+ TP_KEEPIDLE(tp)*tick_sbt);
}
} else {
/*
@@ -2184,7 +2196,7 @@
TSTMP_LT(to.to_tsval, tp->ts_recent)) {
/* Check to see if ts_recent is over 24 days old. */
- if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
+ if (tcp_ts_getsbintime() - tp->ts_recent_age > TCP_PAWS_IDLE) {
/*
* Invalidate ts_recent. If this segment updates
* ts_recent, the age will be reset later and ts_recent
@@ -2338,7 +2350,7 @@
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN|TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_recent = to.to_tsval;
}
@@ -2382,7 +2394,7 @@
* SYN-RECEIVED -> ESTABLISHED
* SYN-RECEIVED* -> FIN-WAIT-1
*/
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2391,7 +2403,7 @@
TCP_PROBE5(accept__established, NULL, tp,
mtod(m, const char *), tp, th);
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)*tick_sbt);
}
/*
* If segment contains data or ACK, will call tcp_reass()
@@ -2666,8 +2678,9 @@
* original cwnd and ssthresh, and proceed to transmit where
* we left off.
*/
+ t = tcp_ts_getsbintime();
if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
- (int)(ticks - tp->t_badrxtwin) < 0)
+ (int)(t - tp->t_badrxtwin) < 0)
cc_cong_signal(tp, th, CC_RTO_ERR);
/*
@@ -2685,16 +2698,19 @@
* huge RTT and blow up the retransmit timer.
*/
if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
- u_int t;
+ u_int t_;
- t = tcp_ts_getticks() - to.to_tsecr;
- if (!tp->t_rttlow || tp->t_rttlow > t)
- tp->t_rttlow = t;
- tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
+ t_ = ((uint32_t)t) - to.to_tsecr;
+ if (!tp->t_rttlow || tp->t_rttlow > t_)
+ tp->t_rttlow = t_;
+ tcp_xmit_timer(tp, TCP_TS_TO_SBT(t) + 1);
} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
- if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
- tp->t_rttlow = ticks - tp->t_rtttime;
- tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+ u_int t_;
+
+ t_ = ((uint32_t)t);
+ if (!tp->t_rttlow || tp->t_rttlow > t_ - tp->t_rtttime)
+ tp->t_rttlow = t_ - tp->t_rtttime;
+ tcp_xmit_timer(tp, TCP_TS_TO_SBT(t_ - tp->t_rtttime));
}
/*
@@ -2780,7 +2796,7 @@
tcp_timer_activate(tp, TT_2MSL,
(tcp_fast_finwait2_recycle ?
tcp_finwait2_timeout :
- TP_MAXIDLE(tp)));
+ TP_MAXIDLE(tp))*tick_sbt);
}
tcp_state_change(tp, TCPS_FIN_WAIT_2);
}
@@ -3004,7 +3020,7 @@
* enter the CLOSE_WAIT state.
*/
case TCPS_SYN_RECEIVED:
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
tcp_state_change(tp, TCPS_CLOSE_WAIT);
@@ -3334,15 +3350,16 @@
* and update averages and current timeout.
*/
static void
-tcp_xmit_timer(struct tcpcb *tp, int rtt)
+tcp_xmit_timer(struct tcpcb *tp, sbintime_t rtt)
{
- int delta;
- int expected_samples, expected_shift, shift;
+ uint64_t delta;
+ uint64_t expected_samples, expected_shift, shift;
INP_WLOCK_ASSERT(tp->t_inpcb);
/* RFC 7323 Appendix G RTO Calculation Modification */
/* ExpectedSamples = ceiling(FlightSize / (SMSS * 2)) */
+ /* roundup(x, y) == ceiling(x / y) * y */
expected_samples = ((tcp_compute_pipe(tp) + (tp->t_maxseg - 1)) / (tp->t_maxseg << 1));
/* alpha' = alpha / ExpectedSamples */
expected_shift = min(max(fls(expected_samples + 1) - 1, 0), TCP_DELTA_SHIFT);
@@ -3411,8 +3428,8 @@
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
*/
- TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
- max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp)*tick_sbt,
+ max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX*tick_sbt);
/*
* We received an ack for a packet that wasn't retransmitted;
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -749,13 +749,13 @@
/* Timestamps. */
if ((tp->t_flags & TF_RCVD_TSTMP) ||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
- to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+ to.to_tsval = tcp_ts_getsbintime32() + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = tcp_ts_getticks();
+ tp->rfbuf_ts = tcp_ts_getsbintime();
}
/* Selective ACK's. */
if (tp->t_flags & TF_SACK_PERMIT) {
@@ -1416,7 +1416,7 @@
* not currently timing anything.
*/
if (tp->t_rtttime == 0) {
- tp->t_rtttime = ticks;
+ tp->t_rtttime = tcp_ts_getsbintime();
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
@@ -1578,8 +1578,7 @@
void
tcp_setpersist(struct tcpcb *tp)
{
- int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
- int tt;
+ uint64_t tt, t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
@@ -1588,7 +1587,7 @@
* Start/restart persistance timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
- TCPTV_PERSMIN, TCPTV_PERSMAX);
+ TCPTV_PERSMIN*tick_sbt, TCPTV_PERSMAX*tick_sbt);
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
Index: sys/netinet/tcp_seq.h
===================================================================
--- sys/netinet/tcp_seq.h
+++ sys/netinet/tcp_seq.h
@@ -63,33 +63,61 @@
(tp)->snd_recover = (tp)->iss
#ifdef _KERNEL
+
+/*
+ * RFC 7323
+ * Section 5.4. Timestamp Clock
+ *
+ * (b) The timestamp clock must not be "too fast".
+ *
+ * The recycling time of the timestamp clock MUST be greater than
+ * MSL seconds. Since the clock (timestamp) is 32 bits and the
+ * worst-case MSL is 255 seconds, the maximum acceptable clock
+ * frequency is one tick every 59 ns.
+ */
+
+/*
+ * The minimum permissible timestamp is 59ns. However, to reduce calculation
+ * overhead we use 256 - (8 bit shift).
+ * - (1<<32)/(1000000000/59) == 253
+ * - (1<<32)/(1000000000/60) == 257
+ *
+ *
+ * Note that MSL should be a function of RTT. Although 60ns is more than sufficient resolution for
+ * the time being a 255s MSL on data center network with a sub-millisecond RTT doesn't make a whole
+ * lot of senese. In the future the MSL should be determined dynamically or at the very least con-
+ * figurable per subnet. Nonetheless, fixing the timestamp clock at a rate corresponding to a 256s
+ * MSL gives us what we need for now while otherwise remaining as RFC compliant as possible.
+ *
+ */
+#define SBT_MINTS 256
+#define SBT_MINTS_SHIFT 8
+
+
/*
* Clock macros for RFC 1323 timestamps.
*/
-#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000)
+#define TCP_TS_TO_SBT(_t) ((_t) << SBT_MINTS_SHIFT)
-/* Timestamp wrap-around time, 24 days. */
-#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000)
/*
- * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323.
- * We always use 1ms granularity independent of hz.
+ * RFC defined MSL: 255s (+ 2s rounding slop)
*/
-static __inline u_int
-tcp_ts_getticks(void)
-{
- struct timeval tv;
- u_long ms;
-
- /*
- * getmicrouptime() should be good enough for any 1-1000ms granularity.
- * Do not use getmicrotime() here as it might break nfsroot/tcp.
- */
- getmicrouptime(&tv);
- ms = tv.tv_sec * 1000 + tv.tv_usec / 1000;
-
- return (ms);
-}
+#define TCP_PAWS_IDLE (SBT_MINTS*SBT_1S)
+
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/clock.h>
+#endif
+
+#if !defined(__amd64__) && !defined(__i386__)
+extern sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+#endif
+
+#define tcp_ts_getsbintime() (cpu_tcp_ts_getsbintime)()
+
+/* trivial macro to make intent clearer */
+#define tcp_ts_getsbintime32() ((uint32_t)tcp_ts_getsbintime())
+
#endif /* _KERNEL */
#endif /* _NETINET_TCP_SEQ_H_ */
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -311,6 +311,21 @@
return (hashsize);
}
+
+#if !defined(__amd64__) && !defined(__i386__)
+sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+
+static sbintime_t
+cpu_tcp_ts_getsbintime_(void)
+{
+ struct bintime bt;
+
+ getbinuptime(&bt);
+ sbt = bt.frac >> SBT_MINTS_SHIFT;
+ return (sbt);
+}
+#endif
+
void
tcp_init(void)
{
@@ -401,7 +416,6 @@
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
- tcp_rexmit_slop = 0;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
@@ -433,6 +447,10 @@
#ifdef TCPPCAP
tcp_pcap_init();
#endif
+#if !defined(__amd64__) && !defined(__i386__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_;
+#endif
+
}
#ifdef VIMAGE
@@ -826,12 +844,12 @@
*/
tp->t_srtt = TCPTV_SRTTBASE;
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
- tp->t_rttmin = tcp_rexmit_min;
- tp->t_rxtcur = TCPTV_RTOBASE;
- tp->t_delack = tcp_delacktime;
+ tp->t_rttmin = tcp_rexmit_min*tick_sbt;
+ tp->t_rxtcur = TCPTV_RTOBASE*tick_sbt;
+ tp->t_delack = tcp_delacktime*tick_sbt;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->t_rcvtime = ticks;
+ tp->t_rcvtime = TCP_TS_TO_SBT(tcp_ts_getsbintime());
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -837,7 +837,7 @@
if (sc->sc_flags & SCF_TIMESTAMP) {
tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
tp->ts_recent = sc->sc_tsreflect;
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_offset = sc->sc_tsoff;
}
#ifdef TCP_SIGNATURE
@@ -884,7 +884,7 @@
tp->t_keepidle = sototcpcb(lso)->t_keepidle;
tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
soisconnected(so);
@@ -1266,7 +1266,7 @@
*/
if (to->to_flags & TOF_TS) {
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = tcp_ts_getticks();
+ sc->sc_ts = tcp_ts_getsbintime();
sc->sc_flags |= SCF_TIMESTAMP;
}
if (to->to_flags & TOF_SCALE) {
@@ -1787,7 +1787,7 @@
/* Randomize the timestamp. */
if (sc->sc_flags & SCF_TIMESTAMP) {
sc->sc_ts = arc4random();
- sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
+ sc->sc_tsoff = sc->sc_ts - tcp_ts_getsbintime();
}
TCPSTAT_INC(tcps_sc_sendcookie);
@@ -1877,7 +1877,7 @@
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
sc->sc_ts = to->to_tsecr;
- sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
+ sc->sc_tsoff = to->to_tsecr - tcp_ts_getsbintime();
}
if (to->to_flags & TOF_SIGNATURE)
Index: sys/netinet/tcp_timer.h
===================================================================
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -127,8 +127,8 @@
/*
* Force a time value to be in a certain range.
*/
-#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
- (tv) = (value) + tcp_rexmit_slop; \
+#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+ tv = value; \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
if ((u_long)(tv) > (u_long)(tvmax)) \
@@ -180,7 +180,6 @@
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
-extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -68,6 +68,7 @@
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_seq.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -102,11 +103,6 @@
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
"Minimum Retransmission Timeout");
-int tcp_rexmit_slop;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
- &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
- "Retransmission Timer Slop");
-
static int always_keepalive = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
@@ -356,7 +352,7 @@
TCPSTAT_INC(tcps_finwait2_drops);
tp = tcp_close(tp);
} else {
- if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
+ if (tcp_ts_getsbintime() - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
if (!callout_reset(&tp->t_timers->tt_2msl,
TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
tp->t_timers->tt_flags &= ~TT_2MSL_RST;
@@ -421,7 +417,7 @@
goto dropit;
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
- if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
+ if (tcp_ts_getsbintime() - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
goto dropit;
/*
* Send a packet designed to force a response
@@ -484,6 +480,7 @@
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ sbintime_t t;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -524,9 +521,11 @@
* (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
+
+ t = tcp_ts_getsbintime() - tp->t_rcvtime;
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
- (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
- ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
+ (t >= tcp_maxpersistidle*tick_sbt ||
+ t >= TCP_REXMTVAL(tp) * tcp_totbackoff * tick_sbt)) {
TCPSTAT_INC(tcps_persistdrop);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
@@ -536,7 +535,7 @@
* connection after a much reduced timeout.
*/
if (tp->t_state > TCPS_CLOSE_WAIT &&
- (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
+ t >= TCPTV_PERSMAX*tick_sbt) {
TCPSTAT_INC(tcps_persistdrop);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
@@ -638,7 +637,7 @@
tp->t_flags |= TF_WASCRECOVERY;
else
tp->t_flags &= ~TF_WASCRECOVERY;
- tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+ tp->t_badrxtwin = tcp_ts_getsbintime() + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
tp->t_flags |= TF_PREVVALID;
} else
tp->t_flags &= ~TF_PREVVALID;
@@ -647,10 +646,10 @@
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
- TCPT_RANGESET(tp->t_rxtcur, rexmt,
- tp->t_rttmin, TCPTV_REXMTMAX);
+ TCPT_RANGESET(tp->t_rxtcur, rexmt*tick_sbt,
+ tp->t_rttmin, TCPTV_REXMTMAX*tick_sbt);
/* 1 < delack < tcp_delacktime - and should scale down with RTO/2 */
- TCPT_RANGESET(tp->t_delack, (rexmt >> 1), 1, tcp_delacktime);
+ TCPT_RANGESET(tp->t_delack, (rexmt >> 1)*tick_sbt, 1, tcp_delacktime*tick_sbt);
/*
* We enter the path for PLMTUD if connection is established or, if
@@ -817,13 +816,14 @@
}
void
-tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, sbintime_t delta)
{
struct callout *t_callout;
timeout_t *f_callout;
struct inpcb *inp = tp->t_inpcb;
int cpu = inp_to_cpuid(inp);
uint32_t f_reset;
+ sbintime_t f_precision;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
@@ -838,26 +838,31 @@
t_callout = &tp->t_timers->tt_delack;
f_callout = tcp_timer_delack;
f_reset = TT_DELACK_RST;
+ f_precision = tick_sbt;
break;
case TT_REXMT:
t_callout = &tp->t_timers->tt_rexmt;
f_callout = tcp_timer_rexmt;
f_reset = TT_REXMT_RST;
+ f_precision = tick_sbt;
break;
case TT_PERSIST:
t_callout = &tp->t_timers->tt_persist;
f_callout = tcp_timer_persist;
f_reset = TT_PERSIST_RST;
+ f_precision = SBT_1S;
break;
case TT_KEEP:
t_callout = &tp->t_timers->tt_keep;
f_callout = tcp_timer_keep;
f_reset = TT_KEEP_RST;
+ f_precision = SBT_1S;
break;
case TT_2MSL:
t_callout = &tp->t_timers->tt_2msl;
f_callout = tcp_timer_2msl;
f_reset = TT_2MSL_RST;
+ f_precision = SBT_1S;
break;
default:
panic("tp %p bad timer_type %#x", tp, timer_type);
@@ -871,10 +876,10 @@
} else {
if ((tp->t_timers->tt_flags & timer_type) == 0) {
tp->t_timers->tt_flags |= (timer_type | f_reset);
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ callout_reset_sbt_on(t_callout, delta, f_precision, f_callout, tp, cpu, 0);
} else {
/* Reset already running callout on the same CPU. */
- if (!callout_reset(t_callout, delta, f_callout, tp)) {
+ if (!callout_reset_sbt(t_callout, delta, f_precision, f_callout, tp, 0)) {
/*
* Callout not cancelled, consider it as not
* properly restarted. */
@@ -992,5 +997,5 @@
xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_2msl))
xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
- xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
+ xtimer->t_rcvtime = ticks_to_msecs((tcp_ts_getsbintime() - tp->t_rcvtime)/tick_sbt);
}
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -568,7 +568,7 @@
*/
if (tw->t_recent && flags == TH_ACK) {
to.to_flags |= TOF_TS;
- to.to_tsval = tcp_ts_getticks() + tw->ts_offset;
+ to.to_tsval = tcp_ts_getsbintime32() + tw->ts_offset;
to.to_tsecr = tw->t_recent;
}
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -508,7 +508,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -596,7 +596,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
error = tcp_output(tp);
out:
@@ -1307,7 +1307,7 @@
}
ti->tcpi_rto = tp->t_rxtcur * tick;
- ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
+ ti->tcpi_last_data_recv = (long)((tcp_ts_getsbintime() - (int)tp->t_rcvtime)/tick_sbt) * tick;
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
@@ -1547,21 +1547,21 @@
if ((tp->t_state > TCPS_LISTEN) &&
(tp->t_state <= TCPS_CLOSING))
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPIDLE(tp));
+ TP_KEEPIDLE(tp)*tick_sbt);
break;
case TCP_KEEPINTVL:
tp->t_keepintvl = ui;
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
- TP_MAXIDLE(tp));
+ TP_MAXIDLE(tp)*tick_sbt);
break;
case TCP_KEEPINIT:
tp->t_keepinit = ui;
if (tp->t_state == TCPS_SYN_RECEIVED ||
tp->t_state == TCPS_SYN_SENT)
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPINIT(tp));
+ TP_KEEPINIT(tp)*tick_sbt);
break;
}
goto unlock_and_done;
@@ -1577,7 +1577,7 @@
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
- TP_MAXIDLE(tp));
+ TP_MAXIDLE(tp)*tick_sbt);
goto unlock_and_done;
#ifdef TCPPCAP
@@ -1829,7 +1829,7 @@
timeout = (tcp_fast_finwait2_recycle) ?
tcp_finwait2_timeout : TP_MAXIDLE(tp);
- tcp_timer_activate(tp, TT_2MSL, timeout);
+ tcp_timer_activate(tp, TT_2MSL, timeout*tick_sbt);
}
}
}
@@ -2075,20 +2075,20 @@
"0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
db_print_indent(indent);
- db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n",
+ db_printf("t_maxopd: %u t_rcvtime: %zu t_startime: %zu\n",
tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
db_print_indent(indent);
- db_printf("t_rttime: %zu t_rtsq: 0x%08x\n",
+ db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
tp->t_rtttime, tp->t_rtseq);
db_print_indent(indent);
- db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %zu\n",
+ db_printf("t_rxtcur: %zu t_maxseg: %u t_srtt: %zu\n",
tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
db_print_indent(indent);
- db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %u "
- "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
+ db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %zu "
+ "t_rttbest: %zu\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
tp->t_rttbest);
db_print_indent(indent);
@@ -2105,7 +2105,7 @@
tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
db_print_indent(indent);
- db_printf("ts_recent: %u ts_recent_age: %u\n",
+ db_printf("ts_recent: %u ts_recent_age: %zu\n",
tp->ts_recent, tp->ts_recent_age);
db_print_indent(indent);
@@ -2114,7 +2114,7 @@
db_print_indent(indent);
db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x "
- "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
+ "t_badrxtwin: %zu\n", tp->snd_ssthresh_prev,
tp->snd_recover_prev, tp->t_badrxtwin);
db_print_indent(indent);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -135,22 +135,22 @@
u_int t_maxopd; /* mss plus options */
- u_int t_rcvtime; /* inactivity time */
- u_int t_starttime; /* time connection was established */
+ sbintime_t t_rcvtime; /* inactivity time */
+ sbintime_t t_starttime; /* time connection was established */
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
u_int t_bw_spare1; /* unused */
tcp_seq t_bw_spare2; /* unused */
- int t_rxtcur; /* current retransmit value (ticks) */
+ sbintime_t t_rxtcur; /* current retransmit value */
u_int t_maxseg; /* maximum segment size */
- int t_srtt; /* smoothed round-trip time */
- int t_rttvar; /* variance in round-trip time */
+ uint64_t t_srtt; /* smoothed round-trip time */
+ uint64_t t_rttvar; /* variance in round-trip time */
int t_rxtshift; /* log(2) of rexmt exp. backoff */
- u_int t_rttmin; /* minimum rtt allowed */
- u_int t_rttbest; /* best rtt we've seen */
+ sbintime_t t_rttmin; /* minimum rtt allowed */
+ sbintime_t t_rttbest; /* best rtt we've seen */
u_long t_rttupdated; /* number of times rtt sampled */
u_long max_sndwnd; /* largest window peer has offered */
@@ -163,7 +163,7 @@
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
u_int32_t ts_recent; /* timestamp echo data */
- u_int ts_recent_age; /* when last updated */
+ sbintime_t ts_recent_age; /* when last updated */
u_int32_t ts_offset; /* our timestamp offset */
tcp_seq last_ack_sent;
@@ -172,7 +172,7 @@
u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */
tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
int t_sndzerowin; /* zero-window updates sent */
- u_int t_badrxtwin; /* window for retransmit recovery */
+ sbintime_t t_badrxtwin; /* window for retransmit recovery */
u_char snd_limited; /* segments limited transmitted */
/* SACK related state */
int snd_numholes; /* number of holes seen by sender */
@@ -367,7 +367,7 @@
struct ucred *tw_cred; /* user credentials */
u_int32_t t_recent;
u_int32_t ts_offset; /* our timestamp offset */
- u_int t_starttime;
+ sbintime_t t_starttime;
int tw_time;
TAILQ_ENTRY(tcptw) tw_2msl;
void *tw_pspare; /* TCP_SIGNATURE */
@@ -723,7 +723,7 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+void tcp_timer_activate(struct tcpcb *, uint32_t, sbintime_t);
int tcp_timer_active(struct tcpcb *, uint32_t);
void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -508,7 +508,7 @@
tod->tod_pcb_detach(tod, tp);
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
(void) tcp_output(tp);
} else {
Index: sys/sys/callout.h
===================================================================
--- sys/sys/callout.h
+++ sys/sys/callout.h
@@ -87,7 +87,7 @@
/* if the passed precision is less than 1 microsecond set it to the resolution
* of hardclock
*/
-#define prthresh(pr) ((pr) < SBT_1US ? htick_sbt : (pr))
+#define prthresh(pr) ((pr) < tick_sbt ? htick_sbt : (pr))
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c
+++ sys/x86/x86/tsc.c
@@ -51,6 +51,12 @@
#include "cpufreq_if.h"
+/*
+ * the number of tsc increments per minimum timestamp
+ */
+#define TSC_FREQ_MINTS (tsc_freq / (1000000000/60))
+
+
uint64_t tsc_freq;
uint64_t tsc_sbt;
int tsc_is_invariant;
@@ -58,6 +64,10 @@
static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
+sbintime_t cpu_tcp_ts_getsbintime_rdtsc(void);
+sbintime_t cpu_tcp_ts_getsbintime_rdtscp(void);
+sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+
SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
&tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
@@ -582,6 +592,12 @@
tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
tc_init(&tsc_timecounter);
}
+ /* XXX yes this needs to be revisited */
+#if defined(__amd64__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtscp;
+#elif defined(__i386__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtsc;
+#endif
}
SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
@@ -734,6 +750,21 @@
return (tc == &tsc_timecounter);
}
+sbintime_t
+cpu_tcp_ts_getsbintime_rdtsc(void)
+{
+
+ return (rdtsc() / TSC_FREQ_MINTS);
+}
+
+sbintime_t
+cpu_tcp_ts_getsbintime_rdtscp(void)
+{
+
+ return (rdtscp() / TSC_FREQ_MINTS);
+}
+
+
#ifdef COMPAT_FREEBSD32
uint32_t
cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Apr 22, 7:22 PM (2 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31995674
Default Alt Text
D4366.id10757.diff (31 KB)
Attached To
Mode
D4366: convert tcp timestamps to scaled sbintime
Attached
Detach File
Event Timeline
Log In to Comment