Page MenuHomeFreeBSD

D4366.id10757.diff
No OneTemporary

D4366.id10757.diff

Index: sys/amd64/include/clock.h
===================================================================
--- sys/amd64/include/clock.h
+++ sys/amd64/include/clock.h
@@ -41,6 +41,8 @@
int timer_spkr_release(void);
void timer_spkr_setfreq(int freq);
+
+extern sbintime_t (*cpu_tcp_ts_getsbintime)(void);
#endif /* _KERNEL */
#endif /* !_MACHINE_CLOCK_H_ */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -59,6 +59,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/hhook.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h> /* for proc0 declaration */
@@ -238,7 +239,7 @@
struct tcpcb *, int, int);
static void tcp_pulloutofband(struct socket *,
struct tcphdr *, struct mbuf *, int);
-static void tcp_xmit_timer(struct tcpcb *, int);
+static void tcp_xmit_timer(struct tcpcb *, sbintime_t);
static void tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
static void inline cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
uint16_t type);
@@ -347,7 +348,7 @@
}
TCPT_RANGESET(tp->t_rxtcur,
((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
- tp->t_rttmin, TCPTV_REXMTMAX);
+ tp->t_rttmin, TCPTV_REXMTMAX*tick_sbt);
}
if (metrics.rmx_ssthresh) {
/*
@@ -1484,6 +1485,7 @@
int thflags, acked, ourfinisacked, needoutput = 0;
int rstreason, todrop, win;
u_long tiwin;
+ sbintime_t t;
char *s;
struct in_conninfo *inc;
struct mbuf *mfree;
@@ -1541,9 +1543,9 @@
* XXX: This should be done after segment
* validation to ignore broken/spoofed segs.
*/
- tp->t_rcvtime = ticks;
+ tp->t_rcvtime = tcp_ts_getsbintime();
if (TCPS_HAVEESTABLISHED(tp->t_state))
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)*tick_sbt);
/*
* Scale up the window into a 32-bit value.
@@ -1594,7 +1596,7 @@
*/
if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
to.to_tsecr -= tp->ts_offset;
- if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
+ if (TSTMP_GT(to.to_tsecr, tcp_ts_getsbintime32()))
to.to_tsecr = 0;
}
/*
@@ -1637,7 +1639,7 @@
if (to.to_flags & TOF_TS) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
}
if (to.to_flags & TOF_MSS)
tcp_mss(tp, to.to_mss);
@@ -1681,7 +1683,7 @@
*/
if ((to.to_flags & TOF_TS) != 0 &&
SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_recent = to.to_tsval;
}
@@ -1705,7 +1707,7 @@
*/
if (tp->t_rxtshift == 1 &&
tp->t_flags & TF_PREVVALID &&
- (int)(ticks - tp->t_badrxtwin) < 0) {
+ (int)(tcp_ts_getsbintime() - tp->t_badrxtwin) < 0) {
cc_cong_signal(tp, th, CC_RTO_ERR);
}
@@ -1719,20 +1721,30 @@
*/
if ((to.to_flags & TOF_TS) != 0 &&
to.to_tsecr) {
- u_int t;
+ u_int t, curts;
- t = tcp_ts_getticks() - to.to_tsecr;
+
+ curts = tcp_ts_getsbintime32();
+ /*
+ * cope with hourly wrap
+ */
+ if (__predict_true(curts > to.to_tsecr))
+ t = curts - to.to_tsecr;
+ else
+ t = UINT_MAX - to.to_tsecr + curts;
if (!tp->t_rttlow || tp->t_rttlow > t)
tp->t_rttlow = t;
tcp_xmit_timer(tp,
- TCP_TS_TO_TICKS(t) + 1);
+ TCP_TS_TO_SBT(t) + 1);
} else if (tp->t_rtttime &&
- SEQ_GT(th->th_ack, tp->t_rtseq)) {
+ SEQ_GT(th->th_ack, tp->t_rtseq)) {
+ sbintime_t t;
+
+ t = tcp_ts_getsbintime();
if (!tp->t_rttlow ||
- tp->t_rttlow > ticks - tp->t_rtttime)
- tp->t_rttlow = ticks - tp->t_rtttime;
- tcp_xmit_timer(tp,
- ticks - tp->t_rtttime);
+ tp->t_rttlow > t - tp->t_rtttime)
+ tp->t_rttlow = t - tp->t_rtttime;
+ tcp_xmit_timer(tp, t - tp->t_rtttime);
}
acked = BYTES_THIS_ACK(tp, th);
@@ -2010,7 +2022,7 @@
* SYN_SENT --> ESTABLISHED
* SYN_SENT* --> FIN_WAIT_1
*/
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2021,7 +2033,7 @@
mtod(m, const char *), tp, th);
cc_conn_init(tp);
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPIDLE(tp));
+ TP_KEEPIDLE(tp)*tick_sbt);
}
} else {
/*
@@ -2184,7 +2196,7 @@
TSTMP_LT(to.to_tsval, tp->ts_recent)) {
/* Check to see if ts_recent is over 24 days old. */
- if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
+ if (tcp_ts_getsbintime() - tp->ts_recent_age > TCP_PAWS_IDLE) {
/*
* Invalidate ts_recent. If this segment updates
* ts_recent, the age will be reset later and ts_recent
@@ -2338,7 +2350,7 @@
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN|TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_recent = to.to_tsval;
}
@@ -2382,7 +2394,7 @@
* SYN-RECEIVED -> ESTABLISHED
* SYN-RECEIVED* -> FIN-WAIT-1
*/
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
if (tp->t_flags & TF_NEEDFIN) {
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
@@ -2391,7 +2403,7 @@
TCP_PROBE5(accept__established, NULL, tp,
mtod(m, const char *), tp, th);
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)*tick_sbt);
}
/*
* If segment contains data or ACK, will call tcp_reass()
@@ -2666,8 +2678,9 @@
* original cwnd and ssthresh, and proceed to transmit where
* we left off.
*/
+ t = tcp_ts_getsbintime();
if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
- (int)(ticks - tp->t_badrxtwin) < 0)
+ (int)(t - tp->t_badrxtwin) < 0)
cc_cong_signal(tp, th, CC_RTO_ERR);
/*
@@ -2685,16 +2698,19 @@
* huge RTT and blow up the retransmit timer.
*/
if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
- u_int t;
+ u_int t_;
- t = tcp_ts_getticks() - to.to_tsecr;
- if (!tp->t_rttlow || tp->t_rttlow > t)
- tp->t_rttlow = t;
- tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
+ t_ = ((uint32_t)t) - to.to_tsecr;
+ if (!tp->t_rttlow || tp->t_rttlow > t_)
+ tp->t_rttlow = t_;
+ tcp_xmit_timer(tp, TCP_TS_TO_SBT(t) + 1);
} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
- if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
- tp->t_rttlow = ticks - tp->t_rtttime;
- tcp_xmit_timer(tp, ticks - tp->t_rtttime);
+ u_int t_;
+
+ t_ = ((uint32_t)t);
+ if (!tp->t_rttlow || tp->t_rttlow > t_ - tp->t_rtttime)
+ tp->t_rttlow = t_ - tp->t_rtttime;
+ tcp_xmit_timer(tp, TCP_TS_TO_SBT(t_ - tp->t_rtttime));
}
/*
@@ -2780,7 +2796,7 @@
tcp_timer_activate(tp, TT_2MSL,
(tcp_fast_finwait2_recycle ?
tcp_finwait2_timeout :
- TP_MAXIDLE(tp)));
+ TP_MAXIDLE(tp))*tick_sbt);
}
tcp_state_change(tp, TCPS_FIN_WAIT_2);
}
@@ -3004,7 +3020,7 @@
* enter the CLOSE_WAIT state.
*/
case TCPS_SYN_RECEIVED:
- tp->t_starttime = ticks;
+ tp->t_starttime = tcp_ts_getsbintime();
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
tcp_state_change(tp, TCPS_CLOSE_WAIT);
@@ -3334,15 +3350,16 @@
* and update averages and current timeout.
*/
static void
-tcp_xmit_timer(struct tcpcb *tp, int rtt)
+tcp_xmit_timer(struct tcpcb *tp, sbintime_t rtt)
{
- int delta;
- int expected_samples, expected_shift, shift;
+ uint64_t delta;
+ uint64_t expected_samples, expected_shift, shift;
INP_WLOCK_ASSERT(tp->t_inpcb);
/* RFC 7323 Appendix G RTO Calculation Modification */
/* ExpectedSamples = ceiling(FlightSize / (SMSS * 2)) */
+ /* roundup(x, y) == ceiling(x / y) * y */
expected_samples = ((tcp_compute_pipe(tp) + (tp->t_maxseg - 1)) / (tp->t_maxseg << 1));
/* alpha' = alpha / ExpectedSamples */
expected_shift = min(max(fls(expected_samples + 1) - 1, 0), TCP_DELTA_SHIFT);
@@ -3411,8 +3428,8 @@
* statistical, we have to test that we don't drop below
* the minimum feasible timer (which is 2 ticks).
*/
- TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
- max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp)*tick_sbt,
+ max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX*tick_sbt);
/*
* We received an ack for a packet that wasn't retransmitted;
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -749,13 +749,13 @@
/* Timestamps. */
if ((tp->t_flags & TF_RCVD_TSTMP) ||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
- to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
+ to.to_tsval = tcp_ts_getsbintime32() + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = tcp_ts_getticks();
+ tp->rfbuf_ts = tcp_ts_getsbintime();
}
/* Selective ACK's. */
if (tp->t_flags & TF_SACK_PERMIT) {
@@ -1416,7 +1416,7 @@
* not currently timing anything.
*/
if (tp->t_rtttime == 0) {
- tp->t_rtttime = ticks;
+ tp->t_rtttime = tcp_ts_getsbintime();
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
@@ -1578,8 +1578,7 @@
void
tcp_setpersist(struct tcpcb *tp)
{
- int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
- int tt;
+ uint64_t tt, t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
@@ -1588,7 +1587,7 @@
* Start/restart persistance timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
- TCPTV_PERSMIN, TCPTV_PERSMAX);
+ TCPTV_PERSMIN*tick_sbt, TCPTV_PERSMAX*tick_sbt);
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
Index: sys/netinet/tcp_seq.h
===================================================================
--- sys/netinet/tcp_seq.h
+++ sys/netinet/tcp_seq.h
@@ -63,33 +63,61 @@
(tp)->snd_recover = (tp)->iss
#ifdef _KERNEL
+
+/*
+ * RFC 7323
+ * Section 5.4. Timestamp Clock
+ *
+ * (b) The timestamp clock must not be "too fast".
+ *
+ * The recycling time of the timestamp clock MUST be greater than
+ * MSL seconds. Since the clock (timestamp) is 32 bits and the
+ * worst-case MSL is 255 seconds, the maximum acceptable clock
+ * frequency is one tick every 59 ns.
+ */
+
+/*
+ * The minimum permissible timestamp is 59ns. However, to reduce calculation
+ * overhead we use 256 - (8 bit shift).
+ * - (1<<32)/(1000000000/59) == 253
+ * - (1<<32)/(1000000000/60) == 257
+ *
+ *
+ * Note that MSL should be a function of RTT. Although 60ns is more than sufficient resolution for
+ * the time being a 255s MSL on data center network with a sub-millisecond RTT doesn't make a whole
+ * lot of senese. In the future the MSL should be determined dynamically or at the very least con-
+ * figurable per subnet. Nonetheless, fixing the timestamp clock at a rate corresponding to a 256s
+ * MSL gives us what we need for now while otherwise remaining as RFC compliant as possible.
+ *
+ */
+#define SBT_MINTS 256
+#define SBT_MINTS_SHIFT 8
+
+
/*
* Clock macros for RFC 1323 timestamps.
*/
-#define TCP_TS_TO_TICKS(_t) ((_t) * hz / 1000)
+#define TCP_TS_TO_SBT(_t) ((_t) << SBT_MINTS_SHIFT)
-/* Timestamp wrap-around time, 24 days. */
-#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * 1000)
/*
- * tcp_ts_getticks() in ms, should be 1ms < x < 1000ms according to RFC 1323.
- * We always use 1ms granularity independent of hz.
+ * RFC defined MSL: 255s (+ 2s rounding slop)
*/
-static __inline u_int
-tcp_ts_getticks(void)
-{
- struct timeval tv;
- u_long ms;
-
- /*
- * getmicrouptime() should be good enough for any 1-1000ms granularity.
- * Do not use getmicrotime() here as it might break nfsroot/tcp.
- */
- getmicrouptime(&tv);
- ms = tv.tv_sec * 1000 + tv.tv_usec / 1000;
-
- return (ms);
-}
+#define TCP_PAWS_IDLE (SBT_MINTS*SBT_1S)
+
+#if defined(__amd64__) || defined(__i386__)
+#include <machine/clock.h>
+#endif
+
+#if !defined(__amd64__) && !defined(__i386__)
+extern sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+#endif
+
+#define tcp_ts_getsbintime() (cpu_tcp_ts_getsbintime)()
+
+/* trivial macro to make intent clearer */
+#define tcp_ts_getsbintime32() ((uint32_t)tcp_ts_getsbintime())
+
#endif /* _KERNEL */
#endif /* _NETINET_TCP_SEQ_H_ */
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -311,6 +311,21 @@
return (hashsize);
}
+
+#if !defined(__amd64__) && !defined(__i386__)
+sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+
+static sbintime_t
+cpu_tcp_ts_getsbintime_(void)
+{
+ struct bintime bt;
+
+ getbinuptime(&bt);
+ sbt = bt.frac >> SBT_MINTS_SHIFT;
+ return (sbt);
+}
+#endif
+
void
tcp_init(void)
{
@@ -401,7 +416,6 @@
tcp_rexmit_min = TCPTV_MIN;
if (tcp_rexmit_min < 1)
tcp_rexmit_min = 1;
- tcp_rexmit_slop = 0;
tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
tcp_tcbhashsize = hashsize;
@@ -433,6 +447,10 @@
#ifdef TCPPCAP
tcp_pcap_init();
#endif
+#if !defined(__amd64__) && !defined(__i386__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_;
+#endif
+
}
#ifdef VIMAGE
@@ -826,12 +844,12 @@
*/
tp->t_srtt = TCPTV_SRTTBASE;
tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
- tp->t_rttmin = tcp_rexmit_min;
- tp->t_rxtcur = TCPTV_RTOBASE;
- tp->t_delack = tcp_delacktime;
+ tp->t_rttmin = tcp_rexmit_min*tick_sbt;
+ tp->t_rxtcur = TCPTV_RTOBASE*tick_sbt;
+ tp->t_delack = tcp_delacktime*tick_sbt;
tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
- tp->t_rcvtime = ticks;
+ tp->t_rcvtime = TCP_TS_TO_SBT(tcp_ts_getsbintime());
/*
* IPv4 TTL initialization is necessary for an IPv6 socket as well,
* because the socket may be bound to an IPv6 wildcard address,
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -837,7 +837,7 @@
if (sc->sc_flags & SCF_TIMESTAMP) {
tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
tp->ts_recent = sc->sc_tsreflect;
- tp->ts_recent_age = tcp_ts_getticks();
+ tp->ts_recent_age = tcp_ts_getsbintime();
tp->ts_offset = sc->sc_tsoff;
}
#ifdef TCP_SIGNATURE
@@ -884,7 +884,7 @@
tp->t_keepidle = sototcpcb(lso)->t_keepidle;
tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
soisconnected(so);
@@ -1266,7 +1266,7 @@
*/
if (to->to_flags & TOF_TS) {
sc->sc_tsreflect = to->to_tsval;
- sc->sc_ts = tcp_ts_getticks();
+ sc->sc_ts = tcp_ts_getsbintime();
sc->sc_flags |= SCF_TIMESTAMP;
}
if (to->to_flags & TOF_SCALE) {
@@ -1787,7 +1787,7 @@
/* Randomize the timestamp. */
if (sc->sc_flags & SCF_TIMESTAMP) {
sc->sc_ts = arc4random();
- sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
+ sc->sc_tsoff = sc->sc_ts - tcp_ts_getsbintime();
}
TCPSTAT_INC(tcps_sc_sendcookie);
@@ -1877,7 +1877,7 @@
sc->sc_flags |= SCF_TIMESTAMP;
sc->sc_tsreflect = to->to_tsval;
sc->sc_ts = to->to_tsecr;
- sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
+ sc->sc_tsoff = to->to_tsecr - tcp_ts_getsbintime();
}
if (to->to_flags & TOF_SIGNATURE)
Index: sys/netinet/tcp_timer.h
===================================================================
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -127,8 +127,8 @@
/*
* Force a time value to be in a certain range.
*/
-#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
- (tv) = (value) + tcp_rexmit_slop; \
+#define TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+ tv = value; \
if ((u_long)(tv) < (u_long)(tvmin)) \
(tv) = (tvmin); \
if ((u_long)(tv) > (u_long)(tvmax)) \
@@ -180,7 +180,6 @@
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
-extern int tcp_rexmit_slop;
extern int tcp_msl;
extern int tcp_ttl; /* time to live for TCP segs */
extern int tcp_backoff[];
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -68,6 +68,7 @@
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_seq.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -102,11 +103,6 @@
&tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
"Minimum Retransmission Timeout");
-int tcp_rexmit_slop;
-SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
- &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
- "Retransmission Timer Slop");
-
static int always_keepalive = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
&always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
@@ -356,7 +352,7 @@
TCPSTAT_INC(tcps_finwait2_drops);
tp = tcp_close(tp);
} else {
- if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
+ if (tcp_ts_getsbintime() - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
if (!callout_reset(&tp->t_timers->tt_2msl,
TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
tp->t_timers->tt_flags &= ~TT_2MSL_RST;
@@ -421,7 +417,7 @@
goto dropit;
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
- if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
+ if (tcp_ts_getsbintime() - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
goto dropit;
/*
* Send a packet designed to force a response
@@ -484,6 +480,7 @@
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
+ sbintime_t t;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
int ostate;
@@ -524,9 +521,11 @@
* (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
*/
+
+ t = tcp_ts_getsbintime() - tp->t_rcvtime;
if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
- (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
- ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
+ (t >= tcp_maxpersistidle*tick_sbt ||
+ t >= TCP_REXMTVAL(tp) * tcp_totbackoff * tick_sbt)) {
TCPSTAT_INC(tcps_persistdrop);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
@@ -536,7 +535,7 @@
* connection after a much reduced timeout.
*/
if (tp->t_state > TCPS_CLOSE_WAIT &&
- (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
+ t >= TCPTV_PERSMAX*tick_sbt) {
TCPSTAT_INC(tcps_persistdrop);
tp = tcp_drop(tp, ETIMEDOUT);
goto out;
@@ -638,7 +637,7 @@
tp->t_flags |= TF_WASCRECOVERY;
else
tp->t_flags &= ~TF_WASCRECOVERY;
- tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
+ tp->t_badrxtwin = tcp_ts_getsbintime() + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
tp->t_flags |= TF_PREVVALID;
} else
tp->t_flags &= ~TF_PREVVALID;
@@ -647,10 +646,10 @@
rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
else
rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
- TCPT_RANGESET(tp->t_rxtcur, rexmt,
- tp->t_rttmin, TCPTV_REXMTMAX);
+ TCPT_RANGESET(tp->t_rxtcur, rexmt*tick_sbt,
+ tp->t_rttmin, TCPTV_REXMTMAX*tick_sbt);
/* 1 < delack < tcp_delacktime - and should scale down with RTO/2 */
- TCPT_RANGESET(tp->t_delack, (rexmt >> 1), 1, tcp_delacktime);
+ TCPT_RANGESET(tp->t_delack, (rexmt >> 1)*tick_sbt, 1, tcp_delacktime*tick_sbt);
/*
* We enter the path for PLMTUD if connection is established or, if
@@ -817,13 +816,14 @@
}
void
-tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, sbintime_t delta)
{
struct callout *t_callout;
timeout_t *f_callout;
struct inpcb *inp = tp->t_inpcb;
int cpu = inp_to_cpuid(inp);
uint32_t f_reset;
+ sbintime_t f_precision;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
@@ -838,26 +838,31 @@
t_callout = &tp->t_timers->tt_delack;
f_callout = tcp_timer_delack;
f_reset = TT_DELACK_RST;
+ f_precision = tick_sbt;
break;
case TT_REXMT:
t_callout = &tp->t_timers->tt_rexmt;
f_callout = tcp_timer_rexmt;
f_reset = TT_REXMT_RST;
+ f_precision = tick_sbt;
break;
case TT_PERSIST:
t_callout = &tp->t_timers->tt_persist;
f_callout = tcp_timer_persist;
f_reset = TT_PERSIST_RST;
+ f_precision = SBT_1S;
break;
case TT_KEEP:
t_callout = &tp->t_timers->tt_keep;
f_callout = tcp_timer_keep;
f_reset = TT_KEEP_RST;
+ f_precision = SBT_1S;
break;
case TT_2MSL:
t_callout = &tp->t_timers->tt_2msl;
f_callout = tcp_timer_2msl;
f_reset = TT_2MSL_RST;
+ f_precision = SBT_1S;
break;
default:
panic("tp %p bad timer_type %#x", tp, timer_type);
@@ -871,10 +876,10 @@
} else {
if ((tp->t_timers->tt_flags & timer_type) == 0) {
tp->t_timers->tt_flags |= (timer_type | f_reset);
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
+ callout_reset_sbt_on(t_callout, delta, f_precision, f_callout, tp, cpu, 0);
} else {
/* Reset already running callout on the same CPU. */
- if (!callout_reset(t_callout, delta, f_callout, tp)) {
+ if (!callout_reset_sbt(t_callout, delta, f_precision, f_callout, tp, 0)) {
/*
* Callout not cancelled, consider it as not
* properly restarted. */
@@ -992,5 +997,5 @@
xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_2msl))
xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
- xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
+ xtimer->t_rcvtime = ticks_to_msecs((tcp_ts_getsbintime() - tp->t_rcvtime)/tick_sbt);
}
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -568,7 +568,7 @@
*/
if (tw->t_recent && flags == TH_ACK) {
to.to_flags |= TOF_TS;
- to.to_tsval = tcp_ts_getticks() + tw->ts_offset;
+ to.to_tsval = tcp_ts_getsbintime32() + tw->ts_offset;
to.to_tsecr = tw->t_recent;
}
optlen = tcp_addoptions(&to, (u_char *)(th + 1));
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -508,7 +508,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -596,7 +596,7 @@
(error = tcp_offload_connect(so, nam)) == 0)
goto out;
#endif
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
error = tcp_output(tp);
out:
@@ -1307,7 +1307,7 @@
}
ti->tcpi_rto = tp->t_rxtcur * tick;
- ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
+ ti->tcpi_last_data_recv = (long)((tcp_ts_getsbintime() - (int)tp->t_rcvtime)/tick_sbt) * tick;
ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
@@ -1547,21 +1547,21 @@
if ((tp->t_state > TCPS_LISTEN) &&
(tp->t_state <= TCPS_CLOSING))
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPIDLE(tp));
+ TP_KEEPIDLE(tp)*tick_sbt);
break;
case TCP_KEEPINTVL:
tp->t_keepintvl = ui;
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
- TP_MAXIDLE(tp));
+ TP_MAXIDLE(tp)*tick_sbt);
break;
case TCP_KEEPINIT:
tp->t_keepinit = ui;
if (tp->t_state == TCPS_SYN_RECEIVED ||
tp->t_state == TCPS_SYN_SENT)
tcp_timer_activate(tp, TT_KEEP,
- TP_KEEPINIT(tp));
+ TP_KEEPINIT(tp)*tick_sbt);
break;
}
goto unlock_and_done;
@@ -1577,7 +1577,7 @@
if ((tp->t_state == TCPS_FIN_WAIT_2) &&
(TP_MAXIDLE(tp) > 0))
tcp_timer_activate(tp, TT_2MSL,
- TP_MAXIDLE(tp));
+ TP_MAXIDLE(tp)*tick_sbt);
goto unlock_and_done;
#ifdef TCPPCAP
@@ -1829,7 +1829,7 @@
timeout = (tcp_fast_finwait2_recycle) ?
tcp_finwait2_timeout : TP_MAXIDLE(tp);
- tcp_timer_activate(tp, TT_2MSL, timeout);
+ tcp_timer_activate(tp, TT_2MSL, timeout*tick_sbt);
}
}
}
@@ -2075,20 +2075,20 @@
"0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
db_print_indent(indent);
- db_printf("t_maxopd: %u t_rcvtime: %u t_startime: %u\n",
+ db_printf("t_maxopd: %u t_rcvtime: %zu t_startime: %zu\n",
tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
db_print_indent(indent);
- db_printf("t_rttime: %zu t_rtsq: 0x%08x\n",
+ db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
tp->t_rtttime, tp->t_rtseq);
db_print_indent(indent);
- db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %zu\n",
+ db_printf("t_rxtcur: %zu t_maxseg: %u t_srtt: %zu\n",
tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
db_print_indent(indent);
- db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %u "
- "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
+ db_printf("t_rttvar: %zu t_rxtshift: %d t_rttmin: %zu "
+ "t_rttbest: %zu\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
tp->t_rttbest);
db_print_indent(indent);
@@ -2105,7 +2105,7 @@
tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
db_print_indent(indent);
- db_printf("ts_recent: %u ts_recent_age: %u\n",
+ db_printf("ts_recent: %u ts_recent_age: %zu\n",
tp->ts_recent, tp->ts_recent_age);
db_print_indent(indent);
@@ -2114,7 +2114,7 @@
db_print_indent(indent);
db_printf("snd_ssthresh_prev: %lu snd_recover_prev: 0x%08x "
- "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
+ "t_badrxtwin: %zu\n", tp->snd_ssthresh_prev,
tp->snd_recover_prev, tp->t_badrxtwin);
db_print_indent(indent);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -135,22 +135,22 @@
u_int t_maxopd; /* mss plus options */
- u_int t_rcvtime; /* inactivity time */
- u_int t_starttime; /* time connection was established */
+ sbintime_t t_rcvtime; /* inactivity time */
+ sbintime_t t_starttime; /* time connection was established */
u_int t_rtttime; /* RTT measurement start time */
tcp_seq t_rtseq; /* sequence number being timed */
u_int t_bw_spare1; /* unused */
tcp_seq t_bw_spare2; /* unused */
- int t_rxtcur; /* current retransmit value (ticks) */
+ sbintime_t t_rxtcur; /* current retransmit value */
u_int t_maxseg; /* maximum segment size */
- int t_srtt; /* smoothed round-trip time */
- int t_rttvar; /* variance in round-trip time */
+ uint64_t t_srtt; /* smoothed round-trip time */
+ uint64_t t_rttvar; /* variance in round-trip time */
int t_rxtshift; /* log(2) of rexmt exp. backoff */
- u_int t_rttmin; /* minimum rtt allowed */
- u_int t_rttbest; /* best rtt we've seen */
+ sbintime_t t_rttmin; /* minimum rtt allowed */
+ sbintime_t t_rttbest; /* best rtt we've seen */
u_long t_rttupdated; /* number of times rtt sampled */
u_long max_sndwnd; /* largest window peer has offered */
@@ -163,7 +163,7 @@
u_char rcv_scale; /* window scaling for recv window */
u_char request_r_scale; /* pending window scaling */
u_int32_t ts_recent; /* timestamp echo data */
- u_int ts_recent_age; /* when last updated */
+ sbintime_t ts_recent_age; /* when last updated */
u_int32_t ts_offset; /* our timestamp offset */
tcp_seq last_ack_sent;
@@ -172,7 +172,7 @@
u_long snd_ssthresh_prev; /* ssthresh prior to retransmit */
tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
int t_sndzerowin; /* zero-window updates sent */
- u_int t_badrxtwin; /* window for retransmit recovery */
+ sbintime_t t_badrxtwin; /* window for retransmit recovery */
u_char snd_limited; /* segments limited transmitted */
/* SACK related state */
int snd_numholes; /* number of holes seen by sender */
@@ -367,7 +367,7 @@
struct ucred *tw_cred; /* user credentials */
u_int32_t t_recent;
u_int32_t ts_offset; /* our timestamp offset */
- u_int t_starttime;
+ sbintime_t t_starttime;
int tw_time;
TAILQ_ENTRY(tcptw) tw_2msl;
void *tw_pspare; /* TCP_SIGNATURE */
@@ -723,7 +723,7 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, void *, void *);
-void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
+void tcp_timer_activate(struct tcpcb *, uint32_t, sbintime_t);
int tcp_timer_active(struct tcpcb *, uint32_t);
void tcp_timer_stop(struct tcpcb *, uint32_t);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -508,7 +508,7 @@
tod->tod_pcb_detach(tod, tp);
KASSERT(!(tp->t_flags & TF_TOE),
("%s: tp %p still offloaded.", __func__, tp));
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)*tick_sbt);
(void) tcp_output(tp);
} else {
Index: sys/sys/callout.h
===================================================================
--- sys/sys/callout.h
+++ sys/sys/callout.h
@@ -87,7 +87,7 @@
/* if the passed precision is less than 1 microsecond set it to the resolution
* of hardclock
*/
-#define prthresh(pr) ((pr) < SBT_1US ? htick_sbt : (pr))
+#define prthresh(pr) ((pr) < tick_sbt ? htick_sbt : (pr))
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
Index: sys/x86/x86/tsc.c
===================================================================
--- sys/x86/x86/tsc.c
+++ sys/x86/x86/tsc.c
@@ -51,6 +51,12 @@
#include "cpufreq_if.h"
+/*
+ * the number of tsc increments per minimum timestamp
+ */
+#define TSC_FREQ_MINTS (tsc_freq / (1000000000/60))
+
+
uint64_t tsc_freq;
uint64_t tsc_sbt;
int tsc_is_invariant;
@@ -58,6 +64,10 @@
static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
+sbintime_t cpu_tcp_ts_getsbintime_rdtsc(void);
+sbintime_t cpu_tcp_ts_getsbintime_rdtscp(void);
+sbintime_t (*cpu_tcp_ts_getsbintime)(void);
+
SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
&tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
@@ -582,6 +592,12 @@
tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
tc_init(&tsc_timecounter);
}
+ /* XXX yes this needs to be revisited */
+#if defined(__amd64__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtscp;
+#elif defined(__i386__)
+ cpu_tcp_ts_getsbintime = cpu_tcp_ts_getsbintime_rdtsc;
+#endif
}
SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
@@ -734,6 +750,21 @@
return (tc == &tsc_timecounter);
}
+sbintime_t
+cpu_tcp_ts_getsbintime_rdtsc(void)
+{
+
+ return (rdtsc() / TSC_FREQ_MINTS);
+}
+
+sbintime_t
+cpu_tcp_ts_getsbintime_rdtscp(void)
+{
+
+ return (rdtscp() / TSC_FREQ_MINTS);
+}
+
+
#ifdef COMPAT_FREEBSD32
uint32_t
cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32,

File Metadata

Mime Type
text/plain
Expires
Wed, Apr 22, 7:22 PM (2 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31995674
Default Alt Text
D4366.id10757.diff (31 KB)

Event Timeline