Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F137958627
D36315.id109757.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
8 KB
Referenced Files
None
Subscribers
None
D36315.id109757.diff
View Options
Index: sys/dev/cxgbe/adapter.h
===================================================================
--- sys/dev/cxgbe/adapter.h
+++ sys/dev/cxgbe/adapter.h
@@ -863,6 +863,15 @@
struct clip_entry;
+#define CNT_CAL_INFO 3
+struct clock_sync {
+ uint64_t hw_cur;
+ uint64_t hw_prev;
+ uint64_t rt_cur;
+ uint64_t rt_prev;
+ uint32_t gen;
+};
+
struct adapter {
SLIST_ENTRY(adapter) link;
device_t dev;
@@ -982,7 +991,11 @@
struct mtx sfl_lock; /* same cache-line as sc_lock? but that's ok */
TAILQ_HEAD(, sge_fl) sfl;
struct callout sfl_callout;
-
+ struct callout cal_callout;
+ struct clock_sync cal_info[CNT_CAL_INFO];
+ int cal_current;
+ int cal_count;
+ uint32_t cal_gen;
/*
* Driver code that can run when the adapter is suspended must use this
* lock or a synchronized_op and check for HW_OFF_LIMITS before
Index: sys/dev/cxgbe/t4_main.c
===================================================================
--- sys/dev/cxgbe/t4_main.c
+++ sys/dev/cxgbe/t4_main.c
@@ -320,6 +320,18 @@
SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldtxq, CTLFLAG_RDTUN, &t4_nofldtxq, 0,
"Number of offload TX queues per port");
+static int t4_clocksync_fast = 1;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, csfast, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_fast, 0,
+ "During initial clock sync how fast do we update in seconds");
+
+static int t4_clocksync_normal = 30;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, csnormal, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_clocksync_normal, 0,
+ "During normal clock sync how fast do we update in seconds");
+
+static int t4_fast_2_slow = 30;
+SYSCTL_INT(_hw_cxgbe, OID_AUTO, cscount, CTLFLAG_RW | CTLFLAG_MPSAFE, &t4_fast_2_slow, 0,
+ "How many clock syncs do we need to do to transition to slow");
+
#define NOFLDRXQ 2
static int t4_nofldrxq = -NOFLDRXQ;
SYSCTL_INT(_hw_cxgbe, OID_AUTO, nofldrxq, CTLFLAG_RDTUN, &t4_nofldrxq, 0,
@@ -1109,6 +1121,81 @@
return (-1);
}
+static inline uint64_t
+t4_get_ns_timestamp(struct timespec *ts)
+{
+ return ((ts->tv_sec * 1000000000) + ts->tv_nsec);
+}
+
+static void
+t4_calibration(void *arg)
+{
+ struct adapter *sc;
+ struct timespec ts;
+ struct clock_sync *cur, *nex;
+ int next_up;
+
+ sc = (struct adapter *)arg;
+
+ cur = &sc->cal_info[sc->cal_current];
+ next_up = sc->cal_current + 1;
+ if (next_up >= CNT_CAL_INFO)
+ next_up = 0;
+ nex = &sc->cal_info[next_up];
+ if (__predict_false(sc->cal_count == 0)) {
+ /* First time in, just get the values in */
+ cur->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
+ nanouptime(&ts);
+ cur->rt_cur = t4_get_ns_timestamp(&ts);
+ sc->cal_count++;
+ goto done;
+ }
+ nex->hw_prev = cur->hw_cur;
+ nex->rt_prev = cur->rt_cur;
+ nex->hw_cur = t4_read_reg64(sc, A_SGE_TIMESTAMP_LO);
+ nanouptime(&ts);
+ nex->rt_cur = t4_get_ns_timestamp(&ts);
+ if ((nex->hw_cur - nex->hw_prev) == 0) {
+ /* The clock is not advancing? */
+ sc->cal_count = 0;
+ atomic_store_rel_int(&cur->gen, 0);
+ goto done;
+ }
+ cur->gen = 0;
+ atomic_thread_fence_rel();
+ sc->cal_current = next_up;
+ sc->cal_gen++;
+ atomic_store_rel_int(&nex->gen, sc->cal_gen);
+ if (sc->cal_count < t4_fast_2_slow)
+ sc->cal_count++;
+done:
+ callout_reset_sbt_curcpu(&sc->cal_callout,
+ ((sc->cal_count < t4_fast_2_slow) ?
+ t4_clocksync_fast : t4_clocksync_normal) * SBT_1S, 0,
+ t4_calibration, sc, C_DIRECT_EXEC);
+}
+
+
+
+static void
+t4_calibration_start(struct adapter *sc)
+{
+ /*
+ * Here if we have not done a calibration
+ * then do so otherwise start the appropriate
+ * timer.
+ */
+ int i;
+
+ for (i = 0; i < CNT_CAL_INFO; i++) {
+ sc->cal_info[i].gen = 0;
+ }
+ sc->cal_current = 0;
+ sc->cal_count = 0;
+ sc->cal_gen = 0;
+ t4_calibration((void *)sc);
+}
+
static int
t4_attach(device_t dev)
{
@@ -1177,6 +1264,8 @@
callout_init(&sc->ktls_tick, 1);
+ callout_init(&sc->cal_callout, 1);
+
refcount_init(&sc->vxlan_refcount, 0);
TASK_INIT(&sc->reset_task, 0, reset_adapter_task, sc);
@@ -1567,6 +1656,7 @@
"failed to attach all child ports: %d\n", rc);
goto done;
}
+ t4_calibration_start(sc);
device_printf(dev,
"PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
@@ -1742,7 +1832,8 @@
free(pi, M_CXGBE);
}
}
-
+ callout_stop(&sc->cal_callout);
+ callout_drain(&sc->cal_callout);
device_delete_children(dev);
sysctl_ctx_free(&sc->ctx);
adapter_full_uninit(sc);
@@ -1920,7 +2011,6 @@
/* No more DMA or interrupts. */
stop_adapter(sc);
-
/* Quiesce all activity. */
for_each_port(sc, i) {
pi = sc->port[i];
@@ -1992,6 +2082,9 @@
sc->sge.fwq.flags &= ~IQ_HW_ALLOCATED;
quiesce_iq_fl(sc, &sc->sge.fwq, NULL);
}
+ /* Stop calibration */
+ callout_stop(&sc->cal_callout);
+ callout_drain(&sc->cal_callout);
/* Mark the adapter totally off limits. */
mtx_lock(&sc->reg_lock);
@@ -2359,6 +2452,9 @@
}
}
}
+ /* Reset all calibration */
+ t4_calibration_start(sc);
+
done:
if (rc == 0) {
sc->incarnation++;
@@ -2520,6 +2616,7 @@
ifp->if_ioctl = cxgbe_ioctl;
ifp->if_transmit = cxgbe_transmit;
ifp->if_qflush = cxgbe_qflush;
+
if (vi->pi->nvi > 1 || sc->flags & IS_VF)
ifp->if_get_counter = vi_get_counter;
else
Index: sys/dev/cxgbe/t4_sge.c
===================================================================
--- sys/dev/cxgbe/t4_sge.c
+++ sys/dev/cxgbe/t4_sge.c
@@ -1511,15 +1511,80 @@
}
#endif
+#define CGBE_SHIFT_SCALE 10
+
static inline uint64_t
-last_flit_to_ns(struct adapter *sc, uint64_t lf)
+c4_tstmp_to_ns(struct adapter *sc, uint64_t lf)
{
- uint64_t n = be64toh(lf) & 0xfffffffffffffff; /* 60b, not 64b. */
+ struct clock_sync *cur, dcur;
+ uint64_t tstmp_sec, tstmp_nsec;
+ uint64_t hw_prev_sec, hw_prev_nsec;
+ uint64_t rt_cur_to_prev, res_s, res_n, res_s_modulo, res;
+ uint64_t hw_clk_div, cclk, a1_s, a1_n;
+ uint64_t hw_tstmp = lf & 0xfffffffffffffffULL; /* 60b, not 64b. */
+ uint32_t gen;
- if (n > UINT64_MAX / 1000000)
- return (n / sc->params.vpd.cclk * 1000000);
- else
- return (n * 1000000 / sc->params.vpd.cclk);
+ do {
+ cur = &sc->cal_info[sc->cal_current];
+ gen = atomic_load_acq_int(&cur->gen);
+ if (gen == 0)
+ return (0);
+ dcur = *cur;
+ atomic_thread_fence_acq();
+ } while (gen != dcur.gen);
+ /*
+ * Our goal here is to have a result that is:
+ *
+ * ( (cur_time - prev_time) )
+ * ((hw_tstmp - hw_prev) * ----------------------------- ) + prev_time
+ * ( (hw_cur - hw_prev) )
+ *
+ * With the constraints that we cannot use float and we
+ * don't want to overflow the uint64_t numbers we are using.
+ *
+ * The plan is to take the clocking value of the hw timestamps
+ * and split them into seconds and nanosecond equivilant portions.
+ * Then we operate on the two portions seperately making sure to
+ * bring back the carry over from the seconds when we divide.
+ *
+ * First up lets get the two divided into separate entities
+ * i.e. the seconds. We use the clock frequency for this.
+ * Note that vpd.cclk is in khz, we need it in raw hz so
+ * convert to hz.
+ */
+ cclk = sc->params.vpd.cclk * 1000;
+ tstmp_sec = hw_tstmp / cclk;
+ tstmp_nsec = hw_tstmp % cclk;
+ hw_prev_sec = dcur.hw_prev / cclk;
+ hw_prev_nsec = dcur.hw_prev % cclk;
+ /* Now will the subtraction need a borrow? */
+ if (hw_prev_nsec > tstmp_nsec) {
+ tstmp_sec--;
+ tstmp_nsec += cclk;
+ }
+ /* Now work with them separately */
+ a1_s = (tstmp_sec - hw_prev_sec);
+ a1_n = (tstmp_nsec - hw_prev_nsec);
+ rt_cur_to_prev = (dcur.rt_cur - dcur.rt_prev);
+ res_s = a1_s * rt_cur_to_prev;
+ res_n = a1_n * rt_cur_to_prev;
+ /* Now lets get our divider */
+ hw_clk_div = dcur.hw_cur - dcur.hw_prev;
+ /* Make sure to save the remainder from the seconds divide */
+ res_s_modulo = res_s % hw_clk_div;
+ res_s /= hw_clk_div;
+ /* scale the remainder to where it should be */
+ res_s_modulo *= cclk;
+ /* Now add in the remainder */
+ res_n += res_s_modulo;
+ /* Now do the divide */
+ res_n /= hw_clk_div;
+ res_s *= cclk;
+ /* Recombine the two */
+ res = res_s + res_n;
+ /* And now add in the base time to get to the real timestamp */
+ res += dcur.rt_prev;
+ return (res);
}
static inline void
@@ -2074,11 +2139,10 @@
* it. For now this is only for custom code
* that knows how to interpret cxgbe's stamp.
*/
- m0->m_pkthdr.rcv_tstmp =
- last_flit_to_ns(sc, d->rsp.u.last_flit);
-#ifdef notyet
- m0->m_flags |= M_TSTMP;
-#endif
+ m0->m_pkthdr.rcv_tstmp = c4_tstmp_to_ns(sc,
+ be64toh(d->rsp.u.last_flit));
+ if (m0->m_pkthdr.rcv_tstmp != 0)
+ m0->m_flags |= M_TSTMP;
}
#ifdef NUMA
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 28, 4:53 PM (3 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26289755
Default Alt Text
D36315.id109757.diff (8 KB)
Attached To
Mode
D36315: Enable M_TSTMP in Chelsio cxgbe driver by creating a mechanism that can sync the time.
Attached
Detach File
Event Timeline
Log In to Comment