Index: sys/x86/x86/tsc.c =================================================================== --- sys/x86/x86/tsc.c +++ sys/x86/x86/tsc.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -601,39 +602,162 @@ static void tsc_calib(void *arg __unused) { - sbintime_t t_start, t_end; - uint64_t freq_khz, tsc_start, tsc_end; register_t flags; - int cpu; + uint64_t tsc0, tsc, tsc_delay, n, passes = 0; + sbintime_t t0, t; + double mu_tsc = 0; + double mu_t = 0; + double va_tsc = 0; + double va_t = 0; + double cva = 0; + double q_tsc; + double q_t; + double d1, d2; + double inv_n; + uint64_t freq; - flags = intr_disable(); - cpu = curcpu; - tsc_start = rdtsc_ordered(); - t_start = sbinuptime(); - intr_restore(flags); + /*- + * The idea here is to compute a best-fit linear regression between + * the TSC and the reference clock; the slope of that line multiplied + * by the frequency of the reference clock (2^32 Hz, since we're + * using sbinuptime as a reference) gives us the frequency of the TSC. + * (Note that we could use a hardware clock directly as a reference + * rather than using sbinuptime and taking a detour via timecounter + * code; but using sbinuptime allows us to automatically use the best + * available reference clock.) + * + * To do this, we calculate the + * (a) mean of the TSC measurements, + * (b) variance of the TSC measurements, + * (c) mean of the reference clock measurements, + * (d) variance of the reference clock measurements, and + * (e) covariance of the TSC and reference clock measurements + * on an ongoing basis, updating all five values after each new data + * point arrives, stopping when we're confident that we've accurately + * measured the clock speed. + * + * Given those five values, the important formulas to remember from + * introductory statistics are: + * 1. slope of regression line = covariance(x, y) / variance(x) + * 2. (relative uncertainty in slope)^2 = + * (variance(x) * variance(y) - covariance(x, y)^2) + * ------------------------------------------------ + * covariance(x, y)^2 * (N - 2) + * + * We adjust the second formula slightly, adding a term to each of + * the variance values to reflect the measurement quantization. + * + * Finally, we need to determine when to stop gathering data. We + * can't simply stop as soon as the computed uncertainty estimate + * is below our threshold; this would make us overconfident since it + * would introduce a multiple-comparisons problem (cf. sequential + * analysis in clinical trials). Instead, we stop with N data points + * if the estimated uncertainty of the first k data points meets our + * target for all N/2 < k <= N; this is not theoretically ideal, but + * in practice works well enough. + */ - DELAY(1000000); + /* + * Using floating-point arithmetic allows us to keep rounding errors + * under control with far less effort than using fixed-point math. + */ + fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX); + /* + * Bind to the current CPU for the duration of our calibration, just + * in case moving between CPUs would introduce additional noise. + */ thread_lock(curthread); - sched_bind(curthread, cpu); + sched_bind(curthread, curcpu); + thread_unlock(curthread); + /* Timer quantization granularity. */ + q_tsc = 2.0; + q_t = (double)(SBT_1S) / tc_getfrequency() + 1.0; + + /* + * Initial values for TSC and uptime; we'll subtract these off from + * values we measure later in order to reduce the size of values + * we're handling later (and thereby reduce rounding errors). + */ flags = intr_disable(); - tsc_end = rdtsc_ordered(); - t_end = sbinuptime(); + tsc0 = rdtsc_ordered(); + t0 = sbinuptime(); intr_restore(flags); + for (n = 1; ; n++) { + /* Get a new data point. */ + flags = intr_disable(); + tsc = rdtsc_ordered() - tsc0; + t = sbinuptime() - t0; + intr_restore(flags); + + /* If we spent too long, bail. */ + if (t > (double)(SBT_1S)) { + printf("Statistical TSC calibration failed! " + "Clocks might be ticking at variable rates.\n"); + printf("Falling back to slow TSC calibration.\n"); + freq = (double)(SBT_1S) * tsc / t; + break; + } + + /* Precompute to save on divisions later. */ + inv_n = 1.0 / n; + + /* Update mean and variance of recorded TSC values. */ + d1 = tsc - mu_tsc; + mu_tsc += d1 * inv_n; + d2 = d1 * (tsc - mu_tsc); + va_tsc += (d2 - va_tsc) * inv_n; + + /* Update mean and variance of recorded time values. */ + d1 = t - mu_t; + mu_t += d1 * inv_n; + d2 = d1 * (t - mu_t); + va_t += (d2 - va_t) * inv_n; + + /* Update covariance. */ + d2 = d1 * (tsc - mu_tsc); + cva += (d2 - cva) * inv_n; + + /* Count low-uncertainty iterations. */ +#define TSC_PPM_UNCERTAINTY 1 +#define TSC_UNCERTAINTY TSC_PPM_UNCERTAINTY * 0.000001 +#define TSC_UNCERTAINTY_SQR TSC_UNCERTAINTY * TSC_UNCERTAINTY + if (TSC_UNCERTAINTY_SQR * (n - 2) * cva * cva > + (va_t + q_t * q_t) * (va_tsc + q_tsc * q_tsc) - cva * cva) + passes++; + else + passes = 0; + + /* Break if we're consistently certain. */ + if (passes * 2 > n) { + freq = (double)(SBT_1S) * cva / va_t; + if (bootverbose) + printf("TSC calibration took %ld us\n", + (long)(t * 1000000.0 / SBT_1S)); + break; + } + + /* + * Add variable delay to avoid theoretical risk of aliasing + * resulting from this loop synchronizing with the frequency + * of the reference clock. On the nth iteration, we spend + * O(1 / n) time here -- long enough to avoid aliasing, but + * short enough to be insignificant as n grows. + */ + tsc_delay = rdtsc_ordered() + tsc / (n * n); + while (rdtsc_ordered() < tsc_delay) + /* Do nothing. */ ; + } + + /* Unbind CPU and exit FPU mode. */ + thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); + fpu_kern_leave(curthread, NULL); - /* - * Direct use of the clock frequency of 10^9Hz would result in overflow - * if more than ~18.5*10^9 TSC ticks elapse between measurements. While - * this is unlikely for now, reduce precision slightly to better avoid - * the problem. - */ - freq_khz = 1000000ul * (tsc_end - tsc_start) / sbttons(t_end - t_start); - - tsc_update_freq(freq_khz * 1000); + tsc_update_freq(freq); tc_init(&tsc_timecounter); set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); }