Index: sys/amd64/include/pvclock.h =================================================================== --- /dev/null +++ sys/amd64/include/pvclock.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include Index: sys/conf/files.amd64 =================================================================== --- sys/conf/files.amd64 +++ sys/conf/files.amd64 @@ -569,6 +569,7 @@ x86/x86/mptable_pci.c optional mptable pci x86/x86/msi.c optional pci x86/x86/nexus.c standard +x86/x86/pvclock.c standard x86/x86/tsc.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm Index: sys/conf/files.i386 =================================================================== --- sys/conf/files.i386 +++ sys/conf/files.i386 @@ -588,6 +588,7 @@ x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/x86/pvclock.c standard x86/x86/delay.c standard x86/xen/hvm.c optional xenhvm x86/xen/xen_intr.c optional xen | xenhvm Index: sys/dev/xen/timer/timer.c =================================================================== --- sys/dev/xen/timer/timer.c +++ sys/dev/xen/timer/timer.c @@ -59,6 +59,7 @@ #include #include #include +#include #include @@ -95,9 +96,6 @@ struct eventtimer et; }; -/* Last time; this guarantees a monotonically increasing clock. */ -volatile uint64_t xen_timer_last_time = 0; - static void xentimer_identify(driver_t *driver, device_t parent) { @@ -148,128 +146,20 @@ return (BUS_PROBE_NOWILDCARD); } -/* - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, - * yielding a 64-bit result. - */ -static inline uint64_t -scale_delta(uint64_t delta, uint32_t mul_frac, int shift) -{ - uint64_t product; - - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - -#if defined(__i386__) - { - uint32_t tmp1, tmp2; - - /** - * For i386, the formula looks like: - * - * lower = (mul_frac * (delta & UINT_MAX)) >> 32 - * upper = mul_frac * (delta >> 32) - * product = lower + upper - */ - __asm__ ( - "mul %5 ; " - "mov %4,%%eax ; " - "mov %%edx,%4 ; " - "mul %5 ; " - "xor %5,%5 ; " - "add %4,%%eax ; " - "adc %5,%%edx ; " - : "=A" (product), "=r" (tmp1), "=r" (tmp2) - : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), - "2" (mul_frac) ); - } -#elif defined(__amd64__) - { - unsigned long tmp; - - __asm__ ( - "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" - : [lo]"=a" (product), [hi]"=d" (tmp) - : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac)); - } -#else -#error "xentimer: unsupported architecture" -#endif - - return (product); -} - -static uint64_t -get_nsec_offset(struct vcpu_time_info *tinfo) -{ - - return (scale_delta(rdtsc() - tinfo->tsc_timestamp, - tinfo->tsc_to_system_mul, tinfo->tsc_shift)); -} - -/* - * Read the current hypervisor system uptime value from Xen. - * See for a description of how this works. - */ -static uint32_t -xen_fetch_vcpu_tinfo(struct vcpu_time_info *dst, struct vcpu_time_info *src) -{ - - do { - dst->version = src->version; - rmb(); - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_time = src->system_time; - dst->tsc_to_system_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); - } while ((src->version & 1) | (dst->version ^ src->version)); - - return (dst->version); -} - /** * \brief Get the current time, in nanoseconds, since the hypervisor booted. * * \param vcpu vcpu_info structure to fetch the time from. * - * \note This function returns the current CPU's idea of this value, unless - * it happens to be less than another CPU's previously determined value. */ static uint64_t xen_fetch_vcpu_time(struct vcpu_info *vcpu) { - struct vcpu_time_info dst; - struct vcpu_time_info *src; - uint32_t pre_version; - uint64_t now; - volatile uint64_t last; - - src = &vcpu->time; - - do { - pre_version = xen_fetch_vcpu_tinfo(&dst, src); - barrier(); - now = dst.system_time + get_nsec_offset(&dst); - barrier(); - } while (pre_version != src->version); + struct pvclock_vcpu_time_info *time; - /* - * Enforce a monotonically increasing clock time across all - * VCPUs. If our time is too old, use the last time and return. - * Otherwise, try to update the last time. - */ - do { - last = xen_timer_last_time; - if (last > now) { - now = last; - break; - } - } while (!atomic_cmpset_64(&xen_timer_last_time, last, now)); + time = (struct pvclock_vcpu_time_info *) &vcpu->time; - return (now); + return (pvclock_get_timecount(time)); } static uint32_t @@ -302,15 +192,11 @@ xen_fetch_wallclock(struct timespec *ts) { shared_info_t *src = HYPERVISOR_shared_info; - uint32_t version = 0; + struct pvclock_wall_clock *wc; - do { - version = src->wc_version; - rmb(); - ts->tv_sec = src->wc_sec; - ts->tv_nsec = src->wc_nsec; - rmb(); - } while ((src->wc_version & 1) | (version ^ src->wc_version)); + wc = (struct pvclock_wall_clock *) &src->wc_version; + + pvclock_get_wallclock(wc, ts); } static void @@ -574,7 +460,7 @@ } /* Reset the last uptime value */ - xen_timer_last_time = 0; + pvclock_resume(); /* Reset the RTC clock */ inittodr(time_second); Index: sys/i386/include/pvclock.h =================================================================== --- /dev/null +++ sys/i386/include/pvclock.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include Index: sys/x86/include/pvclock.h =================================================================== --- /dev/null +++ sys/x86/include/pvclock.h @@ -0,0 +1,57 @@ +/*- + * Copyright (c) 2014, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef X86_PVCLOCK +#define X86_PVCLOCK + +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __packed; + +#define PVCLOCK_FLAG_TSC_STABLE 0x01 +#define PVCLOCK_FLAG_GUEST_PASUED 0x02 + +struct pvclock_wall_clock { + uint32_t version; + uint32_t sec; + uint32_t nsec; +} __packed; + +void pvclock_resume(void); +uint64_t pvclock_get_timecount(struct pvclock_vcpu_time_info *ti); +void pvclock_get_wallclock(struct pvclock_wall_clock *wc, + struct timespec *ts); + +#endif Index: sys/x86/x86/pvclock.c =================================================================== --- /dev/null +++ sys/x86/x86/pvclock.c @@ -0,0 +1,182 @@ +/*- + * Copyright (c) 2009 Adrian Chadd + * Copyright (c) 2012 Spectra Logic Corporation + * Copyright (c) 2014 Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include +#include +#include +#include + +/* + * Last time; this guarantees a monotonically increasing clock for when + * a stable TSC is not provided. + */ +static volatile uint64_t pvclock_last_cycles; + +void +pvclock_resume(void) +{ + + atomic_store_rel_64(&pvclock_last_cycles, 0); +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t +pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#if defined(__i386__) + { + uint32_t tmp1, tmp2; + + /** + * For i386, the formula looks like: + * + * lower = (mul_frac * (delta & UINT_MAX)) >> 32 + * upper = mul_frac * (delta >> 32) + * product = lower + upper + */ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), + "2" (mul_frac) ); + } +#elif defined(__amd64__) + { + unsigned long tmp; + + __asm__ ( + "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]" + : [lo]"=a" (product), [hi]"=d" (tmp) + : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac)); + } +#else +#error "pvclock: unsupported x86 architecture?" +#endif + + return (product); +} + +static uint64_t +pvclock_get_nsec_offset(struct pvclock_vcpu_time_info *ti) +{ + uint64_t delta; + + delta = rdtsc() - ti->tsc_timestamp; + + return (pvclock_scale_delta(delta, ti->tsc_to_system_mul, + ti->tsc_shift)); +} + +static void +pvclock_read_time_info(struct pvclock_vcpu_time_info *ti, + uint64_t *cycles, uint8_t *flags) +{ + uint32_t version; + + do { + version = ti->version; + rmb(); + *cycles = ti->system_time + pvclock_get_nsec_offset(ti); + *flags = ti->flags; + rmb(); + } while ((ti->version & 1) != 0 || ti->version != version); +} + +static void +pvclock_read_wall_clock(struct pvclock_wall_clock *wc, uint32_t *sec, + uint32_t *nsec) +{ + uint32_t version; + + do { + version = wc->version; + rmb(); + *sec = wc->sec; + *nsec = wc->nsec; + rmb(); + } while ((wc->version & 1) != 0 || wc->version != version); +} + +uint64_t +pvclock_get_timecount(struct pvclock_vcpu_time_info *ti) +{ + uint64_t now; + uint8_t flags; + volatile uint64_t last; + + pvclock_read_time_info(ti, &now, &flags); + + if (flags & PVCLOCK_FLAG_TSC_STABLE) + return (now); + + /* + * Enforce a monotonically increasing clock time across all VCPUs. + * If our time is too old, use the last time and return. Otherwise, + * try to update the last time. + */ + do { + last = atomic_load_acq_64(&pvclock_last_cycles); + if (last > now) + return (last); + } while (!atomic_cmpset_64(&pvclock_last_cycles, last, now)); + + return (now); +} + +void +pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts) +{ + uint32_t sec, nsec; + + pvclock_read_wall_clock(wc, &sec, &nsec); + ts->tv_sec = sec; + ts->tv_nsec = nsec; +}