diff --git a/lib/libc/x86/sys/__vdso_gettc.c b/lib/libc/x86/sys/__vdso_gettc.c index ab10943c811b..5cc847e31c0e 100644 --- a/lib/libc/x86/sys/__vdso_gettc.c +++ b/lib/libc/x86/sys/__vdso_gettc.c @@ -1,361 +1,423 @@ /*- * Copyright (c) 2012 Konstantin Belousov * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include "namespace.h" #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include #include +#include #include #include #ifdef WANT_HYPERV #include #endif #include #include "libc_private.h" static inline u_int rdtsc_low(const struct vdso_timehands *th) { u_int rv; __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); return (rv); } static inline u_int rdtscp_low(const struct vdso_timehands *th) { u_int rv; __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); return (rv); } static u_int rdtsc_low_mb_lfence(const struct vdso_timehands *th) { lfence(); return (rdtsc_low(th)); } static u_int rdtsc_low_mb_mfence(const struct vdso_timehands *th) { mfence(); return (rdtsc_low(th)); } static u_int rdtsc_low_mb_none(const struct vdso_timehands *th) { return (rdtsc_low(th)); } static u_int rdtsc32_mb_lfence(void) { lfence(); return (rdtsc32()); } static u_int rdtsc32_mb_mfence(void) { mfence(); return (rdtsc32()); } static u_int rdtsc32_mb_none(void) { return (rdtsc32()); } static u_int rdtscp32_(void) { return (rdtscp32()); } struct tsc_selector_tag { u_int (*ts_rdtsc32)(void); u_int (*ts_rdtsc_low)(const struct vdso_timehands *); }; static const struct tsc_selector_tag tsc_selector[] = { [0] = { /* Intel, LFENCE */ .ts_rdtsc32 = rdtsc32_mb_lfence, .ts_rdtsc_low = rdtsc_low_mb_lfence, }, [1] = { /* AMD, MFENCE */ .ts_rdtsc32 = rdtsc32_mb_mfence, .ts_rdtsc_low = rdtsc_low_mb_mfence, }, [2] = { /* No SSE2 */ .ts_rdtsc32 = rdtsc32_mb_none, .ts_rdtsc_low = rdtsc_low_mb_none, }, [3] = { /* RDTSCP */ .ts_rdtsc32 = rdtscp32_, .ts_rdtsc_low = rdtscp_low, }, }; static int tsc_selector_idx(u_int cpu_feature) { u_int amd_feature, cpu_exthigh, cpu_id, p[4], v[3]; static const char amd_id[] = "AuthenticAMD"; static const char hygon_id[] = "HygonGenuine"; bool amd_cpu; if (cpu_feature == 0) return (2); /* should not happen due to RDTSC */ do_cpuid(0, p); v[0] = p[1]; v[1] = p[3]; v[2] = p[2]; amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 || memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0; do_cpuid(1, p); cpu_id = p[0]; if (cpu_feature != 0) { do_cpuid(0x80000000, p); cpu_exthigh = p[0]; } else { cpu_exthigh = 0; } if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, p); amd_feature = p[3]; } else { amd_feature = 0; } if ((amd_feature & AMDID_RDTSCP) != 0) return (3); if ((cpu_feature & CPUID_SSE2) == 0) return (2); return (amd_cpu ? 1 : 0); } DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, (const struct vdso_timehands *th)) { return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low); } DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void)) { return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32); } #define HPET_DEV_MAP_MAX 10 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; static void __vdso_init_hpet(uint32_t u) { static const char devprefix[] = "/dev/hpet"; char devname[64], *c, *c1, t; volatile char *new_map, *old_map; unsigned int mode; uint32_t u1; int fd; c1 = c = stpcpy(devname, devprefix); u1 = u; do { *c++ = u1 % 10 + '0'; u1 /= 10; } while (u1 != 0); *c = '\0'; for (c--; c1 != c; c1++, c--) { t = *c1; *c1 = *c; *c = t; } old_map = hpet_dev_map[u]; if (old_map != NULL) return; /* * Explicitely check for the capability mode to avoid * triggering trap_enocap on the device open by absolute path. */ if ((cap_getmode(&mode) == 0 && mode != 0) || (fd = _open(devname, O_RDONLY | O_CLOEXEC)) == -1) { /* Prevent the caller from re-entering. */ atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], (uintptr_t)old_map, (uintptr_t)MAP_FAILED); return; } new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); _close(fd); if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u], (uintptr_t)old_map, (uintptr_t)new_map) == 0 && new_map != MAP_FAILED) munmap((void *)new_map, PAGE_SIZE); } #ifdef WANT_HYPERV #define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME /* * NOTE: * We use 'NULL' for this variable to indicate that initialization * is required. And if this variable is 'MAP_FAILED', then Hyper-V * reference TSC can not be used, e.g. in misconfigured jail. */ static struct hyperv_reftsc *hyperv_ref_tsc; static void __vdso_init_hyperv_tsc(void) { int fd; unsigned int mode; if (cap_getmode(&mode) == 0 && mode != 0) goto fail; fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY | O_CLOEXEC); if (fd < 0) goto fail; hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, MAP_SHARED, fd, 0); _close(fd); return; fail: /* Prevent the caller from re-entering. */ hyperv_ref_tsc = MAP_FAILED; } static int __vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) { uint64_t disc, ret, tsc, scale; uint32_t seq; int64_t ofs; while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { scale = tsc_ref->tsc_scale; ofs = tsc_ref->tsc_ofs; mfence(); /* XXXKIB */ tsc = rdtsc(); /* ret = ((tsc * scale) >> 64) + ofs */ __asm__ __volatile__ ("mulq %3" : "=d" (ret), "=a" (disc) : "a" (tsc), "r" (scale)); ret += ofs; atomic_thread_fence_acq(); if (tsc_ref->tsc_seq == seq) { *tc = ret; return (0); } /* Sequence changed; re-sync. */ } return (ENOSYS); } #endif /* WANT_HYPERV */ +static struct pvclock_vcpu_time_info *pvclock_timeinfos; + +static int +__vdso_pvclock_gettc(const struct vdso_timehands *th, u_int *tc) +{ + uint64_t delta, ns, tsc; + struct pvclock_vcpu_time_info *ti; + uint32_t cpuid_ti, cpuid_tsc, version; + bool stable; + + do { + ti = &pvclock_timeinfos[0]; + version = atomic_load_acq_32(&ti->version); + stable = (ti->flags & th->th_x86_pvc_stable_mask) != 0; + if (stable) { + tsc = rdtscp(); + } else { + (void)rdtscp_aux(&cpuid_ti); + ti = &pvclock_timeinfos[cpuid_ti]; + version = atomic_load_acq_32(&ti->version); + tsc = rdtscp_aux(&cpuid_tsc); + } + delta = tsc - ti->tsc_timestamp; + ns = ti->system_time + pvclock_scale_delta(delta, + ti->tsc_to_system_mul, ti->tsc_shift); + atomic_thread_fence_acq(); + } while ((ti->version & 1) != 0 || ti->version != version || + (!stable && cpuid_ti != cpuid_tsc)); + *tc = MAX(ns, th->th_x86_pvc_last_systime); + return (0); +} + +static void +__vdso_init_pvclock_timeinfos(void) +{ + struct pvclock_vcpu_time_info *timeinfos; + size_t len; + int fd, ncpus; + unsigned int mode; + + timeinfos = MAP_FAILED; + if (_elf_aux_info(AT_NCPUS, &ncpus, sizeof(ncpus)) != 0 || + (cap_getmode(&mode) == 0 && mode != 0) || + (fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY | O_CLOEXEC)) < 0) + goto leave; + len = ncpus * sizeof(*pvclock_timeinfos); + timeinfos = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); +leave: + if (atomic_cmpset_rel_ptr( + (volatile uintptr_t *)&pvclock_timeinfos, (uintptr_t)NULL, + (uintptr_t)timeinfos) == 0 && timeinfos != MAP_FAILED) + (void)munmap((void *)timeinfos, len); +} + #pragma weak __vdso_gettc int __vdso_gettc(const struct vdso_timehands *th, u_int *tc) { volatile char *map; uint32_t idx; switch (th->th_algo) { case VDSO_TH_ALGO_X86_TSC: *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : __vdso_gettc_rdtsc32(); return (0); case VDSO_TH_ALGO_X86_HPET: idx = th->th_x86_hpet_idx; if (idx >= HPET_DEV_MAP_MAX) return (ENOSYS); map = (volatile char *)atomic_load_acq_ptr( (volatile uintptr_t *)&hpet_dev_map[idx]); if (map == NULL) { __vdso_init_hpet(idx); map = (volatile char *)atomic_load_acq_ptr( (volatile uintptr_t *)&hpet_dev_map[idx]); } if (map == MAP_FAILED) return (ENOSYS); *tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER); return (0); #ifdef WANT_HYPERV case VDSO_TH_ALGO_X86_HVTSC: if (hyperv_ref_tsc == NULL) __vdso_init_hyperv_tsc(); if (hyperv_ref_tsc == MAP_FAILED) return (ENOSYS); return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); #endif + case VDSO_TH_ALGO_X86_PVCLK: + if (pvclock_timeinfos == NULL) + __vdso_init_pvclock_timeinfos(); + if (pvclock_timeinfos == MAP_FAILED) + return (ENOSYS); + return (__vdso_pvclock_gettc(th, tc)); default: return (ENOSYS); } } #pragma weak __vdso_gettimekeep int __vdso_gettimekeep(struct vdso_timekeep **tk) { return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); }