diff --git a/lib/libc/x86/sys/__vdso_gettc.c b/lib/libc/x86/sys/__vdso_gettc.c --- a/lib/libc/x86/sys/__vdso_gettc.c +++ b/lib/libc/x86/sys/__vdso_gettc.c @@ -53,57 +53,133 @@ #include #include "libc_private.h" -static void -rdtsc_mb_lfence(void) +static inline u_int +rdtsc_low(u_int shift) { + u_int rv; - lfence(); + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (shift) : "edx"); + return (rv); } -static void -rdtsc_mb_mfence(void) +static inline u_int +rdtscp_low(const struct vdso_timehands *th) +{ + u_int rv; + + __asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0" + : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx"); + return (rv); +} + +static u_int +rdtsc_low_mb_lfence(const struct vdso_timehands *th) { + lfence(); + return (rdtsc_low(th->th_x86_shift)); +} +static u_int +rdtsc_low_mb_mfence(const struct vdso_timehands *th) +{ mfence(); + return (rdtsc_low(th->th_x86_shift)); } -static void -rdtsc_mb_none(void) +static u_int +rdtsc_low_mb_none(const struct vdso_timehands *th) { + return (rdtsc_low(th->th_x86_shift)); } -DEFINE_UIFUNC(static, void, rdtsc_mb, (void)) +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low, + (const struct vdso_timehands *th)) { - u_int p[4]; + u_int amd_feature, cpu_exthigh, p[4]; /* Not a typo, string matches our do_cpuid() registers use. */ static const char intel_id[] = "GenuntelineI"; + if (cpu_feature != 0) { + do_cpuid(0x80000000, p); + cpu_exthigh = p[0]; + } else { + cpu_exthigh = 0; + } + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, p); + amd_feature = p[3]; + } else { + amd_feature = 0; + } + + if ((amd_feature & AMDID_RDTSCP) != 0) + return (rdtscp_low); if ((cpu_feature & CPUID_SSE2) == 0) - return (rdtsc_mb_none); + return (rdtsc_low_mb_none); do_cpuid(0, p); return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ? - rdtsc_mb_lfence : rdtsc_mb_mfence); + rdtsc_low_mb_lfence : rdtsc_low_mb_mfence); } static u_int -__vdso_gettc_rdtsc_low(const struct vdso_timehands *th) +rdtsc32_mb_lfence(void) { - u_int rv; - - rdtsc_mb(); - __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" - : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); - return (rv); + lfence(); + return (rdtsc32()); } static u_int -__vdso_rdtsc32(void) +rdtsc32_mb_mfence(void) { + mfence(); + return (rdtsc32()); +} - rdtsc_mb(); +static u_int +rdtsc32_mb_none(void) +{ return (rdtsc32()); } +static u_int +rdtscp32(void) +{ + uint32_t rv; + + __asm __volatile("rdtscp" + : "=a" (rv) :: "ecx", "edx"); + return (rv); +} + +DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void)) +{ + u_int amd_feature, cpu_exthigh, p[4]; + /* Not a typo, string matches our do_cpuid() registers use. */ + static const char intel_id[] = "GenuntelineI"; + + if (cpu_feature != 0) { + do_cpuid(0x80000000, p); + cpu_exthigh = p[0]; + } else { + cpu_exthigh = 0; + } + if (cpu_exthigh >= 0x80000001) { + do_cpuid(0x80000001, p); + amd_feature = p[3]; + } else { + amd_feature = 0; + } + + if ((amd_feature & AMDID_RDTSCP) != 0) + return (rdtscp32); + if ((cpu_feature & CPUID_SSE2) == 0) + return (rdtsc32_mb_none); + do_cpuid(0, p); + return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ? + rdtsc32_mb_lfence : rdtsc32_mb_mfence); +} + #define HPET_DEV_MAP_MAX 10 static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX]; @@ -199,7 +275,7 @@ scale = tsc_ref->tsc_scale; ofs = tsc_ref->tsc_ofs; - rdtsc_mb(); + mfence(); /* XXXKIB */ tsc = rdtsc(); /* ret = ((tsc * scale) >> 64) + ofs */ @@ -231,7 +307,7 @@ switch (th->th_algo) { case VDSO_TH_ALGO_X86_TSC: *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : - __vdso_rdtsc32(); + __vdso_gettc_rdtsc32(); return (0); case VDSO_TH_ALGO_X86_HPET: idx = th->th_x86_hpet_idx; diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c --- a/sys/x86/x86/tsc.c +++ b/sys/x86/x86/tsc.c @@ -91,12 +91,14 @@ int status); static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status); -static unsigned tsc_get_timecount(struct timecounter *tc); -static inline unsigned tsc_get_timecount_low(struct timecounter *tc); -static unsigned tsc_get_timecount_lfence(struct timecounter *tc); -static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); -static unsigned tsc_get_timecount_mfence(struct timecounter *tc); -static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); +static u_int tsc_get_timecount(struct timecounter *tc); +static inline u_int tsc_get_timecount_low(struct timecounter *tc); +static u_int tsc_get_timecount_lfence(struct timecounter *tc); +static u_int tsc_get_timecount_low_lfence(struct timecounter *tc); +static u_int tsc_get_timecount_mfence(struct timecounter *tc); +static u_int tsc_get_timecount_low_mfence(struct timecounter *tc); +static u_int tscp_get_timecount(struct timecounter *tc); +static u_int tscp_get_timecount_low(struct timecounter *tc); static void tsc_levels_changed(void *arg, int unit); static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc); @@ -628,7 +630,10 @@ init: for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) ; - if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { + if ((amd_feature & AMDID_RDTSCP) != 0) { + tsc_timecounter.tc_get_timecount = shift > 0 ? + tscp_get_timecount_low : tscp_get_timecount; + } else if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { if (cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_HYGON) { tsc_timecounter.tc_get_timecount = shift > 0 ? @@ -783,6 +788,16 @@ return (rdtsc32()); } +static u_int +tscp_get_timecount(struct timecounter *tc __unused) +{ + uint32_t rv; + + __asm __volatile("rdtscp" + : "=a" (rv) :: "ecx", "edx"); + return (rv); +} + static inline u_int tsc_get_timecount_low(struct timecounter *tc) { @@ -793,6 +808,16 @@ return (rv); } +static u_int +tscp_get_timecount_low(struct timecounter *tc) +{ + uint32_t rv; + + __asm __volatile("rdtscp; movl %1, %%ecx; shrd %%cl, %%edx, %0" + : "=a" (rv) : "m" (tc->tc_priv) : "ecx", "edx"); + return (rv); +} + static u_int tsc_get_timecount_lfence(struct timecounter *tc __unused) {