Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F137217908
D29733.id87335.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
23 KB
Referenced Files
None
Subscribers
None
D29733.id87335.diff
View Options
Index: lib/libc/x86/sys/__vdso_gettc.c
===================================================================
--- lib/libc/x86/sys/__vdso_gettc.c
+++ lib/libc/x86/sys/__vdso_gettc.c
@@ -45,6 +45,7 @@
#include "un-namespace.h"
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/pvclock.h>
#include <machine/specialreg.h>
#include <dev/acpica/acpi_hpet.h>
#ifdef WANT_HYPERV
@@ -93,6 +94,20 @@
return (rdtsc_low(th));
}
+static inline uint64_t
+rdtsc_mb_lfence(void)
+{
+ lfence();
+ return (rdtsc());
+}
+
+static inline uint64_t
+rdtsc_mb_mfence(void)
+{
+ mfence();
+ return (rdtsc());
+}
+
static u_int
rdtsc32_mb_lfence(void)
{
@@ -120,24 +135,29 @@
}
struct tsc_selector_tag {
+ uint64_t (*ts_rdtsc)(void);
u_int (*ts_rdtsc32)(void);
u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
};
static const struct tsc_selector_tag tsc_selector[] = {
[0] = { /* Intel, LFENCE */
+ .ts_rdtsc = rdtsc_mb_lfence,
.ts_rdtsc32 = rdtsc32_mb_lfence,
.ts_rdtsc_low = rdtsc_low_mb_lfence,
},
[1] = { /* AMD, MFENCE */
+ .ts_rdtsc = rdtsc_mb_mfence,
.ts_rdtsc32 = rdtsc32_mb_mfence,
.ts_rdtsc_low = rdtsc_low_mb_mfence,
},
[2] = { /* No SSE2 */
+ .ts_rdtsc = rdtsc,
.ts_rdtsc32 = rdtsc32_mb_none,
.ts_rdtsc_low = rdtsc_low_mb_none,
},
[3] = { /* RDTSCP */
+ .ts_rdtsc = rdtscp,
.ts_rdtsc32 = rdtscp32_,
.ts_rdtsc_low = rdtscp_low,
},
@@ -184,6 +204,11 @@
return (amd_cpu ? 1 : 0);
}
+DEFINE_UIFUNC(static, uint64_t, __vdso_gettc_rdtsc, (void))
+{
+ return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc);
+}
+
DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
(const struct vdso_timehands *th))
{
@@ -312,6 +337,123 @@
#endif /* WANT_HYPERV */
+static struct pvclock_vcpu_time_info *pvclock_vcpu0_info;
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline uint64_t
+__vdso_pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
+{
+ uint64_t product;
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#if defined(__i386__)
+ {
+ uint32_t tmp1, tmp2;
+
+ /**
+ * For i386, the formula looks like:
+ *
+ * lower = (mul_frac * (delta & UINT_MAX)) >> 32
+ * upper = mul_frac * (delta >> 32)
+ * product = lower + upper
+ */
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
+ "2" (mul_frac) );
+ }
+#elif defined(__amd64__)
+ {
+ unsigned long tmp;
+
+ __asm__ (
+ "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
+ : [lo]"=a" (product), [hi]"=d" (tmp)
+ : "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
+ }
+#else
+#error "pvclock: unsupported x86 architecture?"
+#endif
+
+ return (product);
+}
+
+static inline uint64_t
+__vdso_pvclock_get_nsec_offset(struct pvclock_vcpu_time_info *ti)
+{
+ uint64_t delta;
+
+ delta = __vdso_gettc_rdtsc() - ti->tsc_timestamp;
+
+ return (__vdso_pvclock_scale_delta(delta, ti->tsc_to_system_mul,
+ ti->tsc_shift));
+}
+
+static int
+__vdso_pvclock_tsc(struct pvclock_vcpu_time_info *ti, u_int *tc)
+{
+ uint64_t cycles;
+ uint32_t version;
+ uint8_t flags;
+
+ do {
+ version = ti->version;
+ rmb();
+ cycles = ti->system_time + __vdso_pvclock_get_nsec_offset(ti);
+ flags = ti->flags;
+ rmb();
+ } while ((ti->version & 1) != 0 || ti->version != version);
+
+ if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
+ /*
+ * The 'PVCLOCK_FLAG_TSC_STABLE' bit must have transitioned from
+ * set to unset between the time that the then-current 'struct
+ * vdso_timekeep' 'tk_enabled' flag was observed to be set and
+ * now. Return 'ENOSYS' to fall-back to the syscall codepath:
+ */
+ return (ENOSYS);
+ }
+
+ *tc = cycles;
+ return (0);
+}
+
+static void
+__vdso_init_pvclock_tsc(void)
+{
+ int fd;
+ unsigned int mode;
+
+ if (atomic_cmpset_ptr((volatile uintptr_t *)&pvclock_vcpu0_info,
+ (uintptr_t)NULL, (uintptr_t)MAP_FAILED) != 0) {
+ if (cap_getmode(&mode) == 0 && mode != 0)
+ return;
+
+ fd = _open("/dev/" PVCLOCK_CDEVNAME, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ pvclock_vcpu0_info = mmap(NULL, sizeof(*pvclock_vcpu0_info),
+ PROT_READ, MAP_SHARED, fd, 0);
+
+ _close(fd);
+ }
+}
+
#pragma weak __vdso_gettc
int
__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
@@ -347,6 +489,12 @@
return (ENOSYS);
return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
#endif
+ case VDSO_TH_ALGO_X86_PVCLK:
+ if (pvclock_vcpu0_info == NULL)
+ __vdso_init_pvclock_tsc();
+ if (pvclock_vcpu0_info == MAP_FAILED)
+ return (ENOSYS);
+ return (__vdso_pvclock_tsc(pvclock_vcpu0_info, tc));
default:
return (ENOSYS);
}
Index: sys/amd64/conf/GENERIC
===================================================================
--- sys/amd64/conf/GENERIC
+++ sys/amd64/conf/GENERIC
@@ -376,6 +376,9 @@
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
# HyperV drivers and enhancement support
device hyperv # HyperV drivers
Index: sys/amd64/conf/MINIMAL
===================================================================
--- sys/amd64/conf/MINIMAL
+++ sys/amd64/conf/MINIMAL
@@ -144,6 +144,9 @@
# Note that 'bpf' is required for DHCP.
device bpf # Berkeley packet filter
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
# Xen HVM Guest Optimizations
# NOTE: XENHVM depends on xenpci. They must be added or removed together.
options XENHVM # Xen HVM kernel infrastructure
Index: sys/amd64/conf/NOTES
===================================================================
--- sys/amd64/conf/NOTES
+++ sys/amd64/conf/NOTES
@@ -499,6 +499,9 @@
device virtio_random # VirtIO Entropy device
device virtio_console # VirtIO Console device
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
# Microsoft Hyper-V enhancement support
device hyperv # HyperV drivers
Index: sys/conf/files.x86
===================================================================
--- sys/conf/files.x86
+++ sys/conf/files.x86
@@ -276,6 +276,7 @@
dev/isci/scil/scif_sas_task_request_states.c optional isci
dev/isci/scil/scif_sas_timer.c optional isci
dev/itwd/itwd.c optional itwd
+dev/kvm_clock/kvm_clock.c optional kvm_clock
dev/qat/qat.c optional qat
dev/qat/qat_ae.c optional qat
dev/qat/qat_c2xxx.c optional qat
Index: sys/dev/kvm_clock/kvm_clock.c
===================================================================
--- /dev/null
+++ sys/dev/kvm_clock/kvm_clock.c
@@ -0,0 +1,406 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
+ * Copyright (c) 2021 Mathieu Chouquet-Stringer
+ * Copyright (c) 2021 Juniper Networks, Inc.
+ * Copyright (c) 2021 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Linux KVM paravirtual clock support
+ *
+ * References:
+ * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
+ * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/clock.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mman.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/timetc.h>
+#include <sys/vdso.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include <machine/pvclock.h>
+#include <x86/kvm.h>
+
+#include "clock_if.h"
+
+#define KVM_CLOCK_DEVNAME "kvmclock"
+#define KVM_CLOCK_RESOLUTION 1
+#define KVM_CLOCK_TC_FREQUENCY 1000000000ULL
+/*
+ * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
+ * default value of 800, and (3) below the TSC's value when it supports the
+ * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
+ */
+#define KVM_CLOCK_TC_QUALITY 975
+
+struct kvm_clock_softc {
+ struct timecounter tc;
+ struct pvclock_wall_clock wc;
+ struct cdev *cdev;
+ struct pvclock_vcpu_time_info *timeinfos;
+ vm_object_t timeinfos_obj;
+ vm_size_t timeinfos_size;
+ u_int msr_tc;
+ u_int msr_wc;
+ bool stable_flag_supported;
+};
+
+static u_int kvm_clock_get_timecount(struct timecounter *);
+static void kvm_clock_system_time_enable(struct kvm_clock_softc *sc);
+static void kvm_clock_system_time_enable_pcpu(void *arg);
+static uint32_t kvm_clock_vdso_timehands(struct vdso_timehands *vdso_th,
+ struct timecounter *tc);
+#ifdef COMPAT_FREEBSD32
+static uint32_t kvm_clock_vdso_timehands32(
+ struct vdso_timehands32 *vdso_th, struct timecounter *tc);
+#endif
+
+static d_open_t kvm_clock_open;
+static d_mmap_single_t kvm_clock_mmap_single;
+
+static struct cdevsw kvm_clock_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = KVM_CLOCK_DEVNAME,
+ .d_open = kvm_clock_open,
+ .d_mmap_single = kvm_clock_mmap_single,
+};
+
+static devclass_t kvm_clock_devclass;
+
+static u_int
+kvm_clock_get_timecount(struct timecounter *tc)
+{
+ struct kvm_clock_softc *sc = (struct kvm_clock_softc *)tc->tc_priv;
+ uint64_t time;
+
+ critical_enter();
+ time = pvclock_get_timecount(&(sc->timeinfos)[curcpu]);
+ critical_exit();
+
+ return (time & UINT_MAX);
+}
+
+static void
+kvm_clock_system_time_enable(struct kvm_clock_softc *sc)
+{
+ smp_rendezvous(NULL, kvm_clock_system_time_enable_pcpu, NULL, sc);
+}
+
+static void
+kvm_clock_system_time_enable_pcpu(void *arg)
+{
+ struct kvm_clock_softc *sc = (struct kvm_clock_softc *)arg;
+
+ /*
+ * See [2]; the lsb of this MSR is the system time enable bit.
+ */
+ wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
+}
+
+static uint32_t
+kvm_clock_vdso_timehands(struct vdso_timehands *vdso_th,
+ struct timecounter *tc)
+{
+ struct kvm_clock_softc *sc = tc->tc_priv;
+
+ vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
+ vdso_th->th_x86_shift = 0;
+ vdso_th->th_x86_hpet_idx = 0;
+ bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
+
+ return (sc->cdev != NULL && sc->stable_flag_supported &&
+ (sc->timeinfos[0].flags & PVCLOCK_FLAG_TSC_STABLE) != 0);
+}
+
+#ifdef COMPAT_FREEBSD32
+static uint32_t
+kvm_clock_vdso_timehands32(struct vdso_timehands32 *vdso_th,
+ struct timecounter *tc)
+{
+ struct kvm_clock_softc *sc = tc->tc_priv;
+
+ vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
+ vdso_th->th_x86_shift = 0;
+ vdso_th->th_x86_hpet_idx = 0;
+ bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
+
+ return (sc->cdev != NULL && sc->stable_flag_supported &&
+ (sc->timeinfos[0].flags & PVCLOCK_FLAG_TSC_STABLE) != 0);
+}
+#endif
+
+static int
+kvm_clock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ if (oflags & FWRITE)
+ return (EPERM);
+
+ return (0);
+}
+
+static int
+kvm_clock_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
+ struct vm_object **object, int nprot)
+{
+ struct kvm_clock_softc *sc = cdev->si_drv1;
+
+ if (PROT_EXTRACT(nprot) != PROT_READ)
+ return (EINVAL);
+
+ if (size < sizeof(sc->timeinfos[0]) ||
+ *offset + size > sc->timeinfos_size)
+ return (EINVAL);
+
+ if (*offset != 0 || size < sizeof(sc->timeinfos[0]) ||
+ size > sc->timeinfos_size)
+ return (EINVAL);
+
+ vm_object_reference(sc->timeinfos_obj);
+ *object = sc->timeinfos_obj;
+
+ return (0);
+}
+
+static void
+kvm_clock_identify(driver_t *driver, device_t parent)
+{
+ u_int regs[4];
+
+ kvm_cpuid_get_features(regs);
+ if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) == 0 &&
+ (regs[0] & KVM_FEATURE_CLOCKSOURCE) == 0)
+ return;
+
+ if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
+ return;
+
+ BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
+}
+
+static int
+kvm_clock_probe(device_t dev)
+{
+ device_set_desc(dev, "KVM paravirtual clock");
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+kvm_clock_attach(device_t dev)
+{
+ struct make_dev_args mda;
+ u_int regs[4];
+ vm_page_t m;
+ struct kvm_clock_softc *sc;
+ vm_size_t n;
+ int err;
+
+ sc = device_get_softc(dev);
+
+ /* Process KVM "features" CPUID leaf content: */
+ kvm_cpuid_get_features(regs);
+ if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
+ sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
+ sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
+ } else if ((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0) {
+ sc->msr_tc = KVM_MSR_SYSTEM_TIME;
+ sc->msr_wc = KVM_MSR_WALL_CLOCK;
+ } else
+ return (ENXIO);
+
+ sc->stable_flag_supported =
+ ((regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0);
+
+ /* Set up 'struct pvclock_vcpu_time_info' page(s): */
+ sc->timeinfos_size = round_page(mp_ncpus *
+ sizeof(struct pvclock_vcpu_time_info));
+
+ sc->timeinfos =
+ (struct pvclock_vcpu_time_info *)kva_alloc(sc->timeinfos_size);
+ if (sc->timeinfos == NULL)
+ return (ENOMEM);
+
+ sc->timeinfos_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ sc->timeinfos_size, PROT_READ | PROT_WRITE, 0, NULL);
+ if (sc->timeinfos_obj == NULL) {
+ kva_free((vm_offset_t)sc->timeinfos, sc->timeinfos_size);
+ return (ENOMEM);
+ }
+
+ VM_OBJECT_WLOCK(sc->timeinfos_obj);
+ for (n = 0; n < sc->timeinfos_size / PAGE_SIZE; n++) {
+ m = vm_page_grab(sc->timeinfos_obj, n, VM_ALLOC_ZERO |
+ VM_ALLOC_WIRED);
+ vm_page_valid(m);
+ vm_page_xunbusy(m);
+ pmap_qenter((vm_offset_t)sc->timeinfos + n * PAGE_SIZE, &m, 1);
+ }
+ VM_OBJECT_WUNLOCK(sc->timeinfos_obj);
+
+ kvm_clock_system_time_enable(sc);
+
+ /*
+ * Set up cdev for userspace mmapping of the 'struct
+ * pvclock_vcpu_time_info' page(s):
+ */
+ make_dev_args_init(&mda);
+ mda.mda_devsw = &kvm_clock_cdevsw;
+ mda.mda_uid = UID_ROOT;
+ mda.mda_gid = GID_WHEEL;
+ mda.mda_mode = 0444;
+ mda.mda_si_drv1 = sc;
+ err = make_dev_s(&mda, &sc->cdev, PVCLOCK_CDEVNAME);
+ if (err != 0) {
+ device_printf(dev, "Could not create /dev/%s, error %d. Fast "
+ "time of day will be unavailable for this timecounter.\n",
+ PVCLOCK_CDEVNAME, err);
+ KASSERT(sc->cdev == NULL, ("make_dev_s unexpectedly inited "
+ "cdev"));
+ }
+
+ /*
+ * Set up and register timecounter:
+ * Regarding 'tc_flags': Since the KVM MSR documentation does not
+ * specifically discuss suspend/resume scenarios, conservatively
+ * leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume the system time
+ * must be re-inited in such cases.
+ */
+ sc->tc.tc_get_timecount = kvm_clock_get_timecount;
+ sc->tc.tc_poll_pps = NULL;
+ sc->tc.tc_counter_mask = ~0U;
+ sc->tc.tc_frequency = KVM_CLOCK_TC_FREQUENCY;
+ sc->tc.tc_name = KVM_CLOCK_DEVNAME;
+ sc->tc.tc_quality = KVM_CLOCK_TC_QUALITY;
+ sc->tc.tc_flags = 0;
+ sc->tc.tc_priv = sc;
+ sc->tc.tc_fill_vdso_timehands = kvm_clock_vdso_timehands;
+#ifdef COMPAT_FREEBSD32
+ sc->tc.tc_fill_vdso_timehands32 = kvm_clock_vdso_timehands32;
+#endif
+
+ tc_init(&sc->tc);
+
+ /* Register wall clock: */
+ clock_register(dev, KVM_CLOCK_RESOLUTION);
+
+ return (0);
+}
+
+static int
+kvm_clock_detach(device_t dev)
+{
+ /*
+ * Not currently possible since there is no teardown counterpart of
+ * 'tc_init()'.
+ */
+ return (EBUSY);
+}
+
+static int
+kvm_clock_suspend(device_t dev)
+{
+ return (0);
+}
+
+static int
+kvm_clock_resume(device_t dev)
+{
+ kvm_clock_system_time_enable(device_get_softc(dev));
+ pvclock_resume();
+ inittodr(time_second);
+
+ return (0);
+}
+
+static int
+kvm_clock_gettime(device_t dev, struct timespec *ts)
+{
+ struct timespec system_ts;
+ uint64_t system_nsec;
+ struct kvm_clock_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ critical_enter();
+ wrmsr(sc->msr_wc, vtophys(&sc->wc));
+ pvclock_get_wallclock(&sc->wc, ts);
+ system_nsec = pvclock_get_timecount(&(sc->timeinfos)[curcpu]);
+ critical_exit();
+
+ system_ts.tv_sec = system_nsec / 1000000000ULL;
+ system_ts.tv_nsec = system_nsec % 1000000000ULL;
+
+ timespecadd(ts, &system_ts, ts);
+
+ return (0);
+}
+
+static int
+kvm_clock_settime(device_t dev, struct timespec *ts)
+{
+ return (ENODEV);
+}
+
+static device_method_t kvm_clock_methods[] = {
+ DEVMETHOD(device_identify, kvm_clock_identify),
+ DEVMETHOD(device_probe, kvm_clock_probe),
+ DEVMETHOD(device_attach, kvm_clock_attach),
+ DEVMETHOD(device_detach, kvm_clock_detach),
+ DEVMETHOD(device_suspend, kvm_clock_suspend),
+ DEVMETHOD(device_resume, kvm_clock_resume),
+ /* clock interface */
+ DEVMETHOD(clock_gettime, kvm_clock_gettime),
+ DEVMETHOD(clock_settime, kvm_clock_settime),
+
+ DEVMETHOD_END
+};
+
+static driver_t kvm_clock_driver = {
+ KVM_CLOCK_DEVNAME,
+ kvm_clock_methods,
+ sizeof(struct kvm_clock_softc),
+};
+
+DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, kvm_clock_devclass, 0, 0);
Index: sys/i386/conf/GENERIC
===================================================================
--- sys/i386/conf/GENERIC
+++ sys/i386/conf/GENERIC
@@ -338,6 +338,9 @@
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
# HyperV drivers and enhancement support
device hyperv # HyperV drivers
Index: sys/i386/conf/MINIMAL
===================================================================
--- sys/i386/conf/MINIMAL
+++ sys/i386/conf/MINIMAL
@@ -145,6 +145,9 @@
# Note that 'bpf' is required for DHCP.
device bpf # Berkeley packet filter
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
# Xen HVM Guest Optimizations
# NOTE: XENHVM depends on xenpci. They must be added or removed together.
options XENHVM # Xen HVM kernel infrastructure
Index: sys/i386/conf/NOTES
===================================================================
--- sys/i386/conf/NOTES
+++ sys/i386/conf/NOTES
@@ -719,6 +719,9 @@
device virtio_random # VirtIO Entropy device
device virtio_console # VirtIO Console device
+# Linux KVM paravirtualization support
+device kvm_clock # KVM paravirtual clock driver
+
device hyperv # HyperV drivers
#####################################################################
Index: sys/x86/include/kvm.h
===================================================================
--- /dev/null
+++ sys/x86/include/kvm.h
@@ -0,0 +1,80 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
+ * Copyright (c) 2021 Mathieu Chouquet-Stringer
+ * Copyright (c) 2021 Juniper Networks, Inc.
+ * Copyright (c) 2021 Klara, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Linux KVM paravirtualization: common definitions
+ *
+ * References:
+ * - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
+ * - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
+ */
+
+#ifndef _X86_KVM_H_
+#define _X86_KVM_H_
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/md_var.h>
+
+#define KVM_CPUID_SIGNATURE 0x40000000
+#define KVM_CPUID_FEATURES_LEAF 0x40000001
+
+#define KVM_FEATURE_CLOCKSOURCE 0x00000001
+#define KVM_FEATURE_CLOCKSOURCE2 0x00000008
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0x01000000
+
+/* Deprecated: for the CLOCKSOURCE feature. */
+#define KVM_MSR_WALL_CLOCK 0x11
+#define KVM_MSR_SYSTEM_TIME 0x12
+
+#define KVM_MSR_WALL_CLOCK_NEW 0x4b564d00
+#define KVM_MSR_SYSTEM_TIME_NEW 0x4b564d01
+
+static inline bool
+kvm_cpuid_features_leaf_supported(void)
+{
+ return (vm_guest == VM_GUEST_KVM &&
+ KVM_CPUID_FEATURES_LEAF > hv_base &&
+ KVM_CPUID_FEATURES_LEAF <= hv_high);
+}
+
+static inline void
+kvm_cpuid_get_features(u_int *regs)
+{
+ if (!kvm_cpuid_features_leaf_supported())
+ regs[0] = regs[1] = regs[2] = regs[3] = 0;
+ else
+ do_cpuid(KVM_CPUID_FEATURES_LEAF, regs);
+}
+
+#endif /* !_X86_KVM_H_ */
Index: sys/x86/include/pvclock.h
===================================================================
--- sys/x86/include/pvclock.h
+++ sys/x86/include/pvclock.h
@@ -29,6 +29,8 @@
#ifndef X86_PVCLOCK
#define X86_PVCLOCK
+#define PVCLOCK_CDEVNAME "pvclock"
+
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
@@ -43,6 +45,8 @@
#define PVCLOCK_FLAG_TSC_STABLE 0x01
#define PVCLOCK_FLAG_GUEST_PASUED 0x02
+#ifdef _KERNEL
+
struct pvclock_wall_clock {
uint32_t version;
uint32_t sec;
@@ -56,4 +60,6 @@
void pvclock_get_wallclock(struct pvclock_wall_clock *wc,
struct timespec *ts);
+#endif /* _KERNEL */
+
#endif
Index: sys/x86/include/vdso.h
===================================================================
--- sys/x86/include/vdso.h
+++ sys/x86/include/vdso.h
@@ -42,6 +42,7 @@
#define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1
#define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2
#define VDSO_TH_ALGO_X86_HVTSC VDSO_TH_ALGO_3 /* Hyper-V ref. TSC */
+#define VDSO_TH_ALGO_X86_PVCLK VDSO_TH_ALGO_4 /* KVM/XEN paravirtual clock */
#ifdef _KERNEL
#ifdef COMPAT_FREEBSD32
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 22, 1:58 PM (11 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25972370
Default Alt Text
D29733.id87335.diff (23 KB)
Attached To
Mode
D29733: kvmclock driver with vDSO support
Attached
Detach File
Event Timeline
Log In to Comment