Index: head/sys/dev/hyperv/include/hyperv.h =================================================================== --- head/sys/dev/hyperv/include/hyperv.h (revision 311742) +++ head/sys/dev/hyperv/include/hyperv.h (revision 311743) @@ -1,88 +1,96 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _HYPERV_H_ #define _HYPERV_H_ #ifdef _KERNEL #include #include #define MSR_HV_TIME_REF_COUNT 0x40000020 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ #define CPUID_HV_MSR_SYNIC 0x0004 /* MSRs for SynIC */ #define CPUID_HV_MSR_SYNTIMER 0x0008 /* MSRs for SynTimer */ #define CPUID_HV_MSR_APIC 0x0010 /* MSR_HV_{EOI,ICR,TPR} */ #define CPUID_HV_MSR_HYPERCALL 0x0020 /* MSR_HV_GUEST_OS_ID * MSR_HV_HYPERCALL */ #define CPUID_HV_MSR_VP_INDEX 0x0040 /* MSR_HV_VP_INDEX */ #define CPUID_HV_MSR_REFERENCE_TSC 0x0200 /* MSR_HV_REFERENCE_TSC */ #define CPUID_HV_MSR_GUEST_IDLE 0x0400 /* MSR_HV_GUEST_IDLE */ #ifndef NANOSEC #define NANOSEC 1000000000ULL #endif #define HYPERV_TIMER_NS_FACTOR 100ULL #define HYPERV_TIMER_FREQ (NANOSEC / HYPERV_TIMER_NS_FACTOR) #endif /* _KERNEL */ #define HYPERV_REFTSC_DEVNAME "hv_tsc" /* * Hyper-V Reference TSC */ struct hyperv_reftsc { volatile uint32_t tsc_seq; volatile uint32_t tsc_rsvd1; volatile uint64_t tsc_scale; volatile int64_t tsc_ofs; } __packed __aligned(PAGE_SIZE); #ifdef CTASSERT CTASSERT(sizeof(struct hyperv_reftsc) == PAGE_SIZE); #endif #ifdef _KERNEL struct hyperv_guid { uint8_t hv_guid[16]; } __packed; #define HYPERV_GUID_STRLEN 40 -int hyperv_guid2str(const struct hyperv_guid *, char *, size_t); +typedef uint64_t (*hyperv_tc64_t)(void); -extern u_int hyperv_features; /* CPUID_HV_MSR_ */ +int hyperv_guid2str(const struct hyperv_guid *, char *, + size_t); + +/* + * hyperv_tc64 could be NULL, if there were no suitable Hyper-V + * specific timecounter. + */ +extern hyperv_tc64_t hyperv_tc64; +extern u_int hyperv_features; /* CPUID_HV_MSR_ */ #endif /* _KERNEL */ #endif /* _HYPERV_H_ */ Index: head/sys/dev/hyperv/utilities/vmbus_timesync.c =================================================================== --- head/sys/dev/hyperv/utilities/vmbus_timesync.c (revision 311742) +++ head/sys/dev/hyperv/utilities/vmbus_timesync.c (revision 311743) @@ -1,261 +1,260 @@ /*- * Copyright (c) 2014,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define VMBUS_TIMESYNC_FWVER_MAJOR 3 #define VMBUS_TIMESYNC_FWVER \ VMBUS_IC_VERSION(VMBUS_TIMESYNC_FWVER_MAJOR, 0) #define VMBUS_TIMESYNC_MSGVER_MAJOR 4 #define VMBUS_TIMESYNC_MSGVER \ VMBUS_IC_VERSION(VMBUS_TIMESYNC_MSGVER_MAJOR, 0) #define VMBUS_TIMESYNC_MSGVER4(sc) \ VMBUS_ICVER_LE(VMBUS_IC_VERSION(4, 0), (sc)->ic_msgver) #define VMBUS_TIMESYNC_DORTT(sc) \ - (VMBUS_TIMESYNC_MSGVER4((sc)) &&\ - (hyperv_features & CPUID_HV_MSR_TIME_REFCNT)) + (VMBUS_TIMESYNC_MSGVER4((sc)) && hyperv_tc64 != NULL) static int vmbus_timesync_probe(device_t); static int vmbus_timesync_attach(device_t); static const struct vmbus_ic_desc vmbus_timesync_descs[] = { { .ic_guid = { .hv_guid = { 0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49, 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } }, .ic_desc = "Hyper-V Timesync" }, VMBUS_IC_DESC_END }; static device_method_t vmbus_timesync_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vmbus_timesync_probe), DEVMETHOD(device_attach, vmbus_timesync_attach), DEVMETHOD(device_detach, vmbus_ic_detach), DEVMETHOD_END }; static driver_t vmbus_timesync_driver = { "hvtimesync", vmbus_timesync_methods, sizeof(struct vmbus_ic_softc) }; static devclass_t vmbus_timesync_devclass; DRIVER_MODULE(hv_timesync, vmbus, vmbus_timesync_driver, vmbus_timesync_devclass, NULL, NULL); MODULE_VERSION(hv_timesync, 1); MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1); SYSCTL_NODE(_hw, OID_AUTO, hvtimesync, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Hyper-V timesync interface"); static int vmbus_ts_ignore_sync = 0; SYSCTL_INT(_hw_hvtimesync, OID_AUTO, ignore_sync, CTLFLAG_RWTUN, &vmbus_ts_ignore_sync, 0, "Ignore the sync request."); /* * Trigger sample sync when drift exceeds threshold (ms). * Ignore the sample request when set to 0. */ static int vmbus_ts_sample_thresh = 100; SYSCTL_INT(_hw_hvtimesync, OID_AUTO, sample_thresh, CTLFLAG_RWTUN, &vmbus_ts_sample_thresh, 0, "Threshold that makes sample request trigger the sync (unit: ms)."); static int vmbus_ts_sample_verbose = 0; SYSCTL_INT(_hw_hvtimesync, OID_AUTO, sample_verbose, CTLFLAG_RWTUN, &vmbus_ts_sample_verbose, 0, "Increase sample request verbosity."); static void vmbus_timesync(struct vmbus_ic_softc *sc, uint64_t hvtime, uint64_t sent_tc, uint8_t tsflags) { struct timespec vm_ts; uint64_t hv_ns, vm_ns, rtt = 0; if (VMBUS_TIMESYNC_DORTT(sc)) - rtt = rdmsr(MSR_HV_TIME_REF_COUNT) - sent_tc; + rtt = hyperv_tc64() - sent_tc; hv_ns = (hvtime - VMBUS_ICMSG_TS_BASE + rtt) * HYPERV_TIMER_NS_FACTOR; nanotime(&vm_ts); vm_ns = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec; if ((tsflags & VMBUS_ICMSG_TS_FLAG_SYNC) && !vmbus_ts_ignore_sync) { struct timespec hv_ts; if (bootverbose) { device_printf(sc->ic_dev, "apply sync request, " "hv: %ju, vm: %ju\n", (uintmax_t)hv_ns, (uintmax_t)vm_ns); } hv_ts.tv_sec = hv_ns / NANOSEC; hv_ts.tv_nsec = hv_ns % NANOSEC; kern_clock_settime(curthread, CLOCK_REALTIME, &hv_ts); /* Done! */ return; } if ((tsflags & VMBUS_ICMSG_TS_FLAG_SAMPLE) && vmbus_ts_sample_thresh >= 0) { int64_t diff; if (vmbus_ts_sample_verbose) { device_printf(sc->ic_dev, "sample request, " "hv: %ju, vm: %ju\n", (uintmax_t)hv_ns, (uintmax_t)vm_ns); } if (hv_ns > vm_ns) diff = hv_ns - vm_ns; else diff = vm_ns - hv_ns; /* nanosec -> millisec */ diff /= 1000000; if (diff > vmbus_ts_sample_thresh) { struct timespec hv_ts; if (bootverbose) { device_printf(sc->ic_dev, "apply sample request, hv: %ju, vm: %ju\n", (uintmax_t)hv_ns, (uintmax_t)vm_ns); } hv_ts.tv_sec = hv_ns / NANOSEC; hv_ts.tv_nsec = hv_ns % NANOSEC; kern_clock_settime(curthread, CLOCK_REALTIME, &hv_ts); } /* Done */ return; } } static void vmbus_timesync_cb(struct vmbus_channel *chan, void *xsc) { struct vmbus_ic_softc *sc = xsc; struct vmbus_icmsg_hdr *hdr; int dlen, error; uint64_t xactid; void *data; /* * Receive request. */ data = sc->ic_buf; dlen = sc->ic_buflen; error = vmbus_chan_recv(chan, data, &dlen, &xactid); KASSERT(error != ENOBUFS, ("icbuf is not large enough")); if (error) return; if (dlen < sizeof(*hdr)) { device_printf(sc->ic_dev, "invalid data len %d\n", dlen); return; } hdr = data; /* * Update request, which will be echoed back as response. */ switch (hdr->ic_type) { case VMBUS_ICMSG_TYPE_NEGOTIATE: error = vmbus_ic_negomsg(sc, data, &dlen, VMBUS_TIMESYNC_FWVER, VMBUS_TIMESYNC_MSGVER); if (error) return; if (VMBUS_TIMESYNC_DORTT(sc)) device_printf(sc->ic_dev, "RTT\n"); break; case VMBUS_ICMSG_TYPE_TIMESYNC: if (VMBUS_TIMESYNC_MSGVER4(sc)) { const struct vmbus_icmsg_timesync4 *msg4; if (dlen < sizeof(*msg4)) { device_printf(sc->ic_dev, "invalid timesync4 " "len %d\n", dlen); return; } msg4 = data; vmbus_timesync(sc, msg4->ic_hvtime, msg4->ic_sent_tc, msg4->ic_tsflags); } else { const struct vmbus_icmsg_timesync *msg; if (dlen < sizeof(*msg)) { device_printf(sc->ic_dev, "invalid timesync " "len %d\n", dlen); return; } msg = data; vmbus_timesync(sc, msg->ic_hvtime, 0, msg->ic_tsflags); } break; default: device_printf(sc->ic_dev, "got 0x%08x icmsg\n", hdr->ic_type); break; } /* * Send response by echoing the request back. */ vmbus_ic_sendresp(sc, chan, data, dlen, xactid); } static int vmbus_timesync_probe(device_t dev) { return (vmbus_ic_probe(dev, vmbus_timesync_descs)); } static int vmbus_timesync_attach(device_t dev) { return (vmbus_ic_attach(dev, vmbus_timesync_cb)); } Index: head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c =================================================================== --- head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c (revision 311742) +++ head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c (revision 311743) @@ -1,219 +1,233 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct hyperv_reftsc_ctx { struct hyperv_reftsc *tsc_ref; struct hyperv_dma tsc_ref_dma; }; static uint32_t hyperv_tsc_vdso_timehands( struct vdso_timehands *, struct timecounter *); static d_open_t hyperv_tsc_open; static d_mmap_t hyperv_tsc_mmap; static struct timecounter hyperv_tsc_timecounter = { .tc_get_timecount = NULL, /* based on CPU vendor. */ .tc_counter_mask = 0xffffffff, .tc_frequency = HYPERV_TIMER_FREQ, .tc_name = "Hyper-V-TSC", .tc_quality = 3000, .tc_fill_vdso_timehands = hyperv_tsc_vdso_timehands, }; static struct cdevsw hyperv_tsc_cdevsw = { .d_version = D_VERSION, .d_open = hyperv_tsc_open, .d_mmap = hyperv_tsc_mmap, .d_name = HYPERV_REFTSC_DEVNAME }; static struct hyperv_reftsc_ctx hyperv_ref_tsc; uint64_t hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr, uint64_t out_paddr) { uint64_t status; __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8"); __asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val), "d" (in_paddr), "m" (hc_addr)); return (status); } static int hyperv_tsc_open(struct cdev *dev __unused, int oflags, int devtype __unused, struct thread *td __unused) { if (oflags & FWRITE) return (EPERM); return (0); } static int hyperv_tsc_mmap(struct cdev *dev __unused, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot __unused, vm_memattr_t *memattr __unused) { KASSERT(hyperv_ref_tsc.tsc_ref != NULL, ("reftsc has not been setup")); /* * NOTE: * 'nprot' does not contain information interested to us; * WR-open is blocked by d_open. */ if (offset != 0) return (EOPNOTSUPP); *paddr = hyperv_ref_tsc.tsc_ref_dma.hv_paddr; return (0); } static uint32_t hyperv_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc __unused) { vdso_th->th_algo = VDSO_TH_ALGO_X86_HVTSC; vdso_th->th_x86_shift = 0; vdso_th->th_x86_hpet_idx = 0; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); return (1); } #define HYPERV_TSC_TIMECOUNT(fence) \ -static u_int \ -hyperv_tsc_timecount_##fence(struct timecounter *tc) \ +static uint64_t \ +hyperv_tc64_tsc_##fence(void) \ { \ struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref; \ uint32_t seq; \ \ while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { \ uint64_t disc, ret, tsc; \ uint64_t scale = tsc_ref->tsc_scale; \ int64_t ofs = tsc_ref->tsc_ofs; \ \ fence(); \ tsc = rdtsc(); \ \ /* ret = ((tsc * scale) >> 64) + ofs */ \ __asm__ __volatile__ ("mulq %3" : \ "=d" (ret), "=a" (disc) : \ "a" (tsc), "r" (scale)); \ ret += ofs; \ \ atomic_thread_fence_acq(); \ if (tsc_ref->tsc_seq == seq) \ return (ret); \ \ /* Sequence changed; re-sync. */ \ } \ /* Fallback to the generic timecounter, i.e. rdmsr. */ \ return (rdmsr(MSR_HV_TIME_REF_COUNT)); \ } \ + \ +static u_int \ +hyperv_tsc_timecount_##fence(struct timecounter *tc __unused) \ +{ \ + \ + return (hyperv_tc64_tsc_##fence()); \ +} \ struct __hack HYPERV_TSC_TIMECOUNT(lfence); HYPERV_TSC_TIMECOUNT(mfence); static void hyperv_tsc_tcinit(void *dummy __unused) { + hyperv_tc64_t tc64 = NULL; uint64_t val, orig; if ((hyperv_features & (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) != (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) || (cpu_feature & CPUID_SSE2) == 0) /* SSE2 for mfence/lfence */ return; switch (cpu_vendor_id) { case CPU_VENDOR_AMD: hyperv_tsc_timecounter.tc_get_timecount = hyperv_tsc_timecount_mfence; + tc64 = hyperv_tc64_tsc_mfence; break; case CPU_VENDOR_INTEL: hyperv_tsc_timecounter.tc_get_timecount = hyperv_tsc_timecount_lfence; + tc64 = hyperv_tc64_tsc_lfence; break; default: /* Unsupport CPU vendors. */ return; } hyperv_ref_tsc.tsc_ref = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, sizeof(struct hyperv_reftsc), &hyperv_ref_tsc.tsc_ref_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (hyperv_ref_tsc.tsc_ref == NULL) { printf("hyperv: reftsc page allocation failed\n"); return; } orig = rdmsr(MSR_HV_REFERENCE_TSC); val = MSR_HV_REFTSC_ENABLE | (orig & MSR_HV_REFTSC_RSVD_MASK) | ((hyperv_ref_tsc.tsc_ref_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_REFTSC_PGSHIFT); wrmsr(MSR_HV_REFERENCE_TSC, val); /* Register "enlightened" timecounter. */ tc_init(&hyperv_tsc_timecounter); + + /* Install 64 bits timecounter method for other modules to use. */ + KASSERT(tc64 != NULL, ("tc64 is not set")); + hyperv_tc64 = tc64; /* Add device for mmap(2). */ make_dev(&hyperv_tsc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0444, HYPERV_REFTSC_DEVNAME); } SYSINIT(hyperv_tsc_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, hyperv_tsc_tcinit, NULL); Index: head/sys/dev/hyperv/vmbus/hyperv.c =================================================================== --- head/sys/dev/hyperv/vmbus/hyperv.c (revision 311742) +++ head/sys/dev/hyperv/vmbus/hyperv.c (revision 311743) @@ -1,312 +1,327 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Implements low-level interactions with Hypver-V/Azure */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #define HYPERV_FREEBSD_BUILD 0ULL #define HYPERV_FREEBSD_VERSION ((uint64_t)__FreeBSD_version) #define HYPERV_FREEBSD_OSID 0ULL #define MSR_HV_GUESTID_BUILD_FREEBSD \ (HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK) #define MSR_HV_GUESTID_VERSION_FREEBSD \ ((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \ MSR_HV_GUESTID_VERSION_MASK) #define MSR_HV_GUESTID_OSID_FREEBSD \ ((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \ MSR_HV_GUESTID_OSID_MASK) #define MSR_HV_GUESTID_FREEBSD \ (MSR_HV_GUESTID_BUILD_FREEBSD | \ MSR_HV_GUESTID_VERSION_FREEBSD | \ MSR_HV_GUESTID_OSID_FREEBSD | \ MSR_HV_GUESTID_OSTYPE_FREEBSD) struct hypercall_ctx { void *hc_addr; struct hyperv_dma hc_dma; }; static u_int hyperv_get_timecount(struct timecounter *); static bool hyperv_identify(void); static void hypercall_memfree(void); u_int hyperv_features; u_int hyperv_recommends; static u_int hyperv_pm_features; static u_int hyperv_features3; +hyperv_tc64_t hyperv_tc64; + static struct timecounter hyperv_timecounter = { .tc_get_timecount = hyperv_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = 0xffffffff, .tc_frequency = HYPERV_TIMER_FREQ, .tc_name = "Hyper-V", .tc_quality = 2000, .tc_flags = 0, .tc_priv = NULL }; static struct hypercall_ctx hypercall_context; static u_int hyperv_get_timecount(struct timecounter *tc __unused) { return rdmsr(MSR_HV_TIME_REF_COUNT); } +static uint64_t +hyperv_tc64_rdmsr(void) +{ + + return (rdmsr(MSR_HV_TIME_REF_COUNT)); +} + uint64_t hypercall_post_message(bus_addr_t msg_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_POST_MESSAGE, msg_paddr, 0); } uint64_t hypercall_signal_event(bus_addr_t monprm_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0); } int hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz) { const uint8_t *d = guid->hv_guid; return snprintf(buf, sz, "%02x%02x%02x%02x-" "%02x%02x-%02x%02x-%02x%02x-" "%02x%02x%02x%02x%02x%02x", d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); } static bool hyperv_identify(void) { u_int regs[4]; unsigned int maxleaf; if (vm_guest != VM_GUEST_HV) return (false); do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); maxleaf = regs[0]; if (maxleaf < CPUID_LEAF_HV_LIMITS) return (false); do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); if (regs[0] != CPUID_HV_IFACE_HYPERV) return (false); do_cpuid(CPUID_LEAF_HV_FEATURES, regs); if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) { /* * Hyper-V w/o Hypercall is impossible; someone * is faking Hyper-V. */ return (false); } hyperv_features = regs[0]; hyperv_pm_features = regs[2]; hyperv_features3 = regs[3]; do_cpuid(CPUID_LEAF_HV_IDENTITY, regs); printf("Hyper-V Version: %d.%d.%d [SP%d]\n", regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); printf(" Features=0x%b\n", hyperv_features, "\020" "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */ "\002TMREFCNT" /* MSR_HV_TIME_REF_COUNT */ "\003SYNIC" /* MSRs for SynIC */ "\004SYNTM" /* MSRs for SynTimer */ "\005APIC" /* MSR_HV_{EOI,ICR,TPR} */ "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */ "\007VPINDEX" /* MSR_HV_VP_INDEX */ "\010RESET" /* MSR_HV_RESET */ "\011STATS" /* MSR_HV_STATS_ */ "\012REFTSC" /* MSR_HV_REFERENCE_TSC */ "\013IDLE" /* MSR_HV_GUEST_IDLE */ "\014TMFREQ" /* MSR_HV_{TSC,APIC}_FREQUENCY */ "\015DEBUG"); /* MSR_HV_SYNTH_DEBUG_ */ printf(" PM Features=0x%b [C%u]\n", (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK), "\020" "\005C3HPET", /* HPET is required for C3 state */ CPUPM_HV_CSTATE(hyperv_pm_features)); printf(" Features3=0x%b\n", hyperv_features3, "\020" "\001MWAIT" /* MWAIT */ "\002DEBUG" /* guest debug support */ "\003PERFMON" /* performance monitor */ "\004PCPUDPE" /* physical CPU dynamic partition event */ "\005XMMHC" /* hypercall input through XMM regs */ "\006IDLE" /* guest idle support */ "\007SLEEP" /* hypervisor sleep support */ "\010NUMA" /* NUMA distance query support */ "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ "\012SYNCMC" /* inject synthetic machine checks */ "\013CRASH" /* MSRs for guest crash */ "\014DEBUGMSR" /* MSRs for guest debug */ "\015NPIEP" /* NPIEP */ "\016HVDIS"); /* disabling hypervisor */ do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs); hyperv_recommends = regs[0]; if (bootverbose) printf(" Recommends: %08x %08x\n", regs[0], regs[1]); do_cpuid(CPUID_LEAF_HV_LIMITS, regs); if (bootverbose) { printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", regs[0], regs[1], regs[2]); } if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) { do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs); if (bootverbose) { printf(" HW Features: %08x, AMD: %08x\n", regs[0], regs[3]); } } return (true); } static void hyperv_init(void *dummy __unused) { if (!hyperv_identify()) { /* Not Hyper-V; reset guest id to the generic one. */ if (vm_guest == VM_GUEST_HV) vm_guest = VM_GUEST_VM; return; } /* Set guest id */ wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD); if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) { /* Register Hyper-V timecounter */ tc_init(&hyperv_timecounter); + + /* + * Install 64 bits timecounter method for other modules + * to use. + */ + hyperv_tc64 = hyperv_tc64_rdmsr; } } SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, NULL); static void hypercall_memfree(void) { hyperv_dmamem_free(&hypercall_context.hc_dma, hypercall_context.hc_addr); hypercall_context.hc_addr = NULL; } static void hypercall_create(void *arg __unused) { uint64_t hc, hc_orig; if (vm_guest != VM_GUEST_HV) return; hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK); if (hypercall_context.hc_addr == NULL) { printf("hyperv: Hypercall page allocation failed\n"); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } /* Get the 'reserved' bits, which requires preservation. */ hc_orig = rdmsr(MSR_HV_HYPERCALL); /* * Setup the Hypercall page. * * NOTE: 'reserved' bits MUST be preserved. */ hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_HYPERCALL_PGSHIFT) | (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) | MSR_HV_HYPERCALL_ENABLE; wrmsr(MSR_HV_HYPERCALL, hc); /* * Confirm that Hypercall page did get setup. */ hc = rdmsr(MSR_HV_HYPERCALL); if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) { printf("hyperv: Hypercall setup failed\n"); hypercall_memfree(); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } if (bootverbose) printf("hyperv: Hypercall created\n"); } SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL); static void hypercall_destroy(void *arg __unused) { uint64_t hc; if (hypercall_context.hc_addr == NULL) return; /* Disable Hypercall */ hc = rdmsr(MSR_HV_HYPERCALL); wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK)); hypercall_memfree(); if (bootverbose) printf("hyperv: Hypercall destroyed\n"); } SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy, NULL); Index: head/sys/dev/hyperv/vmbus/vmbus_et.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus_et.c (revision 311742) +++ head/sys/dev/hyperv/vmbus/vmbus_et.c (revision 311743) @@ -1,204 +1,201 @@ /*- * Copyright (c) 2015,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define VMBUS_ET_NAME "hvet" #define MSR_HV_STIMER0_CFG_SINT \ ((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \ MSR_HV_STIMER_CFG_SINT_MASK) /* - * Two additionally required features: + * Additionally required feature: * - SynIC is needed for interrupt generation. - * - Time reference counter is needed to set ABS reference count to - * STIMER0_COUNT. */ -#define CPUID_HV_ET_MASK (CPUID_HV_MSR_TIME_REFCNT | \ - CPUID_HV_MSR_SYNIC | \ +#define CPUID_HV_ET_MASK (CPUID_HV_MSR_SYNIC | \ CPUID_HV_MSR_SYNTIMER) static void vmbus_et_identify(driver_t *, device_t); static int vmbus_et_probe(device_t); static int vmbus_et_attach(device_t); static int vmbus_et_detach(device_t); static int vmbus_et_start(struct eventtimer *, sbintime_t, sbintime_t); static struct eventtimer vmbus_et; static device_method_t vmbus_et_methods[] = { DEVMETHOD(device_identify, vmbus_et_identify), DEVMETHOD(device_probe, vmbus_et_probe), DEVMETHOD(device_attach, vmbus_et_attach), DEVMETHOD(device_detach, vmbus_et_detach), DEVMETHOD_END }; static driver_t vmbus_et_driver = { VMBUS_ET_NAME, vmbus_et_methods, 0 }; static devclass_t vmbus_et_devclass; DRIVER_MODULE(hv_et, vmbus, vmbus_et_driver, vmbus_et_devclass, NULL, NULL); MODULE_VERSION(hv_et, 1); static __inline uint64_t hyperv_sbintime2count(sbintime_t time) { struct timespec val; val = sbttots(time); return (val.tv_sec * HYPERV_TIMER_FREQ) + (val.tv_nsec / HYPERV_TIMER_NS_FACTOR); } static int vmbus_et_start(struct eventtimer *et __unused, sbintime_t first, sbintime_t period __unused) { uint64_t current; - current = rdmsr(MSR_HV_TIME_REF_COUNT); + current = hyperv_tc64(); current += hyperv_sbintime2count(first); wrmsr(MSR_HV_STIMER0_COUNT, current); return (0); } void vmbus_et_intr(struct trapframe *frame) { struct trapframe *oldframe; struct thread *td; if (vmbus_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; vmbus_et.et_event_cb(&vmbus_et, vmbus_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } } static void vmbus_et_identify(driver_t *driver, device_t parent) { if (device_get_unit(parent) != 0 || device_find_child(parent, VMBUS_ET_NAME, -1) != NULL || - (hyperv_features & CPUID_HV_ET_MASK) != CPUID_HV_ET_MASK) + (hyperv_features & CPUID_HV_ET_MASK) != CPUID_HV_ET_MASK || + hyperv_tc64 == NULL) return; device_add_child(parent, VMBUS_ET_NAME, -1); } static int vmbus_et_probe(device_t dev) { if (resource_disabled(VMBUS_ET_NAME, 0)) return (ENXIO); device_set_desc(dev, "Hyper-V event timer"); return (BUS_PROBE_NOWILDCARD); } static void vmbus_et_config(void *arg __unused) { /* * Make sure that STIMER0 is really disabled before writing * to STIMER0_CONFIG. * * "Writing to the configuration register of a timer that * is already enabled may result in undefined behaviour." */ for (;;) { uint64_t val; /* Stop counting, and this also implies disabling STIMER0 */ wrmsr(MSR_HV_STIMER0_COUNT, 0); val = rdmsr(MSR_HV_STIMER0_CONFIG); if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0) break; cpu_spinwait(); } wrmsr(MSR_HV_STIMER0_CONFIG, MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT); } static int vmbus_et_attach(device_t dev) { /* TODO: use independent IDT vector */ vmbus_et.et_name = "Hyper-V"; vmbus_et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; vmbus_et.et_quality = 1000; vmbus_et.et_frequency = HYPERV_TIMER_FREQ; vmbus_et.et_min_period = (0x00000001ULL << 32) / HYPERV_TIMER_FREQ; vmbus_et.et_max_period = (0xfffffffeULL << 32) / HYPERV_TIMER_FREQ; vmbus_et.et_start = vmbus_et_start; /* - * Delay a bit to make sure that MSR_HV_TIME_REF_COUNT will - * not return 0, since writing 0 to STIMER0_COUNT will disable - * STIMER0. + * Delay a bit to make sure that hyperv_tc64 will not return 0, + * since writing 0 to STIMER0_COUNT will disable STIMER0. */ DELAY(100); smp_rendezvous(NULL, vmbus_et_config, NULL, NULL); return (et_register(&vmbus_et)); } static int vmbus_et_detach(device_t dev) { return (et_deregister(&vmbus_et)); }