Index: head/sys/dev/hyperv/vmbus/hv_et.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_et.c (revision 298448) +++ head/sys/dev/hyperv/vmbus/hv_et.c (revision 298449) @@ -1,131 +1,161 @@ /*- * Copyright (c) 2015,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include +#include +#include +#include #include #include #include #include #include #include "hv_vmbus_priv.h" #define HV_TIMER_FREQUENCY (10 * 1000 * 1000LL) /* 100ns period */ #define HV_MAX_DELTA_TICKS 0xffffffffLL #define HV_MIN_DELTA_TICKS 1LL -static struct eventtimer et; -static uint64_t periodticks[MAXCPU]; +static struct eventtimer *et; static inline uint64_t sbintime2tick(sbintime_t time) { struct timespec val; val = sbttots(time); return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100; } static int hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime) { union hv_timer_config timer_cfg; uint64_t current; timer_cfg.as_uint64 = 0; timer_cfg.auto_enable = 1; timer_cfg.sintx = HV_VMBUS_TIMER_SINT; - periodticks[curcpu] = sbintime2tick(periodtime); - if (firsttime == 0) - firsttime = periodtime; - current = rdmsr(HV_X64_MSR_TIME_REF_COUNT); current += sbintime2tick(firsttime); wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); wrmsr(HV_X64_MSR_STIMER0_COUNT, current); return (0); } static int hv_et_stop(struct eventtimer *et) { wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); wrmsr(HV_X64_MSR_STIMER0_COUNT, 0); return (0); } void hv_et_intr(struct trapframe *frame) { - union hv_timer_config timer_cfg; struct trapframe *oldframe; struct thread *td; - if (periodticks[curcpu] != 0) { - uint64_t tick = sbintime2tick(periodticks[curcpu]); - timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG); - timer_cfg.enable = 0; - timer_cfg.auto_enable = 1; - timer_cfg.periodic = 1; - periodticks[curcpu] = 0; - - wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); - wrmsr(HV_X64_MSR_STIMER0_COUNT, tick); - } - - if (et.et_active) { + if (et->et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; - et.et_event_cb(&et, et.et_arg); + et->et_event_cb(et, et->et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } } -void -hv_et_init(void) +static void +hv_et_identify (driver_t *driver, device_t parent) { - et.et_name = "HyperV"; - et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC; - et.et_quality = 1000; - et.et_frequency = HV_TIMER_FREQUENCY; - et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY; - et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); - et.et_start = hv_et_start; - et.et_stop = hv_et_stop; - et.et_priv = &et; - et_register(&et); + if (device_find_child(parent, "hv_et", -1) != NULL) + return; + + device_add_child(parent, "hv_et", -1); } +static int +hv_et_probe(device_t dev) +{ + device_set_desc(dev, "Hyper-V event timer"); + + return (BUS_PROBE_NOWILDCARD); +} + +static int +hv_et_attach(device_t dev) +{ + /* XXX: need allocate SINT and remove global et */ + et = device_get_softc(dev); + + et->et_name = "Hyper-V"; + et->et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; + et->et_quality = 1000; + et->et_frequency = HV_TIMER_FREQUENCY; + et->et_min_period = HV_MIN_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); + et->et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); + et->et_start = hv_et_start; + et->et_stop = hv_et_stop; + et->et_priv = dev; + + return (et_register(et)); +} + +static int +hv_et_detach(device_t dev) +{ + return (et_deregister(et)); +} + +static device_method_t hv_et_methods[] = { + DEVMETHOD(device_identify, hv_et_identify), + DEVMETHOD(device_probe, hv_et_probe), + DEVMETHOD(device_attach, hv_et_attach), + DEVMETHOD(device_detach, hv_et_detach), + + DEVMETHOD_END +}; + +static driver_t hv_et_driver = { + "hv_et", + hv_et_methods, + sizeof(struct eventtimer) +}; + +static devclass_t hv_et_devclass; +DRIVER_MODULE(hv_et, vmbus, hv_et_driver, hv_et_devclass, NULL, 0); +MODULE_VERSION(hv_et, 1); Index: head/sys/dev/hyperv/vmbus/hv_hv.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_hv.c (revision 298448) +++ head/sys/dev/hyperv/vmbus/hv_hv.c (revision 298449) @@ -1,515 +1,513 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Implements low-level interactions with Hypver-V/Azure */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" #define HV_NANOSECONDS_PER_SEC 1000000000L #define HYPERV_INTERFACE 0x31237648 /* HV#1 */ static u_int hv_get_timecount(struct timecounter *tc); u_int hyperv_features; u_int hyperv_recommends; static u_int hyperv_pm_features; static u_int hyperv_features3; /** * Globals */ hv_vmbus_context hv_vmbus_g_context = { .syn_ic_initialized = FALSE, .hypercall_page = NULL, }; static struct timecounter hv_timecounter = { hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100 }; static u_int hv_get_timecount(struct timecounter *tc) { u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT); return (now); } /** * @brief Invoke the specified hypercall */ static uint64_t hv_vmbus_do_hypercall(uint64_t control, void* input, void* output) { #ifdef __x86_64__ uint64_t hv_status = 0; uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; __asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8"); __asm__ __volatile__ ("call *%3" : "=a"(hv_status): "c" (control), "d" (input_address), "m" (hypercall_page)); return (hv_status); #else uint32_t control_high = control >> 32; uint32_t control_low = control & 0xFFFFFFFF; uint32_t hv_status_high = 1; uint32_t hv_status_low = 1; uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0; uint32_t input_address_high = input_address >> 32; uint32_t input_address_low = input_address & 0xFFFFFFFF; uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0; uint32_t output_address_high = output_address >> 32; uint32_t output_address_low = output_address & 0xFFFFFFFF; volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page; __asm__ __volatile__ ("call *%8" : "=d"(hv_status_high), "=a"(hv_status_low) : "d" (control_high), "a" (control_low), "b" (input_address_high), "c" (input_address_low), "D"(output_address_high), "S"(output_address_low), "m" (hypercall_page)); return (hv_status_low | ((uint64_t)hv_status_high << 32)); #endif /* __x86_64__ */ } /** * @brief Main initialization routine. * * This routine must be called * before any other routines in here are called */ int hv_vmbus_init(void) { hv_vmbus_x64_msr_hypercall_contents hypercall_msr; void* virt_addr = NULL; memset( hv_vmbus_g_context.syn_ic_event_page, 0, sizeof(hv_vmbus_handle) * MAXCPU); memset( hv_vmbus_g_context.syn_ic_msg_page, 0, sizeof(hv_vmbus_handle) * MAXCPU); if (vm_guest != VM_GUEST_HV) goto cleanup; /* * Write our OS info */ uint64_t os_guest_info = HV_FREEBSD_GUEST_ID; wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info); hv_vmbus_g_context.guest_id = os_guest_info; /* * See if the hypercall page is already set */ hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); hypercall_msr.u.enable = 1; hypercall_msr.u.guest_physical_address = (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); /* * Confirm that hypercall page did get set up */ hypercall_msr.as_uint64_t = 0; hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL); if (!hypercall_msr.u.enable) goto cleanup; hv_vmbus_g_context.hypercall_page = virt_addr; - hv_et_init(); - return (0); cleanup: if (virt_addr != NULL) { if (hypercall_msr.u.enable) { hypercall_msr.as_uint64_t = 0; wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); } free(virt_addr, M_DEVBUF); } return (ENOTSUP); } /** * @brief Cleanup routine, called normally during driver unloading or exiting */ void hv_vmbus_cleanup(void) { hv_vmbus_x64_msr_hypercall_contents hypercall_msr; if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) { if (hv_vmbus_g_context.hypercall_page != NULL) { hypercall_msr.as_uint64_t = 0; wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t); free(hv_vmbus_g_context.hypercall_page, M_DEVBUF); hv_vmbus_g_context.hypercall_page = NULL; } } } /** * @brief Post a message using the hypervisor message IPC. * (This involves a hypercall.) */ hv_vmbus_status hv_vmbus_post_msg_via_msg_ipc( hv_vmbus_connection_id connection_id, hv_vmbus_msg_type message_type, void* payload, size_t payload_size) { struct alignedinput { uint64_t alignment8; hv_vmbus_input_post_message msg; }; hv_vmbus_input_post_message* aligned_msg; hv_vmbus_status status; size_t addr; if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) return (EMSGSIZE); addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF, M_ZERO | M_NOWAIT); KASSERT(addr != 0, ("Error VMBUS: malloc failed to allocate message buffer!")); if (addr == 0) return (ENOMEM); aligned_msg = (hv_vmbus_input_post_message*) (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN)); aligned_msg->connection_id = connection_id; aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void*) aligned_msg->payload, payload, payload_size); status = hv_vmbus_do_hypercall( HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF; free((void *) addr, M_DEVBUF); return (status); } /** * @brief Signal an event on the specified connection using the hypervisor * event IPC. (This involves a hypercall.) */ hv_vmbus_status hv_vmbus_signal_event(void *con_id) { hv_vmbus_status status; status = hv_vmbus_do_hypercall( HV_CALL_SIGNAL_EVENT, con_id, 0) & 0xFFFF; return (status); } /** * @brief hv_vmbus_synic_init */ void hv_vmbus_synic_init(void *arg) { int cpu; uint64_t hv_vcpu_index; hv_vmbus_synic_simp simp; hv_vmbus_synic_siefp siefp; hv_vmbus_synic_scontrol sctrl; hv_vmbus_synic_sint shared_sint; uint64_t version; hv_setup_args* setup_args = (hv_setup_args *)arg; cpu = PCPU_GET(cpuid); if (hv_vmbus_g_context.hypercall_page == NULL) return; /* * TODO: Check the version */ version = rdmsr(HV_X64_MSR_SVERSION); hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[2 * cpu]; hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[2 * cpu + 1]; /* * Setup the Synic's message page */ simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); simp.u.simp_enabled = 1; simp.u.base_simp_gpa = ((hv_get_phys_addr( hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); /* * Setup the Synic's event page */ siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); siefp.u.siefp_enabled = 1; siefp.u.base_siefp_gpa = ((hv_get_phys_addr( hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT); wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); /*HV_SHARED_SINT_IDT_VECTOR + 0x20; */ shared_sint.as_uint64_t = 0; shared_sint.u.vector = setup_args->vector; shared_sint.u.masked = FALSE; shared_sint.u.auto_eoi = TRUE; wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, shared_sint.as_uint64_t); wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT, shared_sint.as_uint64_t); /* Enable the global synic bit */ sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL); sctrl.u.enable = 1; wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t); hv_vmbus_g_context.syn_ic_initialized = TRUE; /* * Set up the cpuid mapping from Hyper-V to FreeBSD. * The array is indexed using FreeBSD cpuid. */ hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX); hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index; return; } /** * @brief Cleanup routine for hv_vmbus_synic_init() */ void hv_vmbus_synic_cleanup(void *arg) { hv_vmbus_synic_sint shared_sint; hv_vmbus_synic_simp simp; hv_vmbus_synic_siefp siefp; if (!hv_vmbus_g_context.syn_ic_initialized) return; shared_sint.as_uint64_t = rdmsr( HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT); shared_sint.u.masked = 1; /* * Disable the interrupt 0 */ wrmsr( HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT, shared_sint.as_uint64_t); shared_sint.as_uint64_t = rdmsr( HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT); shared_sint.u.masked = 1; /* * Disable the interrupt 1 */ wrmsr( HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT, shared_sint.as_uint64_t); simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP); simp.u.simp_enabled = 0; simp.u.base_simp_gpa = 0; wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t); siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP); siefp.u.siefp_enabled = 0; siefp.u.base_siefp_gpa = 0; wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t); } static bool hyperv_identify(void) { u_int regs[4]; unsigned int maxLeaf; unsigned int op; if (vm_guest != VM_GUEST_HV) return (false); op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; do_cpuid(op, regs); maxLeaf = regs[0]; if (maxLeaf < HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS) return (false); op = HV_CPU_ID_FUNCTION_HV_INTERFACE; do_cpuid(op, regs); if (regs[0] != HYPERV_INTERFACE) return (false); op = HV_CPU_ID_FUNCTION_MS_HV_FEATURES; do_cpuid(op, regs); if ((regs[0] & HV_FEATURE_MSR_HYPERCALL) == 0) { /* * Hyper-V w/o Hypercall is impossible; someone * is faking Hyper-V. */ return (false); } hyperv_features = regs[0]; hyperv_pm_features = regs[2]; hyperv_features3 = regs[3]; op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; do_cpuid(op, regs); printf("Hyper-V Version: %d.%d.%d [SP%d]\n", regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); printf(" Features=0x%b\n", hyperv_features, "\020" "\001VPRUNTIME" /* MSR_VP_RUNTIME */ "\002TMREFCNT" /* MSR_TIME_REF_COUNT */ "\003SYNIC" /* MSRs for SynIC */ "\004SYNTM" /* MSRs for SynTimer */ "\005APIC" /* MSR_{EOI,ICR,TPR} */ "\006HYPERCALL" /* MSR_{GUEST_OS_ID,HYPERCALL} */ "\007VPINDEX" /* MSR_VP_INDEX */ "\010RESET" /* MSR_RESET */ "\011STATS" /* MSR_STATS_ */ "\012REFTSC" /* MSR_REFERENCE_TSC */ "\013IDLE" /* MSR_GUEST_IDLE */ "\014TMFREQ" /* MSR_{TSC,APIC}_FREQUENCY */ "\015DEBUG"); /* MSR_SYNTH_DEBUG_ */ printf(" PM Features=max C%u, 0x%b\n", HV_PM_FEATURE_CSTATE(hyperv_pm_features), (hyperv_pm_features & ~HV_PM_FEATURE_CSTATE_MASK), "\020" "\005C3HPET"); /* HPET is required for C3 state */ printf(" Features3=0x%b\n", hyperv_features3, "\020" "\001MWAIT" /* MWAIT */ "\002DEBUG" /* guest debug support */ "\003PERFMON" /* performance monitor */ "\004PCPUDPE" /* physical CPU dynamic partition event */ "\005XMMHC" /* hypercall input through XMM regs */ "\006IDLE" /* guest idle support */ "\007SLEEP" /* hypervisor sleep support */ "\010NUMA" /* NUMA distance query support */ "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ "\012SYNCMC" /* inject synthetic machine checks */ "\013CRASH" /* MSRs for guest crash */ "\014DEBUGMSR" /* MSRs for guest debug */ "\015NPIEP" /* NPIEP */ "\016HVDIS"); /* disabling hypervisor */ op = HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION; do_cpuid(op, regs); hyperv_recommends = regs[0]; if (bootverbose) printf(" Recommends: %08x %08x\n", regs[0], regs[1]); op = HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS; do_cpuid(op, regs); if (bootverbose) { printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", regs[0], regs[1], regs[2]); } if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE) { op = HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE; do_cpuid(op, regs); if (bootverbose) { printf(" HW Features: %08x AMD: %08x\n", regs[0], regs[3]); } } return (true); } static void hyperv_init(void *dummy __unused) { if (!hyperv_identify()) return; if (hyperv_features & HV_FEATURE_MSR_TIME_REFCNT) { /* Register virtual timecount */ tc_init(&hv_timecounter); } } SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, NULL); Index: head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 298448) +++ head/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c (revision 298449) @@ -1,650 +1,654 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "hv_vmbus_priv.h" #include #include "acpi_if.h" static device_t vmbus_devp; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ static char *vmbus_ids[] = { "VMBUS", NULL }; /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. */ static void vmbus_msg_swintr(void *arg, int pending __unused) { int cpu; void* page_addr; hv_vmbus_channel_msg_header *hdr; hv_vmbus_channel_msg_table_entry *entry; hv_vmbus_channel_msg_type msg_type; hv_vmbus_message* msg; cpu = (int)(long)arg; KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: " "cpu out of range!")); page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; for (;;) { if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) break; /* no message */ hdr = (hv_vmbus_channel_msg_header *)msg->u.payload; msg_type = hdr->message_type; if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) { printf("VMBUS: unknown message type = %d\n", msg_type); goto handled; } entry = &g_channel_message_table[msg_type]; if (entry->messageHandler) entry->messageHandler(hdr); handled: msg->header.message_type = HV_MESSAGE_TYPE_NONE; /* * Make sure the write to message_type (ie set to * HV_MESSAGE_TYPE_NONE) happens before we read the * message_pending and EOMing. Otherwise, the EOMing will * not deliver any more messages * since there is no empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->header.message_flags.u.message_pending) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(HV_X64_MSR_EOM, 0); } } } /** * @brief Interrupt filter routine for VMBUS. * * The purpose of this routine is to determine the type of VMBUS protocol * message to process - an event or a channel message. */ static inline int hv_vmbus_isr(struct trapframe *frame) { int cpu; hv_vmbus_message* msg; void* page_addr; cpu = PCPU_GET(cpuid); /* * The Windows team has advised that we check for events * before checking for messages. This is the way they do it * in Windows when running as a guest in Hyper-V */ hv_vmbus_on_events(cpu); /* Check if there are actual msgs to be process */ page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_TIMER_SINT; /* we call eventtimer process the message */ if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) { msg->header.message_type = HV_MESSAGE_TYPE_NONE; /* call intrrupt handler of event timer */ hv_et_intr(frame); /* * Make sure the write to message_type (ie set to * HV_MESSAGE_TYPE_NONE) happens before we read the * message_pending and EOMing. Otherwise, the EOMing will * not deliver any more messages * since there is no empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->header.message_flags.u.message_pending) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(HV_X64_MSR_EOM, 0); } } msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu], &hv_vmbus_g_context.hv_msg_task[cpu]); } return (FILTER_HANDLED); } u_long *hv_vmbus_intr_cpu[MAXCPU]; void hv_vector_handler(struct trapframe *trap_frame) { int cpu; /* * Disable preemption. */ critical_enter(); /* * Do a little interrupt counting. */ cpu = PCPU_GET(cpuid); (*hv_vmbus_intr_cpu[cpu])++; hv_vmbus_isr(trap_frame); /* * Enable preemption. */ critical_exit(); } static int vmbus_read_ivar( device_t dev, device_t child, int index, uintptr_t* result) { struct hv_device *child_dev_ctx = device_get_ivars(child); switch (index) { case HV_VMBUS_IVAR_TYPE: *result = (uintptr_t) &child_dev_ctx->class_id; return (0); case HV_VMBUS_IVAR_INSTANCE: *result = (uintptr_t) &child_dev_ctx->device_id; return (0); case HV_VMBUS_IVAR_DEVCTX: *result = (uintptr_t) child_dev_ctx; return (0); case HV_VMBUS_IVAR_NODE: *result = (uintptr_t) child_dev_ctx->device; return (0); } return (ENOENT); } static int vmbus_write_ivar( device_t dev, device_t child, int index, uintptr_t value) { switch (index) { case HV_VMBUS_IVAR_TYPE: case HV_VMBUS_IVAR_INSTANCE: case HV_VMBUS_IVAR_DEVCTX: case HV_VMBUS_IVAR_NODE: /* read-only */ return (EINVAL); } return (ENOENT); } static int vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) { char guidbuf[40]; struct hv_device *dev_ctx = device_get_ivars(child); + if (dev_ctx == NULL) + return (0); + strlcat(buf, "classid=", buflen); snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id); strlcat(buf, guidbuf, buflen); strlcat(buf, " deviceid=", buflen); snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id); strlcat(buf, guidbuf, buflen); return (0); } struct hv_device* hv_vmbus_child_device_create( hv_guid type, hv_guid instance, hv_vmbus_channel* channel) { hv_device* child_dev; /* * Allocate the new child device */ child_dev = malloc(sizeof(hv_device), M_DEVBUF, M_WAITOK | M_ZERO); child_dev->channel = channel; memcpy(&child_dev->class_id, &type, sizeof(hv_guid)); memcpy(&child_dev->device_id, &instance, sizeof(hv_guid)); return (child_dev); } int snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid) { int cnt; const unsigned char *d = guid->data; cnt = snprintf(buf, sz, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); return (cnt); } int hv_vmbus_child_device_register(struct hv_device *child_dev) { device_t child; if (bootverbose) { char name[40]; snprintf_hv_guid(name, sizeof(name), &child_dev->class_id); printf("VMBUS: Class ID: %s\n", name); } child = device_add_child(vmbus_devp, NULL, -1); child_dev->device = child; device_set_ivars(child, child_dev); return (0); } int hv_vmbus_child_device_unregister(struct hv_device *child_dev) { int ret = 0; /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); ret = device_delete_child(vmbus_devp, child_dev->device); mtx_unlock(&Giant); return(ret); } static int vmbus_probe(device_t dev) { if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL || device_get_unit(dev) != 0) return (ENXIO); device_set_desc(dev, "Vmbus Devices"); return (BUS_PROBE_DEFAULT); } extern inthand_t IDTVEC(hv_vmbus_callback); /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_bus_init(void) { int i, j, n, ret; char buf[MAXCOMLEN + 1]; cpuset_t cpu_mask; if (vmbus_inited) return (0); vmbus_inited = 1; ret = hv_vmbus_init(); if (ret) { if(bootverbose) printf("Error VMBUS: Hypervisor Initialization Failed!\n"); return (ret); } /* * Find a free IDT slot for vmbus callback. */ hv_vmbus_g_context.hv_cb_vector = lapic_ipi_alloc(IDTVEC(hv_vmbus_callback)); if (hv_vmbus_g_context.hv_cb_vector < 0) { if(bootverbose) printf("Error VMBUS: Cannot find free IDT slot for " "vmbus callback!\n"); goto cleanup; } if(bootverbose) printf("VMBUS: vmbus callback vector %d\n", hv_vmbus_g_context.hv_cb_vector); /* * Notify the hypervisor of our vector. */ setup_args.vector = hv_vmbus_g_context.hv_cb_vector; CPU_FOREACH(j) { snprintf(buf, sizeof(buf), "cpu%d:hyperv", j); intrcnt_add(buf, &hv_vmbus_intr_cpu[j]); for (i = 0; i < 2; i++) setup_args.page_buffers[2 * j + i] = NULL; } /* * Per cpu setup. */ CPU_FOREACH(j) { /* * Setup taskqueue to handle events */ hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK, taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]); CPU_SETOF(j, &cpu_mask); taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET, &cpu_mask, "hvevent%d", j); /* * Setup per-cpu tasks and taskqueues to handle msg. */ hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast( "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_msg_tq[j]); CPU_SETOF(j, &cpu_mask); taskqueue_start_threads_cpuset(&hv_vmbus_g_context.hv_msg_tq[j], 1, PI_NET, &cpu_mask, "hvmsg%d", j); TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0, vmbus_msg_swintr, (void *)(long)j); /* * Prepare the per cpu msg and event pages to be called on each cpu. */ for(i = 0; i < 2; i++) { setup_args.page_buffers[2 * j + i] = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO); } } if (bootverbose) printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n", smp_started); smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args); /* * Connect to VMBus in the root partition */ ret = hv_vmbus_connect(); if (ret != 0) goto cleanup1; hv_vmbus_request_channel_offers(); vmbus_scan(); bus_generic_attach(vmbus_devp); device_printf(vmbus_devp, "device scan, probe and attach done\n"); return (ret); cleanup1: /* * Free pages alloc'ed */ for (n = 0; n < 2 * MAXCPU; n++) if (setup_args.page_buffers[n] != NULL) free(setup_args.page_buffers[n], M_DEVBUF); /* * remove swi and vmbus callback vector; */ CPU_FOREACH(j) { if (hv_vmbus_g_context.hv_event_queue[j] != NULL) { taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]); hv_vmbus_g_context.hv_event_queue[j] = NULL; } } lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector); cleanup: hv_vmbus_cleanup(); return (ret); } static int vmbus_attach(device_t dev) { if(bootverbose) device_printf(dev, "VMBUS: attach dev: %p\n", dev); vmbus_devp = dev; /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); + bus_generic_probe(dev); return (0); } static void vmbus_init(void) { if (vm_guest != VM_GUEST_HV) return; /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) vmbus_bus_init(); } static void vmbus_bus_exit(void) { int i; hv_vmbus_release_unattached_channels(); hv_vmbus_disconnect(); smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL); for(i = 0; i < 2 * MAXCPU; i++) { if (setup_args.page_buffers[i] != NULL) free(setup_args.page_buffers[i], M_DEVBUF); } hv_vmbus_cleanup(); /* remove swi */ CPU_FOREACH(i) { if (hv_vmbus_g_context.hv_event_queue[i] != NULL) { taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]); hv_vmbus_g_context.hv_event_queue[i] = NULL; } } lapic_ipi_free(hv_vmbus_g_context.hv_cb_vector); return; } static void vmbus_exit(void) { vmbus_bus_exit(); } static int vmbus_detach(device_t dev) { vmbus_exit(); return (0); } static void vmbus_mod_load(void) { if(bootverbose) printf("VMBUS: load\n"); } static void vmbus_mod_unload(void) { if(bootverbose) printf("VMBUS: unload\n"); } static int vmbus_modevent(module_t mod, int what, void *arg) { switch (what) { case MOD_LOAD: vmbus_mod_load(); break; case MOD_UNLOAD: vmbus_mod_unload(); break; } return (0); } static device_method_t vmbus_methods[] = { /** Device interface */ DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /** Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_write_ivar, vmbus_write_ivar), DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), { 0, 0 } }; static char driver_name[] = "vmbus"; static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); MODULE_DEPEND(vmbus, acpi, 1, 1, 1); MODULE_VERSION(vmbus, 1); /* We want to be started after SMP is initialized */ SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);