Index: head/sys/dev/hyperv/vmbus/hyperv.c =================================================================== --- head/sys/dev/hyperv/vmbus/hyperv.c (revision 303470) +++ head/sys/dev/hyperv/vmbus/hyperv.c (revision 303471) @@ -1,310 +1,312 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * Implements low-level interactions with Hypver-V/Azure */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #define HYPERV_FREEBSD_BUILD 0ULL #define HYPERV_FREEBSD_VERSION ((uint64_t)__FreeBSD_version) #define HYPERV_FREEBSD_OSID 0ULL #define MSR_HV_GUESTID_BUILD_FREEBSD \ (HYPERV_FREEBSD_BUILD & MSR_HV_GUESTID_BUILD_MASK) #define MSR_HV_GUESTID_VERSION_FREEBSD \ ((HYPERV_FREEBSD_VERSION << MSR_HV_GUESTID_VERSION_SHIFT) & \ MSR_HV_GUESTID_VERSION_MASK) #define MSR_HV_GUESTID_OSID_FREEBSD \ ((HYPERV_FREEBSD_OSID << MSR_HV_GUESTID_OSID_SHIFT) & \ MSR_HV_GUESTID_OSID_MASK) #define MSR_HV_GUESTID_FREEBSD \ (MSR_HV_GUESTID_BUILD_FREEBSD | \ MSR_HV_GUESTID_VERSION_FREEBSD | \ MSR_HV_GUESTID_OSID_FREEBSD | \ MSR_HV_GUESTID_OSTYPE_FREEBSD) struct hypercall_ctx { void *hc_addr; struct hyperv_dma hc_dma; }; -static u_int hyperv_get_timecount(struct timecounter *tc); +static u_int hyperv_get_timecount(struct timecounter *); +static bool hyperv_identify(void); +static void hypercall_memfree(void); -u_int hyperv_features; -u_int hyperv_recommends; +u_int hyperv_features; +u_int hyperv_recommends; -static u_int hyperv_pm_features; -static u_int hyperv_features3; +static u_int hyperv_pm_features; +static u_int hyperv_features3; static struct timecounter hyperv_timecounter = { .tc_get_timecount = hyperv_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = 0xffffffff, .tc_frequency = HYPERV_TIMER_FREQ, .tc_name = "Hyper-V", .tc_quality = 2000, .tc_flags = 0, .tc_priv = NULL }; static struct hypercall_ctx hypercall_context; static u_int hyperv_get_timecount(struct timecounter *tc __unused) { return rdmsr(MSR_HV_TIME_REF_COUNT); } uint64_t hypercall_post_message(bus_addr_t msg_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_POST_MESSAGE, msg_paddr, 0); } uint64_t hypercall_signal_event(bus_addr_t monprm_paddr) { return hypercall_md(hypercall_context.hc_addr, HYPERCALL_SIGNAL_EVENT, monprm_paddr, 0); } int hyperv_guid2str(const struct hyperv_guid *guid, char *buf, size_t sz) { const uint8_t *d = guid->hv_guid; return snprintf(buf, sz, "%02x%02x%02x%02x-" "%02x%02x-%02x%02x-%02x%02x-" "%02x%02x%02x%02x%02x%02x", d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]); } static bool hyperv_identify(void) { u_int regs[4]; unsigned int maxleaf; if (vm_guest != VM_GUEST_HV) return (false); do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); maxleaf = regs[0]; if (maxleaf < CPUID_LEAF_HV_LIMITS) return (false); do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); if (regs[0] != CPUID_HV_IFACE_HYPERV) return (false); do_cpuid(CPUID_LEAF_HV_FEATURES, regs); if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) { /* * Hyper-V w/o Hypercall is impossible; someone * is faking Hyper-V. */ return (false); } hyperv_features = regs[0]; hyperv_pm_features = regs[2]; hyperv_features3 = regs[3]; do_cpuid(CPUID_LEAF_HV_IDENTITY, regs); printf("Hyper-V Version: %d.%d.%d [SP%d]\n", regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]); printf(" Features=0x%b\n", hyperv_features, "\020" "\001VPRUNTIME" /* MSR_HV_VP_RUNTIME */ "\002TMREFCNT" /* MSR_HV_TIME_REF_COUNT */ "\003SYNIC" /* MSRs for SynIC */ "\004SYNTM" /* MSRs for SynTimer */ "\005APIC" /* MSR_HV_{EOI,ICR,TPR} */ "\006HYPERCALL" /* MSR_HV_{GUEST_OS_ID,HYPERCALL} */ "\007VPINDEX" /* MSR_HV_VP_INDEX */ "\010RESET" /* MSR_HV_RESET */ "\011STATS" /* MSR_HV_STATS_ */ "\012REFTSC" /* MSR_HV_REFERENCE_TSC */ "\013IDLE" /* MSR_HV_GUEST_IDLE */ "\014TMFREQ" /* MSR_HV_{TSC,APIC}_FREQUENCY */ "\015DEBUG"); /* MSR_HV_SYNTH_DEBUG_ */ printf(" PM Features=0x%b [C%u]\n", (hyperv_pm_features & ~CPUPM_HV_CSTATE_MASK), "\020" "\005C3HPET", /* HPET is required for C3 state */ CPUPM_HV_CSTATE(hyperv_pm_features)); printf(" Features3=0x%b\n", hyperv_features3, "\020" "\001MWAIT" /* MWAIT */ "\002DEBUG" /* guest debug support */ "\003PERFMON" /* performance monitor */ "\004PCPUDPE" /* physical CPU dynamic partition event */ "\005XMMHC" /* hypercall input through XMM regs */ "\006IDLE" /* guest idle support */ "\007SLEEP" /* hypervisor sleep support */ "\010NUMA" /* NUMA distance query support */ "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */ "\012SYNCMC" /* inject synthetic machine checks */ "\013CRASH" /* MSRs for guest crash */ "\014DEBUGMSR" /* MSRs for guest debug */ "\015NPIEP" /* NPIEP */ "\016HVDIS"); /* disabling hypervisor */ do_cpuid(CPUID_LEAF_HV_RECOMMENDS, regs); hyperv_recommends = regs[0]; if (bootverbose) printf(" Recommends: %08x %08x\n", regs[0], regs[1]); do_cpuid(CPUID_LEAF_HV_LIMITS, regs); if (bootverbose) { printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n", regs[0], regs[1], regs[2]); } if (maxleaf >= CPUID_LEAF_HV_HWFEATURES) { do_cpuid(CPUID_LEAF_HV_HWFEATURES, regs); if (bootverbose) { printf(" HW Features: %08x, AMD: %08x\n", regs[0], regs[3]); } } return (true); } static void hyperv_init(void *dummy __unused) { if (!hyperv_identify()) { /* Not Hyper-V; reset guest id to the generic one. */ if (vm_guest == VM_GUEST_HV) vm_guest = VM_GUEST_VM; return; } /* Set guest id */ wrmsr(MSR_HV_GUEST_OS_ID, MSR_HV_GUESTID_FREEBSD); if (hyperv_features & CPUID_HV_MSR_TIME_REFCNT) { /* Register Hyper-V timecounter */ tc_init(&hyperv_timecounter); } } SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, NULL); static void hypercall_memfree(void) { hyperv_dmamem_free(&hypercall_context.hc_dma, hypercall_context.hc_addr); hypercall_context.hc_addr = NULL; } static void hypercall_create(void *arg __unused) { uint64_t hc, hc_orig; if (vm_guest != VM_GUEST_HV) return; hypercall_context.hc_addr = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, PAGE_SIZE, &hypercall_context.hc_dma, BUS_DMA_WAITOK); if (hypercall_context.hc_addr == NULL) { printf("hyperv: Hypercall page allocation failed\n"); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } /* Get the 'reserved' bits, which requires preservation. */ hc_orig = rdmsr(MSR_HV_HYPERCALL); /* * Setup the Hypercall page. * * NOTE: 'reserved' bits MUST be preserved. */ hc = ((hypercall_context.hc_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_HYPERCALL_PGSHIFT) | (hc_orig & MSR_HV_HYPERCALL_RSVD_MASK) | MSR_HV_HYPERCALL_ENABLE; wrmsr(MSR_HV_HYPERCALL, hc); /* * Confirm that Hypercall page did get setup. */ hc = rdmsr(MSR_HV_HYPERCALL); if ((hc & MSR_HV_HYPERCALL_ENABLE) == 0) { printf("hyperv: Hypercall setup failed\n"); hypercall_memfree(); /* Can't perform any Hyper-V specific actions */ vm_guest = VM_GUEST_VM; return; } if (bootverbose) printf("hyperv: Hypercall created\n"); } SYSINIT(hypercall_ctor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_create, NULL); static void hypercall_destroy(void *arg __unused) { uint64_t hc; if (hypercall_context.hc_addr == NULL) return; /* Disable Hypercall */ hc = rdmsr(MSR_HV_HYPERCALL); wrmsr(MSR_HV_HYPERCALL, (hc & MSR_HV_HYPERCALL_RSVD_MASK)); hypercall_memfree(); if (bootverbose) printf("hyperv: Hypercall destroyed\n"); } SYSUNINIT(hypercall_dtor, SI_SUB_DRIVERS, SI_ORDER_FIRST, hypercall_destroy, NULL); Index: head/sys/dev/hyperv/vmbus/vmbus.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus.c (revision 303470) +++ head/sys/dev/hyperv/vmbus/vmbus.c (revision 303471) @@ -1,1321 +1,1341 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * VM Bus Driver Implementation */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "acpi_if.h" #include "vmbus_if.h" #define VMBUS_GPADL_START 0xe1e10 struct vmbus_msghc { struct hypercall_postmsg_in *mh_inprm; struct hypercall_postmsg_in mh_inprm_save; struct hyperv_dma mh_inprm_dma; struct vmbus_message *mh_resp; struct vmbus_message mh_resp0; }; struct vmbus_msghc_ctx { struct vmbus_msghc *mhc_free; struct mtx mhc_free_lock; uint32_t mhc_flags; struct vmbus_msghc *mhc_active; struct mtx mhc_active_lock; }; #define VMBUS_MSGHC_CTXF_DESTROY 0x0001 +static int vmbus_probe(device_t); +static int vmbus_attach(device_t); +static int vmbus_detach(device_t); +static int vmbus_read_ivar(device_t, device_t, int, + uintptr_t *); +static int vmbus_child_pnpinfo_str(device_t, device_t, + char *, size_t); +static uint32_t vmbus_get_version_method(device_t, device_t); +static int vmbus_probe_guid_method(device_t, device_t, + const struct hyperv_guid *); + static int vmbus_init(struct vmbus_softc *); static int vmbus_connect(struct vmbus_softc *, uint32_t); static int vmbus_req_channels(struct vmbus_softc *sc); static void vmbus_disconnect(struct vmbus_softc *); static int vmbus_scan(struct vmbus_softc *); static void vmbus_scan_wait(struct vmbus_softc *); static void vmbus_scan_newchan(struct vmbus_softc *); static void vmbus_scan_newdev(struct vmbus_softc *); static void vmbus_scan_done(struct vmbus_softc *, const struct vmbus_message *); static void vmbus_chanmsg_handle(struct vmbus_softc *, const struct vmbus_message *); - +static void vmbus_msg_task(void *, int); +static void vmbus_synic_setup(void *); +static void vmbus_synic_teardown(void *); static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS); +static int vmbus_dma_alloc(struct vmbus_softc *); +static void vmbus_dma_free(struct vmbus_softc *); +static int vmbus_intr_setup(struct vmbus_softc *); +static void vmbus_intr_teardown(struct vmbus_softc *); +static int vmbus_doattach(struct vmbus_softc *); +static void vmbus_event_proc_dummy(struct vmbus_softc *, + int); static struct vmbus_msghc_ctx *vmbus_msghc_ctx_create(bus_dma_tag_t); static void vmbus_msghc_ctx_destroy( struct vmbus_msghc_ctx *); static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *); static struct vmbus_msghc *vmbus_msghc_alloc(bus_dma_tag_t); static void vmbus_msghc_free(struct vmbus_msghc *); static struct vmbus_msghc *vmbus_msghc_get1(struct vmbus_msghc_ctx *, uint32_t); struct vmbus_softc *vmbus_sc; extern inthand_t IDTVEC(vmbus_isr); static const uint32_t vmbus_version[] = { VMBUS_VERSION_WIN8_1, VMBUS_VERSION_WIN8, VMBUS_VERSION_WIN7, VMBUS_VERSION_WS2008 }; static const vmbus_chanmsg_proc_t vmbus_chanmsg_handlers[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER_DONE, vmbus_scan_done), VMBUS_CHANMSG_PROC_WAKEUP(CONNECT_RESP) }; static struct vmbus_msghc * vmbus_msghc_alloc(bus_dma_tag_t parent_dtag) { struct vmbus_msghc *mh; mh = malloc(sizeof(*mh), M_DEVBUF, M_WAITOK | M_ZERO); mh->mh_inprm = hyperv_dmamem_alloc(parent_dtag, HYPERCALL_PARAM_ALIGN, 0, HYPERCALL_POSTMSGIN_SIZE, &mh->mh_inprm_dma, BUS_DMA_WAITOK); if (mh->mh_inprm == NULL) { free(mh, M_DEVBUF); return NULL; } return mh; } static void vmbus_msghc_free(struct vmbus_msghc *mh) { hyperv_dmamem_free(&mh->mh_inprm_dma, mh->mh_inprm); free(mh, M_DEVBUF); } static void vmbus_msghc_ctx_free(struct vmbus_msghc_ctx *mhc) { KASSERT(mhc->mhc_active == NULL, ("still have active msg hypercall")); KASSERT(mhc->mhc_free == NULL, ("still have hypercall msg")); mtx_destroy(&mhc->mhc_free_lock); mtx_destroy(&mhc->mhc_active_lock); free(mhc, M_DEVBUF); } static struct vmbus_msghc_ctx * vmbus_msghc_ctx_create(bus_dma_tag_t parent_dtag) { struct vmbus_msghc_ctx *mhc; mhc = malloc(sizeof(*mhc), M_DEVBUF, M_WAITOK | M_ZERO); mtx_init(&mhc->mhc_free_lock, "vmbus msghc free", NULL, MTX_DEF); mtx_init(&mhc->mhc_active_lock, "vmbus msghc act", NULL, MTX_DEF); mhc->mhc_free = vmbus_msghc_alloc(parent_dtag); if (mhc->mhc_free == NULL) { vmbus_msghc_ctx_free(mhc); return NULL; } return mhc; } static struct vmbus_msghc * vmbus_msghc_get1(struct vmbus_msghc_ctx *mhc, uint32_t dtor_flag) { struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_free_lock); while ((mhc->mhc_flags & dtor_flag) == 0 && mhc->mhc_free == NULL) { mtx_sleep(&mhc->mhc_free, &mhc->mhc_free_lock, 0, "gmsghc", 0); } if (mhc->mhc_flags & dtor_flag) { /* Being destroyed */ mh = NULL; } else { mh = mhc->mhc_free; KASSERT(mh != NULL, ("no free hypercall msg")); KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); mhc->mhc_free = NULL; } mtx_unlock(&mhc->mhc_free_lock); return mh; } void vmbus_msghc_reset(struct vmbus_msghc *mh, size_t dsize) { struct hypercall_postmsg_in *inprm; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); inprm = mh->mh_inprm; memset(inprm, 0, HYPERCALL_POSTMSGIN_SIZE); inprm->hc_connid = VMBUS_CONNID_MESSAGE; inprm->hc_msgtype = HYPERV_MSGTYPE_CHANNEL; inprm->hc_dsize = dsize; } struct vmbus_msghc * vmbus_msghc_get(struct vmbus_softc *sc, size_t dsize) { struct vmbus_msghc *mh; if (dsize > HYPERCALL_POSTMSGIN_DSIZE_MAX) panic("invalid data size %zu", dsize); mh = vmbus_msghc_get1(sc->vmbus_msg_hc, VMBUS_MSGHC_CTXF_DESTROY); if (mh == NULL) return NULL; vmbus_msghc_reset(mh, dsize); return mh; } void vmbus_msghc_put(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; KASSERT(mhc->mhc_active == NULL, ("msg hypercall is active")); mh->mh_resp = NULL; mtx_lock(&mhc->mhc_free_lock); KASSERT(mhc->mhc_free == NULL, ("has free hypercall msg")); mhc->mhc_free = mh; mtx_unlock(&mhc->mhc_free_lock); wakeup(&mhc->mhc_free); } void * vmbus_msghc_dataptr(struct vmbus_msghc *mh) { return mh->mh_inprm->hc_data; } static void vmbus_msghc_ctx_destroy(struct vmbus_msghc_ctx *mhc) { struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_free_lock); mhc->mhc_flags |= VMBUS_MSGHC_CTXF_DESTROY; mtx_unlock(&mhc->mhc_free_lock); wakeup(&mhc->mhc_free); mh = vmbus_msghc_get1(mhc, 0); if (mh == NULL) panic("can't get msghc"); vmbus_msghc_free(mh); vmbus_msghc_ctx_free(mhc); } int vmbus_msghc_exec_noresult(struct vmbus_msghc *mh) { sbintime_t time = SBT_1MS; int i; /* * Save the input parameter so that we could restore the input * parameter if the Hypercall failed. * * XXX * Is this really necessary?! i.e. Will the Hypercall ever * overwrite the input parameter? */ memcpy(&mh->mh_inprm_save, mh->mh_inprm, HYPERCALL_POSTMSGIN_SIZE); /* * In order to cope with transient failures, e.g. insufficient * resources on host side, we retry the post message Hypercall * several times. 20 retries seem sufficient. */ #define HC_RETRY_MAX 20 for (i = 0; i < HC_RETRY_MAX; ++i) { uint64_t status; status = hypercall_post_message(mh->mh_inprm_dma.hv_paddr); if (status == HYPERCALL_STATUS_SUCCESS) return 0; pause_sbt("hcpmsg", time, 0, C_HARDCLOCK); if (time < SBT_1S * 2) time *= 2; /* Restore input parameter and try again */ memcpy(mh->mh_inprm, &mh->mh_inprm_save, HYPERCALL_POSTMSGIN_SIZE); } #undef HC_RETRY_MAX return EIO; } int vmbus_msghc_exec(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; int error; KASSERT(mh->mh_resp == NULL, ("hypercall msg has pending response")); mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == NULL, ("pending active msg hypercall")); mhc->mhc_active = mh; mtx_unlock(&mhc->mhc_active_lock); error = vmbus_msghc_exec_noresult(mh); if (error) { mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); mhc->mhc_active = NULL; mtx_unlock(&mhc->mhc_active_lock); } return error; } const struct vmbus_message * vmbus_msghc_wait_result(struct vmbus_softc *sc, struct vmbus_msghc *mh) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; mtx_lock(&mhc->mhc_active_lock); KASSERT(mhc->mhc_active == mh, ("msghc mismatch")); while (mh->mh_resp == NULL) { mtx_sleep(&mhc->mhc_active, &mhc->mhc_active_lock, 0, "wmsghc", 0); } mhc->mhc_active = NULL; mtx_unlock(&mhc->mhc_active_lock); return mh->mh_resp; } void vmbus_msghc_wakeup(struct vmbus_softc *sc, const struct vmbus_message *msg) { struct vmbus_msghc_ctx *mhc = sc->vmbus_msg_hc; struct vmbus_msghc *mh; mtx_lock(&mhc->mhc_active_lock); mh = mhc->mhc_active; KASSERT(mh != NULL, ("no pending msg hypercall")); memcpy(&mh->mh_resp0, msg, sizeof(mh->mh_resp0)); mh->mh_resp = &mh->mh_resp0; mtx_unlock(&mhc->mhc_active_lock); wakeup(&mhc->mhc_active); } uint32_t vmbus_gpadl_alloc(struct vmbus_softc *sc) { return atomic_fetchadd_int(&sc->vmbus_gpadl, 1); } static int vmbus_connect(struct vmbus_softc *sc, uint32_t version) { struct vmbus_chanmsg_connect *req; const struct vmbus_message *msg; struct vmbus_msghc *mh; int error, done = 0; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CONNECT; req->chm_ver = version; req->chm_evtflags = sc->vmbus_evtflags_dma.hv_paddr; req->chm_mnf1 = sc->vmbus_mnf1_dma.hv_paddr; req->chm_mnf2 = sc->vmbus_mnf2_dma.hv_paddr; error = vmbus_msghc_exec(sc, mh); if (error) { vmbus_msghc_put(sc, mh); return error; } msg = vmbus_msghc_wait_result(sc, mh); done = ((const struct vmbus_chanmsg_connect_resp *) msg->msg_data)->chm_done; vmbus_msghc_put(sc, mh); return (done ? 0 : EOPNOTSUPP); } static int vmbus_init(struct vmbus_softc *sc) { int i; for (i = 0; i < nitems(vmbus_version); ++i) { int error; error = vmbus_connect(sc, vmbus_version[i]); if (!error) { sc->vmbus_version = vmbus_version[i]; device_printf(sc->vmbus_dev, "version %u.%u\n", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return 0; } } return ENXIO; } static void vmbus_disconnect(struct vmbus_softc *sc) { struct vmbus_chanmsg_disconnect *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for disconnect\n"); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_DISCONNECT; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "disconnect msg hypercall failed\n"); } } static int vmbus_req_channels(struct vmbus_softc *sc) { struct vmbus_chanmsg_chrequest *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) return ENXIO; req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHREQUEST; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); return error; } static void vmbus_scan_newchan(struct vmbus_softc *sc) { mtx_lock(&sc->vmbus_scan_lock); if ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) sc->vmbus_scan_chcnt++; mtx_unlock(&sc->vmbus_scan_lock); } static void vmbus_scan_done(struct vmbus_softc *sc, const struct vmbus_message *msg __unused) { mtx_lock(&sc->vmbus_scan_lock); sc->vmbus_scan_chcnt |= VMBUS_SCAN_CHCNT_DONE; mtx_unlock(&sc->vmbus_scan_lock); wakeup(&sc->vmbus_scan_chcnt); } static void vmbus_scan_newdev(struct vmbus_softc *sc) { mtx_lock(&sc->vmbus_scan_lock); sc->vmbus_scan_devcnt++; mtx_unlock(&sc->vmbus_scan_lock); wakeup(&sc->vmbus_scan_devcnt); } static void vmbus_scan_wait(struct vmbus_softc *sc) { uint32_t chancnt; mtx_lock(&sc->vmbus_scan_lock); while ((sc->vmbus_scan_chcnt & VMBUS_SCAN_CHCNT_DONE) == 0) { mtx_sleep(&sc->vmbus_scan_chcnt, &sc->vmbus_scan_lock, 0, "waitch", 0); } chancnt = sc->vmbus_scan_chcnt & ~VMBUS_SCAN_CHCNT_DONE; while (sc->vmbus_scan_devcnt != chancnt) { mtx_sleep(&sc->vmbus_scan_devcnt, &sc->vmbus_scan_lock, 0, "waitdev", 0); } mtx_unlock(&sc->vmbus_scan_lock); } static int vmbus_scan(struct vmbus_softc *sc) { int error; /* * Start vmbus scanning. */ error = vmbus_req_channels(sc); if (error) { device_printf(sc->vmbus_dev, "channel request failed: %d\n", error); return error; } /* * Wait for all devices are added to vmbus. */ vmbus_scan_wait(sc); /* * Identify, probe and attach. */ bus_generic_probe(sc->vmbus_dev); bus_generic_attach(sc->vmbus_dev); if (bootverbose) { device_printf(sc->vmbus_dev, "device scan, probe and attach " "done\n"); } return 0; } static void vmbus_chanmsg_handle(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; if (msg_type >= VMBUS_CHANMSG_TYPE_MAX) { device_printf(sc->vmbus_dev, "unknown message type 0x%x\n", msg_type); return; } msg_proc = vmbus_chanmsg_handlers[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); /* Channel specific processing */ vmbus_chan_msgproc(sc, msg); } static void vmbus_msg_task(void *xsc, int pending __unused) { struct vmbus_softc *sc = xsc; volatile struct vmbus_message *msg; msg = VMBUS_PCPU_GET(sc, message, curcpu) + VMBUS_SINT_MESSAGE; for (;;) { if (msg->msg_type == HYPERV_MSGTYPE_NONE) { /* No message */ break; } else if (msg->msg_type == HYPERV_MSGTYPE_CHANNEL) { /* Channel message */ vmbus_chanmsg_handle(sc, __DEVOLATILE(const struct vmbus_message *, msg)); } msg->msg_type = HYPERV_MSGTYPE_NONE; /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } } static __inline int vmbus_handle_intr1(struct vmbus_softc *sc, struct trapframe *frame, int cpu) { volatile struct vmbus_message *msg; struct vmbus_message *msg_base; msg_base = VMBUS_PCPU_GET(sc, message, cpu); /* * Check event timer. * * TODO: move this to independent IDT vector. */ msg = msg_base + VMBUS_SINT_TIMER; if (msg->msg_type == HYPERV_MSGTYPE_TIMER_EXPIRED) { msg->msg_type = HYPERV_MSGTYPE_NONE; vmbus_et_intr(frame); /* * Make sure the write to msg_type (i.e. set to * HYPERV_MSGTYPE_NONE) happens before we read the * msg_flags and EOMing. Otherwise, the EOMing will * not deliver any more messages since there is no * empty slot * * NOTE: * mb() is used here, since atomic_thread_fence_seq_cst() * will become compiler fence on UP kernel. */ mb(); if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) { /* * This will cause message queue rescan to possibly * deliver another msg from the hypervisor */ wrmsr(MSR_HV_EOM, 0); } } /* * Check events. Hot path for network and storage I/O data; high rate. * * NOTE: * As recommended by the Windows guest fellows, we check events before * checking messages. */ sc->vmbus_event_proc(sc, cpu); /* * Check messages. Mainly management stuffs; ultra low rate. */ msg = msg_base + VMBUS_SINT_MESSAGE; if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) { taskqueue_enqueue(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); } return (FILTER_HANDLED); } void vmbus_handle_intr(struct trapframe *trap_frame) { struct vmbus_softc *sc = vmbus_get_softc(); int cpu = curcpu; /* * Disable preemption. */ critical_enter(); /* * Do a little interrupt counting. */ (*VMBUS_PCPU_GET(sc, intr_cnt, cpu))++; vmbus_handle_intr1(sc, trap_frame, cpu); /* * Enable preemption. */ critical_exit(); } static void vmbus_synic_setup(void *xsc) { struct vmbus_softc *sc = xsc; int cpu = curcpu; uint64_t val, orig; uint32_t sint; if (hyperv_features & CPUID_HV_MSR_VP_INDEX) { /* Save virtual processor id. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = rdmsr(MSR_HV_VP_INDEX); } else { /* Set virtual processor id to 0 for compatibility. */ VMBUS_PCPU_GET(sc, vcpuid, cpu) = 0; } /* * Setup the SynIC message. */ orig = rdmsr(MSR_HV_SIMP); val = MSR_HV_SIMP_ENABLE | (orig & MSR_HV_SIMP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, message_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIMP_PGSHIFT); wrmsr(MSR_HV_SIMP, val); /* * Setup the SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); val = MSR_HV_SIEFP_ENABLE | (orig & MSR_HV_SIEFP_RSVD_MASK) | ((VMBUS_PCPU_GET(sc, event_flags_dma.hv_paddr, cpu) >> PAGE_SHIFT) << MSR_HV_SIEFP_PGSHIFT); wrmsr(MSR_HV_SIEFP, val); /* * Configure and unmask SINT for message and event flags. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * Configure and unmask SINT for timer. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); val = sc->vmbus_idtvec | MSR_HV_SINT_AUTOEOI | (orig & MSR_HV_SINT_RSVD_MASK); wrmsr(sint, val); /* * All done; enable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); val = MSR_HV_SCTRL_ENABLE | (orig & MSR_HV_SCTRL_RSVD_MASK); wrmsr(MSR_HV_SCONTROL, val); } static void vmbus_synic_teardown(void *arg) { uint64_t orig; uint32_t sint; /* * Disable SynIC. */ orig = rdmsr(MSR_HV_SCONTROL); wrmsr(MSR_HV_SCONTROL, (orig & MSR_HV_SCTRL_RSVD_MASK)); /* * Mask message and event flags SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_MESSAGE; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Mask timer SINT. */ sint = MSR_HV_SINT0 + VMBUS_SINT_TIMER; orig = rdmsr(sint); wrmsr(sint, orig | MSR_HV_SINT_MASKED); /* * Teardown SynIC message. */ orig = rdmsr(MSR_HV_SIMP); wrmsr(MSR_HV_SIMP, (orig & MSR_HV_SIMP_RSVD_MASK)); /* * Teardown SynIC event flags. */ orig = rdmsr(MSR_HV_SIEFP); wrmsr(MSR_HV_SIEFP, (orig & MSR_HV_SIEFP_RSVD_MASK)); } static int vmbus_dma_alloc(struct vmbus_softc *sc) { bus_dma_tag_t parent_dtag; uint8_t *evtflags; int cpu; parent_dtag = bus_get_dma_tag(sc->vmbus_dev); CPU_FOREACH(cpu) { void *ptr; /* * Per-cpu messages and event flags. */ ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, message_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, message, cpu) = ptr; ptr = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), BUS_DMA_WAITOK | BUS_DMA_ZERO); if (ptr == NULL) return ENOMEM; VMBUS_PCPU_GET(sc, event_flags, cpu) = ptr; } evtflags = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_evtflags_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (evtflags == NULL) return ENOMEM; sc->vmbus_rx_evtflags = (u_long *)evtflags; sc->vmbus_tx_evtflags = (u_long *)(evtflags + (PAGE_SIZE / 2)); sc->vmbus_evtflags = evtflags; sc->vmbus_mnf1 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, PAGE_SIZE, &sc->vmbus_mnf1_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf1 == NULL) return ENOMEM; sc->vmbus_mnf2 = hyperv_dmamem_alloc(parent_dtag, PAGE_SIZE, 0, sizeof(struct vmbus_mnf), &sc->vmbus_mnf2_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->vmbus_mnf2 == NULL) return ENOMEM; return 0; } static void vmbus_dma_free(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_evtflags != NULL) { hyperv_dmamem_free(&sc->vmbus_evtflags_dma, sc->vmbus_evtflags); sc->vmbus_evtflags = NULL; sc->vmbus_rx_evtflags = NULL; sc->vmbus_tx_evtflags = NULL; } if (sc->vmbus_mnf1 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf1_dma, sc->vmbus_mnf1); sc->vmbus_mnf1 = NULL; } if (sc->vmbus_mnf2 != NULL) { hyperv_dmamem_free(&sc->vmbus_mnf2_dma, sc->vmbus_mnf2); sc->vmbus_mnf2 = NULL; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, message, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, message_dma, cpu), VMBUS_PCPU_GET(sc, message, cpu)); VMBUS_PCPU_GET(sc, message, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, event_flags, cpu) != NULL) { hyperv_dmamem_free( VMBUS_PCPU_PTR(sc, event_flags_dma, cpu), VMBUS_PCPU_GET(sc, event_flags, cpu)); VMBUS_PCPU_GET(sc, event_flags, cpu) = NULL; } } } static int vmbus_intr_setup(struct vmbus_softc *sc) { int cpu; CPU_FOREACH(cpu) { char buf[MAXCOMLEN + 1]; cpuset_t cpu_mask; /* Allocate an interrupt counter for Hyper-V interrupt */ snprintf(buf, sizeof(buf), "cpu%d:hyperv", cpu); intrcnt_add(buf, VMBUS_PCPU_PTR(sc, intr_cnt, cpu)); /* * Setup taskqueue to handle events. Task will be per- * channel. */ VMBUS_PCPU_GET(sc, event_tq, cpu) = taskqueue_create_fast( "hyperv event", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, event_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, event_tq, cpu), 1, PI_NET, &cpu_mask, "hvevent%d", cpu); /* * Setup tasks and taskqueues to handle messages. */ VMBUS_PCPU_GET(sc, message_tq, cpu) = taskqueue_create_fast( "hyperv msg", M_WAITOK, taskqueue_thread_enqueue, VMBUS_PCPU_PTR(sc, message_tq, cpu)); CPU_SETOF(cpu, &cpu_mask); taskqueue_start_threads_cpuset( VMBUS_PCPU_PTR(sc, message_tq, cpu), 1, PI_NET, &cpu_mask, "hvmsg%d", cpu); TASK_INIT(VMBUS_PCPU_PTR(sc, message_task, cpu), 0, vmbus_msg_task, sc); } /* * All Hyper-V ISR required resources are setup, now let's find a * free IDT vector for Hyper-V ISR and set it up. */ sc->vmbus_idtvec = lapic_ipi_alloc(IDTVEC(vmbus_isr)); if (sc->vmbus_idtvec < 0) { device_printf(sc->vmbus_dev, "cannot find free IDT vector\n"); return ENXIO; } if(bootverbose) { device_printf(sc->vmbus_dev, "vmbus IDT vector %d\n", sc->vmbus_idtvec); } return 0; } static void vmbus_intr_teardown(struct vmbus_softc *sc) { int cpu; if (sc->vmbus_idtvec >= 0) { lapic_ipi_free(sc->vmbus_idtvec); sc->vmbus_idtvec = -1; } CPU_FOREACH(cpu) { if (VMBUS_PCPU_GET(sc, event_tq, cpu) != NULL) { taskqueue_free(VMBUS_PCPU_GET(sc, event_tq, cpu)); VMBUS_PCPU_GET(sc, event_tq, cpu) = NULL; } if (VMBUS_PCPU_GET(sc, message_tq, cpu) != NULL) { taskqueue_drain(VMBUS_PCPU_GET(sc, message_tq, cpu), VMBUS_PCPU_PTR(sc, message_task, cpu)); taskqueue_free(VMBUS_PCPU_GET(sc, message_tq, cpu)); VMBUS_PCPU_GET(sc, message_tq, cpu) = NULL; } } } static int vmbus_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { return (ENOENT); } static int vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) { const struct vmbus_channel *chan; char guidbuf[HYPERV_GUID_STRLEN]; chan = vmbus_get_channel(child); if (chan == NULL) { /* Event timer device, which does not belong to a channel */ return (0); } strlcat(buf, "classid=", buflen); hyperv_guid2str(&chan->ch_guid_type, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); strlcat(buf, " deviceid=", buflen); hyperv_guid2str(&chan->ch_guid_inst, guidbuf, sizeof(guidbuf)); strlcat(buf, guidbuf, buflen); return (0); } int vmbus_add_child(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; device_t parent = sc->vmbus_dev; int error = 0; /* New channel has been offered */ vmbus_scan_newchan(sc); chan->ch_dev = device_add_child(parent, NULL, -1); if (chan->ch_dev == NULL) { device_printf(parent, "device_add_child for chan%u failed\n", chan->ch_id); error = ENXIO; goto done; } device_set_ivars(chan->ch_dev, chan); done: /* New device has been/should be added to vmbus. */ vmbus_scan_newdev(sc); return error; } int vmbus_delete_child(struct vmbus_channel *chan) { int error; if (chan->ch_dev == NULL) { /* Failed to add a device. */ return 0; } /* * XXXKYS: Ensure that this is the opposite of * device_add_child() */ mtx_lock(&Giant); error = device_delete_child(chan->ch_vmbus->vmbus_dev, chan->ch_dev); mtx_unlock(&Giant); return error; } static int vmbus_sysctl_version(SYSCTL_HANDLER_ARGS) { struct vmbus_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", VMBUS_VERSION_MAJOR(sc->vmbus_version), VMBUS_VERSION_MINOR(sc->vmbus_version)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static uint32_t vmbus_get_version_method(device_t bus, device_t dev) { struct vmbus_softc *sc = device_get_softc(bus); return sc->vmbus_version; } static int vmbus_probe_guid_method(device_t bus, device_t dev, const struct hyperv_guid *guid) { const struct vmbus_channel *chan = vmbus_get_channel(dev); if (memcmp(&chan->ch_guid_type, guid, sizeof(struct hyperv_guid)) == 0) return 0; return ENXIO; } static int vmbus_probe(device_t dev) { char *id[] = { "VMBUS", NULL }; if (ACPI_ID_PROBE(device_get_parent(dev), dev, id) == NULL || device_get_unit(dev) != 0 || vm_guest != VM_GUEST_HV || (hyperv_features & CPUID_HV_MSR_SYNIC) == 0) return (ENXIO); device_set_desc(dev, "Hyper-V Vmbus"); return (BUS_PROBE_DEFAULT); } /** * @brief Main vmbus driver initialization routine. * * Here, we * - initialize the vmbus driver context * - setup various driver entry points * - invoke the vmbus hv main init routine * - get the irq resource * - invoke the vmbus to add the vmbus root device * - setup the vmbus root device * - retrieve the channel offers */ static int vmbus_doattach(struct vmbus_softc *sc) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int ret; if (sc->vmbus_flags & VMBUS_FLAG_ATTACHED) return (0); sc->vmbus_flags |= VMBUS_FLAG_ATTACHED; mtx_init(&sc->vmbus_scan_lock, "vmbus scan", NULL, MTX_DEF); sc->vmbus_gpadl = VMBUS_GPADL_START; mtx_init(&sc->vmbus_prichan_lock, "vmbus prichan", NULL, MTX_DEF); TAILQ_INIT(&sc->vmbus_prichans); sc->vmbus_chmap = malloc( sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX, M_DEVBUF, M_WAITOK | M_ZERO); /* * Create context for "post message" Hypercalls */ sc->vmbus_msg_hc = vmbus_msghc_ctx_create( bus_get_dma_tag(sc->vmbus_dev)); if (sc->vmbus_msg_hc == NULL) { ret = ENXIO; goto cleanup; } /* * Allocate DMA stuffs. */ ret = vmbus_dma_alloc(sc); if (ret != 0) goto cleanup; /* * Setup interrupt. */ ret = vmbus_intr_setup(sc); if (ret != 0) goto cleanup; /* * Setup SynIC. */ if (bootverbose) device_printf(sc->vmbus_dev, "smp_started = %d\n", smp_started); smp_rendezvous(NULL, vmbus_synic_setup, NULL, sc); sc->vmbus_flags |= VMBUS_FLAG_SYNIC; /* * Initialize vmbus, e.g. connect to Hypervisor. */ ret = vmbus_init(sc); if (ret != 0) goto cleanup; if (sc->vmbus_version == VMBUS_VERSION_WS2008 || sc->vmbus_version == VMBUS_VERSION_WIN7) sc->vmbus_event_proc = vmbus_event_proc_compat; else sc->vmbus_event_proc = vmbus_event_proc; ret = vmbus_scan(sc); if (ret != 0) goto cleanup; ctx = device_get_sysctl_ctx(sc->vmbus_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->vmbus_dev)); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, vmbus_sysctl_version, "A", "vmbus version"); return (ret); cleanup: vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_msg_hc != NULL) { vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); sc->vmbus_msg_hc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); mtx_destroy(&sc->vmbus_prichan_lock); return (ret); } static void vmbus_event_proc_dummy(struct vmbus_softc *sc __unused, int cpu __unused) { } static int vmbus_attach(device_t dev) { vmbus_sc = device_get_softc(dev); vmbus_sc->vmbus_dev = dev; vmbus_sc->vmbus_idtvec = -1; /* * Event processing logic will be configured: * - After the vmbus protocol version negotiation. * - Before we request channel offers. */ vmbus_sc->vmbus_event_proc = vmbus_event_proc_dummy; #ifndef EARLY_AP_STARTUP /* * If the system has already booted and thread * scheduling is possible indicated by the global * cold set to zero, we just call the driver * initialization directly. */ if (!cold) #endif vmbus_doattach(vmbus_sc); return (0); } static void vmbus_sysinit(void *arg __unused) { struct vmbus_softc *sc = vmbus_get_softc(); if (vm_guest != VM_GUEST_HV || sc == NULL) return; #ifndef EARLY_AP_STARTUP /* * If the system has already booted and thread * scheduling is possible, as indicated by the * global cold set to zero, we just call the driver * initialization directly. */ if (!cold) #endif vmbus_doattach(sc); } static int vmbus_detach(device_t dev) { struct vmbus_softc *sc = device_get_softc(dev); vmbus_chan_destroy_all(sc); vmbus_disconnect(sc); if (sc->vmbus_flags & VMBUS_FLAG_SYNIC) { sc->vmbus_flags &= ~VMBUS_FLAG_SYNIC; smp_rendezvous(NULL, vmbus_synic_teardown, NULL, NULL); } vmbus_intr_teardown(sc); vmbus_dma_free(sc); if (sc->vmbus_msg_hc != NULL) { vmbus_msghc_ctx_destroy(sc->vmbus_msg_hc); sc->vmbus_msg_hc = NULL; } free(sc->vmbus_chmap, M_DEVBUF); mtx_destroy(&sc->vmbus_scan_lock); mtx_destroy(&sc->vmbus_prichan_lock); return (0); } static device_method_t vmbus_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, vmbus_read_ivar), DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str), /* Vmbus interface */ DEVMETHOD(vmbus_get_version, vmbus_get_version_method), DEVMETHOD(vmbus_probe_guid, vmbus_probe_guid_method), DEVMETHOD_END }; static driver_t vmbus_driver = { "vmbus", vmbus_methods, sizeof(struct vmbus_softc) }; static devclass_t vmbus_devclass; DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, NULL, NULL); MODULE_DEPEND(vmbus, acpi, 1, 1, 1); MODULE_VERSION(vmbus, 1); #ifndef EARLY_AP_STARTUP /* * NOTE: * We have to start as the last step of SI_SUB_SMP, i.e. after SMP is * initialized. */ SYSINIT(vmbus_initialize, SI_SUB_SMP, SI_ORDER_ANY, vmbus_sysinit, NULL); #endif Index: head/sys/dev/hyperv/vmbus/vmbus_br.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus_br.c (revision 303470) +++ head/sys/dev/hyperv/vmbus/vmbus_br.c (revision 303471) @@ -1,400 +1,404 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include /* Amount of space available for write */ #define VMBUS_BR_WAVAIL(r, w, z) \ (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))) /* Increase bufing index */ #define VMBUS_BR_IDXINC(idx, inc, sz) (((idx) + (inc)) % (sz)) +static int vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS); +static int vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS); +static void vmbus_br_setup(struct vmbus_br *, void *, int); + static int vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS) { const struct vmbus_br *br = arg1; uint32_t rindex, windex, imask, ravail, wavail; char state[256]; rindex = br->vbr_rindex; windex = br->vbr_windex; imask = br->vbr_imask; wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); ravail = br->vbr_dsize - wavail; snprintf(state, sizeof(state), "rindex:%u windex:%u imask:%u ravail:%u wavail:%u", rindex, windex, imask, ravail, wavail); return sysctl_handle_string(oidp, state, sizeof(state), req); } /* * Binary bufring states. */ static int vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS) { #define BR_STATE_RIDX 0 #define BR_STATE_WIDX 1 #define BR_STATE_IMSK 2 #define BR_STATE_RSPC 3 #define BR_STATE_WSPC 4 #define BR_STATE_MAX 5 const struct vmbus_br *br = arg1; uint32_t rindex, windex, wavail, state[BR_STATE_MAX]; rindex = br->vbr_rindex; windex = br->vbr_windex; wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); state[BR_STATE_RIDX] = rindex; state[BR_STATE_WIDX] = windex; state[BR_STATE_IMSK] = br->vbr_imask; state[BR_STATE_WSPC] = wavail; state[BR_STATE_RSPC] = br->vbr_dsize - wavail; return sysctl_handle_opaque(oidp, state, sizeof(state), req); } void vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree, struct vmbus_br *br, const char *name) { struct sysctl_oid *tree; char desc[64]; tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (tree == NULL) return; snprintf(desc, sizeof(desc), "%s state", name); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, br, 0, vmbus_br_sysctl_state, "A", desc); snprintf(desc, sizeof(desc), "%s binary state", name); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin", CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, br, 0, vmbus_br_sysctl_state_bin, "IU", desc); } void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr) { rbr->rxbr_imask = 1; mb(); } static __inline uint32_t vmbus_rxbr_avail(const struct vmbus_rxbr *rbr) { uint32_t rindex, windex; /* Get snapshot */ rindex = rbr->rxbr_rindex; windex = rbr->rxbr_windex; return (rbr->rxbr_dsize - VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize)); } uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr) { rbr->rxbr_imask = 0; mb(); /* * Now check to see if the ring buffer is still empty. * If it is not, we raced and we need to process new * incoming channel packets. */ return vmbus_rxbr_avail(rbr); } static void vmbus_br_setup(struct vmbus_br *br, void *buf, int blen) { br->vbr = buf; br->vbr_dsize = blen - sizeof(struct vmbus_bufring); } void vmbus_rxbr_init(struct vmbus_rxbr *rbr) { mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN); } void vmbus_rxbr_deinit(struct vmbus_rxbr *rbr) { mtx_destroy(&rbr->rxbr_lock); } void vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen) { vmbus_br_setup(&rbr->rxbr, buf, blen); } void vmbus_txbr_init(struct vmbus_txbr *tbr) { mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN); } void vmbus_txbr_deinit(struct vmbus_txbr *tbr) { mtx_destroy(&tbr->txbr_lock); } void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen) { vmbus_br_setup(&tbr->txbr, buf, blen); } /* * When we write to the ring buffer, check if the host needs to be * signaled. * * The contract: * - The host guarantees that while it is draining the TX bufring, * it will set the br_imask to indicate it does not need to be * interrupted when new data are added. * - The host guarantees that it will completely drain the TX bufring * before exiting the read loop. Further, once the TX bufring is * empty, it will clear the br_imask and re-check to see if new * data have arrived. */ static __inline boolean_t vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex) { mb(); if (tbr->txbr_imask) return (FALSE); __compiler_membar(); /* * This is the only case we need to signal when the * ring transitions from being empty to non-empty. */ if (old_windex == tbr->txbr_rindex) return (TRUE); return (FALSE); } static __inline uint32_t vmbus_txbr_avail(const struct vmbus_txbr *tbr) { uint32_t rindex, windex; /* Get snapshot */ rindex = tbr->txbr_rindex; windex = tbr->txbr_windex; return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize); } static __inline uint32_t vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex, const void *src0, uint32_t cplen) { const uint8_t *src = src0; uint8_t *br_data = tbr->txbr_data; uint32_t br_dsize = tbr->txbr_dsize; if (cplen > br_dsize - windex) { uint32_t fraglen = br_dsize - windex; /* Wrap-around detected */ memcpy(br_data + windex, src, fraglen); memcpy(br_data, src + fraglen, cplen - fraglen); } else { memcpy(br_data + windex, src, cplen); } return VMBUS_BR_IDXINC(windex, cplen, br_dsize); } /* * Write scattered channel packet to TX bufring. * * The offset of this channel packet is written as a 64bits value * immediately after this channel packet. */ int vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen, boolean_t *need_sig) { uint32_t old_windex, windex, total; uint64_t save_windex; int i; total = 0; for (i = 0; i < iovlen; i++) total += iov[i].iov_len; total += sizeof(save_windex); mtx_lock_spin(&tbr->txbr_lock); /* * NOTE: * If this write is going to make br_windex same as br_rindex, * i.e. the available space for write is same as the write size, * we can't do it then, since br_windex == br_rindex means that * the bufring is empty. */ if (vmbus_txbr_avail(tbr) <= total) { mtx_unlock_spin(&tbr->txbr_lock); return (EAGAIN); } /* Save br_windex for later use */ old_windex = tbr->txbr_windex; /* * Copy the scattered channel packet to the TX bufring. */ windex = old_windex; for (i = 0; i < iovlen; i++) { windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); } /* * Set the offset of the current channel packet. */ save_windex = ((uint64_t)old_windex) << 32; windex = vmbus_txbr_copyto(tbr, windex, &save_windex, sizeof(save_windex)); /* * Update the write index _after_ the channel packet * is copied. */ __compiler_membar(); tbr->txbr_windex = windex; mtx_unlock_spin(&tbr->txbr_lock); *need_sig = vmbus_txbr_need_signal(tbr, old_windex); return (0); } static __inline uint32_t vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex, void *dst0, int cplen) { uint8_t *dst = dst0; const uint8_t *br_data = rbr->rxbr_data; uint32_t br_dsize = rbr->rxbr_dsize; if (cplen > br_dsize - rindex) { uint32_t fraglen = br_dsize - rindex; /* Wrap-around detected. */ memcpy(dst, br_data + rindex, fraglen); memcpy(dst + fraglen, br_data, cplen - fraglen); } else { memcpy(dst, br_data + rindex, cplen); } return VMBUS_BR_IDXINC(rindex, cplen, br_dsize); } int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen) { mtx_lock_spin(&rbr->rxbr_lock); /* * The requested data and the 64bits channel packet * offset should be there at least. */ if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen); mtx_unlock_spin(&rbr->rxbr_lock); return (0); } /* * NOTE: * We assume (dlen + skip) == sizeof(channel packet). */ int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip) { uint32_t rindex, br_dsize = rbr->rxbr_dsize; KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip)); mtx_lock_spin(&rbr->rxbr_lock); if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) { mtx_unlock_spin(&rbr->rxbr_lock); return (EAGAIN); } /* * Copy channel packet from RX bufring. */ rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize); rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); /* * Discard this channel packet's 64bits offset, which is useless to us. */ rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize); /* * Update the read index _after_ the channel packet is fetched. */ __compiler_membar(); rbr->rxbr_rindex = rindex; mtx_unlock_spin(&rbr->rxbr_lock); return (0); } Index: head/sys/dev/hyperv/vmbus/vmbus_chan.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 303470) +++ head/sys/dev/hyperv/vmbus/vmbus_chan.c (revision 303471) @@ -1,1402 +1,1413 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * Copyright (c) 2012 Citrix Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *, - const struct vmbus_channel *); +static void vmbus_chan_update_evtflagcnt( + struct vmbus_softc *, + const struct vmbus_channel *); +static void vmbus_chan_close_internal( + struct vmbus_channel *); +static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS); +static void vmbus_chan_sysctl_create( + struct vmbus_channel *); +static struct vmbus_channel *vmbus_chan_alloc(struct vmbus_softc *); +static void vmbus_chan_free(struct vmbus_channel *); +static int vmbus_chan_add(struct vmbus_channel *); +static void vmbus_chan_cpu_default(struct vmbus_channel *); -static void vmbus_chan_task(void *, int); -static void vmbus_chan_task_nobatch(void *, int); -static void vmbus_chan_detach_task(void *, int); +static void vmbus_chan_task(void *, int); +static void vmbus_chan_task_nobatch(void *, int); +static void vmbus_chan_detach_task(void *, int); -static void vmbus_chan_msgproc_choffer(struct vmbus_softc *, - const struct vmbus_message *); -static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *, - const struct vmbus_message *); +static void vmbus_chan_msgproc_choffer(struct vmbus_softc *, + const struct vmbus_message *); +static void vmbus_chan_msgproc_chrescind( + struct vmbus_softc *, + const struct vmbus_message *); /* * Vmbus channel message processing. */ static const vmbus_chanmsg_proc_t vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = { VMBUS_CHANMSG_PROC(CHOFFER, vmbus_chan_msgproc_choffer), VMBUS_CHANMSG_PROC(CHRESCIND, vmbus_chan_msgproc_chrescind), VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP), VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP) }; /* * Notify host that there are data pending on our TX bufring. */ static __inline void vmbus_chan_signal_tx(const struct vmbus_channel *chan) { atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask); if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask); else hypercall_signal_event(chan->ch_monprm_dma.hv_paddr); } static int vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS) { struct vmbus_channel *chan = arg1; int mnf = 0; if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF) mnf = 1; return sysctl_handle_int(oidp, &mnf, 0, req); } static void vmbus_chan_sysctl_create(struct vmbus_channel *chan) { struct sysctl_oid *ch_tree, *chid_tree, *br_tree; struct sysctl_ctx_list *ctx; uint32_t ch_id; char name[16]; /* * Add sysctl nodes related to this channel to this * channel's sysctl ctx, so that they can be destroyed * independently upon close of this channel, which can * happen even if the device is not detached. */ ctx = &chan->ch_sysctl_ctx; sysctl_ctx_init(ctx); /* * Create dev.NAME.UNIT.channel tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)), OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID tree. */ if (VMBUS_CHAN_ISPRIMARY(chan)) ch_id = chan->ch_id; else ch_id = chan->ch_prichan->ch_id; snprintf(name, sizeof(name), "%d", ch_id); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Create dev.NAME.UNIT.channel.CHANID.sub tree. */ ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (ch_tree == NULL) return; /* * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree. * * NOTE: * chid_tree is changed to this new sysctl tree. */ snprintf(name, sizeof(name), "%d", chan->ch_subidx); chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (chid_tree == NULL) return; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id"); } SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, chan, 0, vmbus_chan_sysctl_mnf, "I", "has monitor notification facilities"); br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO, "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (br_tree != NULL) { /* * Create sysctl tree for RX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx"); /* * Create sysctl tree for TX bufring. */ vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx"); } } int vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size, const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg) { struct vmbus_softc *sc = chan->ch_vmbus; const struct vmbus_chanmsg_chopen_resp *resp; const struct vmbus_message *msg; struct vmbus_chanmsg_chopen *req; struct vmbus_msghc *mh; uint32_t status; int error; uint8_t *br; if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) { device_printf(sc->vmbus_dev, "invalid udata len %d for chan%u\n", udlen, chan->ch_id); return EINVAL; } KASSERT((txbr_size & PAGE_MASK) == 0, ("send bufring size is not multiple page")); KASSERT((rxbr_size & PAGE_MASK) == 0, ("recv bufring size is not multiple page")); if (atomic_testandset_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED_SHIFT)) panic("double-open chan%u", chan->ch_id); chan->ch_cb = cb; chan->ch_cbarg = cbarg; vmbus_chan_update_evtflagcnt(sc, chan); chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid); if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan); else TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan); /* * Allocate the TX+RX bufrings. * XXX should use ch_dev dtag */ br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (br == NULL) { device_printf(sc->vmbus_dev, "bufring allocation failed\n"); error = ENOMEM; goto failed; } chan->ch_bufring = br; /* TX bufring comes first */ vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size); /* RX bufring immediately follows TX bufring */ vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size); /* Create sysctl tree for this channel */ vmbus_chan_sysctl_create(chan); /* * Connect the bufrings, both RX and TX, to this channel. */ error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr, txbr_size + rxbr_size, &chan->ch_bufring_gpadl); if (error) { device_printf(sc->vmbus_dev, "failed to connect bufring GPADL to chan%u\n", chan->ch_id); goto failed; } /* * Open channel w/ the bufring GPADL on the target CPU. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chopen(chan%u)\n", chan->ch_id); error = ENXIO; goto failed; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN; req->chm_chanid = chan->ch_id; req->chm_openid = chan->ch_id; req->chm_gpadl = chan->ch_bufring_gpadl; req->chm_vcpuid = chan->ch_vcpuid; req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT; if (udlen > 0) memcpy(req->chm_udata, udata, udlen); error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chopen(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); goto failed; } msg = vmbus_msghc_wait_result(sc, mh); resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data; status = resp->chm_status; vmbus_msghc_put(sc, mh); if (status == 0) { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u opened\n", chan->ch_id); } return 0; } device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id); error = ENXIO; failed: if (chan->ch_bufring_gpadl) { vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); chan->ch_bufring_gpadl = 0; } if (chan->ch_bufring != NULL) { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); chan->ch_bufring = NULL; } atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED); return error; } int vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr, int size, uint32_t *gpadl0) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_conn *req; const struct vmbus_message *msg; size_t reqsz; uint32_t gpadl, status; int page_count, range_len, i, cnt, error; uint64_t page_id; /* * Preliminary checks. */ KASSERT((size & PAGE_MASK) == 0, ("invalid GPA size %d, not multiple page size", size)); page_count = size >> PAGE_SHIFT; KASSERT((paddr & PAGE_MASK) == 0, ("GPA is not page aligned %jx", (uintmax_t)paddr)); page_id = paddr >> PAGE_SHIFT; range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]); /* * We don't support multiple GPA ranges. */ if (range_len > UINT16_MAX) { device_printf(sc->vmbus_dev, "GPA too large, %d pages\n", page_count); return EOPNOTSUPP; } /* * Allocate GPADL id. */ gpadl = vmbus_gpadl_alloc(sc); *gpadl0 = gpadl; /* * Connect this GPADL to the target channel. * * NOTE: * Since each message can only hold small set of page * addresses, several messages may be required to * complete the connection. */ if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn, chm_range.gpa_page[cnt]); mh = vmbus_msghc_get(sc, reqsz); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpadl->chan%u\n", chan->ch_id); return EIO; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; req->chm_range_len = range_len; req->chm_range_cnt = 1; req->chm_range.gpa_len = size; req->chm_range.gpa_ofs = 0; for (i = 0; i < cnt; ++i) req->chm_range.gpa_page[i] = page_id++; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpadl->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } while (page_count > 0) { struct vmbus_chanmsg_gpadl_subconn *subreq; if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX) cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX; else cnt = page_count; page_count -= cnt; reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn, chm_gpa_page[cnt]); vmbus_msghc_reset(mh, reqsz); subreq = vmbus_msghc_dataptr(mh); subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN; subreq->chm_gpadl = gpadl; for (i = 0; i < cnt; ++i) subreq->chm_gpa_page[i] = page_id++; vmbus_msghc_exec_noresult(mh); } KASSERT(page_count == 0, ("invalid page count %d", page_count)); msg = vmbus_msghc_wait_result(sc, mh); status = ((const struct vmbus_chanmsg_gpadl_connresp *) msg->msg_data)->chm_status; vmbus_msghc_put(sc, mh); if (status != 0) { device_printf(sc->vmbus_dev, "gpadl->chan%u failed: " "status %u\n", chan->ch_id, status); return EIO; } else { if (bootverbose) { device_printf(sc->vmbus_dev, "gpadl->chan%u " "succeeded\n", chan->ch_id); } } return 0; } /* * Disconnect the GPA from the target channel */ int vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_gpadl_disconn *req; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for gpa x->chan%u\n", chan->ch_id); return EBUSY; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN; req->chm_chanid = chan->ch_id; req->chm_gpadl = gpadl; error = vmbus_msghc_exec(sc, mh); if (error) { device_printf(sc->vmbus_dev, "gpa x->chan%u msg hypercall exec failed: %d\n", chan->ch_id, error); vmbus_msghc_put(sc, mh); return error; } vmbus_msghc_wait_result(sc, mh); /* Discard result; no useful information */ vmbus_msghc_put(sc, mh); return 0; } static void vmbus_chan_close_internal(struct vmbus_channel *chan) { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_msghc *mh; struct vmbus_chanmsg_chclose *req; struct taskqueue *tq = chan->ch_tq; int error; /* TODO: stringent check */ atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED); /* * Free this channel's sysctl tree attached to its device's * sysctl tree. */ sysctl_ctx_free(&chan->ch_sysctl_ctx); /* * Set ch_tq to NULL to avoid more requests be scheduled. * XXX pretty broken; need rework. */ chan->ch_tq = NULL; taskqueue_drain(tq, &chan->ch_task); chan->ch_cb = NULL; /* * Close this channel. */ mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chclose(chan%u)\n", chan->ch_id); return; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chclose(chan%u) msg hypercall exec failed: %d\n", chan->ch_id, error); return; } else if (bootverbose) { device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id); } /* * Disconnect the TX+RX bufrings from this channel. */ if (chan->ch_bufring_gpadl) { vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl); chan->ch_bufring_gpadl = 0; } /* * Destroy the TX+RX bufrings. */ if (chan->ch_bufring != NULL) { hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring); chan->ch_bufring = NULL; } } /* * Caller should make sure that all sub-channels have * been added to 'chan' and all to-be-closed channels * are not being opened. */ void vmbus_chan_close(struct vmbus_channel *chan) { int subchan_cnt; if (!VMBUS_CHAN_ISPRIMARY(chan)) { /* * Sub-channel is closed when its primary channel * is closed; done. */ return; } /* * Close all sub-channels, if any. */ subchan_cnt = chan->ch_subchan_cnt; if (subchan_cnt > 0) { struct vmbus_channel **subchan; int i; subchan = vmbus_subchan_get(chan, subchan_cnt); for (i = 0; i < subchan_cnt; ++i) vmbus_chan_close_internal(subchan[i]); vmbus_subchan_rel(subchan, subchan_cnt); } /* Then close the primary channel. */ vmbus_chan_close_internal(chan); } int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt pkt; int pktlen, pad_pktlen, hlen, error; uint64_t pad = 0; struct iovec iov[3]; boolean_t send_evt; hlen = sizeof(pkt); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = type; pkt.cp_hdr.cph_flags = flags; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; iov[0].iov_base = &pkt; iov[0].iov_len = hlen; iov[1].iov_base = data; iov[1].iov_len = dlen; iov[2].iov_base = &pad; iov[2].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_sglist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX, ("invalid sglist len %d", sglen)); hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_gpa_cnt = sglen; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = sg; iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen; iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_send_prplist(struct vmbus_channel *chan, struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen, uint64_t xactid) { struct vmbus_chanpkt_prplist pkt; int pktlen, pad_pktlen, hlen, error; struct iovec iov[4]; boolean_t send_evt; uint64_t pad = 0; KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX, ("invalid prplist entry count %d", prp_cnt)); hlen = __offsetof(struct vmbus_chanpkt_prplist, cp_range[0].gpa_page[prp_cnt]); pktlen = hlen + dlen; pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen); pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA; pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC; VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen); VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen); pkt.cp_hdr.cph_xactid = xactid; pkt.cp_rsvd = 0; pkt.cp_range_cnt = 1; iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); iov[1].iov_base = prp; iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]); iov[2].iov_base = data; iov[2].iov_len = dlen; iov[3].iov_base = &pad; iov[3].iov_len = pad_pktlen - pktlen; error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt); if (!error && send_evt) vmbus_chan_signal_tx(chan); return error; } int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0, uint64_t *xactid) { struct vmbus_chanpkt_hdr pkt; int error, dlen, hlen; error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt)); if (error) return error; hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen); dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen; if (*dlen0 < dlen) { /* Return the size of this packet's data. */ *dlen0 = dlen; return ENOBUFS; } *xactid = pkt.cph_xactid; *dlen0 = dlen; /* Skip packet header */ error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen); KASSERT(!error, ("vmbus_rxbr_read failed")); return 0; } int vmbus_chan_recv_pkt(struct vmbus_channel *chan, struct vmbus_chanpkt_hdr *pkt0, int *pktlen0) { struct vmbus_chanpkt_hdr pkt; int error, pktlen; error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt)); if (error) return error; pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen); if (*pktlen0 < pktlen) { /* Return the size of this packet. */ *pktlen0 = pktlen; return ENOBUFS; } *pktlen0 = pktlen; /* Include packet header */ error = vmbus_rxbr_read(&chan->ch_rxbr, pkt0, pktlen, 0); KASSERT(!error, ("vmbus_rxbr_read failed")); return 0; } static void vmbus_chan_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; vmbus_chan_callback_t cb = chan->ch_cb; void *cbarg = chan->ch_cbarg; /* * Optimize host to guest signaling by ensuring: * 1. While reading the channel, we disable interrupts from * host. * 2. Ensure that we process all posted messages from the host * before returning from this callback. * 3. Once we return, enable signaling from the host. Once this * state is set we check to see if additional packets are * available to read. In this case we repeat the process. * * NOTE: Interrupt has been disabled in the ISR. */ for (;;) { uint32_t left; cb(chan, cbarg); left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr); if (left == 0) { /* No more data in RX bufring; done */ break; } vmbus_rxbr_intr_mask(&chan->ch_rxbr); } } static void vmbus_chan_task_nobatch(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; chan->ch_cb(chan, chan->ch_cbarg); } static __inline void vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags, int flag_cnt) { int f; for (f = 0; f < flag_cnt; ++f) { uint32_t chid_base; u_long flags; int chid_ofs; if (event_flags[f] == 0) continue; flags = atomic_swap_long(&event_flags[f], 0); chid_base = f << VMBUS_EVTFLAG_SHIFT; while ((chid_ofs = ffsl(flags)) != 0) { struct vmbus_channel *chan; --chid_ofs; /* NOTE: ffsl is 1-based */ flags &= ~(1UL << chid_ofs); chan = sc->vmbus_chmap[chid_base + chid_ofs]; /* if channel is closed or closing */ if (chan == NULL || chan->ch_tq == NULL) continue; if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD) vmbus_rxbr_intr_mask(&chan->ch_rxbr); taskqueue_enqueue(chan->ch_tq, &chan->ch_task); } } } void vmbus_event_proc(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; /* * On Host with Win8 or above, the event page can be checked directly * to get the id of the channel that has the pending interrupt. */ eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; vmbus_event_flags_proc(sc, eventf->evt_flags, VMBUS_PCPU_GET(sc, event_flags_cnt, cpu)); } void vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu) { struct vmbus_evtflags *eventf; eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE; if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) { vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags, VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT); } } static void vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc, const struct vmbus_channel *chan) { volatile int *flag_cnt_ptr; int flag_cnt; flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1; flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid); for (;;) { int old_flag_cnt; old_flag_cnt = *flag_cnt_ptr; if (old_flag_cnt >= flag_cnt) break; if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) { if (bootverbose) { device_printf(sc->vmbus_dev, "channel%u update cpu%d flag_cnt to %d\n", chan->ch_id, chan->ch_cpuid, flag_cnt); } break; } } } static struct vmbus_channel * vmbus_chan_alloc(struct vmbus_softc *sc) { struct vmbus_channel *chan; chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO); chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev), HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param), &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (chan->ch_monprm == NULL) { device_printf(sc->vmbus_dev, "monprm alloc failed\n"); free(chan, M_DEVBUF); return NULL; } chan->ch_vmbus = sc; mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF); TAILQ_INIT(&chan->ch_subchans); TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan); vmbus_rxbr_init(&chan->ch_rxbr); vmbus_txbr_init(&chan->ch_txbr); return chan; } static void vmbus_chan_free(struct vmbus_channel *chan) { /* TODO: assert sub-channel list is empty */ /* TODO: asset no longer on the primary channel's sub-channel list */ /* TODO: asset no longer on the vmbus channel list */ hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm); mtx_destroy(&chan->ch_subchan_lock); vmbus_rxbr_deinit(&chan->ch_rxbr); vmbus_txbr_deinit(&chan->ch_txbr); free(chan, M_DEVBUF); } static int vmbus_chan_add(struct vmbus_channel *newchan) { struct vmbus_softc *sc = newchan->ch_vmbus; struct vmbus_channel *prichan; if (newchan->ch_id == 0) { /* * XXX * Chan0 will neither be processed nor should be offered; * skip it. */ device_printf(sc->vmbus_dev, "got chan0 offer, discard\n"); return EINVAL; } else if (newchan->ch_id >= VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid chan%u offer\n", newchan->ch_id); return EINVAL; } sc->vmbus_chmap[newchan->ch_id] = newchan; if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n", newchan->ch_id, newchan->ch_subidx); } mtx_lock(&sc->vmbus_prichan_lock); TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) { /* * Sub-channel will have the same type GUID and instance * GUID as its primary channel. */ if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type, sizeof(struct hyperv_guid)) == 0 && memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst, sizeof(struct hyperv_guid)) == 0) break; } if (VMBUS_CHAN_ISPRIMARY(newchan)) { if (prichan == NULL) { /* Install the new primary channel */ TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); return 0; } else { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "duplicated primary " "chan%u\n", newchan->ch_id); return EINVAL; } } else { /* Sub-channel */ if (prichan == NULL) { mtx_unlock(&sc->vmbus_prichan_lock); device_printf(sc->vmbus_dev, "no primary chan for " "chan%u\n", newchan->ch_id); return EINVAL; } /* * Found the primary channel for this sub-channel and * move on. * * XXX refcnt prichan */ } mtx_unlock(&sc->vmbus_prichan_lock); /* * This is a sub-channel; link it with the primary channel. */ KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan), ("new channel is not sub-channel")); KASSERT(prichan != NULL, ("no primary channel")); newchan->ch_prichan = prichan; newchan->ch_dev = prichan->ch_dev; mtx_lock(&prichan->ch_subchan_lock); TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink); /* * Bump up sub-channel count and notify anyone that is * interested in this sub-channel, after this sub-channel * is setup. */ prichan->ch_subchan_cnt++; mtx_unlock(&prichan->ch_subchan_lock); wakeup(prichan); return 0; } void vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu) { KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu)); if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 || chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) { /* Only cpu0 is supported */ cpu = 0; } chan->ch_cpuid = cpu; chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu); if (bootverbose) { printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n", chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid); } } void vmbus_chan_cpu_rr(struct vmbus_channel *chan) { static uint32_t vmbus_chan_nextcpu; int cpu; cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus; vmbus_chan_cpu_set(chan, cpu); } static void vmbus_chan_cpu_default(struct vmbus_channel *chan) { /* * By default, pin the channel to cpu0. Devices having * special channel-cpu mapping requirement should call * vmbus_chan_cpu_{set,rr}(). */ vmbus_chan_cpu_set(chan, 0); } static void vmbus_chan_msgproc_choffer(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_choffer *offer; struct vmbus_channel *chan; int error; offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data; chan = vmbus_chan_alloc(sc); if (chan == NULL) { device_printf(sc->vmbus_dev, "allocate chan%u failed\n", offer->chm_chanid); return; } chan->ch_id = offer->chm_chanid; chan->ch_subidx = offer->chm_subidx; chan->ch_guid_type = offer->chm_chtype; chan->ch_guid_inst = offer->chm_chinst; /* Batch reading is on by default */ chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT; if (sc->vmbus_version != VMBUS_VERSION_WS2008) chan->ch_monprm->mp_connid = offer->chm_connid; if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) { int trig_idx; /* * Setup MNF stuffs. */ chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF; trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN; if (trig_idx >= VMBUS_MONTRIGS_MAX) panic("invalid monitor trigger %u", offer->chm_montrig); chan->ch_montrig = &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending; chan->ch_montrig_mask = 1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN); } /* * Setup event flag. */ chan->ch_evtflag = &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT]; chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK); /* Select default cpu for this channel. */ vmbus_chan_cpu_default(chan); error = vmbus_chan_add(chan); if (error) { device_printf(sc->vmbus_dev, "add chan%u failed: %d\n", chan->ch_id, error); vmbus_chan_free(chan); return; } if (VMBUS_CHAN_ISPRIMARY(chan)) { /* * Add device for this primary channel. * * NOTE: * Error is ignored here; don't have much to do if error * really happens. */ vmbus_add_child(chan); } } /* * XXX pretty broken; need rework. */ static void vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc, const struct vmbus_message *msg) { const struct vmbus_chanmsg_chrescind *note; struct vmbus_channel *chan; note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data; if (note->chm_chanid > VMBUS_CHAN_MAX) { device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n", note->chm_chanid); return; } if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u rescinded\n", note->chm_chanid); } chan = sc->vmbus_chmap[note->chm_chanid]; if (chan == NULL) return; sc->vmbus_chmap[note->chm_chanid] = NULL; taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task); } static void vmbus_chan_detach_task(void *xchan, int pending __unused) { struct vmbus_channel *chan = xchan; if (VMBUS_CHAN_ISPRIMARY(chan)) { /* Only primary channel owns the device */ vmbus_delete_child(chan); /* NOTE: DO NOT free primary channel for now */ } else { struct vmbus_softc *sc = chan->ch_vmbus; struct vmbus_channel *pri_chan = chan->ch_prichan; struct vmbus_chanmsg_chfree *req; struct vmbus_msghc *mh; int error; mh = vmbus_msghc_get(sc, sizeof(*req)); if (mh == NULL) { device_printf(sc->vmbus_dev, "can not get msg hypercall for chfree(chan%u)\n", chan->ch_id); goto remove; } req = vmbus_msghc_dataptr(mh); req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE; req->chm_chanid = chan->ch_id; error = vmbus_msghc_exec_noresult(mh); vmbus_msghc_put(sc, mh); if (error) { device_printf(sc->vmbus_dev, "chfree(chan%u) failed: %d", chan->ch_id, error); /* NOTE: Move on! */ } else { if (bootverbose) { device_printf(sc->vmbus_dev, "chan%u freed\n", chan->ch_id); } } remove: mtx_lock(&pri_chan->ch_subchan_lock); TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink); KASSERT(pri_chan->ch_subchan_cnt > 0, ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt)); pri_chan->ch_subchan_cnt--; mtx_unlock(&pri_chan->ch_subchan_lock); wakeup(pri_chan); vmbus_chan_free(chan); } } /* * Detach all devices and destroy the corresponding primary channels. */ void vmbus_chan_destroy_all(struct vmbus_softc *sc) { struct vmbus_channel *chan; mtx_lock(&sc->vmbus_prichan_lock); while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) { KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel")); TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink); mtx_unlock(&sc->vmbus_prichan_lock); vmbus_delete_child(chan); vmbus_chan_free(chan); mtx_lock(&sc->vmbus_prichan_lock); } bzero(sc->vmbus_chmap, sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX); mtx_unlock(&sc->vmbus_prichan_lock); } /* * The channel whose vcpu binding is closest to the currect vcpu will * be selected. * If no multi-channel, always select primary channel. */ struct vmbus_channel * vmbus_chan_cpu2chan(struct vmbus_channel *prichan, int cpu) { struct vmbus_channel *sel, *chan; uint32_t vcpu, sel_dist; KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu)); if (TAILQ_EMPTY(&prichan->ch_subchans)) return prichan; vcpu = VMBUS_PCPU_GET(prichan->ch_vmbus, vcpuid, cpu); #define CHAN_VCPU_DIST(ch, vcpu) \ (((ch)->ch_vcpuid > (vcpu)) ? \ ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid)) #define CHAN_SELECT(ch) \ do { \ sel = ch; \ sel_dist = CHAN_VCPU_DIST(ch, vcpu); \ } while (0) CHAN_SELECT(prichan); mtx_lock(&prichan->ch_subchan_lock); TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) { uint32_t dist; KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED, ("chan%u is not opened", chan->ch_id)); if (chan->ch_vcpuid == vcpu) { /* Exact match; done */ CHAN_SELECT(chan); break; } dist = CHAN_VCPU_DIST(chan, vcpu); if (sel_dist <= dist) { /* Far or same distance; skip */ continue; } /* Select the closer channel. */ CHAN_SELECT(chan); } mtx_unlock(&prichan->ch_subchan_lock); #undef CHAN_SELECT #undef CHAN_VCPU_DIST return sel; } struct vmbus_channel ** vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt) { struct vmbus_channel **ret, *chan; int i; ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP, M_WAITOK); mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt < subchan_cnt) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0); i = 0; TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) { /* TODO: refcnt chan */ ret[i] = chan; ++i; if (i == subchan_cnt) break; } KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d", pri_chan->ch_subchan_cnt, subchan_cnt)); mtx_unlock(&pri_chan->ch_subchan_lock); return ret; } void vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused) { free(subchan, M_TEMP); } void vmbus_subchan_drain(struct vmbus_channel *pri_chan) { mtx_lock(&pri_chan->ch_subchan_lock); while (pri_chan->ch_subchan_cnt > 0) mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0); mtx_unlock(&pri_chan->ch_subchan_lock); } void vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg) { vmbus_chanmsg_proc_t msg_proc; uint32_t msg_type; msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type; KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX, ("invalid message type %u", msg_type)); msg_proc = vmbus_chan_msgprocs[msg_type]; if (msg_proc != NULL) msg_proc(sc, msg); } void vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on) { if (!on) chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD; else chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD; } uint32_t vmbus_chan_id(const struct vmbus_channel *chan) { return chan->ch_id; } uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan) { return chan->ch_subidx; } bool vmbus_chan_is_primary(const struct vmbus_channel *chan) { if (VMBUS_CHAN_ISPRIMARY(chan)) return true; else return false; } const struct hyperv_guid * vmbus_chan_guid_inst(const struct vmbus_channel *chan) { return &chan->ch_guid_inst; } Index: head/sys/dev/hyperv/vmbus/vmbus_et.c =================================================================== --- head/sys/dev/hyperv/vmbus/vmbus_et.c (revision 303470) +++ head/sys/dev/hyperv/vmbus/vmbus_et.c (revision 303471) @@ -1,195 +1,202 @@ /*- * Copyright (c) 2015,2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define VMBUS_ET_NAME "hvet" #define MSR_HV_STIMER0_CFG_SINT \ ((((uint64_t)VMBUS_SINT_TIMER) << MSR_HV_STIMER_CFG_SINT_SHIFT) & \ MSR_HV_STIMER_CFG_SINT_MASK) /* * Two additionally required features: * - SynIC is needed for interrupt generation. * - Time reference counter is needed to set ABS reference count to * STIMER0_COUNT. */ #define CPUID_HV_ET_MASK (CPUID_HV_MSR_TIME_REFCNT | \ CPUID_HV_MSR_SYNIC | \ CPUID_HV_MSR_SYNTIMER) +static void vmbus_et_identify(driver_t *, device_t); +static int vmbus_et_probe(device_t); +static int vmbus_et_attach(device_t); +static int vmbus_et_detach(device_t); +static int vmbus_et_start(struct eventtimer *, sbintime_t, + sbintime_t); + static struct eventtimer vmbus_et; static __inline uint64_t hyperv_sbintime2count(sbintime_t time) { struct timespec val; val = sbttots(time); return (val.tv_sec * HYPERV_TIMER_FREQ) + (val.tv_nsec / HYPERV_TIMER_NS_FACTOR); } static int vmbus_et_start(struct eventtimer *et __unused, sbintime_t first, sbintime_t period __unused) { uint64_t current; current = rdmsr(MSR_HV_TIME_REF_COUNT); current += hyperv_sbintime2count(first); wrmsr(MSR_HV_STIMER0_COUNT, current); return (0); } void vmbus_et_intr(struct trapframe *frame) { struct trapframe *oldframe; struct thread *td; if (vmbus_et.et_active) { td = curthread; td->td_intr_nesting_level++; oldframe = td->td_intr_frame; td->td_intr_frame = frame; vmbus_et.et_event_cb(&vmbus_et, vmbus_et.et_arg); td->td_intr_frame = oldframe; td->td_intr_nesting_level--; } } static void vmbus_et_identify(driver_t *driver, device_t parent) { if (device_get_unit(parent) != 0 || device_find_child(parent, VMBUS_ET_NAME, -1) != NULL || (hyperv_features & CPUID_HV_ET_MASK) != CPUID_HV_ET_MASK) return; device_add_child(parent, VMBUS_ET_NAME, -1); } static int vmbus_et_probe(device_t dev) { if (resource_disabled(VMBUS_ET_NAME, 0)) return (ENXIO); device_set_desc(dev, "Hyper-V event timer"); return (BUS_PROBE_NOWILDCARD); } static void vmbus_et_config(void *arg __unused) { /* * Make sure that STIMER0 is really disabled before writing * to STIMER0_CONFIG. * * "Writing to the configuration register of a timer that * is already enabled may result in undefined behaviour." */ for (;;) { uint64_t val; /* Stop counting, and this also implies disabling STIMER0 */ wrmsr(MSR_HV_STIMER0_COUNT, 0); val = rdmsr(MSR_HV_STIMER0_CONFIG); if ((val & MSR_HV_STIMER_CFG_ENABLE) == 0) break; cpu_spinwait(); } wrmsr(MSR_HV_STIMER0_CONFIG, MSR_HV_STIMER_CFG_AUTOEN | MSR_HV_STIMER0_CFG_SINT); } static int vmbus_et_attach(device_t dev) { /* TODO: use independent IDT vector */ vmbus_et.et_name = "Hyper-V"; vmbus_et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; vmbus_et.et_quality = 1000; vmbus_et.et_frequency = HYPERV_TIMER_FREQ; vmbus_et.et_min_period = (0x00000001ULL << 32) / HYPERV_TIMER_FREQ; vmbus_et.et_max_period = (0xfffffffeULL << 32) / HYPERV_TIMER_FREQ; vmbus_et.et_start = vmbus_et_start; /* * Delay a bit to make sure that MSR_HV_TIME_REF_COUNT will * not return 0, since writing 0 to STIMER0_COUNT will disable * STIMER0. */ DELAY(100); smp_rendezvous(NULL, vmbus_et_config, NULL, NULL); return (et_register(&vmbus_et)); } static int vmbus_et_detach(device_t dev) { return (et_deregister(&vmbus_et)); } static device_method_t vmbus_et_methods[] = { DEVMETHOD(device_identify, vmbus_et_identify), DEVMETHOD(device_probe, vmbus_et_probe), DEVMETHOD(device_attach, vmbus_et_attach), DEVMETHOD(device_detach, vmbus_et_detach), DEVMETHOD_END }; static driver_t vmbus_et_driver = { VMBUS_ET_NAME, vmbus_et_methods, 0 }; static devclass_t vmbus_et_devclass; DRIVER_MODULE(hv_et, vmbus, vmbus_et_driver, vmbus_et_devclass, NULL, NULL); MODULE_VERSION(hv_et, 1);