diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c index 6db910e60dd3..c6ff8fc782c6 100644 --- a/sys/dev/xen/balloon/balloon.c +++ b/sys/dev/xen/balloon/balloon.c @@ -1,407 +1,407 @@ /****************************************************************************** * balloon.c * * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); /* Convert from KB (as fetched from xenstore) to number of PAGES */ #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) struct mtx balloon_mutex; /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; /* We may hit the hard limit in Xen. If we do then we remember it. */ unsigned long hard_limit; /* * Drivers may alter the memory reservation independently, but they * must inform the balloon driver so we avoid hitting the hard limit. */ unsigned long driver_pages; /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; }; static struct balloon_stats balloon_stats; #define bs balloon_stats SYSCTL_DECL(_dev_xen); static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, &bs.current_pages, 0, "Current allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, &bs.target_pages, 0, "Target allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, &bs.driver_pages, 0, "Driver pages"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, &bs.hard_limit, 0, "Xen hard limit"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, &bs.balloon_low, 0, "Low-mem balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, &bs.balloon_high, 0, "High-mem balloon"); /* List of ballooned pages, threaded through the mem_map array. */ static TAILQ_HEAD(,vm_page) ballooned_pages; /* Main work function, always executed in process context. */ static void balloon_process(void *unused); #define IPRINTK(fmt, args...) \ printk(KERN_INFO "xen_mem: " fmt, ##args) #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_mem: " fmt, ##args) static unsigned long current_target(void) { unsigned long target = min(bs.target_pages, bs.hard_limit); if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) target = bs.current_pages + bs.balloon_low + bs.balloon_high; return (target); } static unsigned long minimum_target(void) { unsigned long min_pages, curr_pages = current_target(); #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) /* * Simple continuous piecewiese linear function: * max MiB -> min MiB gradient * 0 0 * 16 16 * 32 24 * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) * 8192 552 (1/32) * 32768 1320 * 131072 4392 */ if (realmem < MB2PAGES(128)) min_pages = MB2PAGES(8) + (realmem >> 1); else if (realmem < MB2PAGES(512)) min_pages = MB2PAGES(40) + (realmem >> 2); else if (realmem < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (realmem >> 3); else min_pages = MB2PAGES(296) + (realmem >> 5); #undef MB2PAGES /* Don't enforce growth */ return (min(min_pages, curr_pages)); } static int increase_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; long rc; struct xen_memory_reservation reservation = { .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (page = TAILQ_FIRST(&ballooned_pages), i = 0; i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { KASSERT(page != NULL, ("ballooned_pages list corrupt")); frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret __diagused; /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); } if (rc >= 0) bs.hard_limit = (bs.current_pages + rc - bs.driver_pages); goto out; } for (i = 0; i < nr_pages; i++) { page = TAILQ_FIRST(&ballooned_pages); KASSERT(page != NULL, ("Unable to get ballooned page")); TAILQ_REMOVE(&ballooned_pages, page, plinks.q); bs.balloon_low--; KASSERT(xen_feature(XENFEAT_auto_translated_physmap), ("auto translated physmap but mapping is valid")); vm_page_free(page); } bs.current_pages += nr_pages; out: return (0); } static int decrease_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; int need_sleep = 0; int ret __diagused; struct xen_memory_reservation reservation = { .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (i = 0; i < nr_pages; i++) { /* * Zero the page, or else we might be leaking important data to * other domains on the same host. Xen doesn't scrub ballooned * out memory pages, the guest is in charge of making sure that * no information is leaked. */ if ((page = vm_page_alloc_noobj(VM_ALLOC_ZERO)) == NULL) { nr_pages = i; need_sleep = 1; break; } frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); bs.balloon_low++; } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); bs.current_pages -= nr_pages; return (need_sleep); } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; mtx_lock(&balloon_mutex); for (;;) { int sleep_time; do { credit = current_target() - bs.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != bs.current_pages) sleep_time = hz; else sleep_time = 0; msleep(balloon_process, &balloon_mutex, 0, "balloon", sleep_time); } mtx_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ bs.hard_limit = ~0UL; bs.target_pages = max(target, minimum_target()); wakeup(balloon_process); } static struct xs_watch target_watch = { .node = "memory/target", .max_pending = 1, }; /* React to a change in the target key */ static void watch_target(struct xs_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; int err; err = xs_scanf(XST_NIL, "memory", "target", NULL, "%llu", &new_target); if (err) { /* This is ok (for domain0 at least) - so just return */ return; } /* * The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ set_new_target(new_target >> KB_TO_PAGE_SHIFT); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void -xenballoon_identify(driver_t *driver __unused, device_t parent) +xenballoon_identify(driver_t *driver, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xen Balloon device * * \param dev NewBus device_t for this Xen control instance. * * \return Always returns 0 indicating success. */ static int xenballoon_probe(device_t dev) { device_set_desc(dev, "Xen Balloon Device"); return (0); } /** * \brief Attach the Xen Balloon device. * * \param dev NewBus device_t for this Xen control instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenballoon_attach(device_t dev) { int err; mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); bs.current_pages = realmem; bs.target_pages = bs.current_pages; bs.balloon_low = 0; bs.balloon_high = 0; bs.driver_pages = 0UL; bs.hard_limit = ~0UL; kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); target_watch.callback = watch_target; err = xs_register_watch(&target_watch); if (err) device_printf(dev, "xenballon: failed to set balloon watcher\n"); return (err); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenballoon_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xenballoon_identify), DEVMETHOD(device_probe, xenballoon_probe), DEVMETHOD(device_attach, xenballoon_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, NULL, NULL); diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index 37b4bff709c7..1dc1df935b84 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -1,495 +1,495 @@ /*- * SPDX-License-Identifier: BSD-2-Clause AND BSD-4-Clause * * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions, and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * substantially similar to the "NO WARRANTY" disclaimer below * ("Disclaimer") and any redistribution must be conditioned upon * including a substantially similar Disclaimer requirement for further * binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGES. */ /*- * PV suspend/resume support: * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christian Limpach. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * HVM suspend/resume support: * * Copyright (c) 2008 Citrix Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /** * \file control.c * * \brief Device driver to repond to control domain events that impact * this VM. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__amd64__) || defined(__i386__) #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include bool xen_suspend_cancelled; /*--------------------------- Forward Declarations --------------------------*/ /** Function signature for shutdown event handlers. */ typedef void (xctrl_shutdown_handler_t)(void); static xctrl_shutdown_handler_t xctrl_poweroff; static xctrl_shutdown_handler_t xctrl_reboot; static xctrl_shutdown_handler_t xctrl_suspend; static xctrl_shutdown_handler_t xctrl_crash; /*-------------------------- Private Data Structures -------------------------*/ /** Element type for lookup table of event name to handler. */ struct xctrl_shutdown_reason { const char *name; xctrl_shutdown_handler_t *handler; }; /** Lookup table for shutdown event name to handler. */ static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = { { "poweroff", xctrl_poweroff }, { "reboot", xctrl_reboot }, { "suspend", xctrl_suspend }, { "crash", xctrl_crash }, { "halt", xctrl_poweroff }, }; struct xctrl_softc { struct xs_watch xctrl_watch; }; /*------------------------------ Event Handlers ------------------------------*/ static void xctrl_poweroff(void) { shutdown_nice(RB_POWEROFF|RB_HALT); } static void xctrl_reboot(void) { shutdown_nice(0); } #if !defined(__amd64__) && !defined(__i386__) static void xctrl_suspend(void) { printf("WARNING: xen/control: Suspend not supported!\n"); } #else /* __amd64__ || __i386__ */ static void xctrl_suspend(void) { #ifdef SMP cpuset_t cpu_suspend_map; #endif EVENTHANDLER_INVOKE(power_suspend_early); xs_lock(); stop_all_proc(); xs_unlock(); suspend_all_fs(); EVENTHANDLER_INVOKE(power_suspend); #ifdef EARLY_AP_STARTUP MPASS(mp_ncpus == 1 || smp_started); thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); #else if (smp_started) { thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); } #endif KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0")); /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME. */ bus_topo_lock(); if (DEVICE_SUSPEND(root_bus) != 0) { bus_topo_unlock(); printf("%s: device_suspend failed\n", __func__); return; } #ifdef SMP #ifdef EARLY_AP_STARTUP /* * Suspend other CPUs. This prevents IPIs while we * are resuming, and will allow us to reset per-cpu * vcpu_info on resume. */ cpu_suspend_map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map); if (!CPU_EMPTY(&cpu_suspend_map)) suspend_cpus(cpu_suspend_map); #else CPU_ZERO(&cpu_suspend_map); /* silence gcc */ if (smp_started) { /* * Suspend other CPUs. This prevents IPIs while we * are resuming, and will allow us to reset per-cpu * vcpu_info on resume. */ cpu_suspend_map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map); if (!CPU_EMPTY(&cpu_suspend_map)) suspend_cpus(cpu_suspend_map); } #endif #endif /* * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); intr_suspend(); xen_hvm_suspend(); xen_suspend_cancelled = !!HYPERVISOR_suspend(0); if (!xen_suspend_cancelled) { xen_hvm_resume(false); } intr_resume(xen_suspend_cancelled != 0); enable_intr(); /* * Reset grant table info. */ if (!xen_suspend_cancelled) { gnttab_resume(NULL); } #ifdef SMP if (!CPU_EMPTY(&cpu_suspend_map)) { /* * Now that event channels have been initialized, * resume CPUs. */ resume_cpus(cpu_suspend_map); #if defined(__amd64__) || defined(__i386__) /* Send an IPI_BITMAP in case there are pending bitmap IPIs. */ lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL); #endif } #endif /* * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or * similar. */ DEVICE_RESUME(root_bus); bus_topo_unlock(); /* * Warm up timecounter again and reset system clock. */ timecounter->tc_get_timecount(timecounter); inittodr(time_second); #ifdef EARLY_AP_STARTUP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #else if (smp_started) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } #endif resume_all_fs(); resume_all_proc(); EVENTHANDLER_INVOKE(power_resume); if (bootverbose) printf("System resumed after suspension\n"); } #endif /* __amd64__ || __i386__ */ static void xctrl_crash(void) { panic("Xen directed crash"); } static void xctrl_shutdown_final(void *arg, int howto) { /* * Inform the hypervisor that shutdown is complete, and specify the * nature of the shutdown. RB_HALT is not handled by this function. */ if (KERNEL_PANICKED()) HYPERVISOR_shutdown(SHUTDOWN_crash); else if ((howto & RB_POWEROFF) != 0) HYPERVISOR_shutdown(SHUTDOWN_poweroff); else if ((howto & RB_HALT) == 0) /* RB_POWERCYCLE or regular reset. */ HYPERVISOR_shutdown(SHUTDOWN_reboot); } /*------------------------------ Event Reception -----------------------------*/ static void xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len) { const struct xctrl_shutdown_reason *reason; const struct xctrl_shutdown_reason *last_reason; char *result; int error; int result_len; error = xs_read(XST_NIL, "control", "shutdown", &result_len, (void **)&result); if (error != 0 || result_len == 0) return; /* Acknowledge the request by writing back an empty string. */ error = xs_write(XST_NIL, "control", "shutdown", ""); if (error != 0) printf("unable to ack shutdown request, proceeding anyway\n"); reason = xctrl_shutdown_reasons; last_reason = reason + nitems(xctrl_shutdown_reasons); while (reason < last_reason) { if (!strcmp(result, reason->name)) { reason->handler(); break; } reason++; } free(result, M_XENSTORE); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void -xctrl_identify(driver_t *driver __unused, device_t parent) +xctrl_identify(driver_t *driver, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xen Control device * * \param dev NewBus device_t for this Xen control instance. * * \return Always returns 0 indicating success. */ static int xctrl_probe(device_t dev) { device_set_desc(dev, "Xen Control Device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xen control device. * * \param dev NewBus device_t for this Xen control instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xctrl_attach(device_t dev) { struct xctrl_softc *xctrl; xctrl = device_get_softc(dev); /* Activate watch */ xctrl->xctrl_watch.node = "control/shutdown"; xctrl->xctrl_watch.callback = xctrl_on_watch_event; xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl; /* * We don't care about the path updated, just about the value changes * on that single node, hence there's no need to queue more that one * event. */ xctrl->xctrl_watch.max_pending = 1; xs_register_watch(&xctrl->xctrl_watch); EVENTHANDLER_REGISTER(shutdown_final, xctrl_shutdown_final, NULL, SHUTDOWN_PRI_LAST); return (0); } /** * \brief Detach the Xen control device. * * \param dev NewBus device_t for this Xen control device instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xctrl_detach(device_t dev) { struct xctrl_softc *xctrl; xctrl = device_get_softc(dev); /* Release watch */ xs_unregister_watch(&xctrl->xctrl_watch); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xctrl_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xctrl_identify), DEVMETHOD(device_probe, xctrl_probe), DEVMETHOD(device_attach, xctrl_attach), DEVMETHOD(device_detach, xctrl_detach), DEVMETHOD_END }; DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc)); DRIVER_MODULE(xctrl, xenstore, xctrl_driver, NULL, NULL); diff --git a/sys/dev/xen/grant_table/grant_table.c b/sys/dev/xen/grant_table/grant_table.c index c9d7d1e66af0..394d40c5d49c 100644 --- a/sys/dev/xen/grant_table/grant_table.c +++ b/sys/dev/xen/grant_table/grant_table.c @@ -1,667 +1,667 @@ /****************************************************************************** * gnttab.c * * Two sets of functionality: * 1. Granting foreign access to our memory reservation. * 2. Accessing others' memory reservations via grant references. * (i.e., mechanisms for both sender and recipient of grant references) * * Copyright (c) 2005, Christopher Clark * Copyright (c) 2004, K A Fraser */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static struct mtx gnttab_list_lock; /* * Resource representing allocated physical address space * for the grant table metainfo */ static struct resource *gnttab_pseudo_phys_res; /* Resource id for allocated physical address space. */ static int gnttab_pseudo_phys_res_id; static grant_entry_v1_t *shared; static struct gnttab_free_callback *gnttab_free_callback_list = NULL; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) static int get_free_entries(int count, int *entries) { int ref, error; grant_ref_t head; mtx_lock(&gnttab_list_lock); if ((gnttab_free_count < count) && ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { mtx_unlock(&gnttab_list_lock); return (error); } ref = head = gnttab_free_head; gnttab_free_count -= count; while (count-- > 1) head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head); gnttab_entry(head) = GNTTAB_LIST_END; mtx_unlock(&gnttab_list_lock); *entries = ref; return (0); } static void do_free_callbacks(void) { struct gnttab_free_callback *callback, *next; callback = gnttab_free_callback_list; gnttab_free_callback_list = NULL; while (callback != NULL) { next = callback->next; if (gnttab_free_count >= callback->count) { callback->next = NULL; callback->fn(callback->arg); } else { callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; } callback = next; } } static inline void check_free_callbacks(void) { if (__predict_false(gnttab_free_callback_list != NULL)) do_free_callbacks(); } static void put_free_entry(grant_ref_t ref) { mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } /* * Public grant-issuing interface functions */ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); if (result) *result = ref; return (0); } void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); } int gnttab_query_foreign_access(grant_ref_t ref) { uint16_t nflags; nflags = shared[ref].flags; return (nflags & (GTF_reading|GTF_writing)); } int gnttab_end_foreign_access_ref(grant_ref_t ref) { uint16_t flags; while (!((flags = atomic_load_16(&shared[ref].flags)) & (GTF_reading|GTF_writing))) if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) return (1); printf("%s: WARNING: g.e. still in use!\n", __func__); return (0); } void gnttab_end_foreign_access(grant_ref_t ref, void *page) { if (gnttab_end_foreign_access_ref(ref)) { put_free_entry(ref); if (page != NULL) { free(page, M_DEVBUF); } } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. and page still in use!\n", __func__); } } void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) { grant_ref_t *last_ref; grant_ref_t head; grant_ref_t tail; head = GNTTAB_LIST_END; tail = *refs; last_ref = refs + count; while (refs != last_ref) { if (gnttab_end_foreign_access_ref(*refs)) { gnttab_entry(*refs) = head; head = *refs; } else { /* * XXX This needs to be fixed so that the ref * is placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. still in use!\n", __func__); count--; } refs++; } if (count != 0) { mtx_lock(&gnttab_list_lock); gnttab_free_count += count; gnttab_entry(tail) = gnttab_free_head; gnttab_free_head = head; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } } int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); gnttab_grant_foreign_transfer_ref(ref, domid, pfn); *result = ref; return (0); } void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; } unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; uint16_t flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). * * NOTE: This is a loop since the atomic cmpset can fail multiple * times. In normal operation it will be rare to execute more than * twice. Attempting an attack would consume a great deal of * attacker resources and be unlikely to prolong the loop very much. */ while (!((flags = atomic_load_16(&shared[ref].flags)) & GTF_transfer_committed)) if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) return (0); /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { cpu_spinwait(); flags = atomic_load_16(&shared[ref].flags); } /* Read the frame number /after/ reading completion status. */ rmb(); frame = shared[ref].frame; KASSERT(frame != 0, ("grant table inconsistent")); return (frame); } unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = gnttab_end_foreign_transfer_ref(ref); put_free_entry(ref); return (frame); } void gnttab_free_grant_reference(grant_ref_t ref) { put_free_entry(ref); } void gnttab_free_grant_references(grant_ref_t head) { grant_ref_t ref; int count = 1; if (head == GNTTAB_LIST_END) return; ref = head; while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(ref); count++; } mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) { int ref, error; error = get_free_entries(count, &ref); if (__predict_false(error)) return (error); *head = ref; return (0); } int gnttab_empty_grant_references(const grant_ref_t *private_head) { return (*private_head == GNTTAB_LIST_END); } int gnttab_claim_grant_reference(grant_ref_t *private_head) { grant_ref_t g = *private_head; if (__predict_false(g == GNTTAB_LIST_END)) return (g); *private_head = gnttab_entry(g); return (g); } void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { gnttab_entry(release) = *private_head; *private_head = release; } void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count) { mtx_lock(&gnttab_list_lock); if (callback->next) goto out; callback->fn = fn; callback->arg = arg; callback->count = count; callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; check_free_callbacks(); out: mtx_unlock(&gnttab_list_lock); } void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) { struct gnttab_free_callback **pcb; mtx_lock(&gnttab_list_lock); for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { if (*pcb == callback) { *pcb = callback->next; break; } } mtx_unlock(&gnttab_list_lock); } static int grow_gnttab_list(unsigned int more_frames) { unsigned int new_nr_grant_frames, extra_entries, i; new_nr_grant_frames = nr_grant_frames + more_frames; extra_entries = more_frames * GREFS_PER_GRANT_FRAME; for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!gnttab_list[i]) goto grow_nomem; } for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; check_free_callbacks(); return (0); grow_nomem: for ( ; i >= nr_grant_frames; i--) free(gnttab_list[i], M_DEVBUF); return (ENOMEM); } static unsigned int __max_nr_grant_frames(void) { struct gnttab_query_size query; int rc; query.dom = DOMID_SELF; rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); if ((rc < 0) || (query.status != GNTST_okay)) return (4); /* Legacy max supported number of frames */ return (query.max_nr_frames); } static inline unsigned int max_nr_grant_frames(void) { return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); } #ifdef notyet /* * XXX needed for backend support * */ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long **frames = (unsigned long **)data; set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); (*frames)++; return 0; } static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } #endif static vm_paddr_t resume_frames; static void gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; /* * Loop backwards, so that the first hypercall has the largest index, * ensuring that the table will grow only once. */ do { xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed to map gnttab"); } while (i-- > start_idx); } int gnttab_resume(device_t dev) { unsigned int max_nr_gframes, nr_gframes; nr_gframes = nr_grant_frames; max_nr_gframes = max_nr_grant_frames(); if (max_nr_gframes < nr_gframes) return (ENOSYS); if (!resume_frames) { KASSERT(dev != NULL, ("No resume frames and no device provided")); gnttab_pseudo_phys_res = xenmem_alloc(dev, &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); if (gnttab_pseudo_phys_res == NULL) panic("Unable to reserve physical memory for gnttab"); resume_frames = rman_get_start(gnttab_pseudo_phys_res); shared = rman_get_virtual(gnttab_pseudo_phys_res); } gnttab_map(0, nr_gframes - 1); return (0); } static int gnttab_expand(unsigned int req_entries) { unsigned int cur, extra; cur = nr_grant_frames; extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); if (cur + extra > max_nr_grant_frames()) return (ENOSPC); gnttab_map(cur, cur + extra - 1); return (grow_gnttab_list(extra)); } MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void -granttable_identify(driver_t *driver __unused, device_t parent) +granttable_identify(driver_t *driver, device_t parent) { KASSERT(xen_domain(), ("Trying to attach grant-table device on non Xen domain")); /* * A single device instance for our driver is always present * in a system operating under Xen. */ if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) panic("unable to attach Xen Grant-table device"); } /** * \brief Probe for the existence of the Xen Grant-table device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int granttable_probe(device_t dev) { device_set_desc(dev, "Xen Grant-table Device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xen Grant-table device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int granttable_attach(device_t dev) { int i; unsigned int nr_init_grefs; nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); gnttab_list = malloc(boot_max_nr_grant_frames * sizeof(grant_ref_t *), M_DEVBUF, M_NOWAIT); if (gnttab_list == NULL) return (ENOMEM); for (i = 0; i < nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (gnttab_list[i] == NULL) goto ini_nomem; } if (gnttab_resume(dev)) return (ENODEV); nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; if (bootverbose) printf("Grant table initialized\n"); return (0); ini_nomem: for (i--; i >= 0; i--) free(gnttab_list[i], M_DEVBUF); free(gnttab_list, M_DEVBUF); return (ENOMEM); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t granttable_methods[] = { /* Device interface */ DEVMETHOD(device_identify, granttable_identify), DEVMETHOD(device_probe, granttable_probe), DEVMETHOD(device_attach, granttable_attach), DEVMETHOD_END }; DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, SI_ORDER_FIRST); diff --git a/sys/dev/xen/xenstore/xenstore_dev.c b/sys/dev/xen/xenstore/xenstore_dev.c index 65ca9d67ce4f..0dcf6938d5fb 100644 --- a/sys/dev/xen/xenstore/xenstore_dev.c +++ b/sys/dev/xen/xenstore/xenstore_dev.c @@ -1,545 +1,545 @@ /* * xenstore_dev.c * * Driver giving user-space access to the kernel's connection to the * XenStore service. * * Copyright (c) 2005, Christian Limpach * Copyright (c) 2005, Rusty Russell, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static unsigned int max_pending_watches = 1000; struct xs_dev_transaction { LIST_ENTRY(xs_dev_transaction) list; struct xs_transaction handle; }; struct xs_dev_watch { LIST_ENTRY(xs_dev_watch) list; struct xs_watch watch; char *token; struct xs_dev_data *user; }; struct xs_dev_data { /* In-progress transaction. */ LIST_HEAD(, xs_dev_transaction) transactions; /* Active watches. */ LIST_HEAD(, xs_dev_watch) watches; /* Partial request. */ unsigned int len; union { struct xsd_sockmsg msg; char buffer[PAGE_SIZE]; } u; /* Response queue. */ #define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) char read_buffer[PAGE_SIZE]; unsigned int read_cons, read_prod; /* Serializes writes to the read buffer. */ struct mtx lock; /* Polling structure (for reads only ATM). */ struct selinfo ev_rsel; }; static void xs_queue_reply(struct xs_dev_data *u, const char *data, unsigned int len) { unsigned int i; for (i = 0; i < len; i++, u->read_prod++) u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer), ("xenstore reply too big")); wakeup(u); selwakeup(&u->ev_rsel); } static const char * xs_dev_error_to_string(int error) { unsigned int i; for (i = 0; i < nitems(xsd_errors); i++) if (xsd_errors[i].errnum == error) return (xsd_errors[i].errstring); return (NULL); } static void xs_dev_return_error(struct xs_dev_data *u, int error, int req_id, int tx_id) { struct xsd_sockmsg msg; const char *payload; msg.type = XS_ERROR; msg.req_id = req_id; msg.tx_id = tx_id; payload = NULL; payload = xs_dev_error_to_string(error); if (payload == NULL) payload = xs_dev_error_to_string(EINVAL); KASSERT(payload != NULL, ("Unable to find string for EINVAL errno")); msg.len = strlen(payload) + 1; mtx_lock(&u->lock); xs_queue_reply(u, (char *)&msg, sizeof(msg)); xs_queue_reply(u, payload, msg.len); mtx_unlock(&u->lock); } static int xs_dev_watch_message_parse_string(const char **p, const char *end, const char **string_r) { const char *nul; nul = memchr(*p, 0, end - *p); if (!nul) return (EINVAL); *string_r = *p; *p = nul+1; return (0); } static int xs_dev_watch_message_parse(const struct xsd_sockmsg *msg, const char **path_r, const char **token_r) { const char *p, *end; int error; p = (const char *)msg + sizeof(*msg); end = p + msg->len; KASSERT(p <= end, ("payload overflow")); error = xs_dev_watch_message_parse_string(&p, end, path_r); if (error) return (error); error = xs_dev_watch_message_parse_string(&p, end, token_r); if (error) return (error); return (0); } static struct xs_dev_watch * xs_dev_find_watch(struct xs_dev_data *u, const char *token) { struct xs_dev_watch *watch; LIST_FOREACH(watch, &u->watches, list) if (strcmp(watch->token, token) == 0) return (watch); return (NULL); } static void xs_dev_watch_cb(struct xs_watch *watch, const char **vec, unsigned int len) { struct xs_dev_watch *dwatch; struct xsd_sockmsg msg; char *payload; dwatch = (struct xs_dev_watch *)watch->callback_data; msg.type = XS_WATCH_EVENT; msg.req_id = msg.tx_id = 0; msg.len = strlen(vec[XS_WATCH_PATH]) + strlen(dwatch->token) + 2; payload = malloc(msg.len, M_XENSTORE, M_WAITOK); strcpy(payload, vec[XS_WATCH_PATH]); strcpy(&payload[strlen(vec[XS_WATCH_PATH]) + 1], dwatch->token); mtx_lock(&dwatch->user->lock); xs_queue_reply(dwatch->user, (char *)&msg, sizeof(msg)); xs_queue_reply(dwatch->user, payload, msg.len); mtx_unlock(&dwatch->user->lock); free(payload, M_XENSTORE); } static struct xs_dev_transaction * xs_dev_find_transaction(struct xs_dev_data *u, uint32_t tx_id) { struct xs_dev_transaction *trans; LIST_FOREACH(trans, &u->transactions, list) if (trans->handle.id == tx_id) return (trans); return (NULL); } static int xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { int error; struct xs_dev_data *u; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (error); while (u->read_prod == u->read_cons) { error = tsleep(u, PCATCH, "xsdread", hz/10); if (error && error != EWOULDBLOCK) return (error); } while (uio->uio_resid > 0) { if (u->read_cons == u->read_prod) break; error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)], 1, uio); if (error) return (error); u->read_cons++; } return (0); } static int xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag) { int error; const char *wpath, *wtoken; struct xs_dev_data *u; struct xs_dev_transaction *trans; struct xs_dev_watch *watch; void *reply; static const char *ok = "OK"; int len = uio->uio_resid; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (error); if ((len + u->len) > sizeof(u->u.buffer)) return (EINVAL); error = uiomove(u->u.buffer + u->len, len, uio); if (error) return (error); u->len += len; if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) return (0); switch (u->u.msg.type) { case XS_TRANSACTION_START: case XS_TRANSACTION_END: case XS_DIRECTORY: case XS_READ: case XS_GET_PERMS: case XS_RELEASE: case XS_GET_DOMAIN_PATH: case XS_WRITE: case XS_MKDIR: case XS_RM: case XS_SET_PERMS: /* Check that this transaction id is not hijacked. */ if (u->u.msg.tx_id != 0 && xs_dev_find_transaction(u, u->u.msg.tx_id) == NULL) { error = EINVAL; break; } error = xs_dev_request_and_reply(&u->u.msg, &reply); if (!error) { if (u->u.msg.type == XS_TRANSACTION_START) { trans = malloc(sizeof(*trans), M_XENSTORE, M_WAITOK); trans->handle.id = strtoul(reply, NULL, 0); LIST_INSERT_HEAD(&u->transactions, trans, list); } else if (u->u.msg.type == XS_TRANSACTION_END) { trans = xs_dev_find_transaction(u, u->u.msg.tx_id); KASSERT(trans != NULL, ("Unable to find transaction")); LIST_REMOVE(trans, list); free(trans, M_XENSTORE); } mtx_lock(&u->lock); xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); xs_queue_reply(u, (char *)reply, u->u.msg.len); mtx_unlock(&u->lock); free(reply, M_XENSTORE); } break; case XS_WATCH: u->u.msg.tx_id = 0; error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken); if (error) break; if (xs_dev_find_watch(u, wtoken) != NULL) { error = EINVAL; break; } watch = malloc(sizeof(*watch), M_XENSTORE, M_WAITOK); watch->watch.node = strdup(wpath, M_XENSTORE); watch->watch.callback = xs_dev_watch_cb; watch->watch.callback_data = (uintptr_t)watch; watch->watch.max_pending = max_pending_watches; watch->token = strdup(wtoken, M_XENSTORE); watch->user = u; error = xs_register_watch(&watch->watch); if (error != 0) { free(watch->token, M_XENSTORE); free(watch->watch.node, M_XENSTORE); free(watch, M_XENSTORE); break; } LIST_INSERT_HEAD(&u->watches, watch, list); u->u.msg.len = sizeof(ok); mtx_lock(&u->lock); xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); xs_queue_reply(u, ok, sizeof(ok)); mtx_unlock(&u->lock); break; case XS_UNWATCH: u->u.msg.tx_id = 0; error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken); if (error) break; watch = xs_dev_find_watch(u, wtoken); if (watch == NULL) { error = EINVAL; break; } LIST_REMOVE(watch, list); xs_unregister_watch(&watch->watch); free(watch->watch.node, M_XENSTORE); free(watch->token, M_XENSTORE); free(watch, M_XENSTORE); u->u.msg.len = sizeof(ok); mtx_lock(&u->lock); xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); xs_queue_reply(u, ok, sizeof(ok)); mtx_unlock(&u->lock); break; default: error = EINVAL; break; } if (error != 0) xs_dev_return_error(u, error, u->u.msg.req_id, u->u.msg.tx_id); /* Reset the write buffer. */ u->len = 0; return (0); } static int xs_dev_poll(struct cdev *dev, int events, struct thread *td) { struct xs_dev_data *u; int error, mask; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (POLLERR); /* we can always write */ mask = events & (POLLOUT | POLLWRNORM); if (events & (POLLIN | POLLRDNORM)) { if (u->read_cons != u->read_prod) { mask |= events & (POLLIN | POLLRDNORM); } else { /* Record that someone is waiting */ selrecord(td, &u->ev_rsel); } } return (mask); } static void xs_dev_dtor(void *arg) { struct xs_dev_data *u = arg; struct xs_dev_transaction *trans, *tmpt; struct xs_dev_watch *watch, *tmpw; seldrain(&u->ev_rsel); LIST_FOREACH_SAFE(trans, &u->transactions, list, tmpt) { xs_transaction_end(trans->handle, 1); LIST_REMOVE(trans, list); free(trans, M_XENSTORE); } LIST_FOREACH_SAFE(watch, &u->watches, list, tmpw) { LIST_REMOVE(watch, list); xs_unregister_watch(&watch->watch); free(watch->watch.node, M_XENSTORE); free(watch->token, M_XENSTORE); free(watch, M_XENSTORE); } mtx_destroy(&u->lock); free(u, M_XENSTORE); } static int xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct xs_dev_data *u; int error; u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO); mtx_init(&u->lock, "xsdev_lock", NULL, MTX_DEF); LIST_INIT(&u->transactions); LIST_INIT(&u->watches); error = devfs_set_cdevpriv(u, xs_dev_dtor); if (error != 0) free(u, M_XENSTORE); return (error); } static struct cdevsw xs_dev_cdevsw = { .d_version = D_VERSION, .d_read = xs_dev_read, .d_write = xs_dev_write, .d_open = xs_dev_open, .d_poll = xs_dev_poll, .d_name = "xs_dev", }; /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void -xs_dev_identify(driver_t *driver __unused, device_t parent) +xs_dev_identify(driver_t *driver, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xenstore device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int xs_dev_probe(device_t dev) { device_set_desc(dev, "Xenstore user-space device"); return (0); } /** * \brief Attach the Xenstore device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xs_dev_attach(device_t dev) { struct cdev *xs_cdev; struct sysctl_ctx_list *sysctl_ctx; struct sysctl_oid *sysctl_tree; sysctl_ctx = device_get_sysctl_ctx(dev); sysctl_tree = device_get_sysctl_tree(dev); if (sysctl_ctx == NULL || sysctl_tree == NULL) return (EINVAL); SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0, "maximum amount of pending watch events to be delivered"); xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0400, "xen/xenstore"); if (xs_cdev == NULL) return (EINVAL); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xs_dev_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xs_dev_identify), DEVMETHOD(device_probe, xs_dev_probe), DEVMETHOD(device_attach, xs_dev_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xs_dev, xs_dev_driver, xs_dev_methods, 0); DRIVER_MODULE(xs_dev, xenstore, xs_dev_driver, NULL, NULL); diff --git a/sys/dev/xen/xenstore/xenstored_dev.c b/sys/dev/xen/xenstore/xenstored_dev.c index 13cc7c3b36d8..26ff3083e68c 100644 --- a/sys/dev/xen/xenstore/xenstored_dev.c +++ b/sys/dev/xen/xenstore/xenstored_dev.c @@ -1,159 +1,159 @@ /* * Copyright (c) 2014 Roger Pau Monné * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define XSD_READ_SIZE 20 static int xsd_dev_read(struct cdev *dev, struct uio *uio, int ioflag); static int xsd_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr); static struct cdevsw xsd_dev_cdevsw = { .d_version = D_VERSION, .d_read = xsd_dev_read, .d_mmap = xsd_dev_mmap, .d_name = "xsd_dev", }; static int xsd_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { char evtchn[XSD_READ_SIZE]; int error, len; len = snprintf(evtchn, sizeof(evtchn), "%u", xs_evtchn()); if (len < 0 || len > uio->uio_resid) return (EINVAL); error = uiomove(evtchn, len, uio); if (error) return (error); return (0); } static int xsd_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { if (offset != 0) return (EINVAL); *paddr = xs_address(); return (0); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void -xsd_dev_identify(driver_t *driver __unused, device_t parent) +xsd_dev_identify(driver_t *driver, device_t parent) { if (!xen_domain() || xs_initialized()) return; /* * Only attach if xenstore is not available, because we are the * domain that's supposed to run it. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xenstored device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int xsd_dev_probe(device_t dev) { device_set_desc(dev, "Xenstored user-space device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xenstored device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xsd_dev_attach(device_t dev) { struct cdev *xsd_cdev; xsd_cdev = make_dev(&xsd_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, "xen/xenstored"); if (xsd_cdev == NULL) return (EINVAL); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xsd_dev_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xsd_dev_identify), DEVMETHOD(device_probe, xsd_dev_probe), DEVMETHOD(device_attach, xsd_dev_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xsd_dev, xsd_dev_driver, xsd_dev_methods, 0); DRIVER_MODULE(xsd_dev, xenpv, xsd_dev_driver, NULL, NULL); diff --git a/sys/xen/xenbus/xenbusb.c b/sys/xen/xenbus/xenbusb.c index 0e303fc83f15..12a921eab8b1 100644 --- a/sys/xen/xenbus/xenbusb.c +++ b/sys/xen/xenbus/xenbusb.c @@ -1,1005 +1,1005 @@ /****************************************************************************** * Copyright (C) 2010 Spectra Logic Corporation * Copyright (C) 2008 Doug Rabson * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard * Copyright (C) 2005 XenSource Ltd * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /** * \file xenbusb.c * * \brief Shared support functions for managing the NewBus buses that contain * Xen front and back end device instances. * * The NewBus implementation of XenBus attaches a xenbusb_front and xenbusb_back * child bus to the xenstore device. This strategy allows the small differences * in the handling of XenBus operations for front and back devices to be handled * as overrides in xenbusb_front/back.c. Front and back specific device * classes are also provided so device drivers can register for the devices they * can handle without the need to filter within their probe routines. The * net result is a device hierarchy that might look like this: * * xenstore0/ * xenbusb_front0/ * xn0 * xbd0 * xbd1 * xenbusb_back0/ * xbbd0 * xnb0 * xnb1 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*------------------------- Private Functions --------------------------------*/ /** * \brief Deallocate XenBus device instance variables. * * \param ivars The instance variable block to free. */ static void xenbusb_free_child_ivars(struct xenbus_device_ivars *ivars) { if (ivars->xd_otherend_watch.node != NULL) { xs_unregister_watch(&ivars->xd_otherend_watch); free(ivars->xd_otherend_watch.node, M_XENBUS); ivars->xd_otherend_watch.node = NULL; } if (ivars->xd_local_watch.node != NULL) { xs_unregister_watch(&ivars->xd_local_watch); ivars->xd_local_watch.node = NULL; } if (ivars->xd_node != NULL) { free(ivars->xd_node, M_XENBUS); ivars->xd_node = NULL; } ivars->xd_node_len = 0; if (ivars->xd_type != NULL) { free(ivars->xd_type, M_XENBUS); ivars->xd_type = NULL; } if (ivars->xd_otherend_path != NULL) { free(ivars->xd_otherend_path, M_XENBUS); ivars->xd_otherend_path = NULL; } ivars->xd_otherend_path_len = 0; free(ivars, M_XENBUS); } /** * XenBus watch callback registered against the "state" XenStore * node of the other-end of a split device connection. * * This callback is invoked whenever the state of a device instance's * peer changes. * * \param watch The xs_watch object used to register this callback * function. * \param vec An array of pointers to NUL terminated strings containing * watch event data. The vector should be indexed via the * xs_watch_type enum in xs_wire.h. * \param vec_size The number of elements in vec. */ static void xenbusb_otherend_watch_cb(struct xs_watch *watch, const char **vec, unsigned int vec_size __unused) { struct xenbus_device_ivars *ivars; device_t child; device_t bus; const char *path; enum xenbus_state newstate; ivars = (struct xenbus_device_ivars *)watch->callback_data; child = ivars->xd_dev; bus = device_get_parent(child); path = vec[XS_WATCH_PATH]; if (ivars->xd_otherend_path == NULL || strncmp(ivars->xd_otherend_path, path, ivars->xd_otherend_path_len)) return; newstate = xenbus_read_driver_state(ivars->xd_otherend_path); XENBUSB_OTHEREND_CHANGED(bus, child, newstate); } /** * XenBus watch callback registered against the XenStore sub-tree * represnting the local half of a split device connection. * * This callback is invoked whenever any XenStore data in the subtree * is modified, either by us or another privledged domain. * * \param watch The xs_watch object used to register this callback * function. * \param vec An array of pointers to NUL terminated strings containing * watch event data. The vector should be indexed via the * xs_watch_type enum in xs_wire.h. * \param vec_size The number of elements in vec. * */ static void xenbusb_local_watch_cb(struct xs_watch *watch, const char **vec, unsigned int vec_size __unused) { struct xenbus_device_ivars *ivars; device_t child; device_t bus; const char *path; ivars = (struct xenbus_device_ivars *)watch->callback_data; child = ivars->xd_dev; bus = device_get_parent(child); path = vec[XS_WATCH_PATH]; if (ivars->xd_node == NULL || strncmp(ivars->xd_node, path, ivars->xd_node_len)) return; XENBUSB_LOCALEND_CHANGED(bus, child, &path[ivars->xd_node_len]); } /** * Search our internal record of configured devices (not the XenStore) * to determine if the XenBus device indicated by \a node is known to * the system. * * \param dev The XenBus bus instance to search for device children. * \param node The XenStore node path for the device to find. * * \return The device_t of the found device if any, or NULL. * * \note device_t is a pointer type, so it can be compared against * NULL for validity. */ static device_t xenbusb_device_exists(device_t dev, const char *node) { device_t *kids; device_t result; struct xenbus_device_ivars *ivars; int i, count; if (device_get_children(dev, &kids, &count)) return (FALSE); result = NULL; for (i = 0; i < count; i++) { ivars = device_get_ivars(kids[i]); if (!strcmp(ivars->xd_node, node)) { result = kids[i]; break; } } free(kids, M_TEMP); return (result); } static void xenbusb_delete_child(device_t dev, device_t child) { struct xenbus_device_ivars *ivars; ivars = device_get_ivars(child); /* * We no longer care about the otherend of the * connection. Cancel the watches now so that we * don't try to handle an event for a partially * detached child. */ if (ivars->xd_otherend_watch.node != NULL) xs_unregister_watch(&ivars->xd_otherend_watch); if (ivars->xd_local_watch.node != NULL) xs_unregister_watch(&ivars->xd_local_watch); device_delete_child(dev, child); xenbusb_free_child_ivars(ivars); } /** * \param dev The NewBus device representing this XenBus bus. * \param child The NewBus device representing a child of dev%'s XenBus bus. */ static void xenbusb_verify_device(device_t dev, device_t child) { if (xs_exists(XST_NIL, xenbus_get_node(child), "state") == 0) { /* * Device tree has been removed from Xenbus. * Tear down the device. */ xenbusb_delete_child(dev, child); } } /** * \brief Enumerate the devices on a XenBus bus and register them with * the NewBus device tree. * * xenbusb_enumerate_bus() will create entries (in state DS_NOTPRESENT) * for nodes that appear in the XenStore, but will not invoke probe/attach * operations on drivers. Probe/Attach processing must be separately * performed via an invocation of xenbusb_probe_children(). This is usually * done via the xbs_probe_children task. * * \param xbs XenBus Bus device softc of the owner of the bus to enumerate. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenbusb_enumerate_bus(struct xenbusb_softc *xbs) { const char **types; u_int type_idx; u_int type_count; int error; error = xs_directory(XST_NIL, xbs->xbs_node, "", &type_count, &types); if (error) return (error); for (type_idx = 0; type_idx < type_count; type_idx++) XENBUSB_ENUMERATE_TYPE(xbs->xbs_dev, types[type_idx]); free(types, M_XENSTORE); return (0); } /** * Handler for all generic XenBus device systcl nodes. */ static int xenbusb_device_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev; const char *value; dev = (device_t)arg1; switch (arg2) { case XENBUS_IVAR_NODE: value = xenbus_get_node(dev); break; case XENBUS_IVAR_TYPE: value = xenbus_get_type(dev); break; case XENBUS_IVAR_STATE: value = xenbus_strstate(xenbus_get_state(dev)); break; case XENBUS_IVAR_OTHEREND_ID: return (sysctl_handle_int(oidp, NULL, xenbus_get_otherend_id(dev), req)); /* NOTREACHED */ case XENBUS_IVAR_OTHEREND_PATH: value = xenbus_get_otherend_path(dev); break; default: return (EINVAL); } return (SYSCTL_OUT_STR(req, value)); } /** * Create read-only systcl nodes for xenbusb device ivar data. * * \param dev The XenBus device instance to register with sysctl. */ static void xenbusb_device_sysctl_init(device_t dev) { struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "xenstore_path", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, XENBUS_IVAR_NODE, xenbusb_device_sysctl_handler, "A", "XenStore path to device"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "xenbus_dev_type", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, XENBUS_IVAR_TYPE, xenbusb_device_sysctl_handler, "A", "XenBus device type"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "xenbus_connection_state", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, XENBUS_IVAR_STATE, xenbusb_device_sysctl_handler, "A", "XenBus state of peer connection"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "xenbus_peer_domid", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, XENBUS_IVAR_OTHEREND_ID, xenbusb_device_sysctl_handler, "I", "Xen domain ID of peer"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "xenstore_peer_path", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, XENBUS_IVAR_OTHEREND_PATH, xenbusb_device_sysctl_handler, "A", "XenStore path to peer device"); } /** * \brief Decrement the number of XenBus child devices in the * connecting state by one and release the xbs_attch_ch * interrupt configuration hook if the connecting count * drops to zero. * * \param xbs XenBus Bus device softc of the owner of the bus to enumerate. */ static void xenbusb_release_confighook(struct xenbusb_softc *xbs) { mtx_lock(&xbs->xbs_lock); KASSERT(xbs->xbs_connecting_children > 0, ("Connecting device count error\n")); xbs->xbs_connecting_children--; if (xbs->xbs_connecting_children == 0 && (xbs->xbs_flags & XBS_ATTACH_CH_ACTIVE) != 0) { xbs->xbs_flags &= ~XBS_ATTACH_CH_ACTIVE; mtx_unlock(&xbs->xbs_lock); config_intrhook_disestablish(&xbs->xbs_attach_ch); } else { mtx_unlock(&xbs->xbs_lock); } } /** * \brief Verify the existence of attached device instances and perform * probe/attach processing for newly arrived devices. * * \param dev The NewBus device representing this XenBus bus. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenbusb_probe_children(device_t dev) { device_t *kids; struct xenbus_device_ivars *ivars; int i, count, error; if (device_get_children(dev, &kids, &count) == 0) { for (i = 0; i < count; i++) { if (device_get_state(kids[i]) != DS_NOTPRESENT) { /* * We already know about this one. * Make sure it's still here. */ xenbusb_verify_device(dev, kids[i]); continue; } error = device_probe_and_attach(kids[i]); if (error == ENXIO) { struct xenbusb_softc *xbs; /* * We don't have a PV driver for this device. * However, an emulated device we do support * may share this backend. Hide the node from * XenBus until the next rescan, but leave it's * state unchanged so we don't inadvertently * prevent attachment of any emulated device. */ xenbusb_delete_child(dev, kids[i]); /* * Since the XenStore state of this device * still indicates a pending attach, manually * release it's hold on the boot process. */ xbs = device_get_softc(dev); xenbusb_release_confighook(xbs); continue; } else if (error) { /* * Transition device to the closed state * so the world knows that attachment will * not occur. */ xenbus_set_state(kids[i], XenbusStateClosed); /* * Remove our record of this device. * So long as it remains in the closed * state in the XenStore, we will not find * it again. The state will only change * if the control domain actively reconfigures * this device. */ xenbusb_delete_child(dev, kids[i]); continue; } /* * Augment default newbus provided dynamic sysctl * variables with the standard ivar contents of * XenBus devices. */ xenbusb_device_sysctl_init(kids[i]); /* * Now that we have a driver managing this device * that can receive otherend state change events, * hook up a watch for them. */ ivars = device_get_ivars(kids[i]); xs_register_watch(&ivars->xd_otherend_watch); xs_register_watch(&ivars->xd_local_watch); } free(kids, M_TEMP); } return (0); } /** * \brief Task callback function to perform XenBus probe operations * from a known safe context. * * \param arg The NewBus device_t representing the bus instance to * on which to perform probe processing. * \param pending The number of times this task was queued before it could * be run. */ static void xenbusb_probe_children_cb(void *arg, int pending __unused) { device_t dev = (device_t)arg; bus_topo_lock(); xenbusb_probe_children(dev); bus_topo_unlock(); } /** * \brief XenStore watch callback for the root node of the XenStore * subtree representing a XenBus. * * This callback performs, or delegates to the xbs_probe_children task, * all processing necessary to handle dynmaic device arrival and departure * events from a XenBus. * * \param watch The XenStore watch object associated with this callback. * \param vec The XenStore watch event data. * \param len The number of fields in the event data stream. */ static void xenbusb_devices_changed(struct xs_watch *watch, const char **vec, unsigned int len) { struct xenbusb_softc *xbs; device_t dev; char *node; char *type; char *id; char *p; u_int component; xbs = (struct xenbusb_softc *)watch->callback_data; dev = xbs->xbs_dev; if (len <= XS_WATCH_PATH) { device_printf(dev, "xenbusb_devices_changed: " "Short Event Data.\n"); return; } node = strdup(vec[XS_WATCH_PATH], M_XENBUS); p = strchr(node, '/'); if (p == NULL) goto out; *p = 0; type = p + 1; p = strchr(type, '/'); if (p == NULL) goto out; *p++ = 0; /* * Extract the device ID. A device ID has one or more path * components separated by the '/' character. * * e.g. "/" for backend devices. */ id = p; for (component = 0; component < xbs->xbs_id_components; component++) { p = strchr(p, '/'); if (p == NULL) break; p++; } if (p != NULL) *p = 0; if (*id != 0 && component >= xbs->xbs_id_components - 1) { xenbusb_add_device(xbs->xbs_dev, type, id); taskqueue_enqueue(taskqueue_thread, &xbs->xbs_probe_children); } out: free(node, M_XENBUS); } /** * \brief Interrupt configuration hook callback associated with xbs_attch_ch. * * Since interrupts are always functional at the time of XenBus configuration, * there is nothing to be done when the callback occurs. This hook is only * registered to hold up boot processing while XenBus devices come online. * * \param arg Unused configuration hook callback argument. */ static void xenbusb_nop_confighook_cb(void *arg __unused) { } /*--------------------------- Public Functions -------------------------------*/ /*--------- API comments for these methods can be found in xenbusb.h ---------*/ void -xenbusb_identify(driver_t *driver __unused, device_t parent) +xenbusb_identify(driver_t *driver, device_t parent) { /* * A single instance of each bus type for which we have a driver * is always present in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } int xenbusb_add_device(device_t dev, const char *type, const char *id) { struct xenbusb_softc *xbs; struct sbuf *devpath_sbuf; char *devpath; struct xenbus_device_ivars *ivars; int error; xbs = device_get_softc(dev); devpath_sbuf = sbuf_new_auto(); sbuf_printf(devpath_sbuf, "%s/%s/%s", xbs->xbs_node, type, id); sbuf_finish(devpath_sbuf); devpath = sbuf_data(devpath_sbuf); ivars = malloc(sizeof(*ivars), M_XENBUS, M_ZERO|M_WAITOK); error = ENXIO; if (xs_exists(XST_NIL, devpath, "") != 0) { device_t child; enum xenbus_state state; char *statepath; child = xenbusb_device_exists(dev, devpath); if (child != NULL) { /* * We are already tracking this node */ error = 0; goto out; } state = xenbus_read_driver_state(devpath); if (state != XenbusStateInitialising) { /* * Device is not new, so ignore it. This can * happen if a device is going away after * switching to Closed. */ printf("xenbusb_add_device: Device %s ignored. " "State %d\n", devpath, state); error = 0; goto out; } sx_init(&ivars->xd_lock, "xdlock"); ivars->xd_flags = XDF_CONNECTING; ivars->xd_node = strdup(devpath, M_XENBUS); ivars->xd_node_len = strlen(devpath); ivars->xd_type = strdup(type, M_XENBUS); ivars->xd_state = XenbusStateInitialising; error = XENBUSB_GET_OTHEREND_NODE(dev, ivars); if (error) { printf("xenbus_update_device: %s no otherend id\n", devpath); goto out; } statepath = malloc(ivars->xd_otherend_path_len + strlen("/state") + 1, M_XENBUS, M_WAITOK); sprintf(statepath, "%s/state", ivars->xd_otherend_path); ivars->xd_otherend_watch.node = statepath; ivars->xd_otherend_watch.callback = xenbusb_otherend_watch_cb; ivars->xd_otherend_watch.callback_data = (uintptr_t)ivars; /* * Other end state node watch, limit to one pending event * to prevent frontends from queuing too many events that * could cause resource starvation. */ ivars->xd_otherend_watch.max_pending = 1; ivars->xd_local_watch.node = ivars->xd_node; ivars->xd_local_watch.callback = xenbusb_local_watch_cb; ivars->xd_local_watch.callback_data = (uintptr_t)ivars; /* * Watch our local path, only writable by us or a privileged * domain, no need to limit. */ ivars->xd_local_watch.max_pending = 0; mtx_lock(&xbs->xbs_lock); xbs->xbs_connecting_children++; mtx_unlock(&xbs->xbs_lock); child = device_add_child(dev, NULL, -1); ivars->xd_dev = child; device_set_ivars(child, ivars); } out: sbuf_delete(devpath_sbuf); if (error != 0) xenbusb_free_child_ivars(ivars); return (error); } int xenbusb_attach(device_t dev, char *bus_node, u_int id_components) { struct xenbusb_softc *xbs; xbs = device_get_softc(dev); mtx_init(&xbs->xbs_lock, "xenbusb softc lock", NULL, MTX_DEF); xbs->xbs_node = bus_node; xbs->xbs_id_components = id_components; xbs->xbs_dev = dev; /* * Since XenBus buses are attached to the XenStore, and * the XenStore does not probe children until after interrupt * services are available, this config hook is used solely * to ensure that the remainder of the boot process (e.g. * mount root) is deferred until child devices are adequately * probed. We unblock the boot process as soon as the * connecting child count in our softc goes to 0. */ xbs->xbs_attach_ch.ich_func = xenbusb_nop_confighook_cb; xbs->xbs_attach_ch.ich_arg = dev; config_intrhook_establish(&xbs->xbs_attach_ch); xbs->xbs_flags |= XBS_ATTACH_CH_ACTIVE; xbs->xbs_connecting_children = 1; /* * The subtree for this bus type may not yet exist * causing initial enumeration to fail. We still * want to return success from our attach though * so that we are ready to handle devices for this * bus when they are dynamically attached to us * by a Xen management action. */ (void)xenbusb_enumerate_bus(xbs); xenbusb_probe_children(dev); xbs->xbs_device_watch.node = bus_node; xbs->xbs_device_watch.callback = xenbusb_devices_changed; xbs->xbs_device_watch.callback_data = (uintptr_t)xbs; /* * Allow for unlimited pending watches, as those are local paths * either controlled by the guest or only writable by privileged * domains. */ xbs->xbs_device_watch.max_pending = 0; TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev); xs_register_watch(&xbs->xbs_device_watch); xenbusb_release_confighook(xbs); return (0); } int xenbusb_resume(device_t dev) { device_t *kids; struct xenbus_device_ivars *ivars; int i, count, error; char *statepath; /* * We must re-examine each device and find the new path for * its backend. */ if (device_get_children(dev, &kids, &count) == 0) { for (i = 0; i < count; i++) { if (device_get_state(kids[i]) == DS_NOTPRESENT) continue; if (xen_suspend_cancelled) { DEVICE_RESUME(kids[i]); continue; } ivars = device_get_ivars(kids[i]); xs_unregister_watch(&ivars->xd_otherend_watch); xenbus_set_state(kids[i], XenbusStateInitialising); /* * Find the new backend details and * re-register our watch. */ error = XENBUSB_GET_OTHEREND_NODE(dev, ivars); if (error) return (error); statepath = malloc(ivars->xd_otherend_path_len + strlen("/state") + 1, M_XENBUS, M_WAITOK); sprintf(statepath, "%s/state", ivars->xd_otherend_path); free(ivars->xd_otherend_watch.node, M_XENBUS); ivars->xd_otherend_watch.node = statepath; DEVICE_RESUME(kids[i]); xs_register_watch(&ivars->xd_otherend_watch); #if 0 /* * Can't do this yet since we are running in * the xenwatch thread and if we sleep here, * we will stop delivering watch notifications * and the device will never come back online. */ sx_xlock(&ivars->xd_lock); while (ivars->xd_state != XenbusStateClosed && ivars->xd_state != XenbusStateConnected) sx_sleep(&ivars->xd_state, &ivars->xd_lock, 0, "xdresume", 0); sx_xunlock(&ivars->xd_lock); #endif } free(kids, M_TEMP); } return (0); } int xenbusb_print_child(device_t dev, device_t child) { struct xenbus_device_ivars *ivars = device_get_ivars(child); int retval = 0; retval += bus_print_child_header(dev, child); retval += printf(" at %s", ivars->xd_node); retval += bus_print_child_footer(dev, child); return (retval); } int xenbusb_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) { struct xenbus_device_ivars *ivars = device_get_ivars(child); switch (index) { case XENBUS_IVAR_NODE: *result = (uintptr_t) ivars->xd_node; return (0); case XENBUS_IVAR_TYPE: *result = (uintptr_t) ivars->xd_type; return (0); case XENBUS_IVAR_STATE: *result = (uintptr_t) ivars->xd_state; return (0); case XENBUS_IVAR_OTHEREND_ID: *result = (uintptr_t) ivars->xd_otherend_id; return (0); case XENBUS_IVAR_OTHEREND_PATH: *result = (uintptr_t) ivars->xd_otherend_path; return (0); } return (ENOENT); } int xenbusb_write_ivar(device_t dev, device_t child, int index, uintptr_t value) { struct xenbus_device_ivars *ivars = device_get_ivars(child); enum xenbus_state newstate; int currstate; switch (index) { case XENBUS_IVAR_STATE: { int error; struct xs_transaction xst; newstate = (enum xenbus_state)value; sx_xlock(&ivars->xd_lock); if (ivars->xd_state == newstate) { error = 0; goto out; } do { error = xs_transaction_start(&xst); if (error != 0) goto out; do { error = xs_scanf(xst, ivars->xd_node, "state", NULL, "%d", &currstate); } while (error == EAGAIN); if (error) goto out; do { error = xs_printf(xst, ivars->xd_node, "state", "%d", newstate); } while (error == EAGAIN); if (error) { /* * Avoid looping through xenbus_dev_fatal() * which calls xenbus_write_ivar to set the * state to closing. */ if (newstate != XenbusStateClosing) xenbus_dev_fatal(dev, error, "writing new state"); goto out; } } while (xs_transaction_end(xst, 0)); ivars->xd_state = newstate; if ((ivars->xd_flags & XDF_CONNECTING) != 0 && (newstate == XenbusStateClosed || newstate == XenbusStateConnected)) { struct xenbusb_softc *xbs; ivars->xd_flags &= ~XDF_CONNECTING; xbs = device_get_softc(dev); xenbusb_release_confighook(xbs); } wakeup(&ivars->xd_state); out: if (error != 0) xs_transaction_end(xst, 1); sx_xunlock(&ivars->xd_lock); /* * Shallow ENOENT errors, as returning an error here will * trigger a panic. ENOENT is fine to ignore, because it means * the toolstack has removed the state node as part of * destroying the device, and so we have to shut down the * device without recreating it or else the node would be * leaked. */ return (error == ENOENT ? 0 : error); } case XENBUS_IVAR_NODE: case XENBUS_IVAR_TYPE: case XENBUS_IVAR_OTHEREND_ID: case XENBUS_IVAR_OTHEREND_PATH: /* * These variables are read-only. */ return (EINVAL); } return (ENOENT); } void xenbusb_otherend_changed(device_t bus, device_t child, enum xenbus_state state) { XENBUS_OTHEREND_CHANGED(child, state); } void xenbusb_localend_changed(device_t bus, device_t child, const char *path) { if (strcmp(path, "/state") != 0) { struct xenbus_device_ivars *ivars; ivars = device_get_ivars(child); sx_xlock(&ivars->xd_lock); ivars->xd_state = xenbus_read_driver_state(ivars->xd_node); sx_xunlock(&ivars->xd_lock); } XENBUS_LOCALEND_CHANGED(child, path); }