diff --git a/sys/dev/xen/bus/xenpv.c b/sys/dev/xen/bus/xenpv.c index d9db10b7005d..080c82c73f08 100644 --- a/sys/dev/xen/bus/xenpv.c +++ b/sys/dev/xen/bus/xenpv.c @@ -1,200 +1,200 @@ /* * Copyright (c) 2014 Roger Pau Monné * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "xenmem_if.h" /* * Allocate unused physical memory above 4GB in order to map memory * from foreign domains. We use memory starting at 4GB in order to * prevent clashes with MMIO/ACPI regions. * * Since this is not possible on i386 just use any available memory * chunk above 1MB and hope we don't clash with anything else. */ #ifdef __amd64__ #define LOW_MEM_LIMIT 0x100000000ul #elif defined(__i386__) #define LOW_MEM_LIMIT 0x100000ul #else #error "Unsupported architecture" #endif static devclass_t xenpv_devclass; static void xenpv_identify(driver_t *driver, device_t parent) { if (!xen_domain()) return; /* Make sure there's only one xenpv device. */ if (devclass_get_device(xenpv_devclass, 0)) return; /* * The xenpv bus should be the last to attach in order * to properly detect if an ISA bus has already been added. */ if (BUS_ADD_CHILD(parent, UINT_MAX, "xenpv", 0) == NULL) panic("Unable to attach xenpv bus."); } static int xenpv_probe(device_t dev) { device_set_desc(dev, "Xen PV bus"); return (BUS_PROBE_NOWILDCARD); } static int xenpv_attach(device_t dev) { int error; /* * Let our child drivers identify any child devices that they * can find. Once that is done attach any devices that we * found. */ error = bus_generic_probe(dev); if (error) return (error); error = bus_generic_attach(dev); return (error); } static struct resource * xenpv_alloc_physmem(device_t dev, device_t child, int *res_id, size_t size) { struct resource *res; vm_paddr_t phys_addr; int error; res = bus_alloc_resource(child, SYS_RES_MEMORY, res_id, LOW_MEM_LIMIT, ~0, size, RF_ACTIVE); if (res == NULL) return (NULL); phys_addr = rman_get_start(res); error = vm_phys_fictitious_reg_range(phys_addr, phys_addr + size, - VM_MEMATTR_DEFAULT); + VM_MEMATTR_XEN); if (error) { bus_release_resource(child, SYS_RES_MEMORY, *res_id, res); return (NULL); } return (res); } static int xenpv_free_physmem(device_t dev, device_t child, int res_id, struct resource *res) { vm_paddr_t phys_addr; size_t size; phys_addr = rman_get_start(res); size = rman_get_size(res); vm_phys_fictitious_unreg_range(phys_addr, phys_addr + size); return (bus_release_resource(child, SYS_RES_MEMORY, res_id, res)); } static device_method_t xenpv_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xenpv_identify), DEVMETHOD(device_probe, xenpv_probe), DEVMETHOD(device_attach, xenpv_attach), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), /* Interface to allocate memory for foreign mappings */ DEVMETHOD(xenmem_alloc, xenpv_alloc_physmem), DEVMETHOD(xenmem_free, xenpv_free_physmem), DEVMETHOD_END }; static driver_t xenpv_driver = { "xenpv", xenpv_methods, 0, }; DRIVER_MODULE(xenpv, nexus, xenpv_driver, xenpv_devclass, 0, 0); struct resource * xenmem_alloc(device_t dev, int *res_id, size_t size) { device_t parent; parent = device_get_parent(dev); if (parent == NULL) return (NULL); return (XENMEM_ALLOC(parent, dev, res_id, size)); } int xenmem_free(device_t dev, int res_id, struct resource *res) { device_t parent; parent = device_get_parent(dev); if (parent == NULL) return (ENXIO); return (XENMEM_FREE(parent, dev, res_id, res)); } diff --git a/sys/dev/xen/console/xen_console.c b/sys/dev/xen/console/xen_console.c index 7eed30885e6b..8ad755dbf214 100644 --- a/sys/dev/xen/console/xen_console.c +++ b/sys/dev/xen/console/xen_console.c @@ -1,791 +1,791 @@ /* * Copyright (c) 2015 Julien Grall * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_ddb.h" #include "opt_printf.h" #ifdef DDB #include #endif static char driver_name[] = "xc"; struct xencons_priv; typedef void xencons_early_init_t(struct xencons_priv *cons); typedef int xencons_init_t(device_t dev, struct tty *tp, driver_intr_t intr_handler); typedef int xencons_read_t(struct xencons_priv *cons, char *buffer, unsigned int size); typedef int xencons_write_t(struct xencons_priv *cons, const char *buffer, unsigned int size); struct xencons_ops { /* * Called by the low-level driver during early boot. * Only the minimal set up to get a console should be done here. */ xencons_early_init_t *early_init; /* Prepare the console to be fully use */ xencons_init_t *init; /* Read/write helpers */ xencons_read_t *read; xencons_write_t *write; }; struct xencons_priv { /* Mutex to protect the shared ring and the internal buffers */ struct mtx mtx; /* Interrupt handler used for notify the backend */ xen_intr_handle_t intr_handle; /* KDB internal state */ #ifdef KDB int altbrk; #endif /* Status of the tty */ bool opened; /* Callout used when the write buffer is full */ struct callout callout; /* Internal buffers must be used with mtx locked */ #define WBUF_SIZE 4096 #define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) char wbuf[WBUF_SIZE]; unsigned int wc, wp; /* Consumer/producer wbuf */ #define RBUF_SIZE 1024 #define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) char rbuf[RBUF_SIZE]; unsigned int rc, rp; /* Consumer/producer rbuf */ /* Pointer to the console operations */ const struct xencons_ops *ops; /* * Ring specific fields * XXX: make an union? */ /* Event channel number for early notification (PV only) */ uint32_t evtchn; /* Console shared page */ struct xencons_interface *intf; }; /* * Data for the main console * Necessary to support low-level console driver */ static struct xencons_priv main_cons; #define XC_POLLTIME (hz/10) /*----------------------------- Debug function ------------------------------*/ struct putchar_arg { char *buf; size_t size; size_t n_next; }; static void putchar(int c, void *arg) { struct putchar_arg *pca; pca = (struct putchar_arg *)arg; if (pca->buf == NULL) { /* * We have no buffer, output directly to the * console char by char. */ HYPERVISOR_console_write((char *)&c, 1); } else { pca->buf[pca->n_next++] = c; if ((pca->size == pca->n_next) || (c = '\0')) { /* Flush the buffer */ HYPERVISOR_console_write(pca->buf, pca->n_next); pca->n_next = 0; } } } void xc_printf(const char *fmt, ...) { va_list ap; struct putchar_arg pca; #ifdef PRINTF_BUFR_SIZE char buf[PRINTF_BUFR_SIZE]; pca.buf = buf; pca.size = sizeof(buf); pca.n_next = 0; #else pca.buf = NULL; pca.size = 0; #endif KASSERT((xen_domain()), ("call to xc_printf from non Xen guest")); va_start(ap, fmt); kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); #ifdef PRINTF_BUFR_SIZE if (pca.n_next != 0) HYPERVISOR_console_write(buf, pca.n_next); #endif } /*---------------------- Helpers for the console lock -----------------------*/ /* * The lock is not used when the kernel is panicing as it will never recover * and we want to output no matter what it costs. */ static inline void xencons_lock(struct xencons_priv *cons) { if (!KERNEL_PANICKED()) mtx_lock_spin(&cons->mtx); } static inline void xencons_unlock(struct xencons_priv *cons) { if (!KERNEL_PANICKED()) mtx_unlock_spin(&cons->mtx); } #define xencons_lock_assert(cons) mtx_assert(&(cons)->mtx, MA_OWNED) /*------------------ Helpers for the hypervisor console ---------------------*/ static void xencons_early_init_hypervisor(struct xencons_priv *cons) { /* * Nothing to setup for the low-level console when using * the hypervisor console. */ } static int xencons_init_hypervisor(device_t dev, struct tty *tp, driver_intr_t intr_handler) { struct xencons_priv *cons; int err; cons = tty_softc(tp); err = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL, intr_handler, tp, INTR_TYPE_TTY | INTR_MPSAFE, &cons->intr_handle); if (err != 0) device_printf(dev, "Can't register console interrupt\n"); return (err); } static int xencons_write_hypervisor(struct xencons_priv *cons, const char *buffer, unsigned int size) { HYPERVISOR_console_io(CONSOLEIO_write, size, buffer); return (size); } static int xencons_read_hypervisor(struct xencons_priv *cons, char *buffer, unsigned int size) { xencons_lock_assert(cons); return (HYPERVISOR_console_io(CONSOLEIO_read, size, buffer)); } static const struct xencons_ops xencons_hypervisor_ops = { .early_init = xencons_early_init_hypervisor, .init = xencons_init_hypervisor, .read = xencons_read_hypervisor, .write = xencons_write_hypervisor, }; /*------------------ Helpers for the ring console ---------------------------*/ static void xencons_early_init_ring(struct xencons_priv *cons) { cons->intf = pmap_mapdev_attr(ptoa(xen_get_console_mfn()), PAGE_SIZE, - VM_MEMATTR_WRITE_BACK); + VM_MEMATTR_XEN); cons->evtchn = xen_get_console_evtchn(); } static int xencons_init_ring(device_t dev, struct tty *tp, driver_intr_t intr_handler) { struct xencons_priv *cons; int err; cons = tty_softc(tp); if (cons->evtchn == 0) return (ENODEV); err = xen_intr_bind_local_port(dev, cons->evtchn, NULL, intr_handler, tp, INTR_TYPE_TTY | INTR_MPSAFE, &cons->intr_handle); if (err != 0) return (err); return (0); } static void xencons_notify_ring(struct xencons_priv *cons) { /* * The console may be used before the ring interrupt is properly * initialized. * If so, fallback to directly use the event channel hypercall. */ if (__predict_true(cons->intr_handle != NULL)) xen_intr_signal(cons->intr_handle); else { struct evtchn_send send = { .port = cons->evtchn }; HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); } } static int xencons_write_ring(struct xencons_priv *cons, const char *buffer, unsigned int size) { struct xencons_interface *intf; XENCONS_RING_IDX wcons, wprod; int sent; intf = cons->intf; xencons_lock_assert(cons); wcons = intf->out_cons; wprod = intf->out_prod; mb(); KASSERT((wprod - wcons) <= sizeof(intf->out), ("console send ring inconsistent")); for (sent = 0; sent < size; sent++, wprod++) { if ((wprod - wcons) >= sizeof(intf->out)) break; intf->out[MASK_XENCONS_IDX(wprod, intf->out)] = buffer[sent]; } wmb(); intf->out_prod = wprod; xencons_notify_ring(cons); return (sent); } static int xencons_read_ring(struct xencons_priv *cons, char *buffer, unsigned int size) { struct xencons_interface *intf; XENCONS_RING_IDX rcons, rprod; unsigned int rsz; intf = cons->intf; xencons_lock_assert(cons); rcons = intf->in_cons; rprod = intf->in_prod; rmb(); for (rsz = 0; rsz < size; rsz++, rcons++) { if (rprod == rcons) break; buffer[rsz] = intf->in[MASK_XENCONS_IDX(rcons, intf->in)]; } wmb(); intf->in_cons = rcons; /* No need to notify the backend if nothing has been read */ if (rsz != 0) xencons_notify_ring(cons); return (rsz); } static const struct xencons_ops xencons_ring_ops = { .early_init = xencons_early_init_ring, .init = xencons_init_ring, .read = xencons_read_ring, .write = xencons_write_ring, }; /*------------------ Common implementation of the console -------------------*/ /* * Called by the low-level driver during early boot to initialize the * main console driver. * Only the minimal set up to get a console should be done here. */ static void xencons_early_init(void) { mtx_init(&main_cons.mtx, "XCONS LOCK", NULL, MTX_SPIN); if (xen_get_console_evtchn() == 0) main_cons.ops = &xencons_hypervisor_ops; else main_cons.ops = &xencons_ring_ops; main_cons.ops->early_init(&main_cons); } /* * Receive character from the console and put them in the internal buffer * XXX: Handle overflow of the internal buffer */ static void xencons_rx(struct xencons_priv *cons) { char buf[16]; int sz; xencons_lock(cons); while ((sz = cons->ops->read(cons, buf, sizeof(buf))) > 0) { int i; for (i = 0; i < sz; i++) cons->rbuf[RBUF_MASK(cons->rp++)] = buf[i]; } xencons_unlock(cons); } /* Return true if the write buffer is full */ static bool xencons_tx_full(struct xencons_priv *cons) { unsigned int used; xencons_lock(cons); used = cons->wp - cons->wc; xencons_unlock(cons); return (used >= WBUF_SIZE); } static void xencons_tx_flush(struct xencons_priv *cons, int force) { int sz; xencons_lock(cons); while (cons->wc != cons->wp) { int sent; sz = cons->wp - cons->wc; if (sz > (WBUF_SIZE - WBUF_MASK(cons->wc))) sz = WBUF_SIZE - WBUF_MASK(cons->wc); sent = cons->ops->write(cons, &cons->wbuf[WBUF_MASK(cons->wc)], sz); /* * The other end may not have been initialized. Ignore * the force. */ if (__predict_false(sent < 0)) break; /* * If force is set, spin until the console data is * flushed through the domain controller. */ if (sent == 0 && __predict_true(!force)) break; cons->wc += sent; } xencons_unlock(cons); } static bool xencons_putc(struct xencons_priv *cons, int c, bool force_flush) { xencons_lock(cons); if ((cons->wp - cons->wc) < WBUF_SIZE) cons->wbuf[WBUF_MASK(cons->wp++)] = c; xencons_unlock(cons); xencons_tx_flush(cons, force_flush); return (xencons_tx_full(cons)); } static int xencons_getc(struct xencons_priv *cons) { int ret; xencons_lock(cons); if (cons->rp != cons->rc) { /* We need to return only one char */ ret = (int)cons->rbuf[RBUF_MASK(cons->rc)]; cons->rc++; } else { ret = -1; } xencons_unlock(cons); return (ret); } static bool xencons_tx(struct tty *tp) { bool cons_full; char c; struct xencons_priv *cons; cons = tty_softc(tp); tty_assert_locked(tp); /* * Don't transmit any character if the buffer is full. Otherwise, * characters may be lost */ if (xencons_tx_full(cons)) return (false); cons_full = false; while (!cons_full && ttydisc_getc(tp, &c, 1) == 1) cons_full = xencons_putc(cons, c, false); return (!cons_full); } static void xencons_intr(void *arg) { struct tty *tp; struct xencons_priv *cons; int ret; tp = arg; cons = tty_softc(tp); /* * The input will be used by the low-level console when KDB is active */ if (kdb_active) return; /* * It's not necessary to retrieve input when the tty is not opened */ if (!cons->opened) return; xencons_rx(cons); tty_lock(tp); while ((ret = xencons_getc(cons)) != -1) { #ifdef KDB kdb_alt_break(ret, &cons->altbrk); #endif ttydisc_rint(tp, ret, 0); } ttydisc_rint_done(tp); tty_unlock(tp); /* Try to flush remaining characters if necessary */ xencons_tx_flush(cons, 0); } /* * Helpers to call while shutting down: * - Force flush all output */ static void xencons_shutdown(void *arg, int howto) { struct tty *tp; tp = arg; xencons_tx_flush(tty_softc(tp), 1); } /*---------------------- Low-level console driver ---------------------------*/ static void xencons_cnprobe(struct consdev *cp) { if (!xen_domain()) return; cp->cn_pri = (boothowto & RB_SERIAL) ? CN_REMOTE : CN_NORMAL; sprintf(cp->cn_name, "%s0", driver_name); } static void xencons_cninit(struct consdev *cp) { xencons_early_init(); } static void xencons_cnterm(struct consdev *cp) { } static void xencons_cngrab(struct consdev *cp) { } static void xencons_cnungrab(struct consdev *cp) { } static int xencons_cngetc(struct consdev *dev) { xencons_rx(&main_cons); return (xencons_getc(&main_cons)); } static void xencons_cnputc(struct consdev *dev, int c) { /* * The low-level console is used by KDB and panic. We have to ensure * that any character sent will be seen by the backend. */ xencons_putc(&main_cons, c, true); } CONSOLE_DRIVER(xencons); /*----------------------------- TTY driver ---------------------------------*/ static int xencons_tty_open(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); cons->opened = true; return (0); } static void xencons_tty_close(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); cons->opened = false; } static void xencons_timeout(void *v) { struct tty *tp; struct xencons_priv *cons; tp = v; cons = tty_softc(tp); if (!xencons_tx(tp)) callout_reset(&cons->callout, XC_POLLTIME, xencons_timeout, tp); } static void xencons_tty_outwakeup(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); callout_stop(&cons->callout); if (!xencons_tx(tp)) callout_reset(&cons->callout, XC_POLLTIME, xencons_timeout, tp); } static struct ttydevsw xencons_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_open = xencons_tty_open, .tsw_close = xencons_tty_close, .tsw_outwakeup = xencons_tty_outwakeup, }; /*------------------------ Main console driver ------------------------------*/ static void xencons_identify(driver_t *driver, device_t parent) { device_t child; if (main_cons.ops == NULL) return; child = BUS_ADD_CHILD(parent, 0, driver_name, 0); } static int xencons_probe(device_t dev) { device_set_desc(dev, "Xen Console"); return (BUS_PROBE_NOWILDCARD); } static int xencons_attach(device_t dev) { struct tty *tp; /* * The main console is already allocated statically in order to * support low-level console */ struct xencons_priv *cons; int err; cons = &main_cons; tp = tty_alloc(&xencons_ttydevsw, cons); tty_makedev(tp, NULL, "%s%r", driver_name, 0); device_set_softc(dev, tp); callout_init_mtx(&cons->callout, tty_getlock(tp), 0); err = cons->ops->init(dev, tp, xencons_intr); if (err != 0) { device_printf(dev, "Unable to initialize the console (%d)\n", err); return (err); } /* register handler to flush console on shutdown */ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xencons_shutdown, tp, SHUTDOWN_PRI_DEFAULT)) == NULL) device_printf(dev, "shutdown event registration failed!\n"); return (0); } static int xencons_resume(device_t dev) { struct xencons_priv *cons; struct tty *tp; int err; tp = device_get_softc(dev); cons = tty_softc(tp); xen_intr_unbind(&cons->intr_handle); err = cons->ops->init(dev, tp, xencons_intr); if (err != 0) { device_printf(dev, "Unable to resume the console (%d)\n", err); return (err); } return (0); } static devclass_t xencons_devclass; static device_method_t xencons_methods[] = { DEVMETHOD(device_identify, xencons_identify), DEVMETHOD(device_probe, xencons_probe), DEVMETHOD(device_attach, xencons_attach), DEVMETHOD(device_resume, xencons_resume), DEVMETHOD_END }; static driver_t xencons_driver = { driver_name, xencons_methods, 0, }; DRIVER_MODULE(xc, xenpv, xencons_driver, xencons_devclass, 0, 0); diff --git a/sys/dev/xen/xenstore/xenstore.c b/sys/dev/xen/xenstore/xenstore.c index 890cfe009411..dc00ed0d50b8 100644 --- a/sys/dev/xen/xenstore/xenstore.c +++ b/sys/dev/xen/xenstore/xenstore.c @@ -1,1664 +1,1664 @@ /****************************************************************************** * xenstore.c * * Low-level kernel interface to the XenStore. * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2009,2010 Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** * \file xenstore.c * \brief XenStore interface * * The XenStore interface is a simple storage system that is a means of * communicating state and configuration data between the Xen Domain 0 * and the various guest domains. All configuration data other than * a small amount of essential information required during the early * boot process of launching a Xen aware guest, is managed using the * XenStore. * * The XenStore is ASCII string based, and has a structure and semantics * similar to a filesystem. There are files and directories, the directories * able to contain files or other directories. The depth of the hierarchy * is only limited by the XenStore's maximum path length. * * The communication channel between the XenStore service and other * domains is via two, guest specific, ring buffers in a shared memory * area. One ring buffer is used for communicating in each direction. * The grant table references for this shared memory are given to the * guest either via the xen_start_info structure for a fully para- * virtualized guest, or via HVM hypercalls for a hardware virtualized * guest. * * The XenStore communication relies on an event channel and thus * interrupts. For this reason, the attachment of the XenStore * relies on an interrupt driven configuration hook to hold off * boot processing until communication with the XenStore service * can be established. * * Several Xen services depend on the XenStore, most notably the * XenBus used to discover and manage Xen devices. These services * are implemented as NewBus child attachments to a bus exported * by this XenStore driver. */ static struct xs_watch *find_watch(const char *token); MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results"); /** * Pointer to shared memory communication structures allowing us * to communicate with the XenStore service. * * When operating in full PV mode, this pointer is set early in kernel * startup from within xen_machdep.c. In HVM mode, we use hypercalls * to get the guest frame number for the shared page and then map it * into kva. See xs_init() for details. */ static struct xenstore_domain_interface *xen_store; /*-------------------------- Private Data Structures ------------------------*/ /** * Structure capturing messages received from the XenStore service. */ struct xs_stored_msg { TAILQ_ENTRY(xs_stored_msg) list; struct xsd_sockmsg hdr; union { /* Queued replies. */ struct { char *body; } reply; /* Queued watch events. */ struct { struct xs_watch *handle; const char **vec; u_int vec_size; } watch; } u; }; TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg); /** * Container for all XenStore related state. */ struct xs_softc { /** Newbus device for the XenStore. */ device_t xs_dev; /** * Lock serializing access to ring producer/consumer * indexes. Use of this lock guarantees that wakeups * of blocking readers/writers are not missed due to * races with the XenStore service. */ struct mtx ring_lock; /* * Mutex used to insure exclusive access to the outgoing * communication ring. We use a lock type that can be * held while sleeping so that xs_write() can block waiting * for space in the ring to free up, without allowing another * writer to come in and corrupt a partial message write. */ struct sx request_mutex; /** * A list of replies to our requests. * * The reply list is filled by xs_rcv_thread(). It * is consumed by the context that issued the request * to which a reply is made. The requester blocks in * xs_read_reply(). * * /note Only one requesting context can be active at a time. * This is guaranteed by the request_mutex and insures * that the requester sees replies matching the order * of its requests. */ struct xs_stored_msg_list reply_list; /** Lock protecting the reply list. */ struct mtx reply_lock; /** * List of registered watches. */ struct xs_watch_list registered_watches; /** Lock protecting the registered watches list. */ struct mtx registered_watches_lock; /** * List of pending watch callback events. */ struct xs_stored_msg_list watch_events; /** Lock protecting the watch calback list. */ struct mtx watch_events_lock; /** * The processid of the xenwatch thread. */ pid_t xenwatch_pid; /** * Sleepable mutex used to gate the execution of XenStore * watch event callbacks. * * xenwatch_thread holds an exclusive lock on this mutex * while delivering event callbacks, and xenstore_unregister_watch() * uses an exclusive lock of this mutex to guarantee that no * callbacks of the just unregistered watch are pending * before returning to its caller. */ struct sx xenwatch_mutex; /** * The HVM guest pseudo-physical frame number. This is Xen's mapping * of the true machine frame number into our "physical address space". */ unsigned long gpfn; /** * The event channel for communicating with the * XenStore service. */ int evtchn; /** Handle for XenStore interrupts. */ xen_intr_handle_t xen_intr_handle; /** * Interrupt driven config hook allowing us to defer * attaching children until interrupts (and thus communication * with the XenStore service) are available. */ struct intr_config_hook xs_attachcb; /** * Xenstore is a user-space process that usually runs in Dom0, * so if this domain is booting as Dom0, xenstore wont we accessible, * and we have to defer the initialization of xenstore related * devices to later (when xenstore is started). */ bool initialized; /** * Task to run when xenstore is initialized (Dom0 only), will * take care of attaching xenstore related devices. */ struct task xs_late_init; }; /*-------------------------------- Global Data ------------------------------*/ static struct xs_softc xs; /*------------------------- Private Utility Functions -----------------------*/ /** * Count and optionally record pointers to a number of NUL terminated * strings in a buffer. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param dest An array to store pointers to each string found in strings. * \param len The length of the buffer pointed to by strings. * * \return A count of the number of strings found. */ static u_int extract_strings(const char *strings, const char **dest, u_int len) { u_int num; const char *p; for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) { if (dest != NULL) *dest++ = p; num++; } return (num); } /** * Convert a contiguous buffer containing a series of NUL terminated * strings into an array of pointers to strings. * * The returned pointer references the array of string pointers which * is followed by the storage for the string data. It is the client's * responsibility to free this storage. * * The storage addressed by strings is free'd prior to split returning. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param len The length of the buffer pointed to by strings. * \param num The number of strings found and returned in the strings * array. * * \return An array of pointers to the strings found in the input buffer. */ static const char ** split(char *strings, u_int len, u_int *num) { const char **ret; /* Protect against unterminated buffers. */ if (len > 0) strings[len - 1] = '\0'; /* Count the strings. */ *num = extract_strings(strings, /*dest*/NULL, len); /* Transfer to one big alloc for easy freeing by the caller. */ ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK); memcpy(&ret[*num], strings, len); free(strings, M_XENSTORE); /* Extract pointers to newly allocated array. */ strings = (char *)&ret[*num]; (void)extract_strings(strings, /*dest*/ret, len); return (ret); } /*------------------------- Public Utility Functions -------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ struct sbuf * xs_join(const char *dir, const char *name) { struct sbuf *sb; sb = sbuf_new_auto(); sbuf_cat(sb, dir); if (name[0] != '\0') { sbuf_putc(sb, '/'); sbuf_cat(sb, name); } sbuf_finish(sb); return (sb); } /*-------------------- Low Level Communication Management --------------------*/ /** * Interrupt handler for the XenStore event channel. * * XenStore reads and writes block on "xen_store" for buffer * space. Wakeup any blocking operations when the XenStore * service has modified the queues. */ static void xs_intr(void * arg __unused /*__attribute__((unused))*/) { /* If xenstore has not been initialized, initialize it now */ if (!xs.initialized) { xs.initialized = true; /* * Since this task is probing and attaching devices we * have to hold the Giant lock. */ taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init); } /* * Hold ring lock across wakeup so that clients * cannot miss a wakeup. */ mtx_lock(&xs.ring_lock); wakeup(xen_store); mtx_unlock(&xs.ring_lock); } /** * Verify that the indexes for a ring are valid. * * The difference between the producer and consumer cannot * exceed the size of the ring. * * \param cons The consumer index for the ring to test. * \param prod The producer index for the ring to test. * * \retval 1 If indexes are in range. * \retval 0 If the indexes are out of range. */ static int xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) { return ((prod - cons) <= XENSTORE_RING_SIZE); } /** * Return a pointer to, and the length of, the contiguous * free region available for output in a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous storage available. * * \return A pointer to the start location of the free region. */ static void * xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) *len = XENSTORE_RING_SIZE - (prod - cons); return (buf + MASK_XENSTORE_IDX(prod)); } /** * Return a pointer to, and the length of, the contiguous * data available to read from a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous data available to read. * * \return A pointer to the start location of the available data. */ static const void * xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, const char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); if ((prod - cons) < *len) *len = prod - cons; return (buf + MASK_XENSTORE_IDX(cons)); } /** * Transmit data to the XenStore service. * * \param tdata A pointer to the contiguous data to send. * \param len The amount of data to send. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * \invariant xs.request_mutex exclusively locked. */ static int xs_write_store(const void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; const char *data = (const char *)tdata; int error; sx_assert(&xs.request_mutex, SX_XLOCKED); while (len != 0) { void *dst; u_int avail; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->req_cons; prod = xen_store->req_prod; if ((prod - cons) == XENSTORE_RING_SIZE) { /* * Output ring is full. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbwrite", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); /* Try again. */ continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->req_cons = xen_store->req_prod = 0; return (EIO); } dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail); if (avail > len) avail = len; memcpy(dst, data, avail); data += avail; len -= avail; /* * The store to the producer index, which indicates * to the other side that new data has arrived, must * be visible only after our copy of the data into the * ring has completed. */ wmb(); xen_store->req_prod += avail; /* * xen_intr_signal() implies mb(). The other side will see * the change to req_prod at the time of the interrupt. */ xen_intr_signal(xs.xen_intr_handle); } return (0); } /** * Receive data from the XenStore service. * * \param tdata A pointer to the contiguous buffer to receive the data. * \param len The amount of data to receive. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * * \note xs_read does not perform any internal locking to guarantee * serial access to the incoming ring buffer. However, there * is only one context processing reads: xs_rcv_thread(). */ static int xs_read_store(void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; char *data = (char *)tdata; int error; while (len != 0) { u_int avail; const char *src; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->rsp_cons; prod = xen_store->rsp_prod; if (cons == prod) { /* * Nothing to read. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbread", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->rsp_cons = xen_store->rsp_prod = 0; return (EIO); } src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail); if (avail > len) avail = len; /* * Insure the data we read is related to the indexes * we read above. */ rmb(); memcpy(data, src, avail); data += avail; len -= avail; /* * Insure that the producer of this ring does not see * the ring space as free until after we have copied it * out. */ mb(); xen_store->rsp_cons += avail; /* * xen_intr_signal() implies mb(). The producer will see * the updated consumer index when the event is delivered. */ xen_intr_signal(xs.xen_intr_handle); } return (0); } /*----------------------- Received Message Processing ------------------------*/ /** * Block reading the next message from the XenStore service and * process the result. * * \param type The returned type of the XenStore message received. * * \return 0 on success. Otherwise an errno value indicating the * type of failure encountered. */ static int xs_process_msg(enum xsd_sockmsg_type *type) { struct xs_stored_msg *msg; char *body; int error; msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK); error = xs_read_store(&msg->hdr, sizeof(msg->hdr)); if (error) { free(msg, M_XENSTORE); return (error); } body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK); error = xs_read_store(body, msg->hdr.len); if (error) { free(body, M_XENSTORE); free(msg, M_XENSTORE); return (error); } body[msg->hdr.len] = '\0'; *type = msg->hdr.type; if (msg->hdr.type == XS_WATCH_EVENT) { msg->u.watch.vec = split(body, msg->hdr.len, &msg->u.watch.vec_size); mtx_lock(&xs.registered_watches_lock); msg->u.watch.handle = find_watch( msg->u.watch.vec[XS_WATCH_TOKEN]); mtx_lock(&xs.watch_events_lock); if (msg->u.watch.handle != NULL && (!msg->u.watch.handle->max_pending || msg->u.watch.handle->pending < msg->u.watch.handle->max_pending)) { msg->u.watch.handle->pending++; TAILQ_INSERT_TAIL(&xs.watch_events, msg, list); wakeup(&xs.watch_events); mtx_unlock(&xs.watch_events_lock); } else { mtx_unlock(&xs.watch_events_lock); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.registered_watches_lock); } else { msg->u.reply.body = body; mtx_lock(&xs.reply_lock); TAILQ_INSERT_TAIL(&xs.reply_list, msg, list); wakeup(&xs.reply_list); mtx_unlock(&xs.reply_lock); } return (0); } /** * Thread body of the XenStore receive thread. * * This thread blocks waiting for data from the XenStore service * and processes and received messages. */ static void xs_rcv_thread(void *arg __unused) { int error; enum xsd_sockmsg_type type; for (;;) { error = xs_process_msg(&type); if (error) printf("XENSTORE error %d while reading message\n", error); } } /*---------------- XenStore Message Request/Reply Processing -----------------*/ #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) /** * Convert a XenStore error string into an errno number. * * \param errorstring The error string to convert. * * \return The errno best matching the input string. * * \note Unknown error strings are converted to EINVAL. */ static int xs_get_error(const char *errorstring) { u_int i; for (i = 0; i < xsd_error_count; i++) { if (!strcmp(errorstring, xsd_errors[i].errstring)) return (xsd_errors[i].errnum); } log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s", errorstring); return (EINVAL); } /** * Block waiting for a reply to a message request. * * \param type The returned type of the reply. * \param len The returned body length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result) { struct xs_stored_msg *msg; char *body; int error; mtx_lock(&xs.reply_lock); while (TAILQ_EMPTY(&xs.reply_list)) { error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait", hz/10); if (error && error != EWOULDBLOCK) { mtx_unlock(&xs.reply_lock); return (error); } } msg = TAILQ_FIRST(&xs.reply_list); TAILQ_REMOVE(&xs.reply_list, msg, list); mtx_unlock(&xs.reply_lock); *type = msg->hdr.type; if (len) *len = msg->hdr.len; body = msg->u.reply.body; free(msg, M_XENSTORE); *result = body; return (0); } /** * Pass-thru interface for XenStore access by userland processes * via the XenStore device. * * Reply type and length data are returned by overwriting these * fields in the passed in request message. * * \param msg A properly formatted message to transmit to * the XenStore service. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the cause * of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(result, M_XENSTORE); */ int xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) { uint32_t request_type; int error; request_type = msg->type; sx_xlock(&xs.request_mutex); if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0) error = xs_read_reply(&msg->type, &msg->len, result); sx_xunlock(&xs.request_mutex); return (error); } /** * Send a message with an optionally muti-part body to the XenStore service. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param iovec Pointers to the body sections of the request. * \param num_vecs The number of body sections in the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type, const struct iovec *iovec, u_int num_vecs, u_int *len, void **result) { struct xsd_sockmsg msg; void *ret = NULL; u_int i; int error; msg.tx_id = t.id; msg.req_id = 0; msg.type = request_type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; sx_xlock(&xs.request_mutex); error = xs_write_store(&msg, sizeof(msg)); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } for (i = 0; i < num_vecs; i++) { error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } } error = xs_read_reply(&msg.type, len, &ret); error_lock_held: sx_xunlock(&xs.request_mutex); if (error) return (error); if (msg.type == XS_ERROR) { error = xs_get_error(ret); free(ret, M_XENSTORE); return (error); } /* Reply is either error or an echo of our request message type. */ KASSERT(msg.type == request_type, ("bad xenstore message type")); if (result) *result = ret; else free(ret, M_XENSTORE); return (0); } /** * Wrapper for xs_talkv allowing easy transmission of a message with * a single, contiguous, message body. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param body The body of the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type, const char *body, u_int *len, void **result) { struct iovec iovec; iovec.iov_base = (void *)(uintptr_t)body; iovec.iov_len = strlen(body) + 1; return (xs_talkv(t, request_type, &iovec, 1, len, result)); } /*------------------------- XenStore Watch Support ---------------------------*/ /** * Transmit a watch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_watch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL)); } /** * Transmit an uwatch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_unwatch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); } /** * Convert from watch token (unique identifier) to the associated * internal tracking structure for this watch. * * \param tocken The unique identifier for the watch to find. * * \return A pointer to the found watch structure or NULL. */ static struct xs_watch * find_watch(const char *token) { struct xs_watch *i, *cmp; cmp = (void *)strtoul(token, NULL, 16); LIST_FOREACH(i, &xs.registered_watches, list) if (i == cmp) return (i); return (NULL); } /** * Thread body of the XenStore watch event dispatch thread. */ static void xenwatch_thread(void *unused) { struct xs_stored_msg *msg; for (;;) { mtx_lock(&xs.watch_events_lock); while (TAILQ_EMPTY(&xs.watch_events)) mtx_sleep(&xs.watch_events, &xs.watch_events_lock, PWAIT | PCATCH, "waitev", hz/10); mtx_unlock(&xs.watch_events_lock); sx_xlock(&xs.xenwatch_mutex); mtx_lock(&xs.watch_events_lock); msg = TAILQ_FIRST(&xs.watch_events); if (msg) { TAILQ_REMOVE(&xs.watch_events, msg, list); msg->u.watch.handle->pending--; } mtx_unlock(&xs.watch_events_lock); if (msg != NULL) { /* * XXX There are messages coming in with a NULL * XXX callback. This deserves further investigation; * XXX the workaround here simply prevents the kernel * XXX from panic'ing on startup. */ if (msg->u.watch.handle->callback != NULL) msg->u.watch.handle->callback( msg->u.watch.handle, (const char **)msg->u.watch.vec, msg->u.watch.vec_size); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } sx_xunlock(&xs.xenwatch_mutex); } } /*----------- XenStore Configuration, Initialization, and Control ------------*/ /** * Setup communication channels with the XenStore service. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xs_init_comms(void) { int error; if (xen_store->rsp_prod != xen_store->rsp_cons) { log(LOG_WARNING, "XENSTORE response ring is not quiescent " "(%08x:%08x): fixing up\n", xen_store->rsp_cons, xen_store->rsp_prod); xen_store->rsp_cons = xen_store->rsp_prod; } xen_intr_unbind(&xs.xen_intr_handle); error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn, /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE, &xs.xen_intr_handle); if (error) { log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); return (error); } return (0); } /*------------------ Private Device Attachment Functions --------------------*/ static void xs_identify(driver_t *driver, device_t parent) { BUS_ADD_CHILD(parent, 0, "xenstore", 0); } /** * Probe for the existence of the XenStore. * * \param dev */ static int xs_probe(device_t dev) { /* * We are either operating within a PV kernel or being probed * as the child of the successfully attached xenpci device. * Thus we are in a Xen environment and there will be a XenStore. * Unconditionally return success. */ device_set_desc(dev, "XenStore"); return (BUS_PROBE_NOWILDCARD); } static void xs_attach_deferred(void *arg) { bus_generic_probe(xs.xs_dev); bus_generic_attach(xs.xs_dev); config_intrhook_disestablish(&xs.xs_attachcb); } static void xs_attach_late(void *arg, int pending) { KASSERT((pending == 1), ("xs late attach queued several times")); bus_generic_probe(xs.xs_dev); bus_generic_attach(xs.xs_dev); } /** * Attach to the XenStore. * * This routine also prepares for the probe/attach of drivers that rely * on the XenStore. */ static int xs_attach(device_t dev) { int error; /* Allow us to get device_t from softc and vice-versa. */ xs.xs_dev = dev; device_set_softc(dev, &xs); /* Initialize the interface to xenstore. */ struct proc *p; xs.initialized = false; xs.evtchn = xen_get_xenstore_evtchn(); if (xs.evtchn == 0) { struct evtchn_alloc_unbound alloc_unbound; /* Allocate a local event channel for xenstore */ alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = DOMID_SELF; error = HYPERVISOR_event_channel_op( EVTCHNOP_alloc_unbound, &alloc_unbound); if (error != 0) panic( "unable to alloc event channel for Dom0: %d", error); xs.evtchn = alloc_unbound.port; /* Allocate memory for the xs shared ring */ xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO); xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store)); } else { xs.gpfn = xen_get_xenstore_mfn(); xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE, - PAT_WRITE_BACK); + VM_MEMATTR_XEN); xs.initialized = true; } TAILQ_INIT(&xs.reply_list); TAILQ_INIT(&xs.watch_events); mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF); mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF); sx_init(&xs.xenwatch_mutex, "xenwatch"); sx_init(&xs.request_mutex, "xenstore request"); mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); /* Initialize the shared memory rings to talk to xenstored */ error = xs_init_comms(); if (error) return (error); error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID, 0, "xenwatch"); if (error) return (error); xs.xenwatch_pid = p->p_pid; error = kproc_create(xs_rcv_thread, NULL, NULL, RFHIGHPID, 0, "xenstore_rcv"); xs.xs_attachcb.ich_func = xs_attach_deferred; xs.xs_attachcb.ich_arg = NULL; if (xs.initialized) { config_intrhook_establish(&xs.xs_attachcb); } else { TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL); } return (error); } /** * Prepare for suspension of this VM by halting XenStore access after * all transactions and individual requests have completed. */ static int xs_suspend(device_t dev) { int error; /* Suspend child Xen devices. */ error = bus_generic_suspend(dev); if (error != 0) return (error); sx_xlock(&xs.request_mutex); return (0); } /** * Resume XenStore operations after this VM is resumed. */ static int xs_resume(device_t dev __unused) { struct xs_watch *watch; char token[sizeof(watch) * 2 + 1]; xs_init_comms(); sx_xunlock(&xs.request_mutex); /* * NB: since xenstore childs have not been resumed yet, there's * no need to hold any watch mutex. Having clients try to add or * remove watches at this point (before xenstore is resumed) is * clearly a violantion of the resume order. */ LIST_FOREACH(watch, &xs.registered_watches, list) { sprintf(token, "%lX", (long)watch); xs_watch(watch->node, token); } /* Resume child Xen devices. */ bus_generic_resume(dev); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenstore_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xs_identify), DEVMETHOD(device_probe, xs_probe), DEVMETHOD(device_attach, xs_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xs_suspend), DEVMETHOD(device_resume, xs_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD_END }; DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0); static devclass_t xenstore_devclass; DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0); /*------------------------------- Sysctl Data --------------------------------*/ /* XXX Shouldn't the node be somewhere else? */ SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Xen"); SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, ""); SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); /*-------------------------------- Public API --------------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ bool xs_initialized(void) { return (xs.initialized); } evtchn_port_t xs_evtchn(void) { return (xs.evtchn); } vm_paddr_t xs_address(void) { return (ptoa(xs.gpfn)); } int xs_directory(struct xs_transaction t, const char *dir, const char *node, u_int *num, const char ***result) { struct sbuf *path; char *strings; u_int len = 0; int error; path = xs_join(dir, node); error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len, (void **)&strings); sbuf_delete(path); if (error) return (error); *result = split(strings, len, num); return (0); } int xs_exists(struct xs_transaction t, const char *dir, const char *node) { const char **d; int error, dir_n; error = xs_directory(t, dir, node, &dir_n, &d); if (error) return (0); free(d, M_XENSTORE); return (1); } int xs_read(struct xs_transaction t, const char *dir, const char *node, u_int *len, void **result) { struct sbuf *path; void *ret; int error; path = xs_join(dir, node); error = xs_single(t, XS_READ, sbuf_data(path), len, &ret); sbuf_delete(path); if (error) return (error); *result = ret; return (0); } int xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string) { struct sbuf *path; struct iovec iovec[2]; int error; path = xs_join(dir, node); iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path); iovec[0].iov_len = sbuf_len(path) + 1; iovec[1].iov_base = (void *)(uintptr_t) string; iovec[1].iov_len = strlen(string); error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); sbuf_delete(path); return (error); } int xs_mkdir(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node) { struct xs_transaction local_xbt; struct sbuf *root_path_sbuf; struct sbuf *cur_path_sbuf; char *root_path; char *cur_path; const char **dir; int error; retry: root_path_sbuf = xs_join(base, node); cur_path_sbuf = xs_join(base, node); root_path = sbuf_data(root_path_sbuf); cur_path = sbuf_data(cur_path_sbuf); dir = NULL; local_xbt.id = 0; if (xbt.id == 0) { error = xs_transaction_start(&local_xbt); if (error != 0) goto out; xbt = local_xbt; } while (1) { u_int count; u_int i; error = xs_directory(xbt, cur_path, "", &count, &dir); if (error) goto out; for (i = 0; i < count; i++) { error = xs_rm(xbt, cur_path, dir[i]); if (error == ENOTEMPTY) { struct sbuf *push_dir; /* * Descend to clear out this sub directory. * We'll return to cur_dir once push_dir * is empty. */ push_dir = xs_join(cur_path, dir[i]); sbuf_delete(cur_path_sbuf); cur_path_sbuf = push_dir; cur_path = sbuf_data(cur_path_sbuf); break; } else if (error != 0) { goto out; } } free(dir, M_XENSTORE); dir = NULL; if (i == count) { char *last_slash; /* Directory is empty. It is now safe to remove. */ error = xs_rm(xbt, cur_path, ""); if (error != 0) goto out; if (!strcmp(cur_path, root_path)) break; /* Return to processing the parent directory. */ last_slash = strrchr(cur_path, '/'); KASSERT(last_slash != NULL, ("xs_rm_tree: mangled path %s", cur_path)); *last_slash = '\0'; } } out: sbuf_delete(cur_path_sbuf); sbuf_delete(root_path_sbuf); if (dir != NULL) free(dir, M_XENSTORE); if (local_xbt.id != 0) { int terror; terror = xs_transaction_end(local_xbt, /*abort*/error != 0); xbt.id = 0; if (terror == EAGAIN && error == 0) goto retry; } return (error); } int xs_transaction_start(struct xs_transaction *t) { char *id_str; int error; error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL, (void **)&id_str); if (error == 0) { t->id = strtoul(id_str, NULL, 0); free(id_str, M_XENSTORE); } return (error); } int xs_transaction_end(struct xs_transaction t, int abort) { char abortstr[2]; if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL)); } int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt, ...) { va_list ap; int error, ns; char *val; error = xs_read(t, dir, node, NULL, (void **) &val); if (error) return (error); va_start(ap, fmt); ns = vsscanf(val, fmt, ap); va_end(ap); free(val, M_XENSTORE); /* Distinctive errno. */ if (ns == 0) return (ERANGE); if (scancountp) *scancountp = ns; return (0); } int xs_vprintf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, va_list ap) { struct sbuf *sb; int error; sb = sbuf_new_auto(); sbuf_vprintf(sb, fmt, ap); sbuf_finish(sb); error = xs_write(t, dir, node, sbuf_data(sb)); sbuf_delete(sb); return (error); } int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, ...) { va_list ap; int error; va_start(ap, fmt); error = xs_vprintf(t, dir, node, fmt, ap); va_end(ap); return (error); } int xs_gather(struct xs_transaction t, const char *dir, ...) { va_list ap; const char *name; int error; va_start(ap, dir); error = 0; while (error == 0 && (name = va_arg(ap, char *)) != NULL) { const char *fmt = va_arg(ap, char *); void *result = va_arg(ap, void *); char *p; error = xs_read(t, dir, name, NULL, (void **) &p); if (error) break; if (fmt) { if (sscanf(p, fmt, result) == 0) error = EINVAL; free(p, M_XENSTORE); } else *(char **)result = p; } va_end(ap); return (error); } int xs_register_watch(struct xs_watch *watch) { /* Pointer in ascii is the token. */ char token[sizeof(watch) * 2 + 1]; int error; watch->pending = 0; sprintf(token, "%lX", (long)watch); mtx_lock(&xs.registered_watches_lock); KASSERT(find_watch(token) == NULL, ("watch already registered")); LIST_INSERT_HEAD(&xs.registered_watches, watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_watch(watch->node, token); /* Ignore errors due to multiple registration. */ if (error == EEXIST) error = 0; if (error != 0) { mtx_lock(&xs.registered_watches_lock); LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); } return (error); } void xs_unregister_watch(struct xs_watch *watch) { struct xs_stored_msg *msg, *tmp; char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); mtx_lock(&xs.registered_watches_lock); if (find_watch(token) == NULL) { mtx_unlock(&xs.registered_watches_lock); return; } LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_unwatch(watch->node, token); if (error) log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n", watch->node, error); /* Cancel pending watch events. */ mtx_lock(&xs.watch_events_lock); TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) { if (msg->u.watch.handle != watch) continue; TAILQ_REMOVE(&xs.watch_events, msg, list); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.watch_events_lock); /* Flush any currently-executing callback, unless we are it. :-) */ if (curproc->p_pid != xs.xenwatch_pid) { sx_xlock(&xs.xenwatch_mutex); sx_xunlock(&xs.xenwatch_mutex); } } void xs_lock(void) { sx_xlock(&xs.request_mutex); return; } void xs_unlock(void) { sx_xunlock(&xs.request_mutex); return; } diff --git a/sys/x86/include/xen/xen-os.h b/sys/x86/include/xen/xen-os.h index 86bacadc8521..78d44ad7c9d7 100644 --- a/sys/x86/include/xen/xen-os.h +++ b/sys/x86/include/xen/xen-os.h @@ -1,41 +1,44 @@ /***************************************************************************** * x86/xen/xen-os.h * * Random collection of macros and definition * * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _MACHINE_X86_XEN_XEN_OS_H_ #define _MACHINE_X86_XEN_XEN_OS_H_ +/* Shared memory needs write-back as its cache attribute for coherency. */ +#define VM_MEMATTR_XEN VM_MEMATTR_WRITE_BACK + /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ /* If non-zero, the hypervisor has been configured to use a direct vector */ extern int xen_vector_callback_enabled; #endif /* !__ASSEMBLY__ */ #endif /* _MACHINE_X86_XEN_XEN_OS_H_ */