diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c index aefa5cdb9250..b832bbaf313a 100644 --- a/sys/dev/xen/balloon/balloon.c +++ b/sys/dev/xen/balloon/balloon.c @@ -1,419 +1,418 @@ /****************************************************************************** * balloon.c * * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); /* Convert from KB (as fetched from xenstore) to number of PAGES */ #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) struct mtx balloon_mutex; /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; /* We may hit the hard limit in Xen. If we do then we remember it. */ unsigned long hard_limit; /* * Drivers may alter the memory reservation independently, but they * must inform the balloon driver so we avoid hitting the hard limit. */ unsigned long driver_pages; /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; }; static struct balloon_stats balloon_stats; #define bs balloon_stats SYSCTL_DECL(_dev_xen); static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, &bs.current_pages, 0, "Current allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, &bs.target_pages, 0, "Target allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, &bs.driver_pages, 0, "Driver pages"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, &bs.hard_limit, 0, "Xen hard limit"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, &bs.balloon_low, 0, "Low-mem balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, &bs.balloon_high, 0, "High-mem balloon"); /* List of ballooned pages, threaded through the mem_map array. */ static TAILQ_HEAD(,vm_page) ballooned_pages; /* Main work function, always executed in process context. */ static void balloon_process(void *unused); #define IPRINTK(fmt, args...) \ printk(KERN_INFO "xen_mem: " fmt, ##args) #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_mem: " fmt, ##args) static unsigned long current_target(void) { unsigned long target = min(bs.target_pages, bs.hard_limit); if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) target = bs.current_pages + bs.balloon_low + bs.balloon_high; return (target); } static unsigned long minimum_target(void) { unsigned long min_pages, curr_pages = current_target(); #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) /* * Simple continuous piecewiese linear function: * max MiB -> min MiB gradient * 0 0 * 16 16 * 32 24 * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) * 8192 552 (1/32) * 32768 1320 * 131072 4392 */ if (realmem < MB2PAGES(128)) min_pages = MB2PAGES(8) + (realmem >> 1); else if (realmem < MB2PAGES(512)) min_pages = MB2PAGES(40) + (realmem >> 2); else if (realmem < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (realmem >> 3); else min_pages = MB2PAGES(296) + (realmem >> 5); #undef MB2PAGES /* Don't enforce growth */ return (min(min_pages, curr_pages)); } static int increase_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (page = TAILQ_FIRST(&ballooned_pages), i = 0; i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { KASSERT(page != NULL, ("ballooned_pages list corrupt")); frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret; /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); } if (rc >= 0) bs.hard_limit = (bs.current_pages + rc - bs.driver_pages); goto out; } for (i = 0; i < nr_pages; i++) { page = TAILQ_FIRST(&ballooned_pages); KASSERT(page != NULL, ("Unable to get ballooned page")); TAILQ_REMOVE(&ballooned_pages, page, plinks.q); bs.balloon_low--; KASSERT(xen_feature(XENFEAT_auto_translated_physmap), ("auto translated physmap but mapping is valid")); vm_page_free(page); } bs.current_pages += nr_pages; out: return (0); } static int decrease_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (i = 0; i < nr_pages; i++) { if ((page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_ZERO)) == NULL) { nr_pages = i; need_sleep = 1; break; } if ((page->flags & PG_ZERO) == 0) { /* * Zero the page, or else we might be leaking * important data to other domains on the same * host. Xen doesn't scrub ballooned out memory * pages, the guest is in charge of making * sure that no information is leaked. */ pmap_zero_page(page); } frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); bs.balloon_low++; } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); bs.current_pages -= nr_pages; return (need_sleep); } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; mtx_lock(&balloon_mutex); for (;;) { int sleep_time; do { credit = current_target() - bs.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != bs.current_pages) sleep_time = hz; else sleep_time = 0; msleep(balloon_process, &balloon_mutex, 0, "balloon", sleep_time); } mtx_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ bs.hard_limit = ~0UL; bs.target_pages = max(target, minimum_target()); wakeup(balloon_process); } static struct xs_watch target_watch = { .node = "memory/target" }; /* React to a change in the target key */ static void watch_target(struct xs_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; int err; err = xs_scanf(XST_NIL, "memory", "target", NULL, "%llu", &new_target); if (err) { /* This is ok (for domain0 at least) - so just return */ return; } /* * The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ set_new_target(new_target >> KB_TO_PAGE_SHIFT); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void xenballoon_identify(driver_t *driver __unused, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xen Balloon device * * \param dev NewBus device_t for this Xen control instance. * * \return Always returns 0 indicating success. */ static int xenballoon_probe(device_t dev) { device_set_desc(dev, "Xen Balloon Device"); return (0); } /** * \brief Attach the Xen Balloon device. * * \param dev NewBus device_t for this Xen control instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenballoon_attach(device_t dev) { int err; mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); - bs.current_pages = xen_pv_domain() ? - HYPERVISOR_start_info->nr_pages : realmem; + bs.current_pages = realmem; bs.target_pages = bs.current_pages; bs.balloon_low = 0; bs.balloon_high = 0; bs.driver_pages = 0UL; bs.hard_limit = ~0UL; kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); target_watch.callback = watch_target; err = xs_register_watch(&target_watch); if (err) device_printf(dev, "xenballon: failed to set balloon watcher\n"); return (err); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenballoon_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xenballoon_identify), DEVMETHOD(device_probe, xenballoon_probe), DEVMETHOD(device_attach, xenballoon_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); devclass_t xenballoon_devclass; DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, xenballoon_devclass, NULL, NULL); diff --git a/sys/dev/xen/console/xen_console.c b/sys/dev/xen/console/xen_console.c index 94f08b7f02ad..f0bcb17a59ef 100644 --- a/sys/dev/xen/console/xen_console.c +++ b/sys/dev/xen/console/xen_console.c @@ -1,797 +1,794 @@ /* * Copyright (c) 2015 Julien Grall * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include + #include #include #include #include #include "opt_ddb.h" #include "opt_printf.h" #ifdef DDB #include #endif static char driver_name[] = "xc"; struct xencons_priv; typedef void xencons_early_init_t(struct xencons_priv *cons); typedef int xencons_init_t(device_t dev, struct tty *tp, driver_intr_t intr_handler); typedef int xencons_read_t(struct xencons_priv *cons, char *buffer, unsigned int size); typedef int xencons_write_t(struct xencons_priv *cons, const char *buffer, unsigned int size); struct xencons_ops { /* * Called by the low-level driver during early boot. * Only the minimal set up to get a console should be done here. */ xencons_early_init_t *early_init; /* Prepare the console to be fully use */ xencons_init_t *init; /* Read/write helpers */ xencons_read_t *read; xencons_write_t *write; }; struct xencons_priv { /* Mutex to protect the shared ring and the internal buffers */ struct mtx mtx; /* Interrupt handler used for notify the backend */ xen_intr_handle_t intr_handle; /* KDB internal state */ #ifdef KDB int altbrk; #endif /* Status of the tty */ bool opened; /* Callout used when the write buffer is full */ struct callout callout; /* Internal buffers must be used with mtx locked */ #define WBUF_SIZE 4096 #define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) char wbuf[WBUF_SIZE]; unsigned int wc, wp; /* Consumer/producer wbuf */ #define RBUF_SIZE 1024 #define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) char rbuf[RBUF_SIZE]; unsigned int rc, rp; /* Consumer/producer rbuf */ /* Pointer to the console operations */ const struct xencons_ops *ops; /* * Ring specific fields * XXX: make an union? */ /* Event channel number for early notification (PV only) */ uint32_t evtchn; /* Console shared page */ struct xencons_interface *intf; }; /* * Data for the main console * Necessary to support low-level console driver */ static struct xencons_priv main_cons; #define XC_POLLTIME (hz/10) -/* - * Virtual address of the shared console page (only for PV guest) - * TODO: Introduce a function to set it - */ -char *console_page; - /*----------------------------- Debug function ------------------------------*/ struct putchar_arg { char *buf; size_t size; size_t n_next; }; static void putchar(int c, void *arg) { struct putchar_arg *pca; pca = (struct putchar_arg *)arg; if (pca->buf == NULL) { /* * We have no buffer, output directly to the * console char by char. */ HYPERVISOR_console_write((char *)&c, 1); } else { pca->buf[pca->n_next++] = c; if ((pca->size == pca->n_next) || (c = '\0')) { /* Flush the buffer */ HYPERVISOR_console_write(pca->buf, pca->n_next); pca->n_next = 0; } } } void xc_printf(const char *fmt, ...) { va_list ap; struct putchar_arg pca; #ifdef PRINTF_BUFR_SIZE char buf[PRINTF_BUFR_SIZE]; pca.buf = buf; pca.size = sizeof(buf); pca.n_next = 0; #else pca.buf = NULL; pca.size = 0; #endif KASSERT((xen_domain()), ("call to xc_printf from non Xen guest")); va_start(ap, fmt); kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); #ifdef PRINTF_BUFR_SIZE if (pca.n_next != 0) HYPERVISOR_console_write(buf, pca.n_next); #endif } /*---------------------- Helpers for the console lock -----------------------*/ /* * The lock is not used when the kernel is panicing as it will never recover * and we want to output no matter what it costs. */ static inline void xencons_lock(struct xencons_priv *cons) { if (panicstr == NULL) mtx_lock_spin(&cons->mtx); } static inline void xencons_unlock(struct xencons_priv *cons) { if (panicstr == NULL) mtx_unlock_spin(&cons->mtx); } #define xencons_lock_assert(cons) mtx_assert(&(cons)->mtx, MA_OWNED) /*------------------ Helpers for the hypervisor console ---------------------*/ static void xencons_early_init_hypervisor(struct xencons_priv *cons) { /* * Nothing to setup for the low-level console when using * the hypervisor console. */ } static int xencons_init_hypervisor(device_t dev, struct tty *tp, driver_intr_t intr_handler) { struct xencons_priv *cons; int err; cons = tty_softc(tp); err = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL, intr_handler, tp, INTR_TYPE_TTY | INTR_MPSAFE, &cons->intr_handle); if (err != 0) device_printf(dev, "Can't register console interrupt\n"); return (err); } static int xencons_write_hypervisor(struct xencons_priv *cons, const char *buffer, unsigned int size) { HYPERVISOR_console_io(CONSOLEIO_write, size, buffer); return (size); } static int xencons_read_hypervisor(struct xencons_priv *cons, char *buffer, unsigned int size) { xencons_lock_assert(cons); return (HYPERVISOR_console_io(CONSOLEIO_read, size, buffer)); } static const struct xencons_ops xencons_hypervisor_ops = { .early_init = xencons_early_init_hypervisor, .init = xencons_init_hypervisor, .read = xencons_read_hypervisor, .write = xencons_write_hypervisor, }; /*------------------ Helpers for the ring console ---------------------------*/ static void xencons_early_init_ring(struct xencons_priv *cons) { - /* The shared page for PV is already mapped by the boot code */ - cons->intf = (struct xencons_interface *)console_page; - cons->evtchn = HYPERVISOR_start_info->console.domU.evtchn; + cons->intf = pmap_mapdev_attr(ptoa(xen_get_console_mfn()), PAGE_SIZE, + PAT_WRITE_BACK); + cons->evtchn = xen_get_console_evtchn(); } static int xencons_init_ring(device_t dev, struct tty *tp, driver_intr_t intr_handler) { struct xencons_priv *cons; int err; cons = tty_softc(tp); if (cons->evtchn == 0) return (ENODEV); err = xen_intr_bind_local_port(dev, cons->evtchn, NULL, intr_handler, tp, INTR_TYPE_TTY | INTR_MPSAFE, &cons->intr_handle); if (err != 0) return (err); return (0); } static void xencons_notify_ring(struct xencons_priv *cons) { /* * The console may be used before the ring interrupt is properly * initialized. * If so, fallback to directly use the event channel hypercall. */ if (__predict_true(cons->intr_handle != NULL)) xen_intr_signal(cons->intr_handle); else { struct evtchn_send send = { .port = cons->evtchn }; HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); } } static int xencons_write_ring(struct xencons_priv *cons, const char *buffer, unsigned int size) { struct xencons_interface *intf; XENCONS_RING_IDX wcons, wprod; int sent; intf = cons->intf; xencons_lock_assert(cons); wcons = intf->out_cons; wprod = intf->out_prod; mb(); KASSERT((wprod - wcons) <= sizeof(intf->out), ("console send ring inconsistent")); for (sent = 0; sent < size; sent++, wprod++) { if ((wprod - wcons) >= sizeof(intf->out)) break; intf->out[MASK_XENCONS_IDX(wprod, intf->out)] = buffer[sent]; } wmb(); intf->out_prod = wprod; xencons_notify_ring(cons); return (sent); } static int xencons_read_ring(struct xencons_priv *cons, char *buffer, unsigned int size) { struct xencons_interface *intf; XENCONS_RING_IDX rcons, rprod; unsigned int rsz; intf = cons->intf; xencons_lock_assert(cons); rcons = intf->in_cons; rprod = intf->in_prod; rmb(); for (rsz = 0; rsz < size; rsz++, rcons++) { if (rprod == rcons) break; buffer[rsz] = intf->in[MASK_XENCONS_IDX(rcons, intf->in)]; } wmb(); intf->in_cons = rcons; /* No need to notify the backend if nothing has been read */ if (rsz != 0) xencons_notify_ring(cons); return (rsz); } static const struct xencons_ops xencons_ring_ops = { .early_init = xencons_early_init_ring, .init = xencons_init_ring, .read = xencons_read_ring, .write = xencons_write_ring, }; /*------------------ Common implementation of the console -------------------*/ /* * Called by the low-level driver during early boot to initialize the * main console driver. * Only the minimal set up to get a console should be done here. */ static void xencons_early_init(void) { mtx_init(&main_cons.mtx, "XCONS LOCK", NULL, MTX_SPIN); if (xen_initial_domain()) main_cons.ops = &xencons_hypervisor_ops; else main_cons.ops = &xencons_ring_ops; main_cons.ops->early_init(&main_cons); } /* * Receive character from the console and put them in the internal buffer * XXX: Handle overflow of the internal buffer */ static void xencons_rx(struct xencons_priv *cons) { char buf[16]; int sz; xencons_lock(cons); while ((sz = cons->ops->read(cons, buf, sizeof(buf))) > 0) { int i; for (i = 0; i < sz; i++) cons->rbuf[RBUF_MASK(cons->rp++)] = buf[i]; } xencons_unlock(cons); } /* Return true if the write buffer is full */ static bool xencons_tx_full(struct xencons_priv *cons) { unsigned int used; xencons_lock(cons); used = cons->wp - cons->wc; xencons_unlock(cons); return (used >= WBUF_SIZE); } static void xencons_tx_flush(struct xencons_priv *cons, int force) { int sz; xencons_lock(cons); while (cons->wc != cons->wp) { int sent; sz = cons->wp - cons->wc; if (sz > (WBUF_SIZE - WBUF_MASK(cons->wc))) sz = WBUF_SIZE - WBUF_MASK(cons->wc); sent = cons->ops->write(cons, &cons->wbuf[WBUF_MASK(cons->wc)], sz); /* * The other end may not have been initialized. Ignore * the force. */ if (__predict_false(sent < 0)) break; /* * If force is set, spin until the console data is * flushed through the domain controller. */ if (sent == 0 && __predict_true(!force)) break; cons->wc += sent; } xencons_unlock(cons); } static bool xencons_putc(struct xencons_priv *cons, int c, bool force_flush) { xencons_lock(cons); if ((cons->wp - cons->wc) < WBUF_SIZE) cons->wbuf[WBUF_MASK(cons->wp++)] = c; xencons_unlock(cons); xencons_tx_flush(cons, force_flush); return (xencons_tx_full(cons)); } static int xencons_getc(struct xencons_priv *cons) { int ret; xencons_lock(cons); if (cons->rp != cons->rc) { /* We need to return only one char */ ret = (int)cons->rbuf[RBUF_MASK(cons->rc)]; cons->rc++; } else { ret = -1; } xencons_unlock(cons); return (ret); } static bool xencons_tx(struct tty *tp) { bool cons_full; char c; struct xencons_priv *cons; cons = tty_softc(tp); tty_lock_assert(tp, MA_OWNED); /* * Don't transmit any character if the buffer is full. Otherwise, * characters may be lost */ if (xencons_tx_full(cons)) return (false); cons_full = false; while (!cons_full && ttydisc_getc(tp, &c, 1) == 1) cons_full = xencons_putc(cons, c, false); return (!cons_full); } static void xencons_intr(void *arg) { struct tty *tp; struct xencons_priv *cons; int ret; tp = arg; cons = tty_softc(tp); /* * The input will be used by the low-level console when KDB is active */ if (kdb_active) return; /* * It's not necessary to retrieve input when the tty is not opened */ if (!cons->opened) return; xencons_rx(cons); tty_lock(tp); while ((ret = xencons_getc(cons)) != -1) { #ifdef KDB kdb_alt_break(ret, &cons->altbrk); #endif ttydisc_rint(tp, ret, 0); } ttydisc_rint_done(tp); tty_unlock(tp); /* Try to flush remaining characters if necessary */ xencons_tx_flush(cons, 0); } /* * Helpers to call while shutting down: * - Force flush all output */ static void xencons_shutdown(void *arg, int howto) { struct tty *tp; tp = arg; xencons_tx_flush(tty_softc(tp), 1); } /*---------------------- Low-level console driver ---------------------------*/ static void xencons_cnprobe(struct consdev *cp) { if (!xen_pv_domain()) return; cp->cn_pri = CN_REMOTE; sprintf(cp->cn_name, "%s0", driver_name); } static void xencons_cninit(struct consdev *cp) { xencons_early_init(); } static void xencons_cnterm(struct consdev *cp) { } static void xencons_cngrab(struct consdev *cp) { } static void xencons_cnungrab(struct consdev *cp) { } static int xencons_cngetc(struct consdev *dev) { xencons_rx(&main_cons); return (xencons_getc(&main_cons)); } static void xencons_cnputc(struct consdev *dev, int c) { /* * The low-level console is used by KDB and panic. We have to ensure * that any character sent will be seen by the backend. */ xencons_putc(&main_cons, c, true); } CONSOLE_DRIVER(xencons); /*----------------------------- TTY driver ---------------------------------*/ static int xencons_tty_open(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); cons->opened = true; return (0); } static void xencons_tty_close(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); cons->opened = false; } static void xencons_timeout(void *v) { struct tty *tp; struct xencons_priv *cons; tp = v; cons = tty_softc(tp); if (!xencons_tx(tp)) callout_reset(&cons->callout, XC_POLLTIME, xencons_timeout, tp); } static void xencons_tty_outwakeup(struct tty *tp) { struct xencons_priv *cons; cons = tty_softc(tp); callout_stop(&cons->callout); if (!xencons_tx(tp)) callout_reset(&cons->callout, XC_POLLTIME, xencons_timeout, tp); } static struct ttydevsw xencons_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_open = xencons_tty_open, .tsw_close = xencons_tty_close, .tsw_outwakeup = xencons_tty_outwakeup, }; /*------------------------ Main console driver ------------------------------*/ static void xencons_identify(driver_t *driver, device_t parent) { device_t child; #if defined(__arm__) || defined(__aarch64__) if (!xen_domain()) return; #else if (!xen_pv_domain()) return; #endif child = BUS_ADD_CHILD(parent, 0, driver_name, 0); } static int xencons_probe(device_t dev) { device_set_desc(dev, "Xen Console"); return (BUS_PROBE_NOWILDCARD); } static int xencons_attach(device_t dev) { struct tty *tp; /* * The main console is already allocated statically in order to * support low-level console */ struct xencons_priv *cons; int err; cons = &main_cons; tp = tty_alloc(&xencons_ttydevsw, cons); tty_makedev(tp, NULL, "%s%r", driver_name, 0); device_set_softc(dev, tp); callout_init_mtx(&cons->callout, tty_getlock(tp), 0); err = cons->ops->init(dev, tp, xencons_intr); if (err != 0) { device_printf(dev, "Unable to initialize the console (%d)\n", err); return (err); } /* register handler to flush console on shutdown */ if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xencons_shutdown, tp, SHUTDOWN_PRI_DEFAULT)) == NULL) device_printf(dev, "shutdown event registration failed!\n"); return (0); } static int xencons_resume(device_t dev) { struct xencons_priv *cons; struct tty *tp; int err; tp = device_get_softc(dev); cons = tty_softc(tp); xen_intr_unbind(&cons->intr_handle); err = cons->ops->init(dev, tp, xencons_intr); if (err != 0) { device_printf(dev, "Unable to resume the console (%d)\n", err); return (err); } return (0); } static devclass_t xencons_devclass; static device_method_t xencons_methods[] = { DEVMETHOD(device_identify, xencons_identify), DEVMETHOD(device_probe, xencons_probe), DEVMETHOD(device_attach, xencons_attach), DEVMETHOD(device_resume, xencons_resume), DEVMETHOD_END }; static driver_t xencons_driver = { driver_name, xencons_methods, 0, }; DRIVER_MODULE(xc, xenpv, xencons_driver, xencons_devclass, 0, 0); diff --git a/sys/dev/xen/xenstore/xenstore.c b/sys/dev/xen/xenstore/xenstore.c index 197e16029ef4..f8da1c59f915 100644 --- a/sys/dev/xen/xenstore/xenstore.c +++ b/sys/dev/xen/xenstore/xenstore.c @@ -1,1645 +1,1658 @@ /****************************************************************************** * xenstore.c * * Low-level kernel interface to the XenStore. * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2009,2010 Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /** * \file xenstore.c * \brief XenStore interface * * The XenStore interface is a simple storage system that is a means of * communicating state and configuration data between the Xen Domain 0 * and the various guest domains. All configuration data other than * a small amount of essential information required during the early * boot process of launching a Xen aware guest, is managed using the * XenStore. * * The XenStore is ASCII string based, and has a structure and semantics * similar to a filesystem. There are files and directories, the directories * able to contain files or other directories. The depth of the hierarchy * is only limited by the XenStore's maximum path length. * * The communication channel between the XenStore service and other * domains is via two, guest specific, ring buffers in a shared memory * area. One ring buffer is used for communicating in each direction. * The grant table references for this shared memory are given to the * guest either via the xen_start_info structure for a fully para- * virtualized guest, or via HVM hypercalls for a hardware virtualized * guest. * * The XenStore communication relies on an event channel and thus * interrupts. For this reason, the attachment of the XenStore * relies on an interrupt driven configuration hook to hold off * boot processing until communication with the XenStore service * can be established. * * Several Xen services depend on the XenStore, most notably the * XenBus used to discover and manage Xen devices. These services * are implemented as NewBus child attachments to a bus exported * by this XenStore driver. */ static struct xs_watch *find_watch(const char *token); MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results"); /** * Pointer to shared memory communication structures allowing us * to communicate with the XenStore service. * * When operating in full PV mode, this pointer is set early in kernel * startup from within xen_machdep.c. In HVM mode, we use hypercalls * to get the guest frame number for the shared page and then map it * into kva. See xs_init() for details. */ -struct xenstore_domain_interface *xen_store; +static struct xenstore_domain_interface *xen_store; /*-------------------------- Private Data Structures ------------------------*/ /** * Structure capturing messages received from the XenStore service. */ struct xs_stored_msg { TAILQ_ENTRY(xs_stored_msg) list; struct xsd_sockmsg hdr; union { /* Queued replies. */ struct { char *body; } reply; /* Queued watch events. */ struct { struct xs_watch *handle; const char **vec; u_int vec_size; } watch; } u; }; TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg); /** * Container for all XenStore related state. */ struct xs_softc { /** Newbus device for the XenStore. */ device_t xs_dev; /** * Lock serializing access to ring producer/consumer * indexes. Use of this lock guarantees that wakeups * of blocking readers/writers are not missed due to * races with the XenStore service. */ struct mtx ring_lock; /* * Mutex used to insure exclusive access to the outgoing * communication ring. We use a lock type that can be * held while sleeping so that xs_write() can block waiting * for space in the ring to free up, without allowing another * writer to come in and corrupt a partial message write. */ struct sx request_mutex; /** * A list of replies to our requests. * * The reply list is filled by xs_rcv_thread(). It * is consumed by the context that issued the request * to which a reply is made. The requester blocks in * xs_read_reply(). * * /note Only one requesting context can be active at a time. * This is guaranteed by the request_mutex and insures * that the requester sees replies matching the order * of its requests. */ struct xs_stored_msg_list reply_list; /** Lock protecting the reply list. */ struct mtx reply_lock; /** * List of registered watches. */ struct xs_watch_list registered_watches; /** Lock protecting the registered watches list. */ struct mtx registered_watches_lock; /** * List of pending watch callback events. */ struct xs_stored_msg_list watch_events; /** Lock protecting the watch calback list. */ struct mtx watch_events_lock; /** * The processid of the xenwatch thread. */ pid_t xenwatch_pid; /** * Sleepable mutex used to gate the execution of XenStore * watch event callbacks. * * xenwatch_thread holds an exclusive lock on this mutex * while delivering event callbacks, and xenstore_unregister_watch() * uses an exclusive lock of this mutex to guarantee that no * callbacks of the just unregistered watch are pending * before returning to its caller. */ struct sx xenwatch_mutex; /** * The HVM guest pseudo-physical frame number. This is Xen's mapping * of the true machine frame number into our "physical address space". */ unsigned long gpfn; /** * The event channel for communicating with the * XenStore service. */ int evtchn; /** Handle for XenStore interrupts. */ xen_intr_handle_t xen_intr_handle; /** * Interrupt driven config hook allowing us to defer * attaching children until interrupts (and thus communication * with the XenStore service) are available. */ struct intr_config_hook xs_attachcb; /** * Xenstore is a user-space process that usually runs in Dom0, * so if this domain is booting as Dom0, xenstore wont we accessible, * and we have to defer the initialization of xenstore related * devices to later (when xenstore is started). */ bool initialized; /** * Task to run when xenstore is initialized (Dom0 only), will * take care of attaching xenstore related devices. */ struct task xs_late_init; }; /*-------------------------------- Global Data ------------------------------*/ static struct xs_softc xs; /*------------------------- Private Utility Functions -----------------------*/ /** * Count and optionally record pointers to a number of NUL terminated * strings in a buffer. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param dest An array to store pointers to each string found in strings. * \param len The length of the buffer pointed to by strings. * * \return A count of the number of strings found. */ static u_int extract_strings(const char *strings, const char **dest, u_int len) { u_int num; const char *p; for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) { if (dest != NULL) *dest++ = p; num++; } return (num); } /** * Convert a contiguous buffer containing a series of NUL terminated * strings into an array of pointers to strings. * * The returned pointer references the array of string pointers which * is followed by the storage for the string data. It is the client's * responsibility to free this storage. * * The storage addressed by strings is free'd prior to split returning. * * \param strings A pointer to a contiguous buffer of NUL terminated strings. * \param len The length of the buffer pointed to by strings. * \param num The number of strings found and returned in the strings * array. * * \return An array of pointers to the strings found in the input buffer. */ static const char ** split(char *strings, u_int len, u_int *num) { const char **ret; /* Protect against unterminated buffers. */ if (len > 0) strings[len - 1] = '\0'; /* Count the strings. */ *num = extract_strings(strings, /*dest*/NULL, len); /* Transfer to one big alloc for easy freeing by the caller. */ ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK); memcpy(&ret[*num], strings, len); free(strings, M_XENSTORE); /* Extract pointers to newly allocated array. */ strings = (char *)&ret[*num]; (void)extract_strings(strings, /*dest*/ret, len); return (ret); } /*------------------------- Public Utility Functions -------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ struct sbuf * xs_join(const char *dir, const char *name) { struct sbuf *sb; sb = sbuf_new_auto(); sbuf_cat(sb, dir); if (name[0] != '\0') { sbuf_putc(sb, '/'); sbuf_cat(sb, name); } sbuf_finish(sb); return (sb); } /*-------------------- Low Level Communication Management --------------------*/ /** * Interrupt handler for the XenStore event channel. * * XenStore reads and writes block on "xen_store" for buffer * space. Wakeup any blocking operations when the XenStore * service has modified the queues. */ static void xs_intr(void * arg __unused /*__attribute__((unused))*/) { /* If xenstore has not been initialized, initialize it now */ if (!xs.initialized) { xs.initialized = true; /* * Since this task is probing and attaching devices we * have to hold the Giant lock. */ taskqueue_enqueue(taskqueue_swi_giant, &xs.xs_late_init); } /* * Hold ring lock across wakeup so that clients * cannot miss a wakeup. */ mtx_lock(&xs.ring_lock); wakeup(xen_store); mtx_unlock(&xs.ring_lock); } /** * Verify that the indexes for a ring are valid. * * The difference between the producer and consumer cannot * exceed the size of the ring. * * \param cons The consumer index for the ring to test. * \param prod The producer index for the ring to test. * * \retval 1 If indexes are in range. * \retval 0 If the indexes are out of range. */ static int xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) { return ((prod - cons) <= XENSTORE_RING_SIZE); } /** * Return a pointer to, and the length of, the contiguous * free region available for output in a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous storage available. * * \return A pointer to the start location of the free region. */ static void * xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) *len = XENSTORE_RING_SIZE - (prod - cons); return (buf + MASK_XENSTORE_IDX(prod)); } /** * Return a pointer to, and the length of, the contiguous * data available to read from a ring buffer. * * \param cons The consumer index for the ring. * \param prod The producer index for the ring. * \param buf The base address of the ring's storage. * \param len The amount of contiguous data available to read. * * \return A pointer to the start location of the available data. */ static const void * xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, const char *buf, uint32_t *len) { *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); if ((prod - cons) < *len) *len = prod - cons; return (buf + MASK_XENSTORE_IDX(cons)); } /** * Transmit data to the XenStore service. * * \param tdata A pointer to the contiguous data to send. * \param len The amount of data to send. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * \invariant xs.request_mutex exclusively locked. */ static int xs_write_store(const void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; const char *data = (const char *)tdata; int error; sx_assert(&xs.request_mutex, SX_XLOCKED); while (len != 0) { void *dst; u_int avail; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->req_cons; prod = xen_store->req_prod; if ((prod - cons) == XENSTORE_RING_SIZE) { /* * Output ring is full. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbwrite", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); /* Try again. */ continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->req_cons = xen_store->req_prod = 0; return (EIO); } dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail); if (avail > len) avail = len; memcpy(dst, data, avail); data += avail; len -= avail; /* * The store to the producer index, which indicates * to the other side that new data has arrived, must * be visible only after our copy of the data into the * ring has completed. */ wmb(); xen_store->req_prod += avail; /* * xen_intr_signal() implies mb(). The other side will see * the change to req_prod at the time of the interrupt. */ xen_intr_signal(xs.xen_intr_handle); } return (0); } /** * Receive data from the XenStore service. * * \param tdata A pointer to the contiguous buffer to receive the data. * \param len The amount of data to receive. * * \return On success 0, otherwise an errno value indicating the * cause of failure. * * \invariant Called from thread context. * \invariant The buffer pointed to by tdata is at least len bytes * in length. * * \note xs_read does not perform any internal locking to guarantee * serial access to the incoming ring buffer. However, there * is only one context processing reads: xs_rcv_thread(). */ static int xs_read_store(void *tdata, unsigned len) { XENSTORE_RING_IDX cons, prod; char *data = (char *)tdata; int error; while (len != 0) { u_int avail; const char *src; /* Hold lock so we can't miss wakeups should we block. */ mtx_lock(&xs.ring_lock); cons = xen_store->rsp_cons; prod = xen_store->rsp_prod; if (cons == prod) { /* * Nothing to read. Wait for a ring event. * * Note that the events from both queues * are combined, so being woken does not * guarantee that data exist in the read * ring. * * To simplify error recovery and the retry, * we specify PDROP so our lock is *not* held * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, "xbread", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); continue; } mtx_unlock(&xs.ring_lock); /* Verify queue sanity. */ if (!xs_check_indexes(cons, prod)) { xen_store->rsp_cons = xen_store->rsp_prod = 0; return (EIO); } src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail); if (avail > len) avail = len; /* * Insure the data we read is related to the indexes * we read above. */ rmb(); memcpy(data, src, avail); data += avail; len -= avail; /* * Insure that the producer of this ring does not see * the ring space as free until after we have copied it * out. */ mb(); xen_store->rsp_cons += avail; /* * xen_intr_signal() implies mb(). The producer will see * the updated consumer index when the event is delivered. */ xen_intr_signal(xs.xen_intr_handle); } return (0); } /*----------------------- Received Message Processing ------------------------*/ /** * Block reading the next message from the XenStore service and * process the result. * * \param type The returned type of the XenStore message received. * * \return 0 on success. Otherwise an errno value indicating the * type of failure encountered. */ static int xs_process_msg(enum xsd_sockmsg_type *type) { struct xs_stored_msg *msg; char *body; int error; msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK); error = xs_read_store(&msg->hdr, sizeof(msg->hdr)); if (error) { free(msg, M_XENSTORE); return (error); } body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK); error = xs_read_store(body, msg->hdr.len); if (error) { free(body, M_XENSTORE); free(msg, M_XENSTORE); return (error); } body[msg->hdr.len] = '\0'; *type = msg->hdr.type; if (msg->hdr.type == XS_WATCH_EVENT) { msg->u.watch.vec = split(body, msg->hdr.len, &msg->u.watch.vec_size); mtx_lock(&xs.registered_watches_lock); msg->u.watch.handle = find_watch( msg->u.watch.vec[XS_WATCH_TOKEN]); if (msg->u.watch.handle != NULL) { mtx_lock(&xs.watch_events_lock); TAILQ_INSERT_TAIL(&xs.watch_events, msg, list); wakeup(&xs.watch_events); mtx_unlock(&xs.watch_events_lock); } else { free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.registered_watches_lock); } else { msg->u.reply.body = body; mtx_lock(&xs.reply_lock); TAILQ_INSERT_TAIL(&xs.reply_list, msg, list); wakeup(&xs.reply_list); mtx_unlock(&xs.reply_lock); } return (0); } /** * Thread body of the XenStore receive thread. * * This thread blocks waiting for data from the XenStore service * and processes and received messages. */ static void xs_rcv_thread(void *arg __unused) { int error; enum xsd_sockmsg_type type; for (;;) { error = xs_process_msg(&type); if (error) printf("XENSTORE error %d while reading message\n", error); } } /*---------------- XenStore Message Request/Reply Processing -----------------*/ #define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) /** * Convert a XenStore error string into an errno number. * * \param errorstring The error string to convert. * * \return The errno best matching the input string. * * \note Unknown error strings are converted to EINVAL. */ static int xs_get_error(const char *errorstring) { u_int i; for (i = 0; i < xsd_error_count; i++) { if (!strcmp(errorstring, xsd_errors[i].errstring)) return (xsd_errors[i].errnum); } log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s", errorstring); return (EINVAL); } /** * Block waiting for a reply to a message request. * * \param type The returned type of the reply. * \param len The returned body length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result) { struct xs_stored_msg *msg; char *body; int error; mtx_lock(&xs.reply_lock); while (TAILQ_EMPTY(&xs.reply_list)) { error = mtx_sleep(&xs.reply_list, &xs.reply_lock, 0, "xswait", hz/10); if (error && error != EWOULDBLOCK) { mtx_unlock(&xs.reply_lock); return (error); } } msg = TAILQ_FIRST(&xs.reply_list); TAILQ_REMOVE(&xs.reply_list, msg, list); mtx_unlock(&xs.reply_lock); *type = msg->hdr.type; if (len) *len = msg->hdr.len; body = msg->u.reply.body; free(msg, M_XENSTORE); *result = body; return (0); } /** * Pass-thru interface for XenStore access by userland processes * via the XenStore device. * * Reply type and length data are returned by overwriting these * fields in the passed in request message. * * \param msg A properly formatted message to transmit to * the XenStore service. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating the cause * of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(result, M_XENSTORE); */ int xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) { uint32_t request_type; int error; request_type = msg->type; sx_xlock(&xs.request_mutex); if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0) error = xs_read_reply(&msg->type, &msg->len, result); sx_xunlock(&xs.request_mutex); return (error); } /** * Send a message with an optionally muti-part body to the XenStore service. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param iovec Pointers to the body sections of the request. * \param num_vecs The number of body sections in the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type, const struct iovec *iovec, u_int num_vecs, u_int *len, void **result) { struct xsd_sockmsg msg; void *ret = NULL; u_int i; int error; msg.tx_id = t.id; msg.req_id = 0; msg.type = request_type; msg.len = 0; for (i = 0; i < num_vecs; i++) msg.len += iovec[i].iov_len; sx_xlock(&xs.request_mutex); error = xs_write_store(&msg, sizeof(msg)); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } for (i = 0; i < num_vecs; i++) { error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len); if (error) { printf("xs_talkv failed %d\n", error); goto error_lock_held; } } error = xs_read_reply(&msg.type, len, &ret); error_lock_held: sx_xunlock(&xs.request_mutex); if (error) return (error); if (msg.type == XS_ERROR) { error = xs_get_error(ret); free(ret, M_XENSTORE); return (error); } /* Reply is either error or an echo of our request message type. */ KASSERT(msg.type == request_type, ("bad xenstore message type")); if (result) *result = ret; else free(ret, M_XENSTORE); return (0); } /** * Wrapper for xs_talkv allowing easy transmission of a message with * a single, contiguous, message body. * * \param t The transaction to use for this request. * \param request_type The type of message to send. * \param body The body of the request. * \param len The returned length of the reply. * \param result The returned body of the reply. * * \return 0 on success. Otherwise an errno indicating * the cause of failure. * * \note The returned result is provided in malloced storage and thus * must be free'd by the caller with 'free(*result, M_XENSTORE); */ static int xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type, const char *body, u_int *len, void **result) { struct iovec iovec; iovec.iov_base = (void *)(uintptr_t)body; iovec.iov_len = strlen(body) + 1; return (xs_talkv(t, request_type, &iovec, 1, len, result)); } /*------------------------- XenStore Watch Support ---------------------------*/ /** * Transmit a watch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_watch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL)); } /** * Transmit an uwatch request to the XenStore service. * * \param path The path in the XenStore to watch. * \param tocken A unique identifier for this watch. * * \return 0 on success. Otherwise an errno indicating the * cause of failure. */ static int xs_unwatch(const char *path, const char *token) { struct iovec iov[2]; iov[0].iov_base = (void *)(uintptr_t) path; iov[0].iov_len = strlen(path) + 1; iov[1].iov_base = (void *)(uintptr_t) token; iov[1].iov_len = strlen(token) + 1; return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); } /** * Convert from watch token (unique identifier) to the associated * internal tracking structure for this watch. * * \param tocken The unique identifier for the watch to find. * * \return A pointer to the found watch structure or NULL. */ static struct xs_watch * find_watch(const char *token) { struct xs_watch *i, *cmp; cmp = (void *)strtoul(token, NULL, 16); LIST_FOREACH(i, &xs.registered_watches, list) if (i == cmp) return (i); return (NULL); } /** * Thread body of the XenStore watch event dispatch thread. */ static void xenwatch_thread(void *unused) { struct xs_stored_msg *msg; for (;;) { mtx_lock(&xs.watch_events_lock); while (TAILQ_EMPTY(&xs.watch_events)) mtx_sleep(&xs.watch_events, &xs.watch_events_lock, PWAIT | PCATCH, "waitev", hz/10); mtx_unlock(&xs.watch_events_lock); sx_xlock(&xs.xenwatch_mutex); mtx_lock(&xs.watch_events_lock); msg = TAILQ_FIRST(&xs.watch_events); if (msg) TAILQ_REMOVE(&xs.watch_events, msg, list); mtx_unlock(&xs.watch_events_lock); if (msg != NULL) { /* * XXX There are messages coming in with a NULL * XXX callback. This deserves further investigation; * XXX the workaround here simply prevents the kernel * XXX from panic'ing on startup. */ if (msg->u.watch.handle->callback != NULL) msg->u.watch.handle->callback( msg->u.watch.handle, (const char **)msg->u.watch.vec, msg->u.watch.vec_size); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } sx_xunlock(&xs.xenwatch_mutex); } } /*----------- XenStore Configuration, Initialization, and Control ------------*/ /** * Setup communication channels with the XenStore service. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xs_init_comms(void) { int error; if (xen_store->rsp_prod != xen_store->rsp_cons) { log(LOG_WARNING, "XENSTORE response ring is not quiescent " "(%08x:%08x): fixing up\n", xen_store->rsp_cons, xen_store->rsp_prod); xen_store->rsp_cons = xen_store->rsp_prod; } xen_intr_unbind(&xs.xen_intr_handle); error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn, /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE, &xs.xen_intr_handle); if (error) { log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); return (error); } return (0); } /*------------------ Private Device Attachment Functions --------------------*/ static void xs_identify(driver_t *driver, device_t parent) { BUS_ADD_CHILD(parent, 0, "xenstore", 0); } /** * Probe for the existence of the XenStore. * * \param dev */ static int xs_probe(device_t dev) { /* * We are either operating within a PV kernel or being probed * as the child of the successfully attached xenpci device. * Thus we are in a Xen environment and there will be a XenStore. * Unconditionally return success. */ device_set_desc(dev, "XenStore"); return (BUS_PROBE_NOWILDCARD); } static void xs_attach_deferred(void *arg) { bus_generic_probe(xs.xs_dev); bus_generic_attach(xs.xs_dev); config_intrhook_disestablish(&xs.xs_attachcb); } static void xs_attach_late(void *arg, int pending) { KASSERT((pending == 1), ("xs late attach queued several times")); bus_generic_probe(xs.xs_dev); bus_generic_attach(xs.xs_dev); } /** * Attach to the XenStore. * * This routine also prepares for the probe/attach of drivers that rely * on the XenStore. */ static int xs_attach(device_t dev) { int error; /* Allow us to get device_t from softc and vice-versa. */ xs.xs_dev = dev; device_set_softc(dev, &xs); /* Initialize the interface to xenstore. */ struct proc *p; xs.initialized = false; - if (xen_hvm_domain()) { - xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); - xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); - xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE); - xs.initialized = true; - } else if (xen_pv_domain()) { - if (HYPERVISOR_start_info->store_evtchn == 0) { - struct evtchn_alloc_unbound alloc_unbound; - - /* Allocate a local event channel for xenstore */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = DOMID_SELF; - error = HYPERVISOR_event_channel_op( - EVTCHNOP_alloc_unbound, &alloc_unbound); - if (error != 0) - panic( - "unable to alloc event channel for Dom0: %d", - error); + xs.evtchn = xen_get_xenstore_evtchn(); + if (xs.evtchn == 0) { + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate a local event channel for xenstore */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + error = HYPERVISOR_event_channel_op( + EVTCHNOP_alloc_unbound, &alloc_unbound); + if (error != 0) + panic( + "unable to alloc event channel for Dom0: %d", + error); - HYPERVISOR_start_info->store_evtchn = - alloc_unbound.port; - xs.evtchn = alloc_unbound.port; + xs.evtchn = alloc_unbound.port; - /* Allocate memory for the xs shared ring */ - xen_store = malloc(PAGE_SIZE, M_XENSTORE, - M_WAITOK | M_ZERO); - } else { - xs.evtchn = HYPERVISOR_start_info->store_evtchn; - xs.initialized = true; - } + /* Allocate memory for the xs shared ring */ + xen_store = malloc(PAGE_SIZE, M_XENSTORE, M_WAITOK | M_ZERO); + xs.gpfn = atop(pmap_kextract((vm_offset_t)xen_store)); } else { - panic("Unknown domain type, cannot initialize xenstore."); + xs.gpfn = xen_get_xenstore_mfn(); + xen_store = pmap_mapdev_attr(ptoa(xs.gpfn), PAGE_SIZE, + PAT_WRITE_BACK); + xs.initialized = true; } TAILQ_INIT(&xs.reply_list); TAILQ_INIT(&xs.watch_events); mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF); mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF); sx_init(&xs.xenwatch_mutex, "xenwatch"); sx_init(&xs.request_mutex, "xenstore request"); mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); /* Initialize the shared memory rings to talk to xenstored */ error = xs_init_comms(); if (error) return (error); error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID, 0, "xenwatch"); if (error) return (error); xs.xenwatch_pid = p->p_pid; error = kproc_create(xs_rcv_thread, NULL, NULL, RFHIGHPID, 0, "xenstore_rcv"); xs.xs_attachcb.ich_func = xs_attach_deferred; xs.xs_attachcb.ich_arg = NULL; if (xs.initialized) { config_intrhook_establish(&xs.xs_attachcb); } else { TASK_INIT(&xs.xs_late_init, 0, xs_attach_late, NULL); } return (error); } /** * Prepare for suspension of this VM by halting XenStore access after * all transactions and individual requests have completed. */ static int xs_suspend(device_t dev) { int error; /* Suspend child Xen devices. */ error = bus_generic_suspend(dev); if (error != 0) return (error); sx_xlock(&xs.request_mutex); return (0); } /** * Resume XenStore operations after this VM is resumed. */ static int xs_resume(device_t dev __unused) { struct xs_watch *watch; char token[sizeof(watch) * 2 + 1]; xs_init_comms(); sx_xunlock(&xs.request_mutex); /* * NB: since xenstore childs have not been resumed yet, there's * no need to hold any watch mutex. Having clients try to add or * remove watches at this point (before xenstore is resumed) is * clearly a violantion of the resume order. */ LIST_FOREACH(watch, &xs.registered_watches, list) { sprintf(token, "%lX", (long)watch); xs_watch(watch->node, token); } /* Resume child Xen devices. */ bus_generic_resume(dev); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenstore_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xs_identify), DEVMETHOD(device_probe, xs_probe), DEVMETHOD(device_attach, xs_attach), DEVMETHOD(device_detach, bus_generic_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, xs_suspend), DEVMETHOD(device_resume, xs_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), DEVMETHOD(bus_release_resource, bus_generic_release_resource), DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), DEVMETHOD_END }; DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0); static devclass_t xenstore_devclass; DRIVER_MODULE(xenstore, xenpv, xenstore_driver, xenstore_devclass, 0, 0); /*------------------------------- Sysctl Data --------------------------------*/ /* XXX Shouldn't the node be somewhere else? */ SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, ""); SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); /*-------------------------------- Public API --------------------------------*/ /*------- API comments for these methods can be found in xenstorevar.h -------*/ +bool +xs_initialized(void) +{ + + return (xs.initialized); +} + +evtchn_port_t +xs_evtchn(void) +{ + + return (xs.evtchn); +} + +vm_paddr_t +xs_address(void) +{ + + return (ptoa(xs.gpfn)); +} + int xs_directory(struct xs_transaction t, const char *dir, const char *node, u_int *num, const char ***result) { struct sbuf *path; char *strings; u_int len = 0; int error; path = xs_join(dir, node); error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len, (void **)&strings); sbuf_delete(path); if (error) return (error); *result = split(strings, len, num); return (0); } int xs_exists(struct xs_transaction t, const char *dir, const char *node) { const char **d; int error, dir_n; error = xs_directory(t, dir, node, &dir_n, &d); if (error) return (0); free(d, M_XENSTORE); return (1); } int xs_read(struct xs_transaction t, const char *dir, const char *node, u_int *len, void **result) { struct sbuf *path; void *ret; int error; path = xs_join(dir, node); error = xs_single(t, XS_READ, sbuf_data(path), len, &ret); sbuf_delete(path); if (error) return (error); *result = ret; return (0); } int xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string) { struct sbuf *path; struct iovec iovec[2]; int error; path = xs_join(dir, node); iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path); iovec[0].iov_len = sbuf_len(path) + 1; iovec[1].iov_base = (void *)(uintptr_t) string; iovec[1].iov_len = strlen(string); error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); sbuf_delete(path); return (error); } int xs_mkdir(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm(struct xs_transaction t, const char *dir, const char *node) { struct sbuf *path; int ret; path = xs_join(dir, node); ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL); sbuf_delete(path); return (ret); } int xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node) { struct xs_transaction local_xbt; struct sbuf *root_path_sbuf; struct sbuf *cur_path_sbuf; char *root_path; char *cur_path; const char **dir; int error; retry: root_path_sbuf = xs_join(base, node); cur_path_sbuf = xs_join(base, node); root_path = sbuf_data(root_path_sbuf); cur_path = sbuf_data(cur_path_sbuf); dir = NULL; local_xbt.id = 0; if (xbt.id == 0) { error = xs_transaction_start(&local_xbt); if (error != 0) goto out; xbt = local_xbt; } while (1) { u_int count; u_int i; error = xs_directory(xbt, cur_path, "", &count, &dir); if (error) goto out; for (i = 0; i < count; i++) { error = xs_rm(xbt, cur_path, dir[i]); if (error == ENOTEMPTY) { struct sbuf *push_dir; /* * Descend to clear out this sub directory. * We'll return to cur_dir once push_dir * is empty. */ push_dir = xs_join(cur_path, dir[i]); sbuf_delete(cur_path_sbuf); cur_path_sbuf = push_dir; cur_path = sbuf_data(cur_path_sbuf); break; } else if (error != 0) { goto out; } } free(dir, M_XENSTORE); dir = NULL; if (i == count) { char *last_slash; /* Directory is empty. It is now safe to remove. */ error = xs_rm(xbt, cur_path, ""); if (error != 0) goto out; if (!strcmp(cur_path, root_path)) break; /* Return to processing the parent directory. */ last_slash = strrchr(cur_path, '/'); KASSERT(last_slash != NULL, ("xs_rm_tree: mangled path %s", cur_path)); *last_slash = '\0'; } } out: sbuf_delete(cur_path_sbuf); sbuf_delete(root_path_sbuf); if (dir != NULL) free(dir, M_XENSTORE); if (local_xbt.id != 0) { int terror; terror = xs_transaction_end(local_xbt, /*abort*/error != 0); xbt.id = 0; if (terror == EAGAIN && error == 0) goto retry; } return (error); } int xs_transaction_start(struct xs_transaction *t) { char *id_str; int error; error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL, (void **)&id_str); if (error == 0) { t->id = strtoul(id_str, NULL, 0); free(id_str, M_XENSTORE); } return (error); } int xs_transaction_end(struct xs_transaction t, int abort) { char abortstr[2]; if (abort) strcpy(abortstr, "F"); else strcpy(abortstr, "T"); return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL)); } int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt, ...) { va_list ap; int error, ns; char *val; error = xs_read(t, dir, node, NULL, (void **) &val); if (error) return (error); va_start(ap, fmt); ns = vsscanf(val, fmt, ap); va_end(ap); free(val, M_XENSTORE); /* Distinctive errno. */ if (ns == 0) return (ERANGE); if (scancountp) *scancountp = ns; return (0); } int xs_vprintf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, va_list ap) { struct sbuf *sb; int error; sb = sbuf_new_auto(); sbuf_vprintf(sb, fmt, ap); sbuf_finish(sb); error = xs_write(t, dir, node, sbuf_data(sb)); sbuf_delete(sb); return (error); } int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, ...) { va_list ap; int error; va_start(ap, fmt); error = xs_vprintf(t, dir, node, fmt, ap); va_end(ap); return (error); } int xs_gather(struct xs_transaction t, const char *dir, ...) { va_list ap; const char *name; int error; va_start(ap, dir); error = 0; while (error == 0 && (name = va_arg(ap, char *)) != NULL) { const char *fmt = va_arg(ap, char *); void *result = va_arg(ap, void *); char *p; error = xs_read(t, dir, name, NULL, (void **) &p); if (error) break; if (fmt) { if (sscanf(p, fmt, result) == 0) error = EINVAL; free(p, M_XENSTORE); } else *(char **)result = p; } va_end(ap); return (error); } int xs_register_watch(struct xs_watch *watch) { /* Pointer in ascii is the token. */ char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); mtx_lock(&xs.registered_watches_lock); KASSERT(find_watch(token) == NULL, ("watch already registered")); LIST_INSERT_HEAD(&xs.registered_watches, watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_watch(watch->node, token); /* Ignore errors due to multiple registration. */ if (error == EEXIST) error = 0; if (error != 0) { mtx_lock(&xs.registered_watches_lock); LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); } return (error); } void xs_unregister_watch(struct xs_watch *watch) { struct xs_stored_msg *msg, *tmp; char token[sizeof(watch) * 2 + 1]; int error; sprintf(token, "%lX", (long)watch); mtx_lock(&xs.registered_watches_lock); if (find_watch(token) == NULL) { mtx_unlock(&xs.registered_watches_lock); return; } LIST_REMOVE(watch, list); mtx_unlock(&xs.registered_watches_lock); error = xs_unwatch(watch->node, token); if (error) log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n", watch->node, error); /* Cancel pending watch events. */ mtx_lock(&xs.watch_events_lock); TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) { if (msg->u.watch.handle != watch) continue; TAILQ_REMOVE(&xs.watch_events, msg, list); free(msg->u.watch.vec, M_XENSTORE); free(msg, M_XENSTORE); } mtx_unlock(&xs.watch_events_lock); /* Flush any currently-executing callback, unless we are it. :-) */ if (curproc->p_pid != xs.xenwatch_pid) { sx_xlock(&xs.xenwatch_mutex); sx_xunlock(&xs.xenwatch_mutex); } } void xs_lock(void) { sx_xlock(&xs.request_mutex); return; } void xs_unlock(void) { sx_xunlock(&xs.request_mutex); return; } diff --git a/sys/dev/xen/xenstore/xenstored_dev.c b/sys/dev/xen/xenstore/xenstored_dev.c index ae24085dc809..e239382945aa 100644 --- a/sys/dev/xen/xenstore/xenstored_dev.c +++ b/sys/dev/xen/xenstore/xenstored_dev.c @@ -1,169 +1,165 @@ /* * Copyright (c) 2014 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #define XSD_READ_SIZE 20 static int xsd_dev_read(struct cdev *dev, struct uio *uio, int ioflag); static int xsd_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr); static struct cdevsw xsd_dev_cdevsw = { .d_version = D_VERSION, .d_read = xsd_dev_read, .d_mmap = xsd_dev_mmap, .d_name = "xsd_dev", }; static int xsd_dev_read(struct cdev *dev, struct uio *uio, int ioflag) { char evtchn[XSD_READ_SIZE]; int error, len; - len = snprintf(evtchn, sizeof(evtchn), "%u", - HYPERVISOR_start_info->store_evtchn); + len = snprintf(evtchn, sizeof(evtchn), "%u", xs_evtchn()); if (len < 0 || len > uio->uio_resid) return (EINVAL); error = uiomove(evtchn, len, uio); if (error) return (error); return (0); } static int xsd_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { if (offset != 0) return (EINVAL); - *paddr = pmap_kextract((vm_offset_t)xen_store); + *paddr = xs_address(); return (0); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void xsd_dev_identify(driver_t *driver __unused, device_t parent) { - if (!xen_pv_domain()) - return; - if (HYPERVISOR_start_info->store_mfn != 0) + if (!xen_domain() || xs_initialized()) return; /* * Only attach if xenstore is not available, because we are the * domain that's supposed to run it. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xenstored device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int xsd_dev_probe(device_t dev) { device_set_desc(dev, "Xenstored user-space device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xenstored device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xsd_dev_attach(device_t dev) { struct cdev *xsd_cdev; xsd_cdev = make_dev(&xsd_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, "xen/xenstored"); if (xsd_cdev == NULL) return (EINVAL); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xsd_dev_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xsd_dev_identify), DEVMETHOD(device_probe, xsd_dev_probe), DEVMETHOD(device_attach, xsd_dev_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xsd_dev, xsd_dev_driver, xsd_dev_methods, 0); devclass_t xsd_dev_devclass; DRIVER_MODULE(xsd_dev, xenpv, xsd_dev_driver, xsd_dev_devclass, NULL, NULL); diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 286a80f80366..1b98ab61e429 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -1,427 +1,469 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008, 2013 Citrix Systems, Inc. * Copyright (c) 2012 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*--------------------------- Forward Declarations ---------------------------*/ static void xen_hvm_cpu_init(void); /*-------------------------------- Local Types -------------------------------*/ enum xen_hvm_init_type { XEN_HVM_INIT_COLD, XEN_HVM_INIT_CANCELLED_SUSPEND, XEN_HVM_INIT_RESUME }; /*-------------------------------- Global Data -------------------------------*/ enum xen_domain_type xen_domain_type = XEN_NATIVE; #ifdef SMP struct cpu_ops xen_hvm_cpu_ops = { .cpu_init = xen_hvm_cpu_init, .cpu_resume = xen_hvm_cpu_init }; #endif static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); /** * If non-zero, the hypervisor has been configured to use a direct * IDT event callback for interrupt injection. */ int xen_vector_callback_enabled; /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ shared_info_t *HYPERVISOR_shared_info; -start_info_t *HYPERVISOR_start_info; - /*------------------------------ Sysctl tunables -----------------------------*/ int xen_disable_pv_disks = 0; int xen_disable_pv_nics = 0; TUNABLE_INT("hw.xen.disable_pv_disks", &xen_disable_pv_disks); TUNABLE_INT("hw.xen.disable_pv_nics", &xen_disable_pv_nics); /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t cpuid_base; static uint32_t xen_hvm_cpuid_base(void) { uint32_t base, regs[4]; for (base = 0x40000000; base < 0x40010000; base += 0x100) { do_cpuid(base, regs); if (!memcmp("XenVMMXenVMM", ®s[1], 12) && (regs[0] - base) >= 2) return (base); } return (0); } /* * Allocate and fill in the hypcall page. */ static int xen_hvm_init_hypercall_stubs(enum xen_hvm_init_type init_type) { uint32_t regs[4]; if (xen_pv_domain()) { /* hypercall page is already set in the PV case */ return (0); } cpuid_base = xen_hvm_cpuid_base(); if (cpuid_base == 0) return (ENXIO); if (init_type == XEN_HVM_INIT_COLD) { int major, minor; do_cpuid(cpuid_base + 1, regs); major = regs[0] >> 16; minor = regs[0] & 0xffff; printf("XEN: Hypervisor version %d.%d detected.\n", major, minor); #ifdef SMP if (((major < 4) || (major == 4 && minor <= 5)) && msix_disable_migration == -1) { /* * Xen hypervisors prior to 4.6.0 do not properly * handle updates to enabled MSI-X table entries, * so disable MSI-X interrupt migration in that * case. */ if (bootverbose) printf( "Disabling MSI-X interrupt migration due to Xen hypervisor bug.\n" "Set machdep.msix_disable_migration=0 to forcefully enable it.\n"); msix_disable_migration = 1; } #endif } /* * Find the hypercall pages. */ do_cpuid(cpuid_base + 2, regs); if (regs[0] != 1) return (EINVAL); wrmsr(regs[1], vtophys(&hypercall_page)); return (0); } static void xen_hvm_init_shared_info_page(void) { struct xen_add_to_physmap xatp; if (xen_pv_domain()) { /* * Already setup in the PV case, shared_info is passed inside * of the start_info struct at start of day. */ return; } if (HYPERVISOR_shared_info == NULL) { HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT); if (HYPERVISOR_shared_info == NULL) panic("Unable to allocate Xen shared info page"); } xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed"); } /* * Tell the hypervisor how to contact us for event channel callbacks. */ void xen_hvm_set_callback(device_t dev) { struct xen_hvm_param xhp; int irq; if (xen_vector_callback_enabled) return; xhp.domid = DOMID_SELF; xhp.index = HVM_PARAM_CALLBACK_IRQ; if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { int error; xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN); error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp); if (error == 0) { xen_vector_callback_enabled = 1; return; } printf("Xen HVM callback vector registration failed (%d). " "Falling back to emulated device interrupt\n", error); } xen_vector_callback_enabled = 0; if (dev == NULL) { /* * Called from early boot or resume. * xenpci will invoke us again later. */ return; } irq = pci_get_irq(dev); if (irq < 16) { xhp.value = HVM_CALLBACK_GSI(irq); } else { u_int slot; u_int pin; slot = pci_get_slot(dev); pin = pci_get_intpin(dev) - 1; xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin); } if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp) != 0) panic("Can't set evtchn callback"); } #define XEN_MAGIC_IOPORT 0x10 enum { XMI_MAGIC = 0x49d2, XMI_UNPLUG_IDE_DISKS = 0x01, XMI_UNPLUG_NICS = 0x02, XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04 }; static void xen_hvm_disable_emulated_devices(void) { u_short disable_devs = 0; if (xen_pv_domain()) { /* * No emulated devices in the PV case, so no need to unplug * anything. */ if (xen_disable_pv_disks != 0 || xen_disable_pv_nics != 0) printf("PV devices cannot be disabled in PV guests\n"); return; } if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC) return; if (xen_disable_pv_disks == 0) { if (bootverbose) printf("XEN: disabling emulated disks\n"); disable_devs |= XMI_UNPLUG_IDE_DISKS; } if (xen_disable_pv_nics == 0) { if (bootverbose) printf("XEN: disabling emulated nics\n"); disable_devs |= XMI_UNPLUG_NICS; } if (disable_devs != 0) outw(XEN_MAGIC_IOPORT, disable_devs); } static void xen_hvm_init(enum xen_hvm_init_type init_type) { int error; int i; if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) return; error = xen_hvm_init_hypercall_stubs(init_type); switch (init_type) { case XEN_HVM_INIT_COLD: if (error != 0) return; /* * If xen_domain_type is not set at this point * it means we are inside a (PV)HVM guest, because * for PVH the guest type is set much earlier * (see hammer_time_xen). */ if (!xen_domain()) { xen_domain_type = XEN_HVM_DOMAIN; vm_guest = VM_GUEST_XEN; } setup_xen_features(); #ifdef SMP cpu_ops = xen_hvm_cpu_ops; #endif break; case XEN_HVM_INIT_RESUME: if (error != 0) panic("Unable to init Xen hypercall stubs on resume"); /* Clear stale vcpu_info. */ CPU_FOREACH(i) DPCPU_ID_SET(i, vcpu_info, NULL); break; default: panic("Unsupported HVM initialization type"); } xen_vector_callback_enabled = 0; xen_hvm_set_callback(NULL); /* * On (PV)HVM domains we need to request the hypervisor to * fill the shared info page, for PVH guest the shared_info page * is passed inside the start_info struct and is already set, so this * functions are no-ops. */ xen_hvm_init_shared_info_page(); xen_hvm_disable_emulated_devices(); } void xen_hvm_suspend(void) { } void xen_hvm_resume(bool suspend_cancelled) { xen_hvm_init(suspend_cancelled ? XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); /* Register vcpu_info area for CPU#0. */ xen_hvm_cpu_init(); } static void xen_hvm_sysinit(void *arg __unused) { xen_hvm_init(XEN_HVM_INIT_COLD); } SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); static void xen_hvm_cpu_init(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpu_info; uint32_t regs[4]; int cpu, rc; if (!xen_domain()) return; if (DPCPU_GET(vcpu_info) != NULL) { /* * vcpu_info is already set. We're resuming * from a failed migration and our pre-suspend * configuration is still valid. */ return; } /* * Set vCPU ID. If available fetch the ID from CPUID, if not just use * the ACPI ID. */ KASSERT(cpuid_base != 0, ("Invalid base Xen CPUID leaf")); cpuid_count(cpuid_base + 4, 0, regs); PCPU_SET(vcpu_id, (regs[0] & XEN_HVM_CPUID_VCPU_ID_PRESENT) ? regs[1] : PCPU_GET(acpi_id)); /* * Set the vCPU info. * * NB: the vCPU info for vCPUs < 32 can be fetched from the shared info * page, but in order to make sure the mapping code is correct always * attempt to map the vCPU info at a custom place. */ vcpu_info = DPCPU_PTR(vcpu_local_info); cpu = PCPU_GET(vcpu_id); info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); if (rc != 0) DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]); else DPCPU_SET(vcpu_info, vcpu_info); } SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL); + +/* HVM/PVH start_info accessors */ +static vm_paddr_t +hvm_get_xenstore_mfn(void) +{ + + return (hvm_get_parameter(HVM_PARAM_STORE_PFN)); +} + +static evtchn_port_t +hvm_get_xenstore_evtchn(void) +{ + + return (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN)); +} + +static vm_paddr_t +hvm_get_console_mfn(void) +{ + + return (hvm_get_parameter(HVM_PARAM_CONSOLE_PFN)); +} + +static evtchn_port_t +hvm_get_console_evtchn(void) +{ + + return (hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN)); +} + +static uint32_t +hvm_get_start_flags(void) +{ + + return (0); +} + +struct hypervisor_info hypervisor_info = { + .get_xenstore_mfn = hvm_get_xenstore_mfn, + .get_xenstore_evtchn = hvm_get_xenstore_evtchn, + .get_console_mfn = hvm_get_console_mfn, + .get_console_evtchn = hvm_get_console_evtchn, + .get_start_flags = hvm_get_start_flags, +}; diff --git a/sys/x86/xen/pv.c b/sys/x86/xen/pv.c index 4c812c3b5dad..2963e1fbc011 100644 --- a/sys/x86/xen/pv.c +++ b/sys/x86/xen/pv.c @@ -1,432 +1,467 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 2004 Christian Limpach. * Copyright (c) 2004-2006,2008 Kip Macy * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2013 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* Native initial function */ extern u_int64_t hammer_time(u_int64_t, u_int64_t); /* Xen initial function */ uint64_t hammer_time_xen(start_info_t *, uint64_t); #define MAX_E820_ENTRIES 128 /*--------------------------- Forward Declarations ---------------------------*/ static caddr_t xen_pv_parse_preload_data(u_int64_t); static void xen_pv_parse_memmap(caddr_t, vm_paddr_t *, int *); #ifdef SMP static int xen_pv_start_all_aps(void); #endif /*---------------------------- Extern Declarations ---------------------------*/ #ifdef SMP /* Variables used by amd64 mp_machdep to start APs */ extern char *doublefault_stack; extern char *mce_stack; extern char *nmi_stack; extern char *dbg_stack; #endif /* * Placed by the linker at the end of the bss section, which is the last * section loaded by Xen before loading the symtab and strtab. */ extern uint32_t end; /*-------------------------------- Global Data -------------------------------*/ /* Xen init_ops implementation. */ struct init_ops xen_init_ops = { .parse_preload_data = xen_pv_parse_preload_data, .early_clock_source_init = xen_clock_init, .early_delay = xen_delay, .parse_memmap = xen_pv_parse_memmap, #ifdef SMP .start_all_aps = xen_pv_start_all_aps, #endif .msi_init = xen_msi_init, }; static struct bios_smap xen_smap[MAX_E820_ENTRIES]; +static start_info_t *legacy_start_info; + +/*----------------------- Legacy PVH start_info accessors --------------------*/ +static vm_paddr_t +legacy_get_xenstore_mfn(void) +{ + + return (legacy_start_info->store_mfn); +} + +static evtchn_port_t +legacy_get_xenstore_evtchn(void) +{ + + return (legacy_start_info->store_evtchn); +} + +static vm_paddr_t +legacy_get_console_mfn(void) +{ + + return (legacy_start_info->console.domU.mfn); +} + +static evtchn_port_t +legacy_get_console_evtchn(void) +{ + + return (legacy_start_info->console.domU.evtchn); +} + +static uint32_t +legacy_get_start_flags(void) +{ + + return (legacy_start_info->flags); +} + +struct hypervisor_info legacy_info = { + .get_xenstore_mfn = legacy_get_xenstore_mfn, + .get_xenstore_evtchn = legacy_get_xenstore_evtchn, + .get_console_mfn = legacy_get_console_mfn, + .get_console_evtchn = legacy_get_console_evtchn, + .get_start_flags = legacy_get_start_flags, +}; + /*-------------------------------- Xen PV init -------------------------------*/ /* - * First function called by the Xen PVH boot sequence. + * First function called by the Xen legacy PVH boot sequence. * * Set some Xen global variables and prepare the environment so it is * as similar as possible to what native FreeBSD init function expects. */ uint64_t hammer_time_xen(start_info_t *si, uint64_t xenstack) { uint64_t physfree; uint64_t *PT4 = (u_int64_t *)xenstack; uint64_t *PT3 = (u_int64_t *)(xenstack + PAGE_SIZE); uint64_t *PT2 = (u_int64_t *)(xenstack + 2 * PAGE_SIZE); int i; xen_domain_type = XEN_PV_DOMAIN; vm_guest = VM_GUEST_XEN; if ((si == NULL) || (xenstack == 0)) { xc_printf("ERROR: invalid start_info or xen stack, halting\n"); HYPERVISOR_shutdown(SHUTDOWN_crash); } xc_printf("FreeBSD PVH running on %s\n", si->magic); /* We use 3 pages of xen stack for the boot pagetables */ physfree = xenstack + 3 * PAGE_SIZE - KERNBASE; /* Setup Xen global variables */ - HYPERVISOR_start_info = si; + legacy_start_info = si; HYPERVISOR_shared_info = (shared_info_t *)(si->shared_info + KERNBASE); - /* - * Setup some misc global variables for Xen devices - * - * XXX: Devices that need these specific variables should - * be rewritten to fetch this info by themselves from the - * start_info page. - */ - xen_store = (struct xenstore_domain_interface *) - (ptoa(si->store_mfn) + KERNBASE); - console_page = (char *)(ptoa(si->console.domU.mfn) + KERNBASE); - /* * Use the stack Xen gives us to build the page tables * as native FreeBSD expects to find them (created * by the boot trampoline). */ for (i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); i++) { /* * Each slot of the level 4 pages points * to the same level 3 page */ PT4[i] = ((uint64_t)&PT3[0]) - KERNBASE; PT4[i] |= PG_V | PG_RW | PG_U; /* * Each slot of the level 3 pages points * to the same level 2 page */ PT3[i] = ((uint64_t)&PT2[0]) - KERNBASE; PT3[i] |= PG_V | PG_RW | PG_U; /* * The level 2 page slots are mapped with * 2MB pages for 1GB. */ PT2[i] = i * (2 * 1024 * 1024); PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; } load_cr3(((uint64_t)&PT4[0]) - KERNBASE); /* Set the hooks for early functions that diverge from bare metal */ init_ops = xen_init_ops; apic_ops = xen_apic_ops; + hypervisor_info = legacy_info; /* Now we can jump into the native init function */ return (hammer_time(0, physfree)); } /*-------------------------------- PV specific -------------------------------*/ #ifdef SMP static bool start_xen_ap(int cpu) { struct vcpu_guest_context *ctxt; int ms, cpus = mp_naps; const size_t stacksize = kstack_pages * PAGE_SIZE; /* allocate and set up an idle stack data page */ bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, stacksize, M_WAITOK | M_ZERO); doublefault_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); mce_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dbg_stack = (void *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + kstack_pages * PAGE_SIZE - 8; bootAP = cpu; ctxt = malloc(sizeof(*ctxt), M_TEMP, M_WAITOK | M_ZERO); ctxt->flags = VGCF_IN_KERNEL; ctxt->user_regs.rip = (unsigned long) init_secondary; ctxt->user_regs.rsp = (unsigned long) bootSTK; /* Set the AP to use the same page tables */ ctxt->ctrlreg[3] = KPML4phys; if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) panic("unable to initialize AP#%d", cpu); free(ctxt, M_TEMP); /* Launch the vCPU */ if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) panic("unable to start AP#%d", cpu); /* Wait up to 5 seconds for it to start. */ for (ms = 0; ms < 5000; ms++) { if (mp_naps > cpus) return (true); DELAY(1000); } return (false); } static int xen_pv_start_all_aps(void) { int cpu; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); for (cpu = 1; cpu < mp_ncpus; cpu++) { /* attempt to start the Application Processor */ if (!start_xen_ap(cpu)) panic("AP #%d failed to start!", cpu); CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } return (mp_naps); } #endif /* SMP */ /* * Functions to convert the "extra" parameters passed by Xen * into FreeBSD boot options. */ static void xen_pv_set_env(void) { char *cmd_line_next, *cmd_line; size_t env_size; - cmd_line = HYPERVISOR_start_info->cmd_line; - env_size = sizeof(HYPERVISOR_start_info->cmd_line); + cmd_line = legacy_start_info->cmd_line; + env_size = sizeof(legacy_start_info->cmd_line); /* Skip leading spaces */ for (; isspace(*cmd_line) && (env_size != 0); cmd_line++) env_size--; /* Replace ',' with '\0' */ for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;) ; init_static_kenv(cmd_line, 0); } #ifdef DDB /* * The way Xen loads the symtab is different from the native boot loader, * because it's tailored for NetBSD. So we have to adapt and use the same * method as NetBSD. Portions of the code below have been picked from NetBSD: * sys/kern/kern_ksyms.c CVS Revision 1.71. */ static void xen_pv_parse_symtab(void) { Elf_Ehdr *ehdr; Elf_Shdr *shdr; vm_offset_t sym_end; uint32_t size; int i, j; size = end; - sym_end = HYPERVISOR_start_info->mod_start != 0 ? - HYPERVISOR_start_info->mod_start : - HYPERVISOR_start_info->mfn_list; + sym_end = legacy_start_info->mod_start != 0 ? + legacy_start_info->mod_start : legacy_start_info->mfn_list; /* * Make sure the size is right headed, sym_end is just a * high boundary, but at least allows us to fail earlier. */ if ((vm_offset_t)&end + size > sym_end) { xc_printf("Unable to load ELF symtab: size mismatch\n"); return; } ehdr = (Elf_Ehdr *)(&end + 1); if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) || ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || ehdr->e_version > 1) { xc_printf("Unable to load ELF symtab: invalid symbol table\n"); return; } shdr = (Elf_Shdr *)((uint8_t *)ehdr + ehdr->e_shoff); /* Find the symbol table and the corresponding string table. */ for (i = 1; i < ehdr->e_shnum; i++) { if (shdr[i].sh_type != SHT_SYMTAB) continue; if (shdr[i].sh_offset == 0) continue; ksymtab = (uintptr_t)((uint8_t *)ehdr + shdr[i].sh_offset); ksymtab_size = shdr[i].sh_size; j = shdr[i].sh_link; if (shdr[j].sh_offset == 0) continue; /* Can this happen? */ kstrtab = (uintptr_t)((uint8_t *)ehdr + shdr[j].sh_offset); break; } if (ksymtab == 0 || kstrtab == 0) { xc_printf( "Unable to load ELF symtab: could not find symtab or strtab\n"); return; } } #endif static caddr_t xen_pv_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; vm_ooffset_t off; vm_paddr_t metadata; char *envp; - if (HYPERVISOR_start_info->mod_start != 0) { - preload_metadata = (caddr_t)(HYPERVISOR_start_info->mod_start); + if (legacy_start_info->mod_start != 0) { + preload_metadata = (caddr_t)legacy_start_info->mod_start; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); KASSERT(kmdp != NULL, ("unable to find kernel")); /* * Xen has relocated the metadata and the modules, * so we need to recalculate it's position. This is * done by saving the original modulep address and * then calculating the offset with mod_start, * which contains the relocated modulep address. */ metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t); - off = HYPERVISOR_start_info->mod_start - metadata; + off = legacy_start_info->mod_start - metadata; preload_bootstrap_relocate(off); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); if (envp != NULL) envp += off; init_static_kenv(envp, 0); } else { /* Parse the extra boot information given by Xen */ xen_pv_set_env(); boothowto |= boot_env_to_howto(); kmdp = NULL; } #ifdef DDB xen_pv_parse_symtab(); #endif return (kmdp); } static void xen_pv_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct xen_memory_map memmap; u_int32_t size; int rc; /* Fetch the E820 map from Xen */ memmap.nr_entries = MAX_E820_ENTRIES; set_xen_guest_handle(memmap.buffer, xen_smap); rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); if (rc) panic("unable to fetch Xen E820 memory map"); size = memmap.nr_entries * sizeof(xen_smap[0]); bios_add_smap_entries(xen_smap, size, physmap, physmap_idx); } diff --git a/sys/xen/xen-os.h b/sys/xen/xen-os.h index 044433ae3d36..874c3b71b861 100644 --- a/sys/xen/xen-os.h +++ b/sys/xen/xen-os.h @@ -1,147 +1,191 @@ /****************************************************************************** * xen/xen-os.h * * Random collection of macros and definition * * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_XEN_OS_H_ #define _XEN_XEN_OS_H_ #if !defined(__XEN_INTERFACE_VERSION__) #define __XEN_INTERFACE_VERSION__ 0x00030208 #endif #define GRANT_REF_INVALID 0xffffffff #ifdef LOCORE #define __ASSEMBLY__ #endif -#include - #include +#ifndef __ASSEMBLY__ +#include + +struct hypervisor_info { + vm_paddr_t (*get_xenstore_mfn)(void); + evtchn_port_t (*get_xenstore_evtchn)(void); + vm_paddr_t (*get_console_mfn)(void); + evtchn_port_t (*get_console_evtchn)(void); + uint32_t (*get_start_flags)(void); +}; +extern struct hypervisor_info hypervisor_info; + +static inline vm_paddr_t +xen_get_xenstore_mfn(void) +{ + + return (hypervisor_info.get_xenstore_mfn()); +} + +static inline evtchn_port_t +xen_get_xenstore_evtchn(void) +{ + + return (hypervisor_info.get_xenstore_evtchn()); +} + +static inline vm_paddr_t +xen_get_console_mfn(void) +{ + + return (hypervisor_info.get_console_mfn()); +} + +static inline evtchn_port_t +xen_get_console_evtchn(void) +{ + + return (hypervisor_info.get_console_evtchn()); +} + +static inline uint32_t +xen_get_start_flags(void) +{ + + return (hypervisor_info.get_start_flags()); +} +#endif + +#include + /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ extern shared_info_t *HYPERVISOR_shared_info; -extern start_info_t *HYPERVISOR_start_info; - -/* XXX: we need to get rid of this and use HYPERVISOR_start_info directly */ -extern char *console_page; extern int xen_disable_pv_disks; extern int xen_disable_pv_nics; extern bool xen_suspend_cancelled; enum xen_domain_type { XEN_NATIVE, /* running on bare hardware */ XEN_PV_DOMAIN, /* running in a PV domain */ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ }; extern enum xen_domain_type xen_domain_type; static inline int xen_domain(void) { return (xen_domain_type != XEN_NATIVE); } static inline int xen_pv_domain(void) { return (xen_domain_type == XEN_PV_DOMAIN); } static inline int xen_hvm_domain(void) { return (xen_domain_type == XEN_HVM_DOMAIN); } static inline bool xen_initial_domain(void) { - return (xen_domain() && HYPERVISOR_start_info != NULL && - (HYPERVISOR_start_info->flags & SIF_INITDOMAIN) != 0); + + return (xen_domain() && (xen_get_start_flags() & SIF_INITDOMAIN) != 0); } /* * Based on ofed/include/linux/bitops.h * * Those helpers are prefixed by xen_ because xen-os.h is widely included * and we don't want the other drivers using them. * */ #define NBPL (NBBY * sizeof(long)) static inline bool xen_test_bit(int bit, volatile long *addr) { unsigned long mask = 1UL << (bit % NBPL); return !!(atomic_load_acq_long(&addr[bit / NBPL]) & mask); } static inline void xen_set_bit(int bit, volatile long *addr) { atomic_set_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } static inline void xen_clear_bit(int bit, volatile long *addr) { atomic_clear_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } #undef NBPL /* * Functions to allocate/free unused memory in order * to map memory from other domains. */ struct resource *xenmem_alloc(device_t dev, int *res_id, size_t size); int xenmem_free(device_t dev, int res_id, struct resource *res); /* Debug/emergency function, prints directly to hypervisor console */ void xc_printf(const char *, ...) __printflike(1, 2); #ifndef xen_mb #define xen_mb() mb() #endif #ifndef xen_rmb #define xen_rmb() rmb() #endif #ifndef xen_wmb #define xen_wmb() wmb() #endif #endif /* !__ASSEMBLY__ */ #endif /* _XEN_XEN_OS_H_ */ diff --git a/sys/xen/xenstore/xenstorevar.h b/sys/xen/xenstore/xenstorevar.h index 4c612b4b6881..8c89e174acf2 100644 --- a/sys/xen/xenstore/xenstorevar.h +++ b/sys/xen/xenstore/xenstorevar.h @@ -1,352 +1,371 @@ /****************************************************************************** * xenstorevar.h * * Method declarations and structures for accessing the XenStore.h * * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 XenSource Ltd. * Copyright (C) 2009,2010 Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_XENSTORE_XENSTOREVAR_H #define _XEN_XENSTORE_XENSTOREVAR_H #include #include #include #include #include #include #include #include #include #include #include "xenbus_if.h" /* XenStore allocations including XenStore data returned to clients. */ MALLOC_DECLARE(M_XENSTORE); -struct xenstore_domain_interface; struct xs_watch; -extern struct xenstore_domain_interface *xen_store; typedef void (xs_watch_cb_t)(struct xs_watch *, const char **vec, unsigned int len); /* Register callback to watch subtree (node) in the XenStore. */ struct xs_watch { LIST_ENTRY(xs_watch) list; /* Path being watched. */ char *node; /* Callback (executed in a process context with no locks held). */ xs_watch_cb_t *callback; /* Callback client data untouched by the XenStore watch mechanism. */ uintptr_t callback_data; }; LIST_HEAD(xs_watch_list, xs_watch); typedef int (*xs_event_handler_t)(void *); struct xs_transaction { uint32_t id; }; #define XST_NIL ((struct xs_transaction) { 0 }) +/** + * Check if Xenstore is initialized. + * + * \return True if initialized, false otherwise. + */ +bool xs_initialized(void); + +/** + * Return xenstore event channel port. + * + * \return event channel port. + */ +evtchn_port_t xs_evtchn(void); + +/** + * Return xenstore page physical memory address. + * + * \return xenstore page physical address. + */ +vm_paddr_t xs_address(void); + /** * Fetch the contents of a directory in the XenStore. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param num The returned number of directory entries. * \param result An array of directory entry strings. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note The results buffer is malloced and should be free'd by the * caller with 'free(*result, M_XENSTORE)'. */ int xs_directory(struct xs_transaction t, const char *dir, const char *node, unsigned int *num, const char ***result); /** * Determine if a path exists in the XenStore. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * * \retval 1 The path exists. * \retval 0 The path does not exist or an error occurred attempting * to make that determination. */ int xs_exists(struct xs_transaction t, const char *dir, const char *node); /** * Get the contents of a single "file". Returns the contents in * *result which should be freed with free(*result, M_XENSTORE) after * use. The length of the value in bytes is returned in *len. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the file to read. * \param node The basename of the file to read. * \param len The amount of data read. * \param result The returned contents from this file. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. * * \note The results buffer is malloced and should be free'd by the * caller with 'free(*result, M_XENSTORE)'. */ int xs_read(struct xs_transaction t, const char *dir, const char *node, unsigned int *len, void **result); /** * Write to a single file. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the file to write. * \param node The basename of the file to write. * \param string The NUL terminated string of data to write. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_write(struct xs_transaction t, const char *dir, const char *node, const char *string); /** * Create a new directory. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to create. * \param node The basename of the directory to create. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_mkdir(struct xs_transaction t, const char *dir, const char *node); /** * Remove a file or directory (directories must be empty). * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to remove. * \param node The basename of the directory to remove. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_rm(struct xs_transaction t, const char *dir, const char *node); /** * Destroy a tree of files rooted at dir/node. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the directory to remove. * \param node The basename of the directory to remove. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_rm_tree(struct xs_transaction t, const char *dir, const char *node); /** * Start a transaction. * * Changes by others will not be seen during the lifetime of this * transaction, and changes will not be visible to others until it * is committed (xs_transaction_end). * * \param t The returned transaction. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_transaction_start(struct xs_transaction *t); /** * End a transaction. * * \param t The transaction to end/commit. * \param abort If non-zero, the transaction is discarded * instead of committed. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_transaction_end(struct xs_transaction t, int abort); /* * Single file read and scanf parsing of the result. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param scancountp The number of input values assigned (i.e. the result * of scanf). * \param fmt Scanf format string followed by a variable number of * scanf input arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ int xs_scanf(struct xs_transaction t, const char *dir, const char *node, int *scancountp, const char *fmt, ...) __attribute__((format(scanf, 5, 6))); /** * Printf formatted write to a XenStore file. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param fmt Printf format string followed by a variable number of * printf arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. */ int xs_printf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, ...) __attribute__((format(printf, 4, 5))); /** * va_list version of xenbus_printf(). * * \param t The XenStore transaction covering this request. * \param dir The dirname of the path to read. * \param node The basename of the path to read. * \param fmt Printf format string. * \param ap Va_list of printf arguments. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. */ int xs_vprintf(struct xs_transaction t, const char *dir, const char *node, const char *fmt, va_list ap); /** * Multi-file read within a single directory and scanf parsing of * the results. * * \param t The XenStore transaction covering this request. * \param dir The dirname of the paths to read. * \param ... A variable number of argument triples specifying * the file name, scanf-style format string, and * output variable (pointer to storage of the results). * The last triple in the call must be terminated * will a final NULL argument. A NULL format string * will cause the entire contents of the given file * to be assigned as a NUL terminated, M_XENSTORE heap * backed, string to the output parameter of that tuple. * * \return On success, 0. Otherwise an errno value indicating the * type of read failure. * * Example: * char protocol_abi[64]; * uint32_t ring_ref; * char *dev_type; * int error; * * error = xenbus_gather(XBT_NIL, xenbus_get_node(dev), * "ring-ref", "%" PRIu32, &ring_ref, * "protocol", "%63s", protocol_abi, * "device-type", NULL, &dev_type, * NULL); * * ... * * free(dev_type, M_XENSTORE); */ int xs_gather(struct xs_transaction t, const char *dir, ...); /** * Register a XenStore watch. * * XenStore watches allow a client to be notified via a callback (embedded * within the watch object) of changes to an object in the XenStore. * * \param watch An xs_watch struct with it's node and callback fields * properly initialized. * * \return On success, 0. Otherwise an errno value indicating the * type of write failure. EEXIST errors from the XenStore * are supressed, allowing multiple, physically different, * xenbus_watch objects, to watch the same path in the XenStore. */ int xs_register_watch(struct xs_watch *watch); /** * Unregister a XenStore watch. * * \param watch An xs_watch object previously used in a successful call * to xs_register_watch(). * * The xs_watch object's node field is not altered by this call. * It is the caller's responsibility to properly dispose of both the * watch object and the data pointed to by watch->node. */ void xs_unregister_watch(struct xs_watch *watch); /** * Allocate and return an sbuf containing the XenStore path string * /. If name is the NUL string, the returned sbuf contains * the path string . * * \param dir The NUL terminated directory prefix for new path. * \param name The NUL terminated basename for the new path. * * \return A buffer containing the joined path. */ struct sbuf *xs_join(const char *, const char *); /** * Lock the xenstore request mutex. */ void xs_lock(void); /** * Unlock the xenstore request mutex. */ void xs_unlock(void); #endif /* _XEN_XENSTORE_XENSTOREVAR_H */