diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c index 9fb3164cab7a..02dac3dfe757 100644 --- a/sys/dev/xen/balloon/balloon.c +++ b/sys/dev/xen/balloon/balloon.c @@ -1,412 +1,410 @@ /****************************************************************************** * balloon.c * * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); /* Convert from KB (as fetched from xenstore) to number of PAGES */ #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) struct mtx balloon_mutex; /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; /* We may hit the hard limit in Xen. If we do then we remember it. */ unsigned long hard_limit; /* * Drivers may alter the memory reservation independently, but they * must inform the balloon driver so we avoid hitting the hard limit. */ unsigned long driver_pages; /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; }; static struct balloon_stats balloon_stats; #define bs balloon_stats SYSCTL_DECL(_dev_xen); static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, &bs.current_pages, 0, "Current allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, &bs.target_pages, 0, "Target allocation"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, &bs.driver_pages, 0, "Driver pages"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, &bs.hard_limit, 0, "Xen hard limit"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, &bs.balloon_low, 0, "Low-mem balloon"); SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, &bs.balloon_high, 0, "High-mem balloon"); /* List of ballooned pages, threaded through the mem_map array. */ static TAILQ_HEAD(,vm_page) ballooned_pages; /* Main work function, always executed in process context. */ static void balloon_process(void *unused); #define IPRINTK(fmt, args...) \ printk(KERN_INFO "xen_mem: " fmt, ##args) #define WPRINTK(fmt, args...) \ printk(KERN_WARNING "xen_mem: " fmt, ##args) static unsigned long current_target(void) { unsigned long target = min(bs.target_pages, bs.hard_limit); if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) target = bs.current_pages + bs.balloon_low + bs.balloon_high; return (target); } static unsigned long minimum_target(void) { unsigned long min_pages, curr_pages = current_target(); #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) /* * Simple continuous piecewiese linear function: * max MiB -> min MiB gradient * 0 0 * 16 16 * 32 24 * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) * 8192 552 (1/32) * 32768 1320 * 131072 4392 */ if (realmem < MB2PAGES(128)) min_pages = MB2PAGES(8) + (realmem >> 1); else if (realmem < MB2PAGES(512)) min_pages = MB2PAGES(40) + (realmem >> 2); else if (realmem < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (realmem >> 3); else min_pages = MB2PAGES(296) + (realmem >> 5); #undef MB2PAGES /* Don't enforce growth */ return (min(min_pages, curr_pages)); } static int increase_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; long rc; struct xen_memory_reservation reservation = { - .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (page = TAILQ_FIRST(&ballooned_pages), i = 0; i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { KASSERT(page != NULL, ("ballooned_pages list corrupt")); frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op( XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret __diagused; /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); } if (rc >= 0) bs.hard_limit = (bs.current_pages + rc - bs.driver_pages); goto out; } for (i = 0; i < nr_pages; i++) { page = TAILQ_FIRST(&ballooned_pages); KASSERT(page != NULL, ("Unable to get ballooned page")); TAILQ_REMOVE(&ballooned_pages, page, plinks.q); bs.balloon_low--; KASSERT(xen_feature(XENFEAT_auto_translated_physmap), ("auto translated physmap but mapping is valid")); vm_page_free(page); } bs.current_pages += nr_pages; out: return (0); } static int decrease_reservation(unsigned long nr_pages) { unsigned long i; vm_page_t page; int need_sleep = 0; int ret __diagused; struct xen_memory_reservation reservation = { - .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; mtx_assert(&balloon_mutex, MA_OWNED); if (nr_pages > nitems(frame_list)) nr_pages = nitems(frame_list); for (i = 0; i < nr_pages; i++) { /* * Zero the page, or else we might be leaking important data to * other domains on the same host. Xen doesn't scrub ballooned * out memory pages, the guest is in charge of making sure that * no information is leaked. */ if ((page = vm_page_alloc_noobj(VM_ALLOC_ZERO)) == NULL) { nr_pages = i; need_sleep = 1; break; } frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); bs.balloon_low++; } set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); bs.current_pages -= nr_pages; return (need_sleep); } /* * We avoid multiple worker processes conflicting via the balloon mutex. * We may of course race updates of the target counts (which are protected * by the balloon lock), or with changes to the Xen hard limit, but we will * recover from these in time. */ static void balloon_process(void *unused) { int need_sleep = 0; long credit; mtx_lock(&balloon_mutex); for (;;) { int sleep_time; do { credit = current_target() - bs.current_pages; if (credit > 0) need_sleep = (increase_reservation(credit) != 0); if (credit < 0) need_sleep = (decrease_reservation(-credit) != 0); } while ((credit != 0) && !need_sleep); /* Schedule more work if there is some still to be done. */ if (current_target() != bs.current_pages) sleep_time = hz; else sleep_time = 0; msleep(balloon_process, &balloon_mutex, 0, "balloon", sleep_time); } mtx_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ static void set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ bs.hard_limit = ~0UL; bs.target_pages = max(target, minimum_target()); wakeup(balloon_process); } static struct xs_watch target_watch = { .node = "memory/target", .max_pending = 1, }; /* React to a change in the target key */ static void watch_target(struct xs_watch *watch, const char **vec, unsigned int len) { unsigned long long new_target; int err; err = xs_scanf(XST_NIL, "memory", "target", NULL, "%llu", &new_target); if (err) { /* This is ok (for domain0 at least) - so just return */ return; } /* * The given memory/target value is in KiB, so it needs converting to * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. */ set_new_target(new_target >> KB_TO_PAGE_SHIFT); } /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void xenballoon_identify(driver_t *driver __unused, device_t parent) { /* * A single device instance for our driver is always present * in a system operating under Xen. */ BUS_ADD_CHILD(parent, 0, driver->name, 0); } /** * \brief Probe for the existence of the Xen Balloon device * * \param dev NewBus device_t for this Xen control instance. * * \return Always returns 0 indicating success. */ static int xenballoon_probe(device_t dev) { device_set_desc(dev, "Xen Balloon Device"); return (0); } /** * \brief Attach the Xen Balloon device. * * \param dev NewBus device_t for this Xen control instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int xenballoon_attach(device_t dev) { int err; mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); bs.current_pages = realmem; bs.target_pages = bs.current_pages; bs.balloon_low = 0; bs.balloon_high = 0; bs.driver_pages = 0UL; bs.hard_limit = ~0UL; kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); target_watch.callback = watch_target; err = xs_register_watch(&target_watch); if (err) device_printf(dev, "xenballon: failed to set balloon watcher\n"); return (err); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t xenballoon_methods[] = { /* Device interface */ DEVMETHOD(device_identify, xenballoon_identify), DEVMETHOD(device_probe, xenballoon_probe), DEVMETHOD(device_attach, xenballoon_attach), DEVMETHOD_END }; DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, NULL, NULL); diff --git a/sys/dev/xen/grant_table/grant_table.c b/sys/dev/xen/grant_table/grant_table.c index 11e5071c7b0b..fdbec8ac14e8 100644 --- a/sys/dev/xen/grant_table/grant_table.c +++ b/sys/dev/xen/grant_table/grant_table.c @@ -1,680 +1,680 @@ /****************************************************************************** * gnttab.c * * Two sets of functionality: * 1. Granting foreign access to our memory reservation. * 2. Accessing others' memory reservations via grant references. * (i.e., mechanisms for both sender and recipient of grant references) * * Copyright (c) 2005, Christopher Clark * Copyright (c) 2004, K A Fraser */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static struct mtx gnttab_list_lock; /* * Resource representing allocated physical address space * for the grant table metainfo */ static struct resource *gnttab_pseudo_phys_res; /* Resource id for allocated physical address space. */ static int gnttab_pseudo_phys_res_id; -static grant_entry_t *shared; +static grant_entry_v1_t *shared; static struct gnttab_free_callback *gnttab_free_callback_list = NULL; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) static int get_free_entries(int count, int *entries) { int ref, error; grant_ref_t head; mtx_lock(&gnttab_list_lock); if ((gnttab_free_count < count) && ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { mtx_unlock(&gnttab_list_lock); return (error); } ref = head = gnttab_free_head; gnttab_free_count -= count; while (count-- > 1) head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head); gnttab_entry(head) = GNTTAB_LIST_END; mtx_unlock(&gnttab_list_lock); *entries = ref; return (0); } static void do_free_callbacks(void) { struct gnttab_free_callback *callback, *next; callback = gnttab_free_callback_list; gnttab_free_callback_list = NULL; while (callback != NULL) { next = callback->next; if (gnttab_free_count >= callback->count) { callback->next = NULL; callback->fn(callback->arg); } else { callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; } callback = next; } } static inline void check_free_callbacks(void) { if (__predict_false(gnttab_free_callback_list != NULL)) do_free_callbacks(); } static void put_free_entry(grant_ref_t ref) { mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } /* * Public grant-issuing interface functions */ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); if (result) *result = ref; return (0); } void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); } int gnttab_query_foreign_access(grant_ref_t ref) { uint16_t nflags; nflags = shared[ref].flags; return (nflags & (GTF_reading|GTF_writing)); } int gnttab_end_foreign_access_ref(grant_ref_t ref) { uint16_t flags, nflags; nflags = shared[ref].flags; do { if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { printf("%s: WARNING: g.e. still in use!\n", __func__); return (0); } } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != flags); return (1); } void gnttab_end_foreign_access(grant_ref_t ref, void *page) { if (gnttab_end_foreign_access_ref(ref)) { put_free_entry(ref); if (page != NULL) { free(page, M_DEVBUF); } } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. and page still in use!\n", __func__); } } void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) { grant_ref_t *last_ref; grant_ref_t head; grant_ref_t tail; head = GNTTAB_LIST_END; tail = *refs; last_ref = refs + count; while (refs != last_ref) { if (gnttab_end_foreign_access_ref(*refs)) { gnttab_entry(*refs) = head; head = *refs; } else { /* * XXX This needs to be fixed so that the ref * is placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. still in use!\n", __func__); count--; } refs++; } if (count != 0) { mtx_lock(&gnttab_list_lock); gnttab_free_count += count; gnttab_entry(tail) = gnttab_free_head; gnttab_free_head = head; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } } int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); gnttab_grant_foreign_transfer_ref(ref, domid, pfn); *result = ref; return (0); } void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; } unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; uint16_t flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) return (0); cpu_spinwait(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { flags = shared[ref].flags; cpu_spinwait(); } /* Read the frame number /after/ reading completion status. */ rmb(); frame = shared[ref].frame; KASSERT(frame != 0, ("grant table inconsistent")); return (frame); } unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = gnttab_end_foreign_transfer_ref(ref); put_free_entry(ref); return (frame); } void gnttab_free_grant_reference(grant_ref_t ref) { put_free_entry(ref); } void gnttab_free_grant_references(grant_ref_t head) { grant_ref_t ref; int count = 1; if (head == GNTTAB_LIST_END) return; ref = head; while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(ref); count++; } mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) { int ref, error; error = get_free_entries(count, &ref); if (__predict_false(error)) return (error); *head = ref; return (0); } int gnttab_empty_grant_references(const grant_ref_t *private_head) { return (*private_head == GNTTAB_LIST_END); } int gnttab_claim_grant_reference(grant_ref_t *private_head) { grant_ref_t g = *private_head; if (__predict_false(g == GNTTAB_LIST_END)) return (g); *private_head = gnttab_entry(g); return (g); } void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { gnttab_entry(release) = *private_head; *private_head = release; } void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count) { mtx_lock(&gnttab_list_lock); if (callback->next) goto out; callback->fn = fn; callback->arg = arg; callback->count = count; callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; check_free_callbacks(); out: mtx_unlock(&gnttab_list_lock); } void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) { struct gnttab_free_callback **pcb; mtx_lock(&gnttab_list_lock); for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { if (*pcb == callback) { *pcb = callback->next; break; } } mtx_unlock(&gnttab_list_lock); } static int grow_gnttab_list(unsigned int more_frames) { unsigned int new_nr_grant_frames, extra_entries, i; new_nr_grant_frames = nr_grant_frames + more_frames; extra_entries = more_frames * GREFS_PER_GRANT_FRAME; for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!gnttab_list[i]) goto grow_nomem; } for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; check_free_callbacks(); return (0); grow_nomem: for ( ; i >= nr_grant_frames; i--) free(gnttab_list[i], M_DEVBUF); return (ENOMEM); } static unsigned int __max_nr_grant_frames(void) { struct gnttab_query_size query; int rc; query.dom = DOMID_SELF; rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); if ((rc < 0) || (query.status != GNTST_okay)) return (4); /* Legacy max supported number of frames */ return (query.max_nr_frames); } static inline unsigned int max_nr_grant_frames(void) { return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); } #ifdef notyet /* * XXX needed for backend support * */ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long **frames = (unsigned long **)data; set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); (*frames)++; return 0; } static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } #endif static vm_paddr_t resume_frames; static void gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; /* * Loop backwards, so that the first hypercall has the largest index, * ensuring that the table will grow only once. */ do { xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed to map gnttab"); } while (i-- > start_idx); } int gnttab_resume(device_t dev) { unsigned int max_nr_gframes, nr_gframes; nr_gframes = nr_grant_frames; max_nr_gframes = max_nr_grant_frames(); if (max_nr_gframes < nr_gframes) return (ENOSYS); if (!resume_frames) { KASSERT(dev != NULL, ("No resume frames and no device provided")); gnttab_pseudo_phys_res = xenmem_alloc(dev, &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); if (gnttab_pseudo_phys_res == NULL) panic("Unable to reserve physical memory for gnttab"); resume_frames = rman_get_start(gnttab_pseudo_phys_res); shared = rman_get_virtual(gnttab_pseudo_phys_res); } gnttab_map(0, nr_gframes - 1); return (0); } static int gnttab_expand(unsigned int req_entries) { unsigned int cur, extra; cur = nr_grant_frames; extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); if (cur + extra > max_nr_grant_frames()) return (ENOSPC); gnttab_map(cur, cur + extra - 1); return (grow_gnttab_list(extra)); } MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void granttable_identify(driver_t *driver __unused, device_t parent) { KASSERT(xen_domain(), ("Trying to attach grant-table device on non Xen domain")); /* * A single device instance for our driver is always present * in a system operating under Xen. */ if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) panic("unable to attach Xen Grant-table device"); } /** * \brief Probe for the existence of the Xen Grant-table device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int granttable_probe(device_t dev) { device_set_desc(dev, "Xen Grant-table Device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xen Grant-table device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int granttable_attach(device_t dev) { int i; unsigned int max_nr_glist_frames; unsigned int nr_init_grefs; nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ max_nr_glist_frames = (boot_max_nr_grant_frames * GREFS_PER_GRANT_FRAME / (PAGE_SIZE / sizeof(grant_ref_t))); gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), M_DEVBUF, M_NOWAIT); if (gnttab_list == NULL) return (ENOMEM); for (i = 0; i < nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (gnttab_list[i] == NULL) goto ini_nomem; } if (gnttab_resume(dev)) return (ENODEV); nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; if (bootverbose) printf("Grant table initialized\n"); return (0); ini_nomem: for (i--; i >= 0; i--) free(gnttab_list[i], M_DEVBUF); free(gnttab_list, M_DEVBUF); return (ENOMEM); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t granttable_methods[] = { /* Device interface */ DEVMETHOD(device_identify, granttable_identify), DEVMETHOD(device_probe, granttable_probe), DEVMETHOD(device_attach, granttable_attach), DEVMETHOD_END }; DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, SI_ORDER_FIRST); diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c index 459ac8bbe951..614ee8554a71 100644 --- a/sys/dev/xen/privcmd/privcmd.c +++ b/sys/dev/xen/privcmd/privcmd.c @@ -1,611 +1,611 @@ /* * Copyright (c) 2014 Roger Pau Monné * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device"); #define MAX_DMOP_BUFFERS 16 struct privcmd_map { vm_object_t mem; vm_size_t size; struct resource *pseudo_phys_res; int pseudo_phys_res_id; vm_paddr_t phys_base_addr; boolean_t mapped; BITSET_DEFINE_VAR() *err; }; static d_ioctl_t privcmd_ioctl; static d_open_t privcmd_open; static d_mmap_single_t privcmd_mmap_single; static struct cdevsw privcmd_devsw = { .d_version = D_VERSION, .d_ioctl = privcmd_ioctl, .d_mmap_single = privcmd_mmap_single, .d_open = privcmd_open, .d_name = "privcmd", }; static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); static void privcmd_pg_dtor(void *handle); static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres); static struct cdev_pager_ops privcmd_pg_ops = { .cdev_pg_fault = privcmd_pg_fault, .cdev_pg_ctor = privcmd_pg_ctor, .cdev_pg_dtor = privcmd_pg_dtor, }; struct per_user_data { domid_t dom; }; static device_t privcmd_dev = NULL; /*------------------------- Privcmd Pager functions --------------------------*/ static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { return (0); } static void privcmd_pg_dtor(void *handle) { struct xen_remove_from_physmap rm = { .domid = DOMID_SELF }; struct privcmd_map *map = handle; int error __diagused; vm_size_t i; vm_page_t m; /* * Remove the mappings from the used pages. This will remove the * underlying p2m bindings in Xen second stage translation. */ if (map->mapped == true) { VM_OBJECT_WLOCK(map->mem); retry: for (i = 0; i < map->size; i++) { m = vm_page_lookup(map->mem, i); if (m == NULL) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; cdev_pager_free_page(map->mem, m); } VM_OBJECT_WUNLOCK(map->mem); for (i = 0; i < map->size; i++) { rm.gpfn = atop(map->phys_base_addr) + i; HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm); } free(map->err, M_PRIVCMD); } error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id, map->pseudo_phys_res); KASSERT(error == 0, ("Unable to release memory resource: %d", error)); free(map, M_PRIVCMD); } static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct privcmd_map *map = object->handle; vm_pindex_t pidx; vm_page_t page; if (map->mapped != true) return (VM_PAGER_FAIL); pidx = OFF_TO_IDX(offset); if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err)) return (VM_PAGER_FAIL); page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset); if (page == NULL) return (VM_PAGER_FAIL); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(vm_page_wired(page), ("page %p not wired", page)); KASSERT(!vm_page_busied(page), ("page %p is busy", page)); vm_page_busy_acquire(page, 0); vm_page_valid(page); if (*mres != NULL) vm_page_replace(page, object, pidx, *mres); else vm_page_insert(page, object, pidx); *mres = page; return (VM_PAGER_OK); } /*----------------------- Privcmd char device methods ------------------------*/ static int privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, vm_object_t *object, int nprot) { struct privcmd_map *map; map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO); map->size = OFF_TO_IDX(size); map->pseudo_phys_res_id = 0; map->pseudo_phys_res = xenmem_alloc(privcmd_dev, &map->pseudo_phys_res_id, size); if (map->pseudo_phys_res == NULL) { free(map, M_PRIVCMD); return (ENOMEM); } map->phys_base_addr = rman_get_start(map->pseudo_phys_res); map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops, size, nprot, *offset, NULL); if (map->mem == NULL) { xenmem_free(privcmd_dev, map->pseudo_phys_res_id, map->pseudo_phys_res); free(map, M_PRIVCMD); return (ENOMEM); } *object = map->mem; return (0); } static struct privcmd_map * setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num) { vm_map_t map; vm_map_entry_t entry; vm_object_t mem; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; struct privcmd_map *umap; int error; if ((num == 0) || ((addr & PAGE_MASK) != 0)) return NULL; map = &td->td_proc->p_vmspace->vm_map; error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex, &prot, &wired); if (error != KERN_SUCCESS || (entry->start != addr) || (entry->end != addr + (num * PAGE_SIZE))) return NULL; vm_map_lookup_done(map, entry); if ((mem->type != OBJT_MGTDEVICE) || (mem->un_pager.devp.ops != &privcmd_pg_ops)) return NULL; umap = mem->handle; /* Allocate a bitset to store broken page mappings. */ umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO); return umap; } static int privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int mode, struct thread *td) { int error; unsigned int i; void *data; const struct per_user_data *u; error = devfs_get_cdevpriv(&data); if (error != 0) return (EINVAL); /* * Constify user-data to prevent unintended changes to the restriction * limits. */ u = data; switch (cmd) { case IOCTL_PRIVCMD_HYPERCALL: { struct ioctl_privcmd_hypercall *hcall; hcall = (struct ioctl_privcmd_hypercall *)arg; /* Forbid hypercalls if restricted. */ if (u->dom != DOMID_INVALID) { error = EPERM; break; } #ifdef __amd64__ /* * The hypervisor page table walker will refuse to access * user-space pages if SMAP is enabled, so temporary disable it * while performing the hypercall. */ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) stac(); #endif error = privcmd_hypercall(hcall->op, hcall->arg[0], hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]); #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) clac(); #endif if (error >= 0) { hcall->retval = error; error = 0; } else { error = xen_translate_error(error); hcall->retval = 0; } break; } case IOCTL_PRIVCMD_MMAPBATCH: { struct ioctl_privcmd_mmapbatch *mmap; - struct xen_add_to_physmap_range add; + struct xen_add_to_physmap_batch add; xen_ulong_t *idxs; xen_pfn_t *gpfns; int *errs; unsigned int index; struct privcmd_map *umap; uint16_t num; mmap = (struct ioctl_privcmd_mmapbatch *)arg; if (u->dom != DOMID_INVALID && u->dom != mmap->dom) { error = EPERM; break; } umap = setup_virtual_area(td, mmap->addr, mmap->num); if (umap == NULL) { error = EINVAL; break; } add.domid = DOMID_SELF; add.space = XENMAPSPACE_gmfn_foreign; - add.foreign_domid = mmap->dom; + add.u.foreign_domid = mmap->dom; /* * The 'size' field in the xen_add_to_physmap_range only * allows for UINT16_MAX mappings in a single hypercall. */ num = MIN(mmap->num, UINT16_MAX); idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK); gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK); errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK); set_xen_guest_handle(add.idxs, idxs); set_xen_guest_handle(add.gpfns, gpfns); set_xen_guest_handle(add.errs, errs); for (index = 0; index < mmap->num; index += num) { num = MIN(mmap->num - index, UINT16_MAX); add.size = num; error = copyin(&mmap->arr[index], idxs, sizeof(idxs[0]) * num); if (error != 0) goto mmap_out; for (i = 0; i < num; i++) gpfns[i] = atop(umap->phys_base_addr + (i + index) * PAGE_SIZE); bzero(errs, sizeof(*errs) * num); error = HYPERVISOR_memory_op( - XENMEM_add_to_physmap_range, &add); + XENMEM_add_to_physmap_batch, &add); if (error != 0) { error = xen_translate_error(error); goto mmap_out; } for (i = 0; i < num; i++) { if (errs[i] != 0) { errs[i] = xen_translate_error(errs[i]); /* Mark the page as invalid. */ BIT_SET(mmap->num, index + i, umap->err); } } error = copyout(errs, &mmap->err[index], sizeof(errs[0]) * num); if (error != 0) goto mmap_out; } umap->mapped = true; mmap_out: free(idxs, M_PRIVCMD); free(gpfns, M_PRIVCMD); free(errs, M_PRIVCMD); if (!umap->mapped) free(umap->err, M_PRIVCMD); break; } case IOCTL_PRIVCMD_MMAP_RESOURCE: { struct ioctl_privcmd_mmapresource *mmap; struct xen_mem_acquire_resource adq; xen_pfn_t *gpfns; struct privcmd_map *umap; mmap = (struct ioctl_privcmd_mmapresource *)arg; if (u->dom != DOMID_INVALID && u->dom != mmap->dom) { error = EPERM; break; } bzero(&adq, sizeof(adq)); adq.domid = mmap->dom; adq.type = mmap->type; adq.id = mmap->id; /* Shortcut for getting the resource size. */ if (mmap->addr == 0 && mmap->num == 0) { error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq); if (error != 0) error = xen_translate_error(error); else mmap->num = adq.nr_frames; break; } umap = setup_virtual_area(td, mmap->addr, mmap->num); if (umap == NULL) { error = EINVAL; break; } adq.nr_frames = mmap->num; adq.frame = mmap->idx; gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK); for (i = 0; i < mmap->num; i++) gpfns[i] = atop(umap->phys_base_addr) + i; set_xen_guest_handle(adq.frame_list, gpfns); error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq); if (error != 0) error = xen_translate_error(error); else umap->mapped = true; free(gpfns, M_PRIVCMD); if (!umap->mapped) free(umap->err, M_PRIVCMD); break; } case IOCTL_PRIVCMD_DM_OP: { const struct ioctl_privcmd_dmop *dmop; struct privcmd_dmop_buf *bufs; struct xen_dm_op_buf *hbufs; dmop = (struct ioctl_privcmd_dmop *)arg; if (u->dom != DOMID_INVALID && u->dom != dmop->dom) { error = EPERM; break; } if (dmop->num == 0) break; if (dmop->num > MAX_DMOP_BUFFERS) { error = E2BIG; break; } bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK); error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num); if (error != 0) { free(bufs, M_PRIVCMD); break; } hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK); for (i = 0; i < dmop->num; i++) { set_xen_guest_handle(hbufs[i].h, bufs[i].uptr); hbufs[i].size = bufs[i].size; } #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) stac(); #endif error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs); #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) clac(); #endif if (error != 0) error = xen_translate_error(error); free(bufs, M_PRIVCMD); free(hbufs, M_PRIVCMD); break; } case IOCTL_PRIVCMD_RESTRICT: { struct per_user_data *u; domid_t dom; dom = *(domid_t *)arg; error = devfs_get_cdevpriv((void **)&u); if (error != 0) break; if (u->dom != DOMID_INVALID && u->dom != dom) { error = -EINVAL; break; } u->dom = dom; break; } default: error = ENOSYS; break; } return (error); } static void user_release(void *arg) { free(arg, M_PRIVCMD); } static int privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td) { struct per_user_data *u; int error; u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK); u->dom = DOMID_INVALID; /* Assign the allocated per_user_data to this open instance. */ error = devfs_set_cdevpriv(u, user_release); if (error != 0) { free(u, M_PRIVCMD); } return (error); } /*------------------ Private Device Attachment Functions --------------------*/ static void privcmd_identify(driver_t *driver, device_t parent) { KASSERT(xen_domain(), ("Trying to attach privcmd device on non Xen domain")); if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL) panic("unable to attach privcmd user-space device"); } static int privcmd_probe(device_t dev) { privcmd_dev = dev; device_set_desc(dev, "Xen privileged interface user-space device"); return (BUS_PROBE_NOWILDCARD); } static int privcmd_attach(device_t dev) { make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "xen/privcmd"); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t privcmd_methods[] = { DEVMETHOD(device_identify, privcmd_identify), DEVMETHOD(device_probe, privcmd_probe), DEVMETHOD(device_attach, privcmd_attach), DEVMETHOD_END }; static driver_t privcmd_driver = { "privcmd", privcmd_methods, 0, }; DRIVER_MODULE(privcmd, xenpv, privcmd_driver, 0, 0); MODULE_DEPEND(privcmd, xenpv, 1, 1, 1); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index b9a909a14a67..262df1cefeea 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -1,1369 +1,1372 @@ /****************************************************************************** * xen_intr.c * * Xen event and interrupt services for x86 HVM guests. * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, Intel Corporation * Copyright (c) 2012, Spectra Logic Corporation * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif +/* The code below is the implementation of 2L event channels. */ +#define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS + static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); static u_int first_evtchn_irq; /** * Per-cpu event channel processing state. */ struct xen_intr_pcpu_data { /** * The last event channel bitmap section (level one bit) processed. * This is used to ensure we scan all ports before * servicing an already servied port again. */ u_int last_processed_l1i; /** * The last event channel processed within the event channel * bitmap being scanned. */ u_int last_processed_l2i; /** Pointer to this CPU's interrupt statistic counter. */ u_long *evtchn_intrcnt; /** * A bitmap of ports that can be serviced from this CPU. * A set bit means interrupt handling is enabled. */ u_long evtchn_enabled[sizeof(u_long) * 8]; }; /* * Start the scan at port 0 by initializing the last scanned * location as the highest numbered event channel port. */ DPCPU_DEFINE_STATIC(struct xen_intr_pcpu_data, xen_intr_pcpu) = { .last_processed_l1i = LONG_BIT - 1, .last_processed_l2i = LONG_BIT - 1 }; DPCPU_DECLARE(struct vcpu_info *, vcpu_info); #define XEN_INVALID_EVTCHN 0 /* Invalid event channel */ #define is_valid_evtchn(x) ((x) != XEN_INVALID_EVTCHN) struct xenisrc { struct intsrc xi_intsrc; enum evtchn_type xi_type; int xi_cpu; /* VCPU for delivery. */ int xi_vector; /* Global isrc vector number. */ evtchn_port_t xi_port; int xi_virq; void *xi_cookie; u_int xi_close:1; /* close on unbind? */ u_int xi_masked:1; volatile u_int xi_refcount; }; static void xen_intr_suspend(struct pic *); static void xen_intr_resume(struct pic *, bool suspend_cancelled); static void xen_intr_enable_source(struct intsrc *isrc); static void xen_intr_disable_source(struct intsrc *isrc, int eoi); static void xen_intr_eoi_source(struct intsrc *isrc); static void xen_intr_enable_intr(struct intsrc *isrc); static void xen_intr_disable_intr(struct intsrc *isrc); static int xen_intr_vector(struct intsrc *isrc); static int xen_intr_source_pending(struct intsrc *isrc); static int xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static int xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id); /** * PIC interface for all event channel port types except physical IRQs. */ struct pic xen_intr_pic = { .pic_enable_source = xen_intr_enable_source, .pic_disable_source = xen_intr_disable_source, .pic_eoi_source = xen_intr_eoi_source, .pic_enable_intr = xen_intr_enable_intr, .pic_disable_intr = xen_intr_disable_intr, .pic_vector = xen_intr_vector, .pic_source_pending = xen_intr_source_pending, .pic_suspend = xen_intr_suspend, .pic_resume = xen_intr_resume, .pic_config_intr = xen_intr_config_intr, .pic_assign_cpu = xen_intr_assign_cpu }; static struct mtx xen_intr_isrc_lock; static u_int xen_intr_auto_vector_count; static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; /*------------------------- Private Functions --------------------------------*/ /** * Retrieve a handle for a Xen interrupt source. * * \param isrc A valid Xen interrupt source structure. * * \returns A handle suitable for use with xen_intr_isrc_from_handle() * to retrieve the original Xen interrupt source structure. */ static inline xen_intr_handle_t xen_intr_handle_from_isrc(struct xenisrc *isrc) { return (isrc); } /** * Lookup a Xen interrupt source object given an interrupt binding handle. * * \param handle A handle initialized by a previous call to * xen_intr_bind_isrc(). * * \returns A pointer to the Xen interrupt source object associated * with the given interrupt handle. NULL if no association * currently exists. */ static inline struct xenisrc * xen_intr_isrc_from_handle(xen_intr_handle_t handle) { return ((struct xenisrc *)handle); } /** * Disable signal delivery for an event channel port on the * specified CPU. * * \param port The event channel port to mask. * * This API is used to manage the port<=>CPU binding of event * channel handlers. * * \note This operation does not preclude reception of an event * for this event channel on another CPU. To mask the * event channel globally, use evtchn_mask(). */ static inline void evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); xen_clear_bit(port, pcpu->evtchn_enabled); } /** * Enable signal delivery for an event channel port on the * specified CPU. * * \param port The event channel port to unmask. * * This API is used to manage the port<=>CPU binding of event * channel handlers. * * \note This operation does not guarantee that event delivery * is enabled for this event channel port. The port must * also be globally enabled. See evtchn_unmask(). */ static inline void evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); xen_set_bit(port, pcpu->evtchn_enabled); } /** * Allocate and register a per-cpu Xen upcall interrupt counter. * * \param cpu The cpu for which to register this interrupt count. */ static void xen_intr_intrcnt_add(u_int cpu) { char buf[MAXCOMLEN + 1]; struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); if (pcpu->evtchn_intrcnt != NULL) return; snprintf(buf, sizeof(buf), "cpu%d:xen", cpu); intrcnt_add(buf, &pcpu->evtchn_intrcnt); } /** * Search for an already allocated but currently unused Xen interrupt * source object. * * \param type Restrict the search to interrupt sources of the given * type. * * \return A pointer to a free Xen interrupt source object or NULL. */ static struct xenisrc * xen_intr_find_unused_isrc(enum evtchn_type type) { int isrc_idx; KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held")); for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx ++) { struct xenisrc *isrc; u_int vector; vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL && isrc->xi_type == EVTCHN_TYPE_UNBOUND) { KASSERT(isrc->xi_intsrc.is_handlers == 0, ("Free evtchn still has handlers")); isrc->xi_type = type; return (isrc); } } return (NULL); } /** * Allocate a Xen interrupt source object. * * \param type The type of interrupt source to create. * * \return A pointer to a newly allocated Xen interrupt source * object or NULL. */ static struct xenisrc * xen_intr_alloc_isrc(enum evtchn_type type) { static int warned; struct xenisrc *isrc; unsigned int vector; KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held")); if (xen_intr_auto_vector_count > NR_EVENT_CHANNELS) { if (!warned) { warned = 1; printf("%s: Event channels exhausted.\n", __func__); } return (NULL); } vector = first_evtchn_irq + xen_intr_auto_vector_count; xen_intr_auto_vector_count++; KASSERT((intr_lookup_source(vector) == NULL), ("Trying to use an already allocated vector")); mtx_unlock(&xen_intr_isrc_lock); isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO); isrc->xi_intsrc.is_pic = &xen_intr_pic; isrc->xi_vector = vector; isrc->xi_type = type; intr_register_source(&isrc->xi_intsrc); mtx_lock(&xen_intr_isrc_lock); return (isrc); } /** * Attempt to free an active Xen interrupt source object. * * \param isrc The interrupt source object to release. * * \returns EBUSY if the source is still in use, otherwise 0. */ static int xen_intr_release_isrc(struct xenisrc *isrc) { mtx_lock(&xen_intr_isrc_lock); KASSERT(isrc->xi_intsrc.is_handlers == 0, ("Release called, but xenisrc still in use")); evtchn_mask_port(isrc->xi_port); evtchn_clear_port(isrc->xi_port); /* Rebind port to CPU 0. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); evtchn_cpu_unmask_port(0, isrc->xi_port); if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) { struct evtchn_close close = { .port = isrc->xi_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); } xen_intr_port_to_isrc[isrc->xi_port] = NULL; isrc->xi_cpu = 0; isrc->xi_type = EVTCHN_TYPE_UNBOUND; isrc->xi_port = 0; isrc->xi_cookie = NULL; mtx_unlock(&xen_intr_isrc_lock); return (0); } /** * Associate an interrupt handler with an already allocated local Xen * event channel port. * * \param isrcp The returned Xen interrupt object associated with * the specified local port. * \param local_port The event channel to bind. * \param type The event channel type of local_port. * \param intr_owner The device making this bind request. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ static int xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, enum evtchn_type type, const char *intr_owner, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; int error; *isrcp = NULL; if (port_handlep == NULL) { printf("%s: %s: Bad event handle\n", intr_owner, __func__); return (EINVAL); } mtx_lock(&xen_intr_isrc_lock); isrc = xen_intr_find_unused_isrc(type); if (isrc == NULL) { isrc = xen_intr_alloc_isrc(type); if (isrc == NULL) { mtx_unlock(&xen_intr_isrc_lock); return (ENOSPC); } } isrc->xi_port = local_port; xen_intr_port_to_isrc[local_port] = isrc; refcount_init(&isrc->xi_refcount, 1); mtx_unlock(&xen_intr_isrc_lock); /* Assign the opaque handler */ *port_handlep = xen_intr_handle_from_isrc(isrc); #ifdef SMP if (type == EVTCHN_TYPE_PORT) { /* * By default all interrupts are assigned to vCPU#0 * unless specified otherwise, so shuffle them to balance * the interrupt load. */ xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0)); } #endif if (filter == NULL && handler == NULL) { /* * No filter/handler provided, leave the event channel * masked and without a valid handler, the caller is * in charge of setting that up. */ *isrcp = isrc; return (0); } error = xen_intr_add_handler(intr_owner, filter, handler, arg, flags, *port_handlep); if (error != 0) { xen_intr_release_isrc(isrc); return (error); } *isrcp = isrc; return (0); } /** * Determine the event channel ports at the given section of the * event port bitmap which have pending events for the given cpu. * * \param pcpu The Xen interrupt pcpu data for the cpu being queried. * \param sh The Xen shared info area. * \param idx The index of the section of the event channel bitmap to * inspect. * * \returns A u_long with bits set for every event channel with pending * events. */ static inline u_long xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh, u_int idx) { CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(sh->evtchn_pending[0])); CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(pcpu->evtchn_enabled[0])); CTASSERT(sizeof(sh->evtchn_mask) == sizeof(sh->evtchn_pending)); CTASSERT(sizeof(sh->evtchn_mask) == sizeof(pcpu->evtchn_enabled)); return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx] & pcpu->evtchn_enabled[idx]); } /** * Interrupt handler for processing all Xen event channel events. * * \param trap_frame The trap frame context for the current interrupt. */ void xen_intr_handle_upcall(struct trapframe *trap_frame) { u_int l1i, l2i, port, cpu __diagused; u_long masked_l1, masked_l2; struct xenisrc *isrc; shared_info_t *s; vcpu_info_t *v; struct xen_intr_pcpu_data *pc; u_long l1, l2; /* * Disable preemption in order to always check and fire events * on the right vCPU */ critical_enter(); cpu = PCPU_GET(cpuid); pc = DPCPU_PTR(xen_intr_pcpu); s = HYPERVISOR_shared_info; v = DPCPU_GET(vcpu_info); if (!xen_has_percpu_evtchn()) { KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU")); } v->evtchn_upcall_pending = 0; #if 0 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ wmb(); #endif #endif l1 = atomic_readandclear_long(&v->evtchn_pending_sel); l1i = pc->last_processed_l1i; l2i = pc->last_processed_l2i; (*pc->evtchn_intrcnt)++; while (l1 != 0) { l1i = (l1i + 1) % LONG_BIT; masked_l1 = l1 & ((~0UL) << l1i); if (masked_l1 == 0) { /* * if we masked out all events, wrap around * to the beginning. */ l1i = LONG_BIT - 1; l2i = LONG_BIT - 1; continue; } l1i = ffsl(masked_l1) - 1; do { l2 = xen_intr_active_ports(pc, s, l1i); l2i = (l2i + 1) % LONG_BIT; masked_l2 = l2 & ((~0UL) << l2i); if (masked_l2 == 0) { /* if we masked out all events, move on */ l2i = LONG_BIT - 1; break; } l2i = ffsl(masked_l2) - 1; /* process port */ port = (l1i * LONG_BIT) + l2i; synch_clear_bit(port, &s->evtchn_pending[0]); isrc = xen_intr_port_to_isrc[port]; if (__predict_false(isrc == NULL)) continue; /* Make sure we are firing on the right vCPU */ KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)), ("Received unexpected event on vCPU#%d, event bound to vCPU#%d", PCPU_GET(cpuid), isrc->xi_cpu)); intr_execute_handlers(&isrc->xi_intsrc, trap_frame); /* * If this is the final port processed, * we'll pick up here+1 next time. */ pc->last_processed_l1i = l1i; pc->last_processed_l2i = l2i; } while (l2i != LONG_BIT - 1); l2 = xen_intr_active_ports(pc, s, l1i); if (l2 == 0) { /* * We handled all ports, so we can clear the * selector bit. */ l1 &= ~(1UL << l1i); } } if (xen_evtchn_needs_ack) lapic_eoi(); critical_exit(); } static int xen_intr_init(void *dummy __unused) { shared_info_t *s = HYPERVISOR_shared_info; struct xen_intr_pcpu_data *pcpu; int i; if (!xen_domain()) return (0); mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* * Set the per-cpu mask of CPU#0 to enable all, since by default all * event channels are bound to CPU#0. */ CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); } for (i = 0; i < nitems(s->evtchn_mask); i++) atomic_store_rel_long(&s->evtchn_mask[i], ~0); intr_register_pic(&xen_intr_pic); if (bootverbose) printf("Xen interrupt system initialized\n"); return (0); } SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL); static void xen_intrcnt_init(void *dummy __unused) { unsigned int i; if (!xen_domain()) return; /* * Register interrupt count manually as we aren't guaranteed to see a * call to xen_intr_assign_cpu() before our first interrupt. */ CPU_FOREACH(i) xen_intr_intrcnt_add(i); } SYSINIT(xen_intrcnt_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intrcnt_init, NULL); void xen_intr_alloc_irqs(void) { if (num_io_irqs > UINT_MAX - NR_EVENT_CHANNELS) panic("IRQ allocation overflow (num_msi_irqs too high?)"); first_evtchn_irq = num_io_irqs; num_io_irqs += NR_EVENT_CHANNELS; } /*--------------------------- Common PIC Functions ---------------------------*/ /** * Prepare this PIC for system suspension. */ static void xen_intr_suspend(struct pic *unused) { } static void xen_rebind_ipi(struct xenisrc *isrc) { #ifdef SMP int cpu = isrc->xi_cpu; int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; int error; struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id }; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) panic("unable to rebind xen IPI: %d", error); isrc->xi_port = bind_ipi.port; isrc->xi_cpu = 0; xen_intr_port_to_isrc[bind_ipi.port] = isrc; error = xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); if (error) panic("unable to bind xen IPI to CPU#%d: %d", cpu, error); evtchn_unmask_port(bind_ipi.port); #else panic("Resume IPI event channel on UP"); #endif } static void xen_rebind_virq(struct xenisrc *isrc) { int cpu = isrc->xi_cpu; int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; int error; struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq, .vcpu = vcpu_id }; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error); isrc->xi_port = bind_virq.port; isrc->xi_cpu = 0; xen_intr_port_to_isrc[bind_virq.port] = isrc; #ifdef SMP error = xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); if (error) panic("unable to bind xen VIRQ#%d to CPU#%d: %d", isrc->xi_virq, cpu, error); #endif evtchn_unmask_port(bind_virq.port); } /** * Return this PIC to service after being suspended. */ static void xen_intr_resume(struct pic *unused, bool suspend_cancelled) { shared_info_t *s = HYPERVISOR_shared_info; struct xenisrc *isrc; u_int isrc_idx; int i; if (suspend_cancelled) return; /* Reset the per-CPU masks */ CPU_FOREACH(i) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); } /* Mask all event channels. */ for (i = 0; i < nitems(s->evtchn_mask); i++) atomic_store_rel_long(&s->evtchn_mask[i], ~0); /* Remove port -> isrc mappings */ memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc)); /* Free unused isrcs and rebind VIRQs and IPIs */ for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) { u_int vector; vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL) { isrc->xi_port = 0; switch (isrc->xi_type) { case EVTCHN_TYPE_IPI: xen_rebind_ipi(isrc); break; case EVTCHN_TYPE_VIRQ: xen_rebind_virq(isrc); break; default: break; } } } } /** * Disable a Xen interrupt source. * * \param isrc The interrupt source to disable. */ static void xen_intr_disable_intr(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; evtchn_mask_port(isrc->xi_port); } /** * Determine the global interrupt vector number for * a Xen interrupt source. * * \param isrc The interrupt source to query. * * \return The vector number corresponding to the given interrupt source. */ static int xen_intr_vector(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; return (isrc->xi_vector); } /** * Determine whether or not interrupt events are pending on the * the given interrupt source. * * \param isrc The interrupt source to query. * * \returns 0 if no events are pending, otherwise non-zero. */ static int xen_intr_source_pending(struct intsrc *isrc) { /* * EventChannels are edge triggered and never masked. * There can be no pending events. */ return (0); } /** * Perform configuration of an interrupt source. * * \param isrc The interrupt source to configure. * \param trig Edge or level. * \param pol Active high or low. * * \returns 0 if no events are pending, otherwise non-zero. */ static int xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { /* Configuration is only possible via the evtchn apis. */ return (ENODEV); } /** * Configure CPU affinity for interrupt source event delivery. * * \param isrc The interrupt source to configure. * \param apic_id The apic id of the CPU for handling future events. * * \returns 0 if successful, otherwise an errno. */ static int xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) { #ifdef SMP struct evtchn_bind_vcpu bind_vcpu; struct xenisrc *isrc; u_int to_cpu, vcpu_id; int error, masked; if (!xen_has_percpu_evtchn()) return (EOPNOTSUPP); to_cpu = apic_cpuid(apic_id); vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id; mtx_lock(&xen_intr_isrc_lock); isrc = (struct xenisrc *)base_isrc; if (!is_valid_evtchn(isrc->xi_port)) { mtx_unlock(&xen_intr_isrc_lock); return (EINVAL); } /* * Mask the event channel while binding it to prevent interrupt * delivery with an inconsistent state in isrc->xi_cpu. */ masked = evtchn_test_and_set_mask(isrc->xi_port); if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) || (isrc->xi_type == EVTCHN_TYPE_IPI)) { /* * Virtual IRQs are associated with a cpu by * the Hypervisor at evtchn_bind_virq time, so * all we need to do is update the per-CPU masks. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); isrc->xi_cpu = to_cpu; evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); goto out; } bind_vcpu.port = isrc->xi_port; bind_vcpu.vcpu = vcpu_id; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu); if (isrc->xi_cpu != to_cpu) { if (error == 0) { /* Commit to new binding by removing the old one. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); isrc->xi_cpu = to_cpu; evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); } } out: if (masked == 0) evtchn_unmask_port(isrc->xi_port); mtx_unlock(&xen_intr_isrc_lock); return (0); #else return (EOPNOTSUPP); #endif } /*------------------- Virtual Interrupt Source PIC Functions -----------------*/ /* * Mask a level triggered interrupt source. * * \param isrc The interrupt source to mask (if necessary). * \param eoi If non-zero, perform any necessary end-of-interrupt * acknowledgements. */ static void xen_intr_disable_source(struct intsrc *base_isrc, int eoi) { struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; /* * NB: checking if the event channel is already masked is * needed because the event channel user-space device * masks event channels on its filter as part of its * normal operation, and those shouldn't be automatically * unmasked by the generic interrupt code. The event channel * device will unmask them when needed. */ isrc->xi_masked = !!evtchn_test_and_set_mask(isrc->xi_port); } /* * Unmask a level triggered interrupt source. * * \param isrc The interrupt source to unmask (if necessary). */ static void xen_intr_enable_source(struct intsrc *base_isrc) { struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; if (isrc->xi_masked == 0) evtchn_unmask_port(isrc->xi_port); } /* * Perform any necessary end-of-interrupt acknowledgements. * * \param isrc The interrupt source to EOI. */ static void xen_intr_eoi_source(struct intsrc *base_isrc) { } /* * Enable and unmask the interrupt source. * * \param isrc The interrupt source to enable. */ static void xen_intr_enable_intr(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; evtchn_unmask_port(isrc->xi_port); } /*--------------------------- Public Functions -------------------------------*/ /*------- API comments for these methods can be found in xen/xenintr.h -------*/ int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; int error; error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error != 0) return (error); /* * The Event Channel API didn't open this port, so it is not * responsible for closing it automatically on unbind. */ isrc->xi_close = 0; return (0); } int xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; struct evtchn_alloc_unbound alloc_unbound; int error; alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = remote_domain; error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error != 0) { evtchn_close_t close = { .port = alloc_unbound.port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } isrc->xi_close = 1; return (0); } int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, u_int remote_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; struct evtchn_bind_interdomain bind_interdomain; int error; bind_interdomain.remote_dom = remote_domain; bind_interdomain.remote_port = remote_port; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error) { evtchn_close_t close = { .port = bind_interdomain.local_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; return (0); } int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; struct xenisrc *isrc; struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id }; int error; isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); #ifdef SMP if (error == 0) error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); #endif if (error != 0) { evtchn_close_t close = { .port = bind_virq.port }; xen_intr_unbind(*port_handlep); if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } #ifdef SMP if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt * code to perform the binding. Update our event channel * masks manually so events can't fire on the wrong cpu * during AP startup. */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } #endif /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; isrc->xi_virq = virq; return (0); } int xen_intr_alloc_and_bind_ipi(u_int cpu, driver_filter_t filter, enum intr_type flags, xen_intr_handle_t *port_handlep) { #ifdef SMP int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; struct xenisrc *isrc; struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id }; /* Same size as the one used by intr_handler->ih_name. */ char name[MAXCOMLEN + 1]; int error; isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } snprintf(name, sizeof(name), "cpu%u", cpu); error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI, name, filter, NULL, NULL, flags, port_handlep); if (error != 0) { evtchn_close_t close = { .port = bind_ipi.port }; xen_intr_unbind(*port_handlep); if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt * code to perform the binding. Update our event channel * masks manually so events can't fire on the wrong cpu * during AP startup. */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; return (0); #else return (EOPNOTSUPP); #endif } int xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) { char descr[MAXCOMLEN + 1]; struct xenisrc *isrc; va_list ap; isrc = xen_intr_isrc_from_handle(port_handle); if (isrc == NULL) return (EINVAL); va_start(ap, fmt); vsnprintf(descr, sizeof(descr), fmt, ap); va_end(ap); return (intr_describe(isrc->xi_vector, isrc->xi_cookie, descr)); } void xen_intr_unbind(xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; KASSERT(port_handlep != NULL, ("NULL xen_intr_handle_t passed to %s", __func__)); isrc = xen_intr_isrc_from_handle(*port_handlep); *port_handlep = NULL; if (isrc == NULL) return; mtx_lock(&xen_intr_isrc_lock); if (refcount_release(&isrc->xi_refcount) == 0) { mtx_unlock(&xen_intr_isrc_lock); return; } mtx_unlock(&xen_intr_isrc_lock); if (isrc->xi_cookie != NULL) intr_remove_handler(isrc->xi_cookie); xen_intr_release_isrc(isrc); } void xen_intr_signal(xen_intr_handle_t handle) { struct xenisrc *isrc; isrc = xen_intr_isrc_from_handle(handle); if (isrc != NULL) { KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT || isrc->xi_type == EVTCHN_TYPE_IPI, ("evtchn_signal on something other than a local port")); struct evtchn_send send = { .port = isrc->xi_port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); } } evtchn_port_t xen_intr_port(xen_intr_handle_t handle) { struct xenisrc *isrc; isrc = xen_intr_isrc_from_handle(handle); if (isrc == NULL) return (0); return (isrc->xi_port); } int xen_intr_add_handler(const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t handle) { struct xenisrc *isrc; int error; isrc = xen_intr_isrc_from_handle(handle); if (isrc == NULL || isrc->xi_cookie != NULL) return (EINVAL); error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg, flags|INTR_EXCL, &isrc->xi_cookie, 0); if (error != 0) printf("%s: %s: add handler failed: %d\n", name, __func__, error); return (error); } int xen_intr_get_evtchn_from_port(evtchn_port_t port, xen_intr_handle_t *handlep) { if (!is_valid_evtchn(port) || port >= NR_EVENT_CHANNELS) return (EINVAL); if (handlep == NULL) { return (EINVAL); } mtx_lock(&xen_intr_isrc_lock); if (xen_intr_port_to_isrc[port] == NULL) { mtx_unlock(&xen_intr_isrc_lock); return (EINVAL); } refcount_acquire(&xen_intr_port_to_isrc[port]->xi_refcount); mtx_unlock(&xen_intr_isrc_lock); /* Assign the opaque handler */ *handlep = xen_intr_handle_from_isrc(xen_intr_port_to_isrc[port]); return (0); } #ifdef DDB static const char * xen_intr_print_type(enum evtchn_type type) { static const char *evtchn_type_to_string[EVTCHN_TYPE_COUNT] = { [EVTCHN_TYPE_UNBOUND] = "UNBOUND", [EVTCHN_TYPE_VIRQ] = "VIRQ", [EVTCHN_TYPE_IPI] = "IPI", [EVTCHN_TYPE_PORT] = "PORT", }; if (type >= EVTCHN_TYPE_COUNT) return ("UNKNOWN"); return (evtchn_type_to_string[type]); } static void xen_intr_dump_port(struct xenisrc *isrc) { struct xen_intr_pcpu_data *pcpu; shared_info_t *s = HYPERVISOR_shared_info; int i; db_printf("Port %d Type: %s\n", isrc->xi_port, xen_intr_print_type(isrc->xi_type)); if (isrc->xi_type == EVTCHN_TYPE_VIRQ) db_printf("\tVirq: %d\n", isrc->xi_virq); db_printf("\tMasked: %d Pending: %d\n", !!xen_test_bit(isrc->xi_port, &s->evtchn_mask[0]), !!xen_test_bit(isrc->xi_port, &s->evtchn_pending[0])); db_printf("\tPer-CPU Masks: "); CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); db_printf("cpu#%d: %d ", i, !!xen_test_bit(isrc->xi_port, pcpu->evtchn_enabled)); } db_printf("\n"); } DB_SHOW_COMMAND(xen_evtchn, db_show_xen_evtchn) { int i; if (!xen_domain()) { db_printf("Only available on Xen guests\n"); return; } for (i = 0; i < NR_EVENT_CHANNELS; i++) { struct xenisrc *isrc; isrc = xen_intr_port_to_isrc[i]; if (isrc == NULL) continue; xen_intr_dump_port(isrc); } } #endif /* DDB */ diff --git a/sys/xen/xen-os.h b/sys/xen/xen-os.h index 183724be9749..e1d03b7322bc 100644 --- a/sys/xen/xen-os.h +++ b/sys/xen/xen-os.h @@ -1,171 +1,171 @@ /****************************************************************************** * xen/xen-os.h * * Random collection of macros and definition * * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team) * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef _XEN_XEN_OS_H_ #define _XEN_XEN_OS_H_ -#define __XEN_INTERFACE_VERSION__ 0x00030208 +#define __XEN_INTERFACE_VERSION__ 0x00040d00 #define GRANT_REF_INVALID 0xffffffff #ifdef LOCORE #define __ASSEMBLY__ #endif #include #ifndef __ASSEMBLY__ #include #include static inline vm_paddr_t xen_get_xenstore_mfn(void) { return (hvm_get_parameter(HVM_PARAM_STORE_PFN)); } static inline evtchn_port_t xen_get_xenstore_evtchn(void) { return (hvm_get_parameter(HVM_PARAM_STORE_EVTCHN)); } static inline vm_paddr_t xen_get_console_mfn(void) { return (hvm_get_parameter(HVM_PARAM_CONSOLE_PFN)); } static inline evtchn_port_t xen_get_console_evtchn(void) { return (hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN)); } extern shared_info_t *HYPERVISOR_shared_info; extern bool xen_suspend_cancelled; enum xen_domain_type { XEN_NATIVE, /* running on bare hardware */ XEN_PV_DOMAIN, /* running in a PV domain */ XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ }; extern enum xen_domain_type xen_domain_type; static inline int xen_domain(void) { return (xen_domain_type != XEN_NATIVE); } static inline int xen_pv_domain(void) { return (xen_domain_type == XEN_PV_DOMAIN); } static inline int xen_hvm_domain(void) { return (xen_domain_type == XEN_HVM_DOMAIN); } static inline bool xen_initial_domain(void) { return (xen_domain() && (hvm_start_flags & SIF_INITDOMAIN) != 0); } #endif #include /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ /* * Based on ofed/include/linux/bitops.h * * Those helpers are prefixed by xen_ because xen-os.h is widely included * and we don't want the other drivers using them. * */ #define NBPL (NBBY * sizeof(long)) static inline bool xen_test_bit(int bit, volatile long *addr) { unsigned long mask = 1UL << (bit % NBPL); return !!(atomic_load_acq_long(&addr[bit / NBPL]) & mask); } static inline void xen_set_bit(int bit, volatile long *addr) { atomic_set_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } static inline void xen_clear_bit(int bit, volatile long *addr) { atomic_clear_long(&addr[bit / NBPL], 1UL << (bit % NBPL)); } #undef NBPL /* * Functions to allocate/free unused memory in order * to map memory from other domains. */ struct resource *xenmem_alloc(device_t dev, int *res_id, size_t size); int xenmem_free(device_t dev, int res_id, struct resource *res); /* Debug/emergency function, prints directly to hypervisor console */ void xc_printf(const char *, ...) __printflike(1, 2); #ifndef xen_mb #define xen_mb() mb() #endif #ifndef xen_rmb #define xen_rmb() rmb() #endif #ifndef xen_wmb #define xen_wmb() wmb() #endif #endif /* !__ASSEMBLY__ */ #endif /* _XEN_XEN_OS_H_ */