diff --git a/sys/amd64/include/xen/synch_bitops.h b/sys/amd64/include/xen/synch_bitops.h deleted file mode 100644 index 746687aa91bd..000000000000 --- a/sys/amd64/include/xen/synch_bitops.h +++ /dev/null @@ -1,129 +0,0 @@ -#ifndef __XEN_SYNCH_BITOPS_H__ -#define __XEN_SYNCH_BITOPS_H__ - -/* - * Copyright 1992, Linus Torvalds. - * Heavily modified to provide guaranteed strong synchronisation - * when communicating with Xen or other guest OSes running on other CPUs. - */ - - -#define ADDR (*(volatile long *) addr) - -static __inline__ void synch_set_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btsl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_clear_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btrl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_change_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btcl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btsl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btrl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__ ( - "lock btcl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -struct __synch_xchg_dummy { unsigned long a[100]; }; -#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x)) - -#define synch_cmpxchg(ptr, old, new) \ -((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ - (unsigned long)(old), \ - (unsigned long)(new), \ - sizeof(*(ptr)))) - -static inline unsigned long __synch_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - case 4: - __asm__ __volatile__("lock; cmpxchgl %k1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - case 8: - __asm__ __volatile__("lock; cmpxchgq %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - } - return old; -} - -static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & - (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int synch_var_test_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "btl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); - return oldbit; -} - -#define synch_test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - synch_const_test_bit((nr),(addr)) : \ - synch_var_test_bit((nr),(addr))) - -#endif /* __XEN_SYNCH_BITOPS_H__ */ diff --git a/sys/dev/xen/evtchn/evtchn_dev.c b/sys/dev/xen/evtchn/evtchn_dev.c index 34f481358008..a217120222e6 100644 --- a/sys/dev/xen/evtchn/evtchn_dev.c +++ b/sys/dev/xen/evtchn/evtchn_dev.c @@ -1,603 +1,601 @@ /****************************************************************************** * evtchn.c * * Driver for receiving and demuxing event-channel signals. * * Copyright (c) 2004-2005, K A Fraser * Multi-process extensions Copyright (c) 2004, Steven Smith * FreeBSD port Copyright (c) 2014, Roger Pau Monné * Fetched from git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git * File: drivers/xen/evtchn.c * Git commit: 0dc0064add422bc0ef5165ebe9ece3052bbd457d * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 * as published by the Free Software Foundation; or, when distributed * separately from the Linux kernel or incorporated into other * software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include - #include #include #include #include MALLOC_DEFINE(M_EVTCHN, "evtchn_dev", "Xen event channel user-space device"); struct user_evtchn; static int evtchn_cmp(struct user_evtchn *u1, struct user_evtchn *u2); RB_HEAD(evtchn_tree, user_evtchn); struct per_user_data { struct mtx bind_mutex; /* serialize bind/unbind operations */ struct evtchn_tree evtchns; /* Notification ring, accessed via /dev/xen/evtchn. */ #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) evtchn_port_t *ring; unsigned int ring_cons, ring_prod, ring_overflow; struct sx ring_cons_mutex; /* protect against concurrent readers */ struct mtx ring_prod_mutex; /* product against concurrent interrupts */ struct selinfo ev_rsel; }; struct user_evtchn { RB_ENTRY(user_evtchn) node; struct per_user_data *user; evtchn_port_t port; xen_intr_handle_t handle; bool enabled; }; RB_GENERATE_STATIC(evtchn_tree, user_evtchn, node, evtchn_cmp); static device_t evtchn_dev; static d_read_t evtchn_read; static d_write_t evtchn_write; static d_ioctl_t evtchn_ioctl; static d_poll_t evtchn_poll; static d_open_t evtchn_open; static void evtchn_release(void *arg); static struct cdevsw evtchn_devsw = { .d_version = D_VERSION, .d_open = evtchn_open, .d_read = evtchn_read, .d_write = evtchn_write, .d_ioctl = evtchn_ioctl, .d_poll = evtchn_poll, .d_name = "evtchn", }; /*------------------------- Red-black tree helpers ---------------------------*/ static int evtchn_cmp(struct user_evtchn *u1, struct user_evtchn *u2) { return (u1->port - u2->port); } static struct user_evtchn * find_evtchn(struct per_user_data *u, evtchn_port_t port) { struct user_evtchn tmp = { .port = port, }; return (RB_FIND(evtchn_tree, &u->evtchns, &tmp)); } /*--------------------------- Interrupt handlers -----------------------------*/ static int evtchn_filter(void *arg) { struct user_evtchn *evtchn; evtchn = arg; if (!evtchn->enabled && bootverbose) { device_printf(evtchn_dev, "Received upcall for disabled event channel %d\n", evtchn->port); } evtchn_mask_port(evtchn->port); evtchn->enabled = false; return (FILTER_SCHEDULE_THREAD); } static void evtchn_interrupt(void *arg) { struct user_evtchn *evtchn; struct per_user_data *u; evtchn = arg; u = evtchn->user; /* * Protect against concurrent events using this handler * on different CPUs. */ mtx_lock(&u->ring_prod_mutex); if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; wmb(); /* Ensure ring contents visible */ if (u->ring_cons == u->ring_prod++) { wakeup(u); selwakeup(&u->ev_rsel); } } else u->ring_overflow = 1; mtx_unlock(&u->ring_prod_mutex); } /*------------------------- Character device methods -------------------------*/ static int evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) { struct per_user_data *u; int error; u = malloc(sizeof(*u), M_EVTCHN, M_WAITOK | M_ZERO); u->ring = malloc(PAGE_SIZE, M_EVTCHN, M_WAITOK | M_ZERO); /* Initialize locks */ mtx_init(&u->bind_mutex, "evtchn_bind_mutex", NULL, MTX_DEF); sx_init(&u->ring_cons_mutex, "evtchn_ringc_sx"); mtx_init(&u->ring_prod_mutex, "evtchn_ringp_mutex", NULL, MTX_DEF); /* Initialize red-black tree. */ RB_INIT(&u->evtchns); /* Assign the allocated per_user_data to this open instance. */ error = devfs_set_cdevpriv(u, evtchn_release); if (error != 0) { mtx_destroy(&u->bind_mutex); mtx_destroy(&u->ring_prod_mutex); sx_destroy(&u->ring_cons_mutex); free(u->ring, M_EVTCHN); free(u, M_EVTCHN); } return (error); } static void evtchn_release(void *arg) { struct per_user_data *u; struct user_evtchn *evtchn, *tmp; u = arg; seldrain(&u->ev_rsel); RB_FOREACH_SAFE(evtchn, evtchn_tree, &u->evtchns, tmp) { xen_intr_unbind(&evtchn->handle); RB_REMOVE(evtchn_tree, &u->evtchns, evtchn); free(evtchn, M_EVTCHN); } mtx_destroy(&u->bind_mutex); mtx_destroy(&u->ring_prod_mutex); sx_destroy(&u->ring_cons_mutex); free(u->ring, M_EVTCHN); free(u, M_EVTCHN); } static int evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) { int error, count; unsigned int c, p, bytes1 = 0, bytes2 = 0; struct per_user_data *u; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (EINVAL); /* Whole number of ports. */ count = uio->uio_resid; count &= ~(sizeof(evtchn_port_t)-1); if (count == 0) return (0); if (count > PAGE_SIZE) count = PAGE_SIZE; sx_xlock(&u->ring_cons_mutex); for (;;) { if (u->ring_overflow) { error = EFBIG; goto unlock_out; } c = u->ring_cons; p = u->ring_prod; if (c != p) break; if (ioflag & IO_NDELAY) { error = EWOULDBLOCK; goto unlock_out; } error = sx_sleep(u, &u->ring_cons_mutex, PCATCH, "evtchw", 0); if ((error != 0) && (error != EWOULDBLOCK)) goto unlock_out; } /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(evtchn_port_t); bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); } else { bytes1 = (p - c) * sizeof(evtchn_port_t); bytes2 = 0; } /* Truncate chunks according to caller's maximum byte count. */ if (bytes1 > count) { bytes1 = count; bytes2 = 0; } else if ((bytes1 + bytes2) > count) { bytes2 = count - bytes1; } error = EFAULT; rmb(); /* Ensure that we see the port before we copy it. */ if (uiomove(&u->ring[EVTCHN_RING_MASK(c)], bytes1, uio) || ((bytes2 != 0) && uiomove(&u->ring[0], bytes2, uio))) goto unlock_out; u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); error = 0; unlock_out: sx_xunlock(&u->ring_cons_mutex); return (error); } static int evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) { int error, i, count; evtchn_port_t *kbuf; struct per_user_data *u; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (EINVAL); kbuf = malloc(PAGE_SIZE, M_EVTCHN, M_WAITOK); count = uio->uio_resid; /* Whole number of ports. */ count &= ~(sizeof(evtchn_port_t)-1); error = 0; if (count == 0) goto out; if (count > PAGE_SIZE) count = PAGE_SIZE; error = uiomove(kbuf, count, uio); if (error != 0) goto out; mtx_lock(&u->bind_mutex); for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { evtchn_port_t port = kbuf[i]; struct user_evtchn *evtchn; evtchn = find_evtchn(u, port); if (evtchn && !evtchn->enabled) { evtchn->enabled = true; evtchn_unmask_port(evtchn->port); } } mtx_unlock(&u->bind_mutex); error = 0; out: free(kbuf, M_EVTCHN); return (error); } static inline int evtchn_bind_user_port(struct per_user_data *u, struct user_evtchn *evtchn) { int error; evtchn->port = xen_intr_port(evtchn->handle); evtchn->user = u; evtchn->enabled = true; mtx_lock(&u->bind_mutex); RB_INSERT(evtchn_tree, &u->evtchns, evtchn); mtx_unlock(&u->bind_mutex); error = xen_intr_add_handler(device_get_nameunit(evtchn_dev), evtchn_filter, evtchn_interrupt, evtchn, INTR_TYPE_MISC | INTR_MPSAFE, evtchn->handle); if (error != 0) { xen_intr_unbind(&evtchn->handle); mtx_lock(&u->bind_mutex); RB_REMOVE(evtchn_tree, &u->evtchns, evtchn); mtx_unlock(&u->bind_mutex); free(evtchn, M_EVTCHN); } return (error); } static int evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int mode, struct thread *td __unused) { struct per_user_data *u; int error; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (EINVAL); switch (cmd) { case IOCTL_EVTCHN_BIND_VIRQ: { struct ioctl_evtchn_bind_virq *bind; struct user_evtchn *evtchn; evtchn = malloc(sizeof(*evtchn), M_EVTCHN, M_WAITOK | M_ZERO); bind = (struct ioctl_evtchn_bind_virq *)arg; error = xen_intr_bind_virq(evtchn_dev, bind->virq, 0, NULL, NULL, NULL, 0, &evtchn->handle); if (error != 0) { free(evtchn, M_EVTCHN); break; } error = evtchn_bind_user_port(u, evtchn); if (error != 0) break; bind->port = evtchn->port; break; } case IOCTL_EVTCHN_BIND_INTERDOMAIN: { struct ioctl_evtchn_bind_interdomain *bind; struct user_evtchn *evtchn; evtchn = malloc(sizeof(*evtchn), M_EVTCHN, M_WAITOK | M_ZERO); bind = (struct ioctl_evtchn_bind_interdomain *)arg; error = xen_intr_bind_remote_port(evtchn_dev, bind->remote_domain, bind->remote_port, NULL, NULL, NULL, 0, &evtchn->handle); if (error != 0) { free(evtchn, M_EVTCHN); break; } error = evtchn_bind_user_port(u, evtchn); if (error != 0) break; bind->port = evtchn->port; break; } case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { struct ioctl_evtchn_bind_unbound_port *bind; struct user_evtchn *evtchn; evtchn = malloc(sizeof(*evtchn), M_EVTCHN, M_WAITOK | M_ZERO); bind = (struct ioctl_evtchn_bind_unbound_port *)arg; error = xen_intr_alloc_and_bind_local_port(evtchn_dev, bind->remote_domain, NULL, NULL, NULL, 0, &evtchn->handle); if (error != 0) { free(evtchn, M_EVTCHN); break; } error = evtchn_bind_user_port(u, evtchn); if (error != 0) break; bind->port = evtchn->port; break; } case IOCTL_EVTCHN_UNBIND: { struct ioctl_evtchn_unbind *unbind; struct user_evtchn *evtchn; unbind = (struct ioctl_evtchn_unbind *)arg; mtx_lock(&u->bind_mutex); evtchn = find_evtchn(u, unbind->port); if (evtchn == NULL) { error = ENOTCONN; break; } RB_REMOVE(evtchn_tree, &u->evtchns, evtchn); mtx_unlock(&u->bind_mutex); xen_intr_unbind(&evtchn->handle); free(evtchn, M_EVTCHN); error = 0; break; } case IOCTL_EVTCHN_NOTIFY: { struct ioctl_evtchn_notify *notify; struct user_evtchn *evtchn; notify = (struct ioctl_evtchn_notify *)arg; mtx_lock(&u->bind_mutex); evtchn = find_evtchn(u, notify->port); if (evtchn == NULL) { error = ENOTCONN; break; } xen_intr_signal(evtchn->handle); mtx_unlock(&u->bind_mutex); error = 0; break; } case IOCTL_EVTCHN_RESET: { /* Initialise the ring to empty. Clear errors. */ sx_xlock(&u->ring_cons_mutex); mtx_lock(&u->ring_prod_mutex); u->ring_cons = u->ring_prod = u->ring_overflow = 0; mtx_unlock(&u->ring_prod_mutex); sx_xunlock(&u->ring_cons_mutex); error = 0; break; } case FIONBIO: case FIOASYNC: /* Handled in an upper layer */ error = 0; break; default: error = ENOTTY; break; } return (error); } static int evtchn_poll(struct cdev *dev, int events, struct thread *td) { struct per_user_data *u; int error, mask; error = devfs_get_cdevpriv((void **)&u); if (error != 0) return (POLLERR); /* we can always write */ mask = events & (POLLOUT | POLLWRNORM); mtx_lock(&u->ring_prod_mutex); if (events & (POLLIN | POLLRDNORM)) { if (u->ring_cons != u->ring_prod) { mask |= events & (POLLIN | POLLRDNORM); } else { /* Record that someone is waiting */ selrecord(td, &u->ev_rsel); } } mtx_unlock(&u->ring_prod_mutex); return (mask); } /*------------------ Private Device Attachment Functions --------------------*/ static void evtchn_identify(driver_t *driver, device_t parent) { KASSERT((xen_domain()), ("Trying to attach evtchn device on non Xen domain")); evtchn_dev = BUS_ADD_CHILD(parent, 0, "evtchn", 0); if (evtchn_dev == NULL) panic("unable to attach evtchn user-space device"); } static int evtchn_probe(device_t dev) { device_set_desc(dev, "Xen event channel user-space device"); return (BUS_PROBE_NOWILDCARD); } static int evtchn_attach(device_t dev) { make_dev_credf(MAKEDEV_ETERNAL, &evtchn_devsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t evtchn_methods[] = { DEVMETHOD(device_identify, evtchn_identify), DEVMETHOD(device_probe, evtchn_probe), DEVMETHOD(device_attach, evtchn_attach), DEVMETHOD_END }; static driver_t evtchn_driver = { "evtchn", evtchn_methods, 0, }; DRIVER_MODULE(evtchn, xenpv, evtchn_driver, 0, 0); MODULE_DEPEND(evtchn, xenpv, 1, 1, 1); diff --git a/sys/dev/xen/grant_table/grant_table.c b/sys/dev/xen/grant_table/grant_table.c index fdbec8ac14e8..b5bbe04977a2 100644 --- a/sys/dev/xen/grant_table/grant_table.c +++ b/sys/dev/xen/grant_table/grant_table.c @@ -1,680 +1,678 @@ /****************************************************************************** * gnttab.c * * Two sets of functionality: * 1. Granting foreign access to our memory reservation. * 2. Accessing others' memory reservations via grant references. * (i.e., mechanisms for both sender and recipient of grant references) * * Copyright (c) 2005, Christopher Clark * Copyright (c) 2004, K A Fraser */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include -#include - #include #include #include #include #include #include /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_v1_t)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; static unsigned int boot_max_nr_grant_frames; static int gnttab_free_count; static grant_ref_t gnttab_free_head; static struct mtx gnttab_list_lock; /* * Resource representing allocated physical address space * for the grant table metainfo */ static struct resource *gnttab_pseudo_phys_res; /* Resource id for allocated physical address space. */ static int gnttab_pseudo_phys_res_id; static grant_entry_v1_t *shared; static struct gnttab_free_callback *gnttab_free_callback_list = NULL; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) static int get_free_entries(int count, int *entries) { int ref, error; grant_ref_t head; mtx_lock(&gnttab_list_lock); if ((gnttab_free_count < count) && ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { mtx_unlock(&gnttab_list_lock); return (error); } ref = head = gnttab_free_head; gnttab_free_count -= count; while (count-- > 1) head = gnttab_entry(head); gnttab_free_head = gnttab_entry(head); gnttab_entry(head) = GNTTAB_LIST_END; mtx_unlock(&gnttab_list_lock); *entries = ref; return (0); } static void do_free_callbacks(void) { struct gnttab_free_callback *callback, *next; callback = gnttab_free_callback_list; gnttab_free_callback_list = NULL; while (callback != NULL) { next = callback->next; if (gnttab_free_count >= callback->count) { callback->next = NULL; callback->fn(callback->arg); } else { callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; } callback = next; } } static inline void check_free_callbacks(void) { if (__predict_false(gnttab_free_callback_list != NULL)) do_free_callbacks(); } static void put_free_entry(grant_ref_t ref) { mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = ref; gnttab_free_count++; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } /* * Public grant-issuing interface functions */ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); if (result) *result = ref; return (0); } void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { shared[ref].frame = frame; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); } int gnttab_query_foreign_access(grant_ref_t ref) { uint16_t nflags; nflags = shared[ref].flags; return (nflags & (GTF_reading|GTF_writing)); } int gnttab_end_foreign_access_ref(grant_ref_t ref) { - uint16_t flags, nflags; + uint16_t flags; - nflags = shared[ref].flags; - do { - if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { - printf("%s: WARNING: g.e. still in use!\n", __func__); - return (0); - } - } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != - flags); + while (!((flags = atomic_load_16(&shared[ref].flags)) & + (GTF_reading|GTF_writing))) + if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) + return (1); - return (1); + printf("%s: WARNING: g.e. still in use!\n", __func__); + return (0); } void gnttab_end_foreign_access(grant_ref_t ref, void *page) { if (gnttab_end_foreign_access_ref(ref)) { put_free_entry(ref); if (page != NULL) { free(page, M_DEVBUF); } } else { /* XXX This needs to be fixed so that the ref and page are placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. and page still in use!\n", __func__); } } void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs) { grant_ref_t *last_ref; grant_ref_t head; grant_ref_t tail; head = GNTTAB_LIST_END; tail = *refs; last_ref = refs + count; while (refs != last_ref) { if (gnttab_end_foreign_access_ref(*refs)) { gnttab_entry(*refs) = head; head = *refs; } else { /* * XXX This needs to be fixed so that the ref * is placed on a list to be freed up later. */ printf("%s: WARNING: leaking g.e. still in use!\n", __func__); count--; } refs++; } if (count != 0) { mtx_lock(&gnttab_list_lock); gnttab_free_count += count; gnttab_entry(tail) = gnttab_free_head; gnttab_free_head = head; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } } int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result) { int error, ref; error = get_free_entries(1, &ref); if (__predict_false(error)) return (error); gnttab_grant_foreign_transfer_ref(ref, domid, pfn); *result = ref; return (0); } void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; } unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { unsigned long frame; uint16_t flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). + * + * NOTE: This is a loop since the atomic cmpset can fail multiple + * times. In normal operation it will be rare to execute more than + * twice. Attempting an attack would consume a great deal of + * attacker resources and be unlikely to prolong the loop very much. */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) + while (!((flags = atomic_load_16(&shared[ref].flags)) & + GTF_transfer_committed)) + if (atomic_cmpset_16(&shared[ref].flags, flags, 0)) return (0); - cpu_spinwait(); - } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; cpu_spinwait(); + flags = atomic_load_16(&shared[ref].flags); } /* Read the frame number /after/ reading completion status. */ rmb(); frame = shared[ref].frame; KASSERT(frame != 0, ("grant table inconsistent")); return (frame); } unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) { unsigned long frame = gnttab_end_foreign_transfer_ref(ref); put_free_entry(ref); return (frame); } void gnttab_free_grant_reference(grant_ref_t ref) { put_free_entry(ref); } void gnttab_free_grant_references(grant_ref_t head) { grant_ref_t ref; int count = 1; if (head == GNTTAB_LIST_END) return; ref = head; while (gnttab_entry(ref) != GNTTAB_LIST_END) { ref = gnttab_entry(ref); count++; } mtx_lock(&gnttab_list_lock); gnttab_entry(ref) = gnttab_free_head; gnttab_free_head = head; gnttab_free_count += count; check_free_callbacks(); mtx_unlock(&gnttab_list_lock); } int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) { int ref, error; error = get_free_entries(count, &ref); if (__predict_false(error)) return (error); *head = ref; return (0); } int gnttab_empty_grant_references(const grant_ref_t *private_head) { return (*private_head == GNTTAB_LIST_END); } int gnttab_claim_grant_reference(grant_ref_t *private_head) { grant_ref_t g = *private_head; if (__predict_false(g == GNTTAB_LIST_END)) return (g); *private_head = gnttab_entry(g); return (g); } void gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) { gnttab_entry(release) = *private_head; *private_head = release; } void gnttab_request_free_callback(struct gnttab_free_callback *callback, void (*fn)(void *), void *arg, uint16_t count) { mtx_lock(&gnttab_list_lock); if (callback->next) goto out; callback->fn = fn; callback->arg = arg; callback->count = count; callback->next = gnttab_free_callback_list; gnttab_free_callback_list = callback; check_free_callbacks(); out: mtx_unlock(&gnttab_list_lock); } void gnttab_cancel_free_callback(struct gnttab_free_callback *callback) { struct gnttab_free_callback **pcb; mtx_lock(&gnttab_list_lock); for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { if (*pcb == callback) { *pcb = callback->next; break; } } mtx_unlock(&gnttab_list_lock); } static int grow_gnttab_list(unsigned int more_frames) { unsigned int new_nr_grant_frames, extra_entries, i; new_nr_grant_frames = nr_grant_frames + more_frames; extra_entries = more_frames * GREFS_PER_GRANT_FRAME; for (i = nr_grant_frames; i < new_nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (!gnttab_list[i]) goto grow_nomem; } for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(i) = gnttab_free_head; gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; gnttab_free_count += extra_entries; nr_grant_frames = new_nr_grant_frames; check_free_callbacks(); return (0); grow_nomem: for ( ; i >= nr_grant_frames; i--) free(gnttab_list[i], M_DEVBUF); return (ENOMEM); } static unsigned int __max_nr_grant_frames(void) { struct gnttab_query_size query; int rc; query.dom = DOMID_SELF; rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); if ((rc < 0) || (query.status != GNTST_okay)) return (4); /* Legacy max supported number of frames */ return (query.max_nr_frames); } static inline unsigned int max_nr_grant_frames(void) { return (min(__max_nr_grant_frames(), boot_max_nr_grant_frames)); } #ifdef notyet /* * XXX needed for backend support * */ static int map_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { unsigned long **frames = (unsigned long **)data; set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); (*frames)++; return 0; } static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } #endif static vm_paddr_t resume_frames; static void gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct xen_add_to_physmap xatp; unsigned int i = end_idx; /* * Loop backwards, so that the first hypercall has the largest index, * ensuring that the table will grow only once. */ do { xatp.domid = DOMID_SELF; xatp.idx = i; xatp.space = XENMAPSPACE_grant_table; xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) panic("HYPERVISOR_memory_op failed to map gnttab"); } while (i-- > start_idx); } int gnttab_resume(device_t dev) { unsigned int max_nr_gframes, nr_gframes; nr_gframes = nr_grant_frames; max_nr_gframes = max_nr_grant_frames(); if (max_nr_gframes < nr_gframes) return (ENOSYS); if (!resume_frames) { KASSERT(dev != NULL, ("No resume frames and no device provided")); gnttab_pseudo_phys_res = xenmem_alloc(dev, &gnttab_pseudo_phys_res_id, PAGE_SIZE * max_nr_gframes); if (gnttab_pseudo_phys_res == NULL) panic("Unable to reserve physical memory for gnttab"); resume_frames = rman_get_start(gnttab_pseudo_phys_res); shared = rman_get_virtual(gnttab_pseudo_phys_res); } gnttab_map(0, nr_gframes - 1); return (0); } static int gnttab_expand(unsigned int req_entries) { unsigned int cur, extra; cur = nr_grant_frames; extra = howmany(req_entries, GREFS_PER_GRANT_FRAME); if (cur + extra > max_nr_grant_frames()) return (ENOSPC); gnttab_map(cur, cur + extra - 1); return (grow_gnttab_list(extra)); } MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF | MTX_RECURSE); /*------------------ Private Device Attachment Functions --------------------*/ /** * \brief Identify instances of this device type in the system. * * \param driver The driver performing this identify action. * \param parent The NewBus parent device for any devices this method adds. */ static void granttable_identify(driver_t *driver __unused, device_t parent) { KASSERT(xen_domain(), ("Trying to attach grant-table device on non Xen domain")); /* * A single device instance for our driver is always present * in a system operating under Xen. */ if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL) panic("unable to attach Xen Grant-table device"); } /** * \brief Probe for the existence of the Xen Grant-table device * * \param dev NewBus device_t for this instance. * * \return Always returns 0 indicating success. */ static int granttable_probe(device_t dev) { device_set_desc(dev, "Xen Grant-table Device"); return (BUS_PROBE_NOWILDCARD); } /** * \brief Attach the Xen Grant-table device. * * \param dev NewBus device_t for this instance. * * \return On success, 0. Otherwise an errno value indicating the * type of failure. */ static int granttable_attach(device_t dev) { int i; unsigned int max_nr_glist_frames; unsigned int nr_init_grefs; nr_grant_frames = 1; boot_max_nr_grant_frames = __max_nr_grant_frames(); /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ max_nr_glist_frames = (boot_max_nr_grant_frames * GREFS_PER_GRANT_FRAME / (PAGE_SIZE / sizeof(grant_ref_t))); gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), M_DEVBUF, M_NOWAIT); if (gnttab_list == NULL) return (ENOMEM); for (i = 0; i < nr_grant_frames; i++) { gnttab_list[i] = (grant_ref_t *) malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (gnttab_list[i] == NULL) goto ini_nomem; } if (gnttab_resume(dev)) return (ENODEV); nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) gnttab_entry(i) = i + 1; gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; gnttab_free_head = NR_RESERVED_ENTRIES; if (bootverbose) printf("Grant table initialized\n"); return (0); ini_nomem: for (i--; i >= 0; i--) free(gnttab_list[i], M_DEVBUF); free(gnttab_list, M_DEVBUF); return (ENOMEM); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t granttable_methods[] = { /* Device interface */ DEVMETHOD(device_identify, granttable_identify), DEVMETHOD(device_probe, granttable_probe), DEVMETHOD(device_attach, granttable_attach), DEVMETHOD_END }; DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0); DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, NULL, NULL, SI_ORDER_FIRST); diff --git a/sys/i386/include/xen/synch_bitops.h b/sys/i386/include/xen/synch_bitops.h deleted file mode 100644 index 696bc6fa43f4..000000000000 --- a/sys/i386/include/xen/synch_bitops.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef __XEN_SYNCH_BITOPS_H__ -#define __XEN_SYNCH_BITOPS_H__ - -/* - * Copyright 1992, Linus Torvalds. - * Heavily modified to provide guaranteed strong synchronisation - * when communicating with Xen or other guest OSes running on other CPUs. - */ - -#define ADDR (*(volatile long *) addr) - -static __inline__ void synch_set_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btsl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_clear_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btrl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ void synch_change_bit(int nr, volatile void * addr) -{ - __asm__ __volatile__ ( - "lock btcl %1,%0" - : "=m" (ADDR) : "Ir" (nr) : "memory" ); -} - -static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btsl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "lock btrl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) -{ - int oldbit; - - __asm__ __volatile__ ( - "lock btcl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); - return oldbit; -} - -struct __synch_xchg_dummy { unsigned long a[100]; }; -#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x)) - -#define synch_cmpxchg(ptr, old, new) \ -((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ - (unsigned long)(old), \ - (unsigned long)(new), \ - sizeof(*(ptr)))) - -static inline unsigned long __synch_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - __asm__ __volatile__("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - case 2: - __asm__ __volatile__("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; -#ifdef CONFIG_X86_64 - case 4: - __asm__ __volatile__("lock; cmpxchgl %k1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; - case 8: - __asm__ __volatile__("lock; cmpxchgq %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; -#else - case 4: - __asm__ __volatile__("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "q"(new), "m"(*__synch_xg(ptr)), - "0"(old) - : "memory"); - return prev; -#endif - } - return old; -} - -static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) -{ - return ((1UL << (nr & 31)) & - (((const volatile unsigned int *) addr)[nr >> 5])) != 0; -} - -static __inline__ int synch_var_test_bit(int nr, volatile void * addr) -{ - int oldbit; - __asm__ __volatile__ ( - "btl %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); - return oldbit; -} - -#define synch_test_bit(nr,addr) \ -(__builtin_constant_p(nr) ? \ - synch_const_test_bit((nr),(addr)) : \ - synch_var_test_bit((nr),(addr))) - -#endif /* __XEN_SYNCH_BITOPS_H__ */ diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index cf4560b6b5fb..8d5562e21018 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -1,1401 +1,1399 @@ /****************************************************************************** * xen_intr.c * * Xen event and interrupt services for x86 HVM guests. * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, Intel Corporation * Copyright (c) 2012, Spectra Logic Corporation * Copyright © 2021-2023, Elliott Mitchell * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include - #include #include #include #include #include #include #include #ifdef DDB #include #endif /* The code below is the implementation of 2L event channels. */ #define NR_EVENT_CHANNELS EVTCHN_2L_NR_CHANNELS static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); static u_int first_evtchn_irq; /** * Per-cpu event channel processing state. */ struct xen_intr_pcpu_data { /** * The last event channel bitmap section (level one bit) processed. * This is used to ensure we scan all ports before * servicing an already servied port again. */ u_int last_processed_l1i; /** * The last event channel processed within the event channel * bitmap being scanned. */ u_int last_processed_l2i; /** Pointer to this CPU's interrupt statistic counter. */ u_long *evtchn_intrcnt; /** * A bitmap of ports that can be serviced from this CPU. * A set bit means interrupt handling is enabled. */ u_long evtchn_enabled[sizeof(u_long) * 8]; }; /* * Start the scan at port 0 by initializing the last scanned * location as the highest numbered event channel port. */ DPCPU_DEFINE_STATIC(struct xen_intr_pcpu_data, xen_intr_pcpu) = { .last_processed_l1i = LONG_BIT - 1, .last_processed_l2i = LONG_BIT - 1 }; DPCPU_DECLARE(struct vcpu_info *, vcpu_info); #define INVALID_EVTCHN (~(evtchn_port_t)0) /* Invalid event channel */ #define is_valid_evtchn(x) ((uintmax_t)(x) < NR_EVENT_CHANNELS) struct xenisrc { struct intsrc xi_intsrc; enum evtchn_type xi_type; int xi_cpu; /* VCPU for delivery. */ int xi_vector; /* Global isrc vector number. */ evtchn_port_t xi_port; int xi_virq; void *xi_cookie; u_int xi_close:1; /* close on unbind? */ u_int xi_masked:1; volatile u_int xi_refcount; }; static void xen_intr_suspend(struct pic *); static void xen_intr_resume(struct pic *, bool suspend_cancelled); static void xen_intr_enable_source(struct intsrc *isrc); static void xen_intr_disable_source(struct intsrc *isrc, int eoi); static void xen_intr_eoi_source(struct intsrc *isrc); static void xen_intr_enable_intr(struct intsrc *isrc); static void xen_intr_disable_intr(struct intsrc *isrc); static int xen_intr_vector(struct intsrc *isrc); static int xen_intr_source_pending(struct intsrc *isrc); static int xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static int xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id); /** * PIC interface for all event channel port types except physical IRQs. */ struct pic xen_intr_pic = { .pic_enable_source = xen_intr_enable_source, .pic_disable_source = xen_intr_disable_source, .pic_eoi_source = xen_intr_eoi_source, .pic_enable_intr = xen_intr_enable_intr, .pic_disable_intr = xen_intr_disable_intr, .pic_vector = xen_intr_vector, .pic_source_pending = xen_intr_source_pending, .pic_suspend = xen_intr_suspend, .pic_resume = xen_intr_resume, .pic_config_intr = xen_intr_config_intr, .pic_assign_cpu = xen_intr_assign_cpu }; static struct mtx xen_intr_isrc_lock; static u_int xen_intr_auto_vector_count; static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; /*------------------------- Private Functions --------------------------------*/ /** * Retrieve a handle for a Xen interrupt source. * * \param isrc A valid Xen interrupt source structure. * * \returns A handle suitable for use with xen_intr_isrc_from_handle() * to retrieve the original Xen interrupt source structure. */ static inline xen_intr_handle_t xen_intr_handle_from_isrc(struct xenisrc *isrc) { return (isrc); } /** * Lookup a Xen interrupt source object given an interrupt binding handle. * * \param handle A handle initialized by a previous call to * xen_intr_bind_isrc(). * * \returns A pointer to the Xen interrupt source object associated * with the given interrupt handle. NULL if no association * currently exists. */ static inline struct xenisrc * xen_intr_isrc_from_handle(xen_intr_handle_t handle) { return ((struct xenisrc *)handle); } /** * Disable signal delivery for an event channel port on the * specified CPU. * * \param port The event channel port to mask. * * This API is used to manage the port<=>CPU binding of event * channel handlers. * * \note This operation does not preclude reception of an event * for this event channel on another CPU. To mask the * event channel globally, use evtchn_mask(). */ static inline void evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); xen_clear_bit(port, pcpu->evtchn_enabled); } /** * Enable signal delivery for an event channel port on the * specified CPU. * * \param port The event channel port to unmask. * * This API is used to manage the port<=>CPU binding of event * channel handlers. * * \note This operation does not guarantee that event delivery * is enabled for this event channel port. The port must * also be globally enabled. See evtchn_unmask(). */ static inline void evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); xen_set_bit(port, pcpu->evtchn_enabled); } /** * Allocate and register a per-cpu Xen upcall interrupt counter. * * \param cpu The cpu for which to register this interrupt count. */ static void xen_intr_intrcnt_add(u_int cpu) { char buf[MAXCOMLEN + 1]; struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); if (pcpu->evtchn_intrcnt != NULL) return; snprintf(buf, sizeof(buf), "cpu%d:xen", cpu); intrcnt_add(buf, &pcpu->evtchn_intrcnt); } /** * Search for an already allocated but currently unused Xen interrupt * source object. * * \param type Restrict the search to interrupt sources of the given * type. * * \return A pointer to a free Xen interrupt source object or NULL. */ static struct xenisrc * xen_intr_find_unused_isrc(enum evtchn_type type) { int isrc_idx; KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held")); for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx ++) { struct xenisrc *isrc; u_int vector; vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); /* * Since intr_register_source() must be called while unlocked, * isrc == NULL *will* occur, though very infrequently. * * This also allows a very small gap where a foreign intrusion * into Xen's interrupt range could be examined by this test. */ if (__predict_true(isrc != NULL) && __predict_true(isrc->xi_intsrc.is_pic == &xen_intr_pic) && isrc->xi_type == EVTCHN_TYPE_UNBOUND) { KASSERT(isrc->xi_intsrc.is_handlers == 0, ("Free evtchn still has handlers")); isrc->xi_type = type; return (isrc); } } return (NULL); } /** * Allocate a Xen interrupt source object. * * \param type The type of interrupt source to create. * * \return A pointer to a newly allocated Xen interrupt source * object or NULL. */ static struct xenisrc * xen_intr_alloc_isrc(enum evtchn_type type) { static int warned; struct xenisrc *isrc; unsigned int vector; int error; KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held")); if (xen_intr_auto_vector_count >= NR_EVENT_CHANNELS) { if (!warned) { warned = 1; printf("%s: Xen interrupts exhausted.\n", __func__); } return (NULL); } vector = first_evtchn_irq + xen_intr_auto_vector_count; xen_intr_auto_vector_count++; KASSERT((intr_lookup_source(vector) == NULL), ("Trying to use an already allocated vector")); mtx_unlock(&xen_intr_isrc_lock); isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO); isrc->xi_intsrc.is_pic = &xen_intr_pic; isrc->xi_vector = vector; isrc->xi_type = type; error = intr_register_source(&isrc->xi_intsrc); if (error != 0) panic("%s(): failed registering interrupt %u, error=%d\n", __func__, vector, error); mtx_lock(&xen_intr_isrc_lock); return (isrc); } /** * Attempt to free an active Xen interrupt source object. * * \param isrc The interrupt source object to release. * * \returns EBUSY if the source is still in use, otherwise 0. */ static int xen_intr_release_isrc(struct xenisrc *isrc) { KASSERT(isrc->xi_intsrc.is_handlers == 0, ("Release called, but xenisrc still in use")); mtx_lock(&xen_intr_isrc_lock); if (is_valid_evtchn(isrc->xi_port)) { evtchn_mask_port(isrc->xi_port); evtchn_clear_port(isrc->xi_port); /* Rebind port to CPU 0. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); evtchn_cpu_unmask_port(0, isrc->xi_port); if (isrc->xi_close != 0) { struct evtchn_close close = { .port = isrc->xi_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); } xen_intr_port_to_isrc[isrc->xi_port] = NULL; } isrc->xi_cpu = 0; isrc->xi_type = EVTCHN_TYPE_UNBOUND; isrc->xi_port = INVALID_EVTCHN; isrc->xi_cookie = NULL; mtx_unlock(&xen_intr_isrc_lock); return (0); } /** * Associate an interrupt handler with an already allocated local Xen * event channel port. * * \param isrcp The returned Xen interrupt object associated with * the specified local port. * \param local_port The event channel to bind. * \param type The event channel type of local_port. * \param intr_owner The device making this bind request. * \param filter An interrupt filter handler. Specify NULL * to always dispatch to the ithread handler. * \param handler An interrupt ithread handler. Optional (can * specify NULL) if all necessary event actions * are performed by filter. * \param arg Argument to present to both filter and handler. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ static int xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, enum evtchn_type type, const char *intr_owner, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; int error; *isrcp = NULL; if (port_handlep == NULL) { printf("%s: %s: Bad event handle\n", intr_owner, __func__); return (EINVAL); } mtx_lock(&xen_intr_isrc_lock); isrc = xen_intr_find_unused_isrc(type); if (isrc == NULL) { isrc = xen_intr_alloc_isrc(type); if (isrc == NULL) { mtx_unlock(&xen_intr_isrc_lock); return (ENOSPC); } } isrc->xi_port = local_port; isrc->xi_close = 0; xen_intr_port_to_isrc[local_port] = isrc; refcount_init(&isrc->xi_refcount, 1); mtx_unlock(&xen_intr_isrc_lock); /* Assign the opaque handler */ *port_handlep = xen_intr_handle_from_isrc(isrc); #ifdef SMP if (type == EVTCHN_TYPE_PORT) { /* * By default all interrupts are assigned to vCPU#0 * unless specified otherwise, so shuffle them to balance * the interrupt load. */ xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu(0)); } #endif if (filter == NULL && handler == NULL) { /* * No filter/handler provided, leave the event channel * masked and without a valid handler, the caller is * in charge of setting that up. */ *isrcp = isrc; return (0); } error = xen_intr_add_handler(intr_owner, filter, handler, arg, flags, *port_handlep); if (error != 0) { xen_intr_release_isrc(isrc); return (error); } *isrcp = isrc; return (0); } /** * Determine the event channel ports at the given section of the * event port bitmap which have pending events for the given cpu. * * \param pcpu The Xen interrupt pcpu data for the cpu being queried. * \param sh The Xen shared info area. * \param idx The index of the section of the event channel bitmap to * inspect. * * \returns A u_long with bits set for every event channel with pending * events. */ static inline u_long xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh, u_int idx) { CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(sh->evtchn_pending[0])); CTASSERT(sizeof(sh->evtchn_mask[0]) == sizeof(pcpu->evtchn_enabled[0])); CTASSERT(sizeof(sh->evtchn_mask) == sizeof(sh->evtchn_pending)); CTASSERT(sizeof(sh->evtchn_mask) == sizeof(pcpu->evtchn_enabled)); return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx] & pcpu->evtchn_enabled[idx]); } /** * Interrupt handler for processing all Xen event channel events. * * \param trap_frame The trap frame context for the current interrupt. */ void xen_intr_handle_upcall(struct trapframe *trap_frame) { u_int l1i, l2i, port, cpu __diagused; u_long masked_l1, masked_l2; struct xenisrc *isrc; shared_info_t *s; vcpu_info_t *v; struct xen_intr_pcpu_data *pc; u_long l1, l2; /* * Disable preemption in order to always check and fire events * on the right vCPU */ critical_enter(); cpu = PCPU_GET(cpuid); pc = DPCPU_PTR(xen_intr_pcpu); s = HYPERVISOR_shared_info; v = DPCPU_GET(vcpu_info); if (!xen_has_percpu_evtchn()) { KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU")); } v->evtchn_upcall_pending = 0; #if 0 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ wmb(); #endif #endif l1 = atomic_readandclear_long(&v->evtchn_pending_sel); l1i = pc->last_processed_l1i; l2i = pc->last_processed_l2i; (*pc->evtchn_intrcnt)++; while (l1 != 0) { l1i = (l1i + 1) % LONG_BIT; masked_l1 = l1 & ((~0UL) << l1i); if (masked_l1 == 0) { /* * if we masked out all events, wrap around * to the beginning. */ l1i = LONG_BIT - 1; l2i = LONG_BIT - 1; continue; } l1i = ffsl(masked_l1) - 1; do { l2 = xen_intr_active_ports(pc, s, l1i); l2i = (l2i + 1) % LONG_BIT; masked_l2 = l2 & ((~0UL) << l2i); if (masked_l2 == 0) { /* if we masked out all events, move on */ l2i = LONG_BIT - 1; break; } l2i = ffsl(masked_l2) - 1; /* process port */ port = (l1i * LONG_BIT) + l2i; - synch_clear_bit(port, &s->evtchn_pending[0]); + evtchn_clear_port(port); isrc = xen_intr_port_to_isrc[port]; if (__predict_false(isrc == NULL)) continue; /* Make sure we are firing on the right vCPU */ KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)), ("Received unexpected event on vCPU#%d, event bound to vCPU#%d", PCPU_GET(cpuid), isrc->xi_cpu)); intr_execute_handlers(&isrc->xi_intsrc, trap_frame); /* * If this is the final port processed, * we'll pick up here+1 next time. */ pc->last_processed_l1i = l1i; pc->last_processed_l2i = l2i; } while (l2i != LONG_BIT - 1); l2 = xen_intr_active_ports(pc, s, l1i); if (l2 == 0) { /* * We handled all ports, so we can clear the * selector bit. */ l1 &= ~(1UL << l1i); } } if (xen_evtchn_needs_ack) lapic_eoi(); critical_exit(); } static int xen_intr_init(void *dummy __unused) { shared_info_t *s = HYPERVISOR_shared_info; struct xen_intr_pcpu_data *pcpu; int i; if (!xen_domain()) return (0); _Static_assert(is_valid_evtchn(0), "is_valid_evtchn(0) fails (unused by Xen, but valid by interface"); _Static_assert(is_valid_evtchn(NR_EVENT_CHANNELS - 1), "is_valid_evtchn(max) fails (is a valid channel)"); _Static_assert(!is_valid_evtchn(NR_EVENT_CHANNELS), "is_valid_evtchn(>max) fails (NOT a valid channel)"); _Static_assert(!is_valid_evtchn(~(evtchn_port_t)0), "is_valid_evtchn(maxint) fails (overflow?)"); _Static_assert(!is_valid_evtchn(INVALID_EVTCHN), "is_valid_evtchn(INVALID_EVTCHN) fails (must be invalid!)"); _Static_assert(!is_valid_evtchn(-1), "is_valid_evtchn(-1) fails (negative are invalid)"); mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* * Set the per-cpu mask of CPU#0 to enable all, since by default all * event channels are bound to CPU#0. */ CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); } for (i = 0; i < nitems(s->evtchn_mask); i++) atomic_store_rel_long(&s->evtchn_mask[i], ~0); intr_register_pic(&xen_intr_pic); if (bootverbose) printf("Xen interrupt system initialized\n"); return (0); } SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL); static void xen_intrcnt_init(void *dummy __unused) { unsigned int i; if (!xen_domain()) return; /* * Register interrupt count manually as we aren't guaranteed to see a * call to xen_intr_assign_cpu() before our first interrupt. */ CPU_FOREACH(i) xen_intr_intrcnt_add(i); } SYSINIT(xen_intrcnt_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intrcnt_init, NULL); void xen_intr_alloc_irqs(void) { if (num_io_irqs > UINT_MAX - NR_EVENT_CHANNELS) panic("IRQ allocation overflow (num_msi_irqs too high?)"); first_evtchn_irq = num_io_irqs; num_io_irqs += NR_EVENT_CHANNELS; } /*--------------------------- Common PIC Functions ---------------------------*/ /** * Prepare this PIC for system suspension. */ static void xen_intr_suspend(struct pic *unused) { } static void xen_rebind_ipi(struct xenisrc *isrc) { #ifdef SMP int cpu = isrc->xi_cpu; int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; int error; struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id }; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) panic("unable to rebind xen IPI: %d", error); isrc->xi_port = bind_ipi.port; isrc->xi_cpu = 0; xen_intr_port_to_isrc[bind_ipi.port] = isrc; error = xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); if (error) panic("unable to bind xen IPI to CPU#%d: %d", cpu, error); evtchn_unmask_port(bind_ipi.port); #else panic("Resume IPI event channel on UP"); #endif } static void xen_rebind_virq(struct xenisrc *isrc) { int cpu = isrc->xi_cpu; int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; int error; struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq, .vcpu = vcpu_id }; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error); isrc->xi_port = bind_virq.port; isrc->xi_cpu = 0; xen_intr_port_to_isrc[bind_virq.port] = isrc; #ifdef SMP error = xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); if (error) panic("unable to bind xen VIRQ#%d to CPU#%d: %d", isrc->xi_virq, cpu, error); #endif evtchn_unmask_port(bind_virq.port); } /** * Return this PIC to service after being suspended. */ static void xen_intr_resume(struct pic *unused, bool suspend_cancelled) { shared_info_t *s = HYPERVISOR_shared_info; struct xenisrc *isrc; u_int isrc_idx; int i; if (suspend_cancelled) return; /* Reset the per-CPU masks */ CPU_FOREACH(i) { struct xen_intr_pcpu_data *pcpu; pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); } /* Mask all event channels. */ for (i = 0; i < nitems(s->evtchn_mask); i++) atomic_store_rel_long(&s->evtchn_mask[i], ~0); /* Remove port -> isrc mappings */ memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc)); /* Free unused isrcs and rebind VIRQs and IPIs */ for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) { u_int vector; vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL) { isrc->xi_port = INVALID_EVTCHN; switch (isrc->xi_type) { case EVTCHN_TYPE_IPI: xen_rebind_ipi(isrc); break; case EVTCHN_TYPE_VIRQ: xen_rebind_virq(isrc); break; default: break; } } } } /** * Disable a Xen interrupt source. * * \param isrc The interrupt source to disable. */ static void xen_intr_disable_intr(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; evtchn_mask_port(isrc->xi_port); } /** * Determine the global interrupt vector number for * a Xen interrupt source. * * \param isrc The interrupt source to query. * * \return The vector number corresponding to the given interrupt source. */ static int xen_intr_vector(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; return (isrc->xi_vector); } /** * Determine whether or not interrupt events are pending on the * the given interrupt source. * * \param isrc The interrupt source to query. * * \returns 0 if no events are pending, otherwise non-zero. */ static int xen_intr_source_pending(struct intsrc *isrc) { /* * EventChannels are edge triggered and never masked. * There can be no pending events. */ return (0); } /** * Perform configuration of an interrupt source. * * \param isrc The interrupt source to configure. * \param trig Edge or level. * \param pol Active high or low. * * \returns 0 if no events are pending, otherwise non-zero. */ static int xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol) { /* Configuration is only possible via the evtchn apis. */ return (ENODEV); } /** * Configure CPU affinity for interrupt source event delivery. * * \param isrc The interrupt source to configure. * \param apic_id The apic id of the CPU for handling future events. * * \returns 0 if successful, otherwise an errno. */ static int xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) { #ifdef SMP struct evtchn_bind_vcpu bind_vcpu; struct xenisrc *isrc; u_int to_cpu, vcpu_id; int error, masked; if (!xen_has_percpu_evtchn()) return (EOPNOTSUPP); to_cpu = apic_cpuid(apic_id); vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id; mtx_lock(&xen_intr_isrc_lock); isrc = (struct xenisrc *)base_isrc; if (!is_valid_evtchn(isrc->xi_port)) { mtx_unlock(&xen_intr_isrc_lock); return (EINVAL); } /* * Mask the event channel while binding it to prevent interrupt * delivery with an inconsistent state in isrc->xi_cpu. */ masked = evtchn_test_and_set_mask(isrc->xi_port); if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) || (isrc->xi_type == EVTCHN_TYPE_IPI)) { /* * Virtual IRQs are associated with a cpu by * the Hypervisor at evtchn_bind_virq time, so * all we need to do is update the per-CPU masks. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); isrc->xi_cpu = to_cpu; evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); goto out; } bind_vcpu.port = isrc->xi_port; bind_vcpu.vcpu = vcpu_id; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu); if (isrc->xi_cpu != to_cpu) { if (error == 0) { /* Commit to new binding by removing the old one. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); isrc->xi_cpu = to_cpu; evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); } } out: if (masked == 0) evtchn_unmask_port(isrc->xi_port); mtx_unlock(&xen_intr_isrc_lock); return (0); #else return (EOPNOTSUPP); #endif } /*------------------- Virtual Interrupt Source PIC Functions -----------------*/ /* * Mask a level triggered interrupt source. * * \param isrc The interrupt source to mask (if necessary). * \param eoi If non-zero, perform any necessary end-of-interrupt * acknowledgements. */ static void xen_intr_disable_source(struct intsrc *base_isrc, int eoi) { struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; /* * NB: checking if the event channel is already masked is * needed because the event channel user-space device * masks event channels on its filter as part of its * normal operation, and those shouldn't be automatically * unmasked by the generic interrupt code. The event channel * device will unmask them when needed. */ isrc->xi_masked = !!evtchn_test_and_set_mask(isrc->xi_port); } /* * Unmask a level triggered interrupt source. * * \param isrc The interrupt source to unmask (if necessary). */ static void xen_intr_enable_source(struct intsrc *base_isrc) { struct xenisrc *isrc; isrc = (struct xenisrc *)base_isrc; if (isrc->xi_masked == 0) evtchn_unmask_port(isrc->xi_port); } /* * Perform any necessary end-of-interrupt acknowledgements. * * \param isrc The interrupt source to EOI. */ static void xen_intr_eoi_source(struct intsrc *base_isrc) { } /* * Enable and unmask the interrupt source. * * \param isrc The interrupt source to enable. */ static void xen_intr_enable_intr(struct intsrc *base_isrc) { struct xenisrc *isrc = (struct xenisrc *)base_isrc; evtchn_unmask_port(isrc->xi_port); } /*--------------------------- Public Functions -------------------------------*/ /*------- API comments for these methods can be found in xen/xenintr.h -------*/ int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; int error; error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error != 0) return (error); /* * The Event Channel API didn't open this port, so it is not * responsible for closing it automatically on unbind. */ isrc->xi_close = 0; return (0); } int xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; struct evtchn_alloc_unbound alloc_unbound; int error; alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = remote_domain; error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error != 0) { evtchn_close_t close = { .port = alloc_unbound.port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } isrc->xi_close = 1; return (0); } int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, u_int remote_port, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; struct evtchn_bind_interdomain bind_interdomain; int error; bind_interdomain.remote_dom = remote_domain; bind_interdomain.remote_port = remote_port; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port, EVTCHN_TYPE_PORT, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); if (error) { evtchn_close_t close = { .port = bind_interdomain.local_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; return (0); } int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) { int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; struct xenisrc *isrc; struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id }; int error; isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, device_get_nameunit(dev), filter, handler, arg, flags, port_handlep); #ifdef SMP if (error == 0) error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); #endif if (error != 0) { evtchn_close_t close = { .port = bind_virq.port }; xen_intr_unbind(*port_handlep); if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } #ifdef SMP if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt * code to perform the binding. Update our event channel * masks manually so events can't fire on the wrong cpu * during AP startup. */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } #endif /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; isrc->xi_virq = virq; return (0); } int xen_intr_alloc_and_bind_ipi(u_int cpu, driver_filter_t filter, enum intr_type flags, xen_intr_handle_t *port_handlep) { #ifdef SMP int vcpu_id = pcpu_find(cpu)->pc_vcpu_id; struct xenisrc *isrc; struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id }; /* Same size as the one used by intr_handler->ih_name. */ char name[MAXCOMLEN + 1]; int error; isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) { /* * XXX Trap Hypercall error code Linuxisms in * the HYPERCALL layer. */ return (-error); } snprintf(name, sizeof(name), "cpu%u", cpu); error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI, name, filter, NULL, NULL, flags, port_handlep); if (error != 0) { evtchn_close_t close = { .port = bind_ipi.port }; xen_intr_unbind(*port_handlep); if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); return (error); } if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt * code to perform the binding. Update our event channel * masks manually so events can't fire on the wrong cpu * during AP startup. */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; return (0); #else return (EOPNOTSUPP); #endif } int xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) { char descr[MAXCOMLEN + 1]; struct xenisrc *isrc; va_list ap; isrc = xen_intr_isrc_from_handle(port_handle); if (isrc == NULL) return (EINVAL); va_start(ap, fmt); vsnprintf(descr, sizeof(descr), fmt, ap); va_end(ap); return (intr_describe(isrc->xi_vector, isrc->xi_cookie, descr)); } void xen_intr_unbind(xen_intr_handle_t *port_handlep) { struct xenisrc *isrc; KASSERT(port_handlep != NULL, ("NULL xen_intr_handle_t passed to %s", __func__)); isrc = xen_intr_isrc_from_handle(*port_handlep); *port_handlep = NULL; if (isrc == NULL) return; mtx_lock(&xen_intr_isrc_lock); if (refcount_release(&isrc->xi_refcount) == 0) { mtx_unlock(&xen_intr_isrc_lock); return; } mtx_unlock(&xen_intr_isrc_lock); if (isrc->xi_cookie != NULL) intr_remove_handler(isrc->xi_cookie); xen_intr_release_isrc(isrc); } void xen_intr_signal(xen_intr_handle_t handle) { struct xenisrc *isrc; isrc = xen_intr_isrc_from_handle(handle); if (isrc != NULL) { KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT || isrc->xi_type == EVTCHN_TYPE_IPI, ("evtchn_signal on something other than a local port")); struct evtchn_send send = { .port = isrc->xi_port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); } } evtchn_port_t xen_intr_port(xen_intr_handle_t handle) { struct xenisrc *isrc; isrc = xen_intr_isrc_from_handle(handle); if (isrc == NULL) return (0); return (isrc->xi_port); } int xen_intr_add_handler(const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, enum intr_type flags, xen_intr_handle_t handle) { struct xenisrc *isrc; int error; isrc = xen_intr_isrc_from_handle(handle); if (isrc == NULL || isrc->xi_cookie != NULL) return (EINVAL); error = intr_add_handler(name, isrc->xi_vector,filter, handler, arg, flags|INTR_EXCL, &isrc->xi_cookie, 0); if (error != 0) printf("%s: %s: add handler failed: %d\n", name, __func__, error); return (error); } int xen_intr_get_evtchn_from_port(evtchn_port_t port, xen_intr_handle_t *handlep) { if (!is_valid_evtchn(port)) return (EINVAL); if (handlep == NULL) { return (EINVAL); } mtx_lock(&xen_intr_isrc_lock); if (xen_intr_port_to_isrc[port] == NULL) { mtx_unlock(&xen_intr_isrc_lock); return (EINVAL); } refcount_acquire(&xen_intr_port_to_isrc[port]->xi_refcount); mtx_unlock(&xen_intr_isrc_lock); /* Assign the opaque handler */ *handlep = xen_intr_handle_from_isrc(xen_intr_port_to_isrc[port]); return (0); } #ifdef DDB static const char * xen_intr_print_type(enum evtchn_type type) { static const char *evtchn_type_to_string[EVTCHN_TYPE_COUNT] = { [EVTCHN_TYPE_UNBOUND] = "UNBOUND", [EVTCHN_TYPE_VIRQ] = "VIRQ", [EVTCHN_TYPE_IPI] = "IPI", [EVTCHN_TYPE_PORT] = "PORT", }; if (type >= EVTCHN_TYPE_COUNT) return ("UNKNOWN"); return (evtchn_type_to_string[type]); } static void xen_intr_dump_port(struct xenisrc *isrc) { struct xen_intr_pcpu_data *pcpu; shared_info_t *s = HYPERVISOR_shared_info; int i; db_printf("Port %d Type: %s\n", isrc->xi_port, xen_intr_print_type(isrc->xi_type)); if (isrc->xi_type == EVTCHN_TYPE_VIRQ) db_printf("\tVirq: %d\n", isrc->xi_virq); db_printf("\tMasked: %d Pending: %d\n", !!xen_test_bit(isrc->xi_port, &s->evtchn_mask[0]), !!xen_test_bit(isrc->xi_port, &s->evtchn_pending[0])); db_printf("\tPer-CPU Masks: "); CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); db_printf("cpu#%d: %d ", i, !!xen_test_bit(isrc->xi_port, pcpu->evtchn_enabled)); } db_printf("\n"); } DB_SHOW_COMMAND(xen_evtchn, db_show_xen_evtchn) { int i; if (!xen_domain()) { db_printf("Only available on Xen guests\n"); return; } for (i = 0; i < NR_EVENT_CHANNELS; i++) { struct xenisrc *isrc; isrc = xen_intr_port_to_isrc[i]; if (isrc == NULL) continue; xen_intr_dump_port(isrc); } } #endif /* DDB */ diff --git a/sys/xen/evtchn/evtchnvar.h b/sys/xen/evtchn/evtchnvar.h index 1f78755115ac..d1438846594f 100644 --- a/sys/xen/evtchn/evtchnvar.h +++ b/sys/xen/evtchn/evtchnvar.h @@ -1,104 +1,111 @@ /****************************************************************************** * evtchn.h * * Data structures and definitions private to the FreeBSD implementation * of the Xen event channel API. * * Copyright (c) 2004, K A Fraser * Copyright (c) 2012, Spectra Logic Corporation + * Copyright © 2022, Elliott Mitchell * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this source file (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * $FreeBSD$ */ #ifndef __XEN_EVTCHN_EVTCHNVAR_H__ #define __XEN_EVTCHN_EVTCHNVAR_H__ #include #include enum evtchn_type { EVTCHN_TYPE_UNBOUND, EVTCHN_TYPE_VIRQ, EVTCHN_TYPE_IPI, EVTCHN_TYPE_PORT, EVTCHN_TYPE_COUNT }; /** Submit a port notification for delivery to a userland evtchn consumer */ void evtchn_device_upcall(evtchn_port_t port); +/* Macros for accessing event channel values */ +#define EVTCHN_PTR(type, port) \ + (HYPERVISOR_shared_info->evtchn_##type + ((port) / __LONG_BIT)) +#define EVTCHN_BIT(port) ((port) & (__LONG_BIT - 1)) +#define EVTCHN_MASK(port) (1UL << EVTCHN_BIT(port)) + /** * Disable signal delivery for an event channel port, returning its * previous mask state. * * \param port The event channel port to query and mask. * * \returns 1 if event delivery was previously disabled. Otherwise 0. */ static inline int evtchn_test_and_set_mask(evtchn_port_t port) { - shared_info_t *s = HYPERVISOR_shared_info; - return synch_test_and_set_bit(port, s->evtchn_mask); + + return (atomic_testandset_long(EVTCHN_PTR(mask, port), + EVTCHN_BIT(port))); } /** * Clear any pending event for the given event channel port. * * \param port The event channel port to clear. */ static inline void evtchn_clear_port(evtchn_port_t port) { - shared_info_t *s = HYPERVISOR_shared_info; - synch_clear_bit(port, &s->evtchn_pending[0]); + + atomic_clear_long(EVTCHN_PTR(pending, port), EVTCHN_MASK(port)); } /** * Disable signal delivery for an event channel port. * * \param port The event channel port to mask. */ static inline void evtchn_mask_port(evtchn_port_t port) { - shared_info_t *s = HYPERVISOR_shared_info; - synch_set_bit(port, &s->evtchn_mask[0]); + atomic_set_long(EVTCHN_PTR(mask, port), EVTCHN_MASK(port)); } /** * Enable signal delivery for an event channel port. * * \param port The event channel port to enable. */ static inline void evtchn_unmask_port(evtchn_port_t port) { evtchn_unmask_t op = { .port = port }; HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); } #endif /* __XEN_EVTCHN_EVTCHNVAR_H__ */