Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -2973,6 +2973,7 @@ dev/xen/xenstore/xenstored_dev.c optional xenhvm dev/xen/evtchn/evtchn_dev.c optional xenhvm dev/xen/privcmd/privcmd.c optional xenhvm +dev/xen/gntdev/gntdev.c optional xenhvm dev/xen/debug/debug.c optional xenhvm dev/xl/if_xl.c optional xl pci dev/xl/xlphy.c optional xl pci Index: sys/dev/xen/gntdev/gntdev.c =================================================================== --- /dev/null +++ sys/dev/xen/gntdev/gntdev.c @@ -0,0 +1,1187 @@ +/*- + * Copyright (c) 2016 Akshay Jaggi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * gntdev.c + * + * Interface to /dev/xen/gntdev. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device"); + +static d_open_t gntdev_open; +static d_ioctl_t gntdev_ioctl; +static d_mmap_single_t gntdev_mmap_single; + +static struct cdevsw gntdev_devsw = { + .d_version = D_VERSION, + .d_open = gntdev_open, + .d_ioctl = gntdev_ioctl, + .d_mmap_single = gntdev_mmap_single, + .d_name = "gntdev", +}; + +static device_t gntdev_dev = NULL; + +struct gntdev_gref; +struct gntdev_gmap; +STAILQ_HEAD(gref_list_head, gntdev_gref); +STAILQ_HEAD(gmap_list_head, gntdev_gmap); +RB_HEAD(gref_tree_head, gntdev_gref); +RB_HEAD(gmap_tree_head, gntdev_gmap); + +struct per_user_data { + struct mtx user_data_lock; + struct gref_tree_head gref_tree; + struct gmap_tree_head gmap_tree; + uint64_t file_offset; +}; + +/* + * Get offset into the file which will be used while mmapping the + * appropriate pages by the userspace program. + */ +static uint64_t +get_file_offset(struct per_user_data *priv_user, uint32_t count) +{ + uint64_t file_offset; + + mtx_lock(&priv_user->user_data_lock); + file_offset = priv_user->file_offset; + priv_user->file_offset += count * PAGE_SIZE; + mtx_unlock(&priv_user->user_data_lock); + + return (file_offset); +} + +static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, + vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); +static void gntdev_gmap_pg_dtor(void *handle); +static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres); + +static struct cdev_pager_ops gntdev_gmap_pg_ops = { + .cdev_pg_fault = gntdev_gmap_pg_fault, + .cdev_pg_ctor = gntdev_gmap_pg_ctor, + .cdev_pg_dtor = gntdev_gmap_pg_dtor, +}; + +struct cleanup_data_struct { + struct mtx to_kill_grefs_mtx; + struct mtx to_kill_gmaps_mtx; + struct gref_list_head to_kill_grefs; + struct gmap_list_head to_kill_gmaps; +}; + +static struct cleanup_data_struct cleanup_data = { + .to_kill_grefs = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_grefs), + .to_kill_gmaps = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_gmaps), +}; +MTX_SYSINIT(to_kill_grefs_mtx, &cleanup_data.to_kill_grefs_mtx, + "gntdev to_kill_grefs mutex", MTX_DEF); +MTX_SYSINIT(to_kill_gmaps_mtx, &cleanup_data.to_kill_gmaps_mtx, + "gntdev to_kill_gmaps mutex", MTX_DEF); + +static void cleanup_function(void *arg, __unused int pending); +static struct task cleanup_task = TASK_INITIALIZER(0, cleanup_function, + &cleanup_data); + +struct notify_data { + uint64_t index; + uint32_t action; + uint32_t event_channel_port; + xen_intr_handle_t notify_evtchn_handle; +}; + +static void notify(struct notify_data *notify, vm_page_t page); + +/*-------------------- Grant Allocation Methods -----------------------------*/ + +struct gntdev_gref { + union gref_next_union { + STAILQ_ENTRY(gntdev_gref) list; + RB_ENTRY(gntdev_gref) tree; + } gref_next; + uint64_t file_index; + grant_ref_t gref_id; + vm_page_t page; + struct notify_data *notify; +}; + +static int +gref_cmp(struct gntdev_gref *g1, struct gntdev_gref *g2) +{ + return (g1->file_index - g2->file_index); +} + +RB_GENERATE_STATIC(gref_tree_head, gntdev_gref, gref_next.tree, gref_cmp); + +/* + * Traverse over the device-list of to-be-deleted grants allocated, and + * if all accesses, both local mmaps and foreign maps, to them have ended, + * destroy them. + */ +static void +gref_list_dtor(struct cleanup_data_struct *cleanup_data) +{ + struct gref_list_head tmp_grefs; + struct gntdev_gref *gref, *gref_tmp, *gref_previous; + + STAILQ_INIT(&tmp_grefs); + mtx_lock(&cleanup_data->to_kill_grefs_mtx); + STAILQ_SWAP(&cleanup_data->to_kill_grefs, &tmp_grefs, gntdev_gref); + mtx_unlock(&cleanup_data->to_kill_grefs_mtx); + + gref_previous = NULL; + STAILQ_FOREACH_SAFE(gref, &tmp_grefs, gref_next.list, gref_tmp) { + if (gref->page && gref->page->object == NULL) { + if (gref->notify) { + notify(gref->notify, gref->page); + } + if (gref->gref_id != GRANT_REF_INVALID) { + if (gnttab_query_foreign_access(gref->gref_id)) + continue; + if (gnttab_end_foreign_access_ref(gref->gref_id) + == 0) + continue; + gnttab_free_grant_reference(gref->gref_id); + } + vm_page_unwire(gref->page, PQ_NONE); + vm_page_free(gref->page); + gref->page = NULL; + } + if (gref->page == NULL) { + if (gref_previous == NULL) + STAILQ_REMOVE_HEAD(&tmp_grefs, gref_next.list); + else + STAILQ_REMOVE_AFTER(&tmp_grefs, gref_previous, + gref_next.list); + if (gref->notify) + free(gref->notify, M_GNTDEV); + free(gref, M_GNTDEV); + } + else + gref_previous = gref; + } + + if (!STAILQ_EMPTY(&tmp_grefs)) { + mtx_lock(&cleanup_data->to_kill_grefs_mtx); + STAILQ_CONCAT(&cleanup_data->to_kill_grefs, &tmp_grefs); + mtx_unlock(&cleanup_data->to_kill_grefs_mtx); + } +} + +/* + * Find count number of contiguous allocated grants for a given userspace + * program by file-offset (index). + */ +static struct gntdev_gref* +gntdev_find_grefs(struct per_user_data *priv_user, + uint64_t index, uint32_t count) +{ + struct gntdev_gref find_gref, *gref, *gref_start = NULL; + + find_gref.file_index = index; + + mtx_lock(&priv_user->user_data_lock); + gref_start = RB_FIND(gref_tree_head, &priv_user->gref_tree, &find_gref); + for (gref = gref_start; gref != NULL && count > 0; gref = + RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { + if (index != gref->file_index) + break; + index += PAGE_SIZE; + count--; + } + mtx_unlock(&priv_user->user_data_lock); + + if (count) + return (NULL); + return (gref_start); +} + +/* + * IOCTL_GNTDEV_ALLOC_GREF + * Allocate required number of wired pages for the request, grant foreign + * access to the physical frames for these pages, and add details about + * this allocation to the per user private data, so that these pages can + * be mmapped by the userspace program. + */ +static int +gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg) +{ + int i, error, readonly; + uint64_t file_offset; + struct gntdev_gref *grefs; + struct per_user_data *priv_user; + + readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE); + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + /* Cleanup grefs and free pages. */ + taskqueue_enqueue(taskqueue_thread, &cleanup_task); + + /* Get file offset for this request. */ + file_offset = get_file_offset(priv_user, arg->count); + + /* Allocate grefs. */ + grefs = malloc(sizeof(*grefs) * arg->count, M_GNTDEV, M_WAITOK); + + for (i = 0; i < arg->count; i++) { + grefs[i].file_index = file_offset + i * PAGE_SIZE; + grefs[i].gref_id = GRANT_REF_INVALID; + grefs[i].notify = NULL; + grefs[i].page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL + | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); + if (grefs[i].page == NULL) { + log(LOG_ERR, "Page allocation failed."); + error = ENOMEM; + break; + } + if ((grefs[i].page->flags & PG_ZERO) == 0) { + /* + * Zero the allocated page, as we don't want to + * leak our memory to other domains. + */ + pmap_zero_page(grefs[i].page); + } + grefs[i].page->valid = VM_PAGE_BITS_ALL; + + error = gnttab_grant_foreign_access(arg->domid, + (VM_PAGE_TO_PHYS(grefs[i].page) >> PAGE_SHIFT), + readonly, &grefs[i].gref_id); + if (error != 0) { + log(LOG_ERR, "Grant Table Hypercall failed."); + break; + } + } + + if (error != 0) { + /* + * If target domain maps the gref (by guessing the gref-id), + * then we can't clean it up yet and we have to leave the + * page in place so as to not leak our memory to that domain. + * Add it to a global list to be cleaned up later. + */ + mtx_lock(&cleanup_data.to_kill_grefs_mtx); + for (i = 0; i < arg->count; i++) + STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, + &grefs[i], gref_next.list); + mtx_unlock(&cleanup_data.to_kill_grefs_mtx); + + taskqueue_enqueue(taskqueue_thread, &cleanup_task); + + return (error); + } + + /* Copy the output values. */ + arg->index = file_offset; + for (i = 0; i < arg->count; i++) + arg->gref_ids[i] = grefs[i].gref_id; + + /* Modify the per user private data. */ + mtx_lock(&priv_user->user_data_lock); + for (i = 0; i < arg->count; i++) + RB_INSERT(gref_tree_head, &priv_user->gref_tree, &grefs[i]); + mtx_unlock(&priv_user->user_data_lock); + + return (error); +} + +/* + * IOCTL_GNTDEV_DEALLOC_GREF + * Remove grant allocation information from the per user private data, so + * that it can't be mmapped anymore by the userspace program, and add it + * to the to-be-deleted grants global device-list. + */ +static int +gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg) +{ + int error; + uint32_t count; + struct gntdev_gref *gref, *gref_tmp; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gref = gntdev_find_grefs(priv_user, arg->index, arg->count); + if (gref == NULL) { + log(LOG_ERR, "Can't find requested grant-refs."); + return (EINVAL); + } + + /* Remove the grefs from user private data. */ + count = arg->count; + mtx_lock(&priv_user->user_data_lock); + mtx_lock(&cleanup_data.to_kill_grefs_mtx); + for (; gref != NULL && count > 0; gref = gref_tmp) { + gref_tmp = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref); + RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); + STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, + gref_next.list); + count--; + } + mtx_unlock(&cleanup_data.to_kill_grefs_mtx); + mtx_unlock(&priv_user->user_data_lock); + + taskqueue_enqueue(taskqueue_thread, &cleanup_task); + + return (0); +} + +/*-------------------- Grant Mapping Methods --------------------------------*/ + +struct gntdev_gmap_map { + vm_object_t mem; + struct resource *pseudo_phys_res; + int pseudo_phys_res_id; + vm_paddr_t phys_base_addr; +}; + +struct gntdev_gmap { + union gmap_next_union { + STAILQ_ENTRY(gntdev_gmap) list; + RB_ENTRY(gntdev_gmap) tree; + } gmap_next; + uint64_t file_index; + uint32_t count; + struct gnttab_map_grant_ref *grant_map_ops; + struct gntdev_gmap_map *map; + struct notify_data *notify; +}; + +static int +gmap_cmp(struct gntdev_gmap *g1, struct gntdev_gmap *g2) +{ + return (g1->file_index - g2->file_index); +} + +RB_GENERATE_STATIC(gmap_tree_head, gntdev_gmap, gmap_next.tree, gmap_cmp); + +/* + * Traverse over the device-list of to-be-deleted grant mappings, and if + * the region is no longer mmapped by anyone, free the memory used to + * store information about the mapping. + */ +static void +gmap_list_dtor(struct cleanup_data_struct *cleanup_data) +{ + struct gmap_list_head tmp_gmaps; + struct gntdev_gmap *gmap, *gmap_tmp, *gmap_previous; + + STAILQ_INIT(&tmp_gmaps); + mtx_lock(&cleanup_data->to_kill_gmaps_mtx); + STAILQ_SWAP(&cleanup_data->to_kill_gmaps, &tmp_gmaps, gntdev_gmap); + mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); + + gmap_previous = NULL; + STAILQ_FOREACH_SAFE(gmap, &tmp_gmaps, gmap_next.list, gmap_tmp) { + if (gmap->map == NULL) { + if (gmap_previous == NULL) + STAILQ_REMOVE_HEAD(&tmp_gmaps, gmap_next.list); + else + STAILQ_REMOVE_AFTER(&tmp_gmaps, gmap_previous, + gmap_next.list); + + if (gmap->notify) + free(gmap->notify, M_GNTDEV); + free(gmap->grant_map_ops, M_GNTDEV); + free(gmap, M_GNTDEV); + } + else + gmap_previous = gmap; + } + + if (!STAILQ_EMPTY(&tmp_gmaps)) { + mtx_lock(&cleanup_data->to_kill_gmaps_mtx); + STAILQ_CONCAT(&cleanup_data->to_kill_gmaps, &tmp_gmaps); + mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); + } +} + +/* + * Find mapped grants for a given userspace program, by file-offset (index) + * and count, as supplied during the map-ioctl. + */ +static struct gntdev_gmap* +gntdev_find_gmap(struct per_user_data *priv_user, + uint64_t index, uint32_t count) +{ + struct gntdev_gmap find_gmap, *gmap; + + find_gmap.file_index = index; + + mtx_lock(&priv_user->user_data_lock); + gmap = RB_FIND(gmap_tree_head, &priv_user->gmap_tree, &find_gmap); + mtx_unlock(&priv_user->user_data_lock); + + if (gmap != NULL && gmap->count == count) + return (gmap); + return (NULL); +} + +/* + * Remove the pages from the mgtdevice pager, call the unmap hypercall, + * free the xenmem resource. This function is called during the + * destruction of the mgtdevice pager, which happens when all mmaps to + * it have been removed, and the unmap-ioctl has been performed. + */ +static int +notify_unmap_cleanup(struct gntdev_gmap *gmap) +{ + int error, i, count; + vm_page_t m; + struct gnttab_unmap_grant_ref *unmap_ops; + + unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count, + M_GNTDEV, M_WAITOK); + + /* Enumerate freeable maps. */ + count = 0; + for (i = 0; i < gmap->count; i++) { + if (gmap->grant_map_ops[i].handle != -1) { + unmap_ops[count].handle = gmap->grant_map_ops[i].handle; + unmap_ops[count].host_addr = + gmap->grant_map_ops[i].host_addr; + unmap_ops[count].dev_bus_addr = 0; + count++; + } + } + + /* Perform notification. */ + if (count > 0 && gmap->notify) { + vm_page_t page; + uint64_t page_offset; + + page_offset = gmap->notify->index - gmap->file_index; + page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + page_offset); + notify(gmap->notify, page); + } + + /* Free the pages. */ + VM_OBJECT_WLOCK(gmap->map->mem); +retry: + for (i = 0; i < gmap->count; i++) { + m = vm_page_lookup(gmap->map->mem, i); + if (m == NULL) + continue; + if (vm_page_sleep_if_busy(m, "pcmdum")) + goto retry; + cdev_pager_free_page(gmap->map->mem, m); + } + VM_OBJECT_WUNLOCK(gmap->map->mem); + + /* Perform unmap hypercall. */ + error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap_ops, count); + + for (i = 0; i < gmap->count; i++) { + gmap->grant_map_ops[i].handle = -1; + gmap->grant_map_ops[i].host_addr = 0; + } + + if (gmap->map) { + error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id, + gmap->map->pseudo_phys_res); + KASSERT(error == 0, + ("Unable to release memory resource: %d", error)); + + free(gmap->map, M_GNTDEV); + gmap->map = NULL; + } + + free(unmap_ops, M_GNTDEV); + + return (error); +} + +/* + * IOCTL_GNTDEV_MAP_GRANT_REF + * Populate structures for mapping the grant reference in the per user + * private data. Actual resource allocation and map hypercall is performed + * during the mmap. + */ +static int +gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg) +{ + int i, error; + struct gntdev_gmap *gmap; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO); + + gmap->count = arg->count; + gmap->file_index = get_file_offset(priv_user, arg->count); + gmap->grant_map_ops = + malloc(sizeof(struct gnttab_map_grant_ref) * arg->count, + M_GNTDEV, M_WAITOK | M_ZERO); + + for (i = 0; i < arg->count; i++) { + gmap->grant_map_ops[i].dom = arg->refs[i].domid; + gmap->grant_map_ops[i].ref = arg->refs[i].ref; + gmap->grant_map_ops[i].handle = -1; + gmap->grant_map_ops[i].flags = GNTMAP_host_map; + } + + mtx_lock(&priv_user->user_data_lock); + RB_INSERT(gmap_tree_head, &priv_user->gmap_tree, gmap); + mtx_unlock(&priv_user->user_data_lock); + + arg->index = gmap->file_index; + + return (error); +} + +/* + * IOCTL_GNTDEV_UNMAP_GRANT_REF + * Remove the map information from the per user private data and add it + * to the global device-list of mappings to be deleted. A reference to + * the mgtdevice pager is also decreased, the reason for which is + * explained in mmap_gmap(). + */ +static int +gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg) +{ + int error; + struct gntdev_gmap *gmap; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + gmap = gntdev_find_gmap(priv_user, arg->index, arg->count); + if (gmap == NULL) { + log(LOG_ERR, "Can't find requested grant-map."); + return (EINVAL); + } + + mtx_lock(&priv_user->user_data_lock); + mtx_lock(&cleanup_data.to_kill_gmaps_mtx); + RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); + STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list); + mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); + mtx_unlock(&priv_user->user_data_lock); + + if (gmap->map) + vm_object_deallocate(gmap->map->mem); + + taskqueue_enqueue(taskqueue_thread, &cleanup_task); + + return (0); +} + +/* + * IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR + * Get file-offset and count for a given mapping, from the virtual address + * where the mapping is mmapped. + * Please note, this only works for grants mapped by this domain, and not + * grants allocated. Count doesn't make much sense in reference to grants + * allocated. Also, because this function is present in the linux gntdev + * device, but not in the linux gntalloc one, most userspace code only use + * it for mapped grants. + */ +static int +gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg, + struct thread *td) +{ + int error; + vm_map_t map; + vm_map_entry_t entry; + vm_object_t mem; + vm_pindex_t pindex; + vm_prot_t prot; + boolean_t wired; + struct gntdev_gmap *gmap; + + map = &td->td_proc->p_vmspace->vm_map; + error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry, + &mem, &pindex, &prot, &wired); + if (error != KERN_SUCCESS) + return (EINVAL); + vm_map_lookup_done(map, entry); + + if ((mem->type != OBJT_MGTDEVICE) || + (mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) + return (EINVAL); + + gmap = mem->handle; + if (gmap == NULL || + (entry->end - entry->start) != (gmap->count * PAGE_SIZE)) + return (EINVAL); + + arg->count = gmap->count; + arg->offset = gmap->file_index; + return (0); +} + +/*-------------------- Grant Mapping Pager ----------------------------------*/ + +static int +gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + + return (0); +} + +static void +gntdev_gmap_pg_dtor(void *handle) +{ + + notify_unmap_cleanup((struct gntdev_gmap *)handle); +} + +static int +gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, + vm_page_t *mres) +{ + struct gntdev_gmap *gmap = object->handle; + vm_pindex_t pidx, ridx; + vm_page_t page, oldm; + vm_ooffset_t relative_offset; + + if (gmap->map == NULL) + return (VM_PAGER_FAIL); + + relative_offset = offset - gmap->file_index; + + pidx = OFF_TO_IDX(offset); + ridx = OFF_TO_IDX(relative_offset); + if (ridx >= gmap->count || + gmap->grant_map_ops[ridx].status != GNTST_okay) + return (VM_PAGER_FAIL); + + page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset); + if (page == NULL) + return (VM_PAGER_FAIL); + + KASSERT((page->flags & PG_FICTITIOUS) != 0, + ("not fictitious %p", page)); + KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); + KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); + + if (*mres != NULL) { + oldm = *mres; + vm_page_lock(oldm); + vm_page_free(oldm); + vm_page_unlock(oldm); + *mres = NULL; + } + + vm_page_insert(page, object, pidx); + page->valid = VM_PAGE_BITS_ALL; + vm_page_xbusy(page); + *mres = page; + return (VM_PAGER_OK); +} + +/*------------------ Grant Table Methods ------------------------------------*/ + +static void +notify(struct notify_data *notify, vm_page_t page) +{ + if (notify->action & UNMAP_NOTIFY_CLEAR_BYTE) { + uint8_t *mem; + uint64_t offset; + + offset = notify->index & (PAGE_SIZE - 1); + mem = (uint8_t *)pmap_quick_enter_page(page); + mem[offset] = 0; + pmap_quick_remove_page((vm_offset_t)mem); + } + if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { + xen_intr_signal(notify->notify_evtchn_handle); + xen_intr_unbind(¬ify->notify_evtchn_handle); + } + notify->action = 0; +} + +/* + * Helper to copy new arguments from the notify ioctl into + * the existing notify data. + */ +static int +copy_notify_helper(struct notify_data *destination, + struct ioctl_gntdev_unmap_notify *source) +{ + xen_intr_handle_t handlep = NULL; + + /* + * "Get" before "Put"ting previous reference, as we might be + * holding the last reference to the event channel port. + */ + if (source->action & UNMAP_NOTIFY_SEND_EVENT) + if (xen_intr_get_evtchn_from_port(source->event_channel_port, + &handlep) != 0) + return (EINVAL); + + if (destination->action & UNMAP_NOTIFY_SEND_EVENT) + xen_intr_unbind(&destination->notify_evtchn_handle); + + destination->action = source->action; + destination->event_channel_port = source->event_channel_port; + destination->index = source->index; + destination->notify_evtchn_handle = handlep; + + return (0); +} + +/* + * IOCTL_GNTDEV_SET_UNMAP_NOTIFY + * Set unmap notification inside the appropriate grant. It sends a + * notification when the grant is completely munmapped by this domain + * and ready for destruction. + */ +static int +gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg) +{ + int error; + uint64_t index; + struct per_user_data *priv_user; + struct gntdev_gref *gref = NULL; + struct gntdev_gmap *gmap; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + if (arg->action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) + return (EINVAL); + + index = arg->index & ~(PAGE_SIZE - 1); + gref = gntdev_find_grefs(priv_user, index, 1); + if (gref) { + if (gref->notify == NULL) + gref->notify = malloc(sizeof(*arg), M_GNTDEV, + M_WAITOK | M_ZERO); + return (copy_notify_helper(gref->notify, arg)); + } + + error = EINVAL; + mtx_lock(&priv_user->user_data_lock); + RB_FOREACH(gmap, gmap_tree_head, &priv_user->gmap_tree) { + if (arg->index >= gmap->file_index && + arg->index < gmap->file_index + gmap->count * PAGE_SIZE) { + if (gmap->notify == NULL) + gmap->notify = malloc(sizeof(*arg), M_GNTDEV, + M_WAITOK | M_ZERO); + error = copy_notify_helper(gmap->notify, arg); + break; + } + } + mtx_unlock(&priv_user->user_data_lock); + + return (error); +} + +/*------------------ Gntdev Char Device Methods -----------------------------*/ + +static void +cleanup_function(void *arg, __unused int pending) +{ + + gref_list_dtor((struct cleanup_data_struct *) arg); + gmap_list_dtor((struct cleanup_data_struct *) arg); +} + +static void +per_user_data_dtor(void *arg) +{ + struct gntdev_gref *gref, *gref_tmp; + struct gntdev_gmap *gmap, *gmap_tmp; + struct per_user_data *priv_user; + + priv_user = (struct per_user_data *) arg; + + mtx_lock(&priv_user->user_data_lock); + + mtx_lock(&cleanup_data.to_kill_grefs_mtx); + RB_FOREACH_SAFE(gref, gref_tree_head, &priv_user->gref_tree, gref_tmp) { + RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); + STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, + gref_next.list); + } + mtx_unlock(&cleanup_data.to_kill_grefs_mtx); + + mtx_lock(&cleanup_data.to_kill_gmaps_mtx); + RB_FOREACH_SAFE(gmap, gmap_tree_head, &priv_user->gmap_tree, gmap_tmp) { + RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); + STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, + gmap_next.list); + if (gmap->map) + vm_object_deallocate(gmap->map->mem); + } + mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); + + mtx_unlock(&priv_user->user_data_lock); + + taskqueue_enqueue(taskqueue_thread, &cleanup_task); + + mtx_destroy(&priv_user->user_data_lock); + free(priv_user, M_GNTDEV); +} + +static int +gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + int error; + struct per_user_data *priv_user; + + priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO); + RB_INIT(&priv_user->gref_tree); + RB_INIT(&priv_user->gmap_tree); + mtx_init(&priv_user->user_data_lock, + "per user data mutex", NULL, MTX_DEF); + + error = devfs_set_cdevpriv(priv_user, per_user_data_dtor); + if (error != 0) + per_user_data_dtor(priv_user); + + return (error); +} + +static int +gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, + int fflag, struct thread *td) +{ + int error; + + switch (cmd) { + case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: + error = gntdev_set_unmap_notify( + (struct ioctl_gntdev_unmap_notify*) data); + break; + case IOCTL_GNTDEV_ALLOC_GREF: + error = gntdev_alloc_gref( + (struct ioctl_gntdev_alloc_gref*) data); + break; + case IOCTL_GNTDEV_DEALLOC_GREF: + error = gntdev_dealloc_gref( + (struct ioctl_gntdev_dealloc_gref*) data); + break; + case IOCTL_GNTDEV_MAP_GRANT_REF: + error = gntdev_map_grant_ref( + (struct ioctl_gntdev_map_grant_ref*) data); + break; + case IOCTL_GNTDEV_UNMAP_GRANT_REF: + error = gntdev_unmap_grant_ref( + (struct ioctl_gntdev_unmap_grant_ref*) data); + break; + case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: + error = gntdev_get_offset_for_vaddr( + (struct ioctl_gntdev_get_offset_for_vaddr*) data, td); + break; + default: + error = ENOSYS; + break; + } + + return (error); +} + +/* + * MMAP an allocated grant into user memory. + * Please note, that the grants must not already be mmapped, otherwise + * this function will fail. + */ +static int +mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start, + uint32_t count, vm_size_t size, struct vm_object **object) +{ + vm_object_t mem_obj; + struct gntdev_gref *gref; + + mem_obj = vm_object_allocate(OBJT_PHYS, size); + if (mem_obj == NULL) + return (ENOMEM); + + mtx_lock(&priv_user->user_data_lock); + VM_OBJECT_WLOCK(mem_obj); + for (gref = gref_start; gref != NULL && count > 0; gref = + RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { + if (gref->page->object) + break; + + vm_page_insert(gref->page, mem_obj, + OFF_TO_IDX(gref->file_index)); + + count--; + } + VM_OBJECT_WUNLOCK(mem_obj); + mtx_unlock(&priv_user->user_data_lock); + + if (count) { + vm_object_deallocate(mem_obj); + return (EINVAL); + } + + *object = mem_obj; + + return (0); + +} + +/* + * MMAP a mapped grant into user memory. + */ +static int +mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start, + vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) +{ + int i, error; + + /* + * The grant map hypercall might already be done. + * If that is the case, increase a reference to the + * vm object and return the already allocated object. + */ + if (gmap_start->map) { + vm_object_reference(gmap_start->map->mem); + *object = gmap_start->map->mem; + return (0); + } + + gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV, + M_WAITOK | M_ZERO); + + /* Allocate the xen pseudo physical memory resource. */ + gmap_start->map->pseudo_phys_res_id = 0; + gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev, + &gmap_start->map->pseudo_phys_res_id, size); + if (gmap_start->map->pseudo_phys_res == NULL) { + free(gmap_start->map, M_GNTDEV); + gmap_start->map = NULL; + return (ENOMEM); + } + gmap_start->map->phys_base_addr = + rman_get_start(gmap_start->map->pseudo_phys_res); + + /* Allocate the mgtdevice pager. */ + gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE, + &gntdev_gmap_pg_ops, size, nprot, *offset, NULL); + if (gmap_start->map->mem == NULL) { + xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id, + gmap_start->map->pseudo_phys_res); + free(gmap_start->map, M_GNTDEV); + gmap_start->map = NULL; + return (ENOMEM); + } + + for(i = 0; i < gmap_start->count; i++) { + gmap_start->grant_map_ops[i].host_addr = + gmap_start->map->phys_base_addr + i * PAGE_SIZE; + + if ((nprot & PROT_WRITE) == 0) + gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly; + } + /* Make the MAP hypercall. */ + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + gmap_start->grant_map_ops, gmap_start->count); + if (error != 0) { + /* + * Deallocate pager. + * Pager deallocation will automatically take care of + * xenmem deallocation, etc. + */ + vm_object_deallocate(gmap_start->map->mem); + + return (EINVAL); + } + + /* Retry EAGAIN maps. */ + for (i = 0; i < gmap_start->count; i++) { + int delay = 1; + while (delay < 256 && + gmap_start->grant_map_ops[i].status == GNTST_eagain) { + HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, + &gmap_start->grant_map_ops[i], 1); + pause(("gntmap"), delay * SBT_1MS); + delay++; + } + if (gmap_start->grant_map_ops[i].status == GNTST_eagain) + gmap_start->grant_map_ops[i].status = GNTST_bad_page; + + if (gmap_start->grant_map_ops[i].status != GNTST_okay) { + /* + * Deallocate pager. + * Pager deallocation will automatically take care of + * xenmem deallocation, notification, unmap hypercall, + * etc. + */ + vm_object_deallocate(gmap_start->map->mem); + + return (EINVAL); + } + } + + /* + * Add a reference to the vm object. We do not want + * the vm object to be deleted when all the mmaps are + * unmapped, because it may be re-mmapped. Instead, + * we want the object to be deleted, when along with + * munmaps, we have also processed the unmap-ioctl. + */ + vm_object_reference(gmap_start->map->mem); + + *object = gmap_start->map->mem; + + return (0); +} + +static int +gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **object, int nprot) +{ + int error; + uint32_t count; + struct gntdev_gref *gref_start; + struct gntdev_gmap *gmap_start; + struct per_user_data *priv_user; + + error = devfs_get_cdevpriv((void**) &priv_user); + if (error != 0) + return (EINVAL); + + count = OFF_TO_IDX(size); + + gref_start = gntdev_find_grefs(priv_user, *offset, count); + if (gref_start) { + error = mmap_gref(priv_user, gref_start, count, size, object); + return (error); + } + + gmap_start = gntdev_find_gmap(priv_user, *offset, count); + if (gmap_start) { + error = mmap_gmap(priv_user, gmap_start, offset, size, object, + nprot); + return (error); + } + + return (EINVAL); +} + +/*------------------ Private Device Attachment Functions --------------------*/ +static void +gntdev_identify(driver_t *driver, device_t parent) +{ + + KASSERT((xen_domain()), + ("Trying to attach gntdev device on non Xen domain")); + + if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL) + panic("unable to attach gntdev user-space device"); +} + +static int +gntdev_probe(device_t dev) +{ + + gntdev_dev = dev; + device_set_desc(dev, "Xen grant-table user-space device"); + return (BUS_PROBE_NOWILDCARD); +} + +static int +gntdev_attach(device_t dev) +{ + + make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT, + GID_WHEEL, 0600, "xen/gntdev"); + return (0); +} + +/*-------------------- Private Device Attachment Data -----------------------*/ +static device_method_t gntdev_methods[] = { + DEVMETHOD(device_identify, gntdev_identify), + DEVMETHOD(device_probe, gntdev_probe), + DEVMETHOD(device_attach, gntdev_attach), + DEVMETHOD_END +}; + +static driver_t gntdev_driver = { + "gntdev", + gntdev_methods, + 0, +}; + +devclass_t gntdev_devclass; + +DRIVER_MODULE(gntdev, xenpv, gntdev_driver, gntdev_devclass, 0, 0); +MODULE_DEPEND(gntdev, xenpv, 1, 1, 1); Index: sys/xen/gntdev.h =================================================================== --- /dev/null +++ sys/xen/gntdev.h @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2016 Akshay Jaggi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * gntdev.h + * + * Interface to /dev/xen/gntdev. + * + * This device provides the user with two kinds of functionalities: + * 1. Grant Allocation + * Allocate a page of our own memory, and share it with a foreign domain. + * 2. Grant Mapping + * Map a grant allocated by a foreign domain, into our own memory. + * + * + * Grant Allocation + * + * Steps to allocate a grant: + * 1. Do an `IOCTL_GNTDEV_ALLOC_GREF ioctl`, with + * - `domid`, as the domain-id of the foreign domain + * - `flags`, ORed with GNTDEV_ALLOC_FLAG_WRITABLE if you want the foreign + * domain to have write access to the shared memory + * - `count`, with the number of pages to share with the foreign domain + * + * Ensure that the structure you allocate has enough memory to store + * all the allocated grant-refs, i.e., you need to allocate + * (sizeof(struct ioctl_gntdev_alloc_gref) + (count - 1)*sizeof(uint32_t)) + * bytes of memory. + * + * 2. Mmap the address given in `index` after a successful ioctl. + * This will give you access to the granted pages. + * + * Note: + * 1. The grant is not removed until all three of the following conditions + * are met + * - The region is not mmaped. That is, munmap() has been called if + * the region was mmapped previously. + * - IOCTL_GNTDEV_DEALLOC_GREF ioctl has been performed. After you + * perform this ioctl, you can no longer mmap or set notify on + * the grant. + * - The foreign domain has stopped using the grant. + * 2. Granted pages can only belong to one mmap region. + * 3. Every page of granted memory is a unit in itself. What this means + * is that you can set a unmap notification for each of the granted + * pages, individually; you can mmap and dealloc-ioctl a contiguous + * range of allocated grants (even if alloc-ioctls were performed + * individually), etc. + * + * + * Grant Mapping + * + * Steps to map a grant: + * 1. Do a `IOCTL_GNTDEV_MAP_GRANT_REF` ioctl, with + * - `count`, as the number of foreign grants to map + * - `refs[i].domid`, as the domain id of the foreign domain + * - `refs[i].ref`, as the grant-ref for the grant to be mapped + * + * 2. Mmap the address given in `index` after a successful ioctl. + * This will give you access to the mapped pages. + * + * Note: + * 1. The map hypercall is not made till the region is mmapped. + * 2. The unit is defined by the map ioctl. This means that only one + * unmap notification can be set on a group of pages that were + * mapped together in one ioctl, and also no single mmaping of contiguous + * grant-maps is possible. + * 3. You can mmap the same grant-map region multiple times. + * 4. The grant is not unmapped until both of the following conditions are met + * - The region is not mmaped. That is, munmap() has been called for + * as many times as the grant was mmapped. + * - IOCTL_GNTDEV_UNMAP_GRANT_REF ioctl has been called. + * 5. IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR ioctl gives index and count of + * a grant-map from the virtual address of the location where the grant + * is mmapped. + * + * + * IOCTL_GNTDEV_SET_UNMAP_NOTIFY + * This ioctl allows us to set notifications to be made when the grant is + * either unmapped (in case of a mapped grant), or when it is ready to be + * deallocated by us, ie, the grant is no more mmapped, and the dealloc + * ioctl has been called (in case of an allocated grant). OR `action` with + * the required notification masks, and fill in the appropriate fields. + * - UNMAP_NOTIFY_CLEAR_BYTE clears the byte at `index`, where index is + * the address of the byte in file address space. + * - UNMAP_NOTIFY_SEND_EVENT sends an event channel notification on + * `event_channel_port` + * In case of multiple notify ioctls, only the last one survives. + * + */ + +#ifndef __XEN_GNTDEV_H__ +#define __XEN_GNTDEV_H__ + +#include + +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \ + _IOW('E', 0, struct ioctl_gntdev_unmap_notify) +struct ioctl_gntdev_unmap_notify { + /* IN parameters */ + uint64_t index; + uint32_t action; + uint32_t event_channel_port; +}; + +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + +/*-------------------- Grant Allocation IOCTLs ------------------------------*/ + +#define IOCTL_GNTDEV_ALLOC_GREF \ + _IOWR('E', 1, struct ioctl_gntdev_alloc_gref) +struct ioctl_gntdev_alloc_gref { + /* IN parameters */ + uint16_t domid; + uint16_t flags; + uint32_t count; + /* OUT parameters */ + uint64_t index; + /* Variable OUT parameter */ + uint32_t gref_ids[1]; +}; + +#define GNTDEV_ALLOC_FLAG_WRITABLE 1 + +#define IOCTL_GNTDEV_DEALLOC_GREF \ + _IOW('E', 2, struct ioctl_gntdev_dealloc_gref) +struct ioctl_gntdev_dealloc_gref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +/*-------------------- Grant Mapping IOCTLs ---------------------------------*/ + +struct ioctl_gntdev_grant_ref { + uint32_t domid; + uint32_t ref; +}; + +#define IOCTL_GNTDEV_MAP_GRANT_REF \ + _IOWR('E', 3, struct ioctl_gntdev_map_grant_ref) +struct ioctl_gntdev_map_grant_ref { + /* IN parameters */ + uint32_t count; + uint32_t pad0; + /* OUT parameters */ + uint64_t index; + /* Variable IN parameter */ + struct ioctl_gntdev_grant_ref refs[1]; +}; + +#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ + _IOW('E', 4, struct ioctl_gntdev_unmap_grant_ref) +struct ioctl_gntdev_unmap_grant_ref { + /* IN parameters */ + uint64_t index; + uint32_t count; +}; + +#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ + _IOWR('E', 5, struct ioctl_gntdev_get_offset_for_vaddr) +struct ioctl_gntdev_get_offset_for_vaddr { + /* IN parameters */ + uint64_t vaddr; + /* OUT parameters */ + uint64_t offset; + uint32_t count; +}; + +#endif /* __XEN_GNTDEV_H__ */ \ No newline at end of file