Changeset View
Standalone View
sys/dev/xen/gntdev/gntdev.c
- This file was added.
/*- | |||||
* Copyright (c) 2016 Akshay Jaggi <jaggi@FreeBSD.org> | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
pfg: These two lines should be moved after the license. The license should be strictly as suggested… | |||||
Done Inline ActionsTo be clear, the lines before the copyright line are what should be moved, not the copyright line. imp: To be clear, the lines before the copyright line are what should be moved, not the copyright… | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
* | |||||
* gntdev.c | |||||
* | |||||
* Interface to /dev/xen/gntdev. | |||||
* | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/param.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/uio.h> | |||||
#include <sys/bus.h> | |||||
Done Inline ActionsInstead of this, you can just add $FreeBSD$ in the comment, after the license. pfg: Instead of this, you can just add $FreeBSD$ in the comment, after the license. | |||||
Done Inline ActionsFor .c files, this is the correct way. For .h files, you are correct. imp: For .c files, this is the correct way. For .h files, you are correct. | |||||
Done Inline ActionsHmm.. then I guess I have screwed up more than once :(. pfg: Hmm.. then I guess I have screwed up more than once :(.
| |||||
#include <sys/malloc.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/mutex.h> | |||||
#include <sys/rwlock.h> | |||||
#include <sys/selinfo.h> | |||||
#include <sys/poll.h> | |||||
#include <sys/conf.h> | |||||
#include <sys/fcntl.h> | |||||
#include <sys/ioccom.h> | |||||
#include <sys/rman.h> | |||||
#include <sys/tree.h> | |||||
#include <sys/module.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/bitset.h> | |||||
#include <sys/queue.h> | |||||
#include <sys/mman.h> | |||||
#include <sys/syslog.h> | |||||
#include <sys/taskqueue.h> | |||||
#include <vm/vm.h> | |||||
#include <vm/vm_param.h> | |||||
#include <vm/vm_extern.h> | |||||
#include <vm/vm_kern.h> | |||||
#include <vm/vm_page.h> | |||||
#include <vm/vm_map.h> | |||||
#include <vm/vm_object.h> | |||||
#include <vm/vm_pager.h> | |||||
#include <vm/vm_phys.h> | |||||
#include <machine/md_var.h> | |||||
#include <xen/xen-os.h> | |||||
#include <xen/hypervisor.h> | |||||
#include <xen/error.h> | |||||
#include <xen/xen_intr.h> | |||||
#include <xen/gnttab.h> | |||||
#include <xen/gntdev.h> | |||||
MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device"); | |||||
static d_open_t gntdev_open; | |||||
static d_ioctl_t gntdev_ioctl; | |||||
static d_mmap_single_t gntdev_mmap_single; | |||||
static struct cdevsw gntdev_devsw = { | |||||
.d_version = D_VERSION, | |||||
.d_open = gntdev_open, | |||||
.d_ioctl = gntdev_ioctl, | |||||
.d_mmap_single = gntdev_mmap_single, | |||||
.d_name = "gntdev", | |||||
}; | |||||
static device_t gntdev_dev = NULL; | |||||
struct gntdev_gref; | |||||
struct gntdev_gmap; | |||||
STAILQ_HEAD(gref_list_head, gntdev_gref); | |||||
STAILQ_HEAD(gmap_list_head, gntdev_gmap); | |||||
RB_HEAD(gref_tree_head, gntdev_gref); | |||||
RB_HEAD(gmap_tree_head, gntdev_gmap); | |||||
struct per_user_data { | |||||
struct mtx user_data_lock; | |||||
struct gref_tree_head gref_tree; | |||||
struct gmap_tree_head gmap_tree; | |||||
uint64_t file_offset; | |||||
}; | |||||
/* | |||||
* Get offset into the file which will be used while mmapping the | |||||
* appropriate pages by the userspace program. | |||||
*/ | |||||
static uint64_t | |||||
get_file_offset(struct per_user_data *priv_user, uint32_t count) | |||||
{ | |||||
uint64_t file_offset; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
file_offset = priv_user->file_offset; | |||||
priv_user->file_offset += count * PAGE_SIZE; | |||||
roygerUnsubmitted Not Done Inline ActionsNow that I look again into this, isn't this a little bit too simplistic? At some point priv_user->file_offset is going to overflow, returning into 0, and then we risk overlapping with existing regions. IMHO, you should use something like a region manager (rangeset), that can keep track of start and sizes of the allocated regions, it's going to add a little bit more of logic, but at least we will be on the safe side. royger: Now that I look again into this, isn't this a little bit too simplistic? At some point… | |||||
akshay1994.leo_gmail.comAuthorUnsubmitted Not Done Inline ActionsAye Aye! This was an improvement I left on for something later. (Linux uses similar simplistic logic for alloc, but perhaps something more complicated for map). akshay1994.leo_gmail.com: Aye Aye! This was an improvement I left on for something later. (Linux uses similar simplistic… | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
return (file_offset); | |||||
} | |||||
static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, | |||||
vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); | |||||
static void gntdev_gmap_pg_dtor(void *handle); | |||||
static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, | |||||
int prot, vm_page_t *mres); | |||||
static struct cdev_pager_ops gntdev_gmap_pg_ops = { | |||||
.cdev_pg_fault = gntdev_gmap_pg_fault, | |||||
.cdev_pg_ctor = gntdev_gmap_pg_ctor, | |||||
.cdev_pg_dtor = gntdev_gmap_pg_dtor, | |||||
}; | |||||
struct cleanup_data_struct { | |||||
struct mtx to_kill_grefs_mtx; | |||||
struct mtx to_kill_gmaps_mtx; | |||||
struct gref_list_head to_kill_grefs; | |||||
struct gmap_list_head to_kill_gmaps; | |||||
}; | |||||
static struct cleanup_data_struct cleanup_data = { | |||||
.to_kill_grefs = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_grefs), | |||||
.to_kill_gmaps = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_gmaps), | |||||
}; | |||||
MTX_SYSINIT(to_kill_grefs_mtx, &cleanup_data.to_kill_grefs_mtx, | |||||
"gntdev to_kill_grefs mutex", MTX_DEF); | |||||
MTX_SYSINIT(to_kill_gmaps_mtx, &cleanup_data.to_kill_gmaps_mtx, | |||||
"gntdev to_kill_gmaps mutex", MTX_DEF); | |||||
static void cleanup_function(void *arg, __unused int pending); | |||||
static struct task cleanup_task = TASK_INITIALIZER(0, cleanup_function, | |||||
&cleanup_data); | |||||
struct notify_data { | |||||
uint64_t index; | |||||
uint32_t action; | |||||
uint32_t event_channel_port; | |||||
Done Inline ActionsThe bracket should be on the same line as the iterator. royger: The bracket should be on the same line as the iterator. | |||||
xen_intr_handle_t notify_evtchn_handle; | |||||
Done Inline ActionsWith the addition of the RB tree now the structure requires two different handlers, one for the RB tree and another one for the TAILQ, which is rather unfortunate (also taking into account that there might be hundreds of those on a busy Dom0. Could you please benchmark if the usage of the RB tree makes any difference from a performance point of view? In order to test it you should create a NULL memdisk (all reads will return 0, writes are ignored) using: # mdconfig -a -t null -s 10g Attach it to a guest using the Qdisk backend and then run fio (or any other disk benchmark) against the raw block device. You should test the pre-RB and the RB versions of the gnttab device and decide if the addition of the RB tree is worth it or not. royger: With the addition of the RB tree now the structure requires two different handlers, one for the… | |||||
Not Done Inline ActionsConsistently better performance using RB tree. I guess we should keep it :) P.S. - And even better performance using (RB + Async) :) :) with_RB4 KBDownload
with_RB_run24 KBDownload
with_RB_run34 KBDownload
without_RB_run34 KBDownload
without_RB_run24 KBDownload
without_RB4 KBDownload
fio_test.sh839 BDownload
with_RB_and_Async5 KBDownload akshay1994.leo_gmail.com: Consistently better performance using RB tree. I guess we should keep it :)
P.S. - And even… | |||||
Done Inline ActionsGreat, it looks marginally better. FWIW, in FreeBSD (and other projects), when you give performance figures you usually have to use a table of some sort, with the relevant data, and the stddev. Without this data results could be considered useless. If you want to give this a try, there's a very good cli tool in FreeBSD called 'ministat(1)', that will give you a nice graph and will tell you whether there's a performance benefit or it's just an statistic illusion. IMHO it's not necessary in this case, but I would recommend that you give it a try if you want/can. royger: Great, it looks marginally better.
FWIW, in FreeBSD (and other projects), when you give… | |||||
Not Done Inline ActionsNoted. I'll keep it in mind from next time onwards. akshay1994.leo_gmail.com: Noted. I'll keep it in mind from next time onwards. | |||||
Done Inline ActionsIn order to reduce the size of this struct, you should put gref_list_next and gref_tree_next inside of a union, since AFAICT, you will never have a gref added to both the cleanup tree and a RB tree (same applies to gntdev_gmap). And the struct definitions should all be at the top of the file. Having functions interleaved with struct definitions makes the code harder to read IMHO (I might be convinced otherwise if you think it's better the other way around). royger: In order to reduce the size of this struct, you should put gref_list_next and gref_tree_next… | |||||
Not Done Inline ActionsI've shifted both iterator structures inside a union. I'm a little inclined towards keeping the struct definitions in the middle because the code is divided into segments, as follows:
The DS defined in (2) has no use in any of the other parts of the code, and the same is true for the DS defined in (3). Most of their use is inside their respective sections, and thus I defined them at the beginning of their respective sections. akshay1994.leo_gmail.com: I've shifted both iterator structures inside a union.
I'm a little inclined towards keeping… | |||||
}; | |||||
Done Inline ActionsSame here, this should be on the same line as the if (there are a bunch of those, so I'm going to stop commenting on each one of them). royger: Same here, this should be on the same line as the if (there are a bunch of those, so I'm going… | |||||
static void notify(struct notify_data *notify, vm_page_t page); | |||||
/*-------------------- Grant Allocation Methods -----------------------------*/ | |||||
struct gntdev_gref { | |||||
union gref_next_union { | |||||
STAILQ_ENTRY(gntdev_gref) list; | |||||
RB_ENTRY(gntdev_gref) tree; | |||||
} gref_next; | |||||
uint64_t file_index; | |||||
grant_ref_t gref_id; | |||||
vm_page_t page; | |||||
struct notify_data *notify; | |||||
}; | |||||
static int | |||||
gref_cmp(struct gntdev_gref *g1, struct gntdev_gref *g2) | |||||
{ | |||||
return (g1->file_index - g2->file_index); | |||||
} | |||||
RB_GENERATE_STATIC(gref_tree_head, gntdev_gref, gref_next.tree, gref_cmp); | |||||
/* | |||||
Done Inline ActionsSince this now runs in a taskqueue you should use a STAILQ list, and only hold the mutex in order to pick the first element from the queue and remove it. The unmap and cleanup should be done without holding the mutex in order to prevent contention. If the element can not be removed you will have to put it into a local queue that will be appended to the global one once processing has finished. You will have to do it that way in order to prevent iterating forever over grants that cannot be unmapped. royger: Since this now runs in a taskqueue you should use a STAILQ list, and only hold the mutex in… | |||||
Not Done Inline ActionsDone, captain! akshay1994.leo_gmail.com: Done, captain! | |||||
* Traverse over the device-list of to-be-deleted grants allocated, and | |||||
* if all accesses, both local mmaps and foreign maps, to them have ended, | |||||
* destroy them. | |||||
*/ | |||||
static void | |||||
gref_list_dtor(struct cleanup_data_struct *cleanup_data) | |||||
{ | |||||
struct gref_list_head tmp_grefs; | |||||
struct gntdev_gref *gref, *gref_tmp, *gref_previous; | |||||
STAILQ_INIT(&tmp_grefs); | |||||
Done Inline ActionsWould it be better to use a red-black tree in order to store the list of grants from a performance point of view? I expect that the list is going to be mostly accessed in order to search for granted regions rather than adding or removing them. There's an example of a red-black tree usage in sys/vm/vm_phys.c, search for the RB_* macros :). royger: Would it be better to use a red-black tree in order to store the list of grants from a… | |||||
Not Done Inline ActionsIt might be an overkill, given that we access the list only twice or thrice for every allocated grant (mmap, maybe notify, and dealloc). Linux uses a list. akshay1994.leo_gmail.com: It might be an overkill, given that we access the list only twice or thrice for every allocated… | |||||
mtx_lock(&cleanup_data->to_kill_grefs_mtx); | |||||
STAILQ_SWAP(&cleanup_data->to_kill_grefs, &tmp_grefs, gntdev_gref); | |||||
mtx_unlock(&cleanup_data->to_kill_grefs_mtx); | |||||
gref_previous = NULL; | |||||
STAILQ_FOREACH_SAFE(gref, &tmp_grefs, gref_next.list, gref_tmp) { | |||||
if (gref->page && gref->page->object == NULL) { | |||||
if (gref->notify) { | |||||
notify(gref->notify, gref->page); | |||||
} | |||||
if (gref->gref_id != GRANT_REF_INVALID) { | |||||
if (gnttab_query_foreign_access(gref->gref_id)) | |||||
continue; | |||||
if (gnttab_end_foreign_access_ref(gref->gref_id) | |||||
== 0) | |||||
continue; | |||||
gnttab_free_grant_reference(gref->gref_id); | |||||
} | |||||
vm_page_unwire(gref->page, PQ_NONE); | |||||
vm_page_free(gref->page); | |||||
gref->page = NULL; | |||||
} | |||||
if (gref->page == NULL) { | |||||
if (gref_previous == NULL) | |||||
STAILQ_REMOVE_HEAD(&tmp_grefs, gref_next.list); | |||||
else | |||||
STAILQ_REMOVE_AFTER(&tmp_grefs, gref_previous, | |||||
gref_next.list); | |||||
if (gref->notify) | |||||
free(gref->notify, M_GNTDEV); | |||||
free(gref, M_GNTDEV); | |||||
} | |||||
else | |||||
gref_previous = gref; | |||||
} | |||||
if (!STAILQ_EMPTY(&tmp_grefs)) { | |||||
Done Inline ActionsSo you do the cleanup of expired grefs when new ones are mapped, shouldn't it be better to defer the cleanup to some background task, so that it doesn't interfere with the allocation of new grefs? Take a look at the TASKQUEUE(9) man page, it allows to schedule asynchronous task execution. royger: So you do the cleanup of expired grefs when new ones are mapped, shouldn't it be better to… | |||||
Not Done Inline ActionsI guess it should be rare that some domain keeps an allocated grant mapped, even when access from this side has ended. This function is always called when any grant is deallocated, but just to kill phantom grants still alive, I call it here. akshay1994.leo_gmail.com: I guess it should be rare that some domain keeps an allocated grant mapped, even when access… | |||||
mtx_lock(&cleanup_data->to_kill_grefs_mtx); | |||||
STAILQ_CONCAT(&cleanup_data->to_kill_grefs, &tmp_grefs); | |||||
mtx_unlock(&cleanup_data->to_kill_grefs_mtx); | |||||
} | |||||
} | |||||
/* | |||||
* Find count number of contiguous allocated grants for a given userspace | |||||
* program by file-offset (index). | |||||
Done Inline Actionsgref cannot be NULL, because you are suing M_WAITOK, so you can remove this check (or turn it into a KASSERT if you are paranoid :)) royger: gref cannot be NULL, because you are suing M_WAITOK, so you can remove this check (or turn it… | |||||
*/ | |||||
static struct gntdev_gref* | |||||
gntdev_find_grefs(struct per_user_data *priv_user, | |||||
uint64_t index, uint32_t count) | |||||
{ | |||||
struct gntdev_gref find_gref, *gref, *gref_start = NULL; | |||||
find_gref.file_index = index; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
gref_start = RB_FIND(gref_tree_head, &priv_user->gref_tree, &find_gref); | |||||
for (gref = gref_start; gref != NULL && count > 0; gref = | |||||
RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { | |||||
if (index != gref->file_index) | |||||
break; | |||||
index += PAGE_SIZE; | |||||
count--; | |||||
} | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
if (count) | |||||
return (NULL); | |||||
return (gref_start); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_ALLOC_GREF | |||||
* Allocate required number of wired pages for the request, grant foreign | |||||
* access to the physical frames for these pages, and add details about | |||||
* this allocation to the per user private data, so that these pages can | |||||
* be mmapped by the userspace program. | |||||
*/ | |||||
static int | |||||
gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg) | |||||
{ | |||||
int i, error, readonly; | |||||
uint64_t file_offset; | |||||
struct gntdev_gref *grefs; | |||||
struct per_user_data *priv_user; | |||||
readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE); | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
Done Inline ActionsSame here, you should probably look into making gref_list_dtor a task that you can schedule in the background, because AFAICT there's no obvious reason that it should be explicitly done at this point. royger: Same here, you should probably look into making gref_list_dtor a task that you can schedule in… | |||||
Not Done Inline ActionsSince this is an error, mostly only free() calls will happen here. akshay1994.leo_gmail.com: Since this is an error, mostly only free() calls will happen here. | |||||
/* Cleanup grefs and free pages. */ | |||||
taskqueue_enqueue(taskqueue_thread, &cleanup_task); | |||||
/* Get file offset for this request. */ | |||||
file_offset = get_file_offset(priv_user, arg->count); | |||||
/* Allocate grefs. */ | |||||
Done Inline ActionsYou should not declare variables here, instead do it at the top of the function (didn't you get a complain from the compiler here?) royger: You should not declare variables here, instead do it at the top of the function (didn't you get… | |||||
grefs = malloc(sizeof(*grefs) * arg->count, M_GNTDEV, M_WAITOK); | |||||
for (i = 0; i < arg->count; i++) { | |||||
Done Inline ActionsNo need for the brackets here. royger: No need for the brackets here. | |||||
grefs[i].file_index = file_offset + i * PAGE_SIZE; | |||||
grefs[i].gref_id = GRANT_REF_INVALID; | |||||
grefs[i].notify = NULL; | |||||
grefs[i].page = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | |||||
| VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); | |||||
if (grefs[i].page == NULL) { | |||||
log(LOG_ERR, "Page allocation failed."); | |||||
error = ENOMEM; | |||||
break; | |||||
} | |||||
if ((grefs[i].page->flags & PG_ZERO) == 0) { | |||||
/* | |||||
* Zero the allocated page, as we don't want to | |||||
* leak our memory to other domains. | |||||
*/ | |||||
pmap_zero_page(grefs[i].page); | |||||
} | |||||
grefs[i].page->valid = VM_PAGE_BITS_ALL; | |||||
error = gnttab_grant_foreign_access(arg->domid, | |||||
(VM_PAGE_TO_PHYS(grefs[i].page) >> PAGE_SHIFT), | |||||
readonly, &grefs[i].gref_id); | |||||
if (error != 0) { | |||||
log(LOG_ERR, "Grant Table Hypercall failed."); | |||||
break; | |||||
} | |||||
Done Inline Actionsgref == NULL (gref is not a boolean, so you should not use !). royger: gref == NULL (gref is not a boolean, so you should not use !). | |||||
} | |||||
if (error != 0) { | |||||
/* | |||||
* If target domain maps the gref (by guessing the gref-id), | |||||
* then we can't clean it up yet and we have to leave the | |||||
* page in place so as to not leak our memory to that domain. | |||||
* Add it to a global list to be cleaned up later. | |||||
*/ | |||||
mtx_lock(&cleanup_data.to_kill_grefs_mtx); | |||||
for (i = 0; i < arg->count; i++) | |||||
STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, | |||||
&grefs[i], gref_next.list); | |||||
mtx_unlock(&cleanup_data.to_kill_grefs_mtx); | |||||
taskqueue_enqueue(taskqueue_thread, &cleanup_task); | |||||
return (error); | |||||
} | |||||
/* Copy the output values. */ | |||||
arg->index = file_offset; | |||||
for (i = 0; i < arg->count; i++) | |||||
arg->gref_ids[i] = grefs[i].gref_id; | |||||
/* Modify the per user private data. */ | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
for (i = 0; i < arg->count; i++) | |||||
RB_INSERT(gref_tree_head, &priv_user->gref_tree, &grefs[i]); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
return (error); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_DEALLOC_GREF | |||||
* Remove grant allocation information from the per user private data, so | |||||
* that it can't be mmapped anymore by the userspace program, and add it | |||||
* to the to-be-deleted grants global device-list. | |||||
*/ | |||||
static int | |||||
gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg) | |||||
{ | |||||
int error; | |||||
uint32_t count; | |||||
struct gntdev_gref *gref, *gref_tmp; | |||||
struct per_user_data *priv_user; | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
gref = gntdev_find_grefs(priv_user, arg->index, arg->count); | |||||
if (gref == NULL) { | |||||
log(LOG_ERR, "Can't find requested grant-refs."); | |||||
return (EINVAL); | |||||
} | |||||
/* Remove the grefs from user private data. */ | |||||
count = arg->count; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
mtx_lock(&cleanup_data.to_kill_grefs_mtx); | |||||
for (; gref != NULL && count > 0; gref = gref_tmp) { | |||||
gref_tmp = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref); | |||||
RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); | |||||
STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, | |||||
gref_next.list); | |||||
count--; | |||||
} | |||||
mtx_unlock(&cleanup_data.to_kill_grefs_mtx); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
taskqueue_enqueue(taskqueue_thread, &cleanup_task); | |||||
return (0); | |||||
} | |||||
/*-------------------- Grant Mapping Methods --------------------------------*/ | |||||
struct gntdev_gmap_map { | |||||
vm_object_t mem; | |||||
struct resource *pseudo_phys_res; | |||||
int pseudo_phys_res_id; | |||||
vm_paddr_t phys_base_addr; | |||||
}; | |||||
struct gntdev_gmap { | |||||
union gmap_next_union { | |||||
STAILQ_ENTRY(gntdev_gmap) list; | |||||
RB_ENTRY(gntdev_gmap) tree; | |||||
} gmap_next; | |||||
uint64_t file_index; | |||||
uint32_t count; | |||||
struct gnttab_map_grant_ref *grant_map_ops; | |||||
struct gntdev_gmap_map *map; | |||||
struct notify_data *notify; | |||||
}; | |||||
static int | |||||
gmap_cmp(struct gntdev_gmap *g1, struct gntdev_gmap *g2) | |||||
{ | |||||
return (g1->file_index - g2->file_index); | |||||
} | |||||
Done Inline Actionsmalloc with M_WAITOK cannot fail (there are a couple more below which I'm not going to comment on). royger: malloc with M_WAITOK cannot fail (there are a couple more below which I'm not going to comment… | |||||
RB_GENERATE_STATIC(gmap_tree_head, gntdev_gmap, gmap_next.tree, gmap_cmp); | |||||
/* | |||||
* Traverse over the device-list of to-be-deleted grant mappings, and if | |||||
* the region is no longer mmapped by anyone, free the memory used to | |||||
* store information about the mapping. | |||||
*/ | |||||
static void | |||||
gmap_list_dtor(struct cleanup_data_struct *cleanup_data) | |||||
{ | |||||
struct gmap_list_head tmp_gmaps; | |||||
struct gntdev_gmap *gmap, *gmap_tmp, *gmap_previous; | |||||
STAILQ_INIT(&tmp_gmaps); | |||||
mtx_lock(&cleanup_data->to_kill_gmaps_mtx); | |||||
STAILQ_SWAP(&cleanup_data->to_kill_gmaps, &tmp_gmaps, gntdev_gmap); | |||||
mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); | |||||
gmap_previous = NULL; | |||||
STAILQ_FOREACH_SAFE(gmap, &tmp_gmaps, gmap_next.list, gmap_tmp) { | |||||
if (gmap->map == NULL) { | |||||
if (gmap_previous == NULL) | |||||
STAILQ_REMOVE_HEAD(&tmp_gmaps, gmap_next.list); | |||||
else | |||||
STAILQ_REMOVE_AFTER(&tmp_gmaps, gmap_previous, | |||||
gmap_next.list); | |||||
if (gmap->notify) | |||||
free(gmap->notify, M_GNTDEV); | |||||
free(gmap->grant_map_ops, M_GNTDEV); | |||||
free(gmap, M_GNTDEV); | |||||
} | |||||
else | |||||
gmap_previous = gmap; | |||||
} | |||||
if (!STAILQ_EMPTY(&tmp_gmaps)) { | |||||
mtx_lock(&cleanup_data->to_kill_gmaps_mtx); | |||||
STAILQ_CONCAT(&cleanup_data->to_kill_gmaps, &tmp_gmaps); | |||||
mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); | |||||
} | |||||
} | |||||
Done Inline ActionsWhy do you need to set handle to -1 and host_addr to 0, aren't you just unmapping the grants? royger: Why do you need to set handle to -1 and host_addr to 0, aren't you just unmapping the grants? | |||||
Not Done Inline ActionsThis function might be called in case the Map Hyper-call fails partially (say half of the grants are mapped and the rest are not) (line 924). In that case, I enumerate over the successful maps, and un-map them. Since the user can retry the mmap in such a case, to redo the hyper-call, I update the internal data-structures to represent the fact that none of the grants are mapped anymore. After this, the state of the internal data structures is same as it was before the mmap had happened. akshay1994.leo_gmail.com: This function might be called in case the Map Hyper-call fails partially (say half of the… | |||||
/* | |||||
* Find mapped grants for a given userspace program, by file-offset (index) | |||||
* and count, as supplied during the map-ioctl. | |||||
*/ | |||||
static struct gntdev_gmap* | |||||
gntdev_find_gmap(struct per_user_data *priv_user, | |||||
uint64_t index, uint32_t count) | |||||
{ | |||||
struct gntdev_gmap find_gmap, *gmap; | |||||
find_gmap.file_index = index; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
gmap = RB_FIND(gmap_tree_head, &priv_user->gmap_tree, &find_gmap); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
if (gmap != NULL && gmap->count == count) | |||||
return (gmap); | |||||
return (NULL); | |||||
} | |||||
/* | |||||
* Remove the pages from the mgtdevice pager, call the unmap hypercall, | |||||
* free the xenmem resource. This function is called during the | |||||
* destruction of the mgtdevice pager, which happens when all mmaps to | |||||
* it have been removed, and the unmap-ioctl has been performed. | |||||
*/ | |||||
static int | |||||
notify_unmap_cleanup(struct gntdev_gmap *gmap) | |||||
{ | |||||
int error, i, count; | |||||
vm_page_t m; | |||||
struct gnttab_unmap_grant_ref *unmap_ops; | |||||
unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count, | |||||
M_GNTDEV, M_WAITOK); | |||||
/* Enumerate freeable maps. */ | |||||
count = 0; | |||||
for (i = 0; i < gmap->count; i++) { | |||||
if (gmap->grant_map_ops[i].handle != -1) { | |||||
unmap_ops[count].handle = gmap->grant_map_ops[i].handle; | |||||
unmap_ops[count].host_addr = | |||||
gmap->grant_map_ops[i].host_addr; | |||||
unmap_ops[count].dev_bus_addr = 0; | |||||
count++; | |||||
} | |||||
} | |||||
/* Perform notification. */ | |||||
if (count > 0 && gmap->notify) { | |||||
vm_page_t page; | |||||
uint64_t page_offset; | |||||
page_offset = gmap->notify->index - gmap->file_index; | |||||
page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + page_offset); | |||||
notify(gmap->notify, page); | |||||
} | |||||
/* Free the pages. */ | |||||
VM_OBJECT_WLOCK(gmap->map->mem); | |||||
retry: | |||||
for (i = 0; i < gmap->count; i++) { | |||||
m = vm_page_lookup(gmap->map->mem, i); | |||||
if (m == NULL) | |||||
continue; | |||||
if (vm_page_sleep_if_busy(m, "pcmdum")) | |||||
goto retry; | |||||
cdev_pager_free_page(gmap->map->mem, m); | |||||
} | |||||
VM_OBJECT_WUNLOCK(gmap->map->mem); | |||||
/* Perform unmap hypercall. */ | |||||
error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |||||
unmap_ops, count); | |||||
for (i = 0; i < gmap->count; i++) { | |||||
gmap->grant_map_ops[i].handle = -1; | |||||
gmap->grant_map_ops[i].host_addr = 0; | |||||
} | |||||
if (gmap->map) { | |||||
error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id, | |||||
gmap->map->pseudo_phys_res); | |||||
KASSERT(error == 0, | |||||
("Unable to release memory resource: %d", error)); | |||||
free(gmap->map, M_GNTDEV); | |||||
gmap->map = NULL; | |||||
} | |||||
free(unmap_ops, M_GNTDEV); | |||||
return (error); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_MAP_GRANT_REF | |||||
* Populate structures for mapping the grant reference in the per user | |||||
* private data. Actual resource allocation and map hypercall is performed | |||||
* during the mmap. | |||||
*/ | |||||
static int | |||||
gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg) | |||||
{ | |||||
int i, error; | |||||
struct gntdev_gmap *gmap; | |||||
struct per_user_data *priv_user; | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO); | |||||
gmap->count = arg->count; | |||||
gmap->file_index = get_file_offset(priv_user, arg->count); | |||||
gmap->grant_map_ops = | |||||
malloc(sizeof(struct gnttab_map_grant_ref) * arg->count, | |||||
M_GNTDEV, M_WAITOK | M_ZERO); | |||||
for (i = 0; i < arg->count; i++) { | |||||
gmap->grant_map_ops[i].dom = arg->refs[i].domid; | |||||
gmap->grant_map_ops[i].ref = arg->refs[i].ref; | |||||
gmap->grant_map_ops[i].handle = -1; | |||||
gmap->grant_map_ops[i].flags = GNTMAP_host_map; | |||||
} | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
RB_INSERT(gmap_tree_head, &priv_user->gmap_tree, gmap); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
arg->index = gmap->file_index; | |||||
return (error); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_UNMAP_GRANT_REF | |||||
* Remove the map information from the per user private data and add it | |||||
* to the global device-list of mappings to be deleted. A reference to | |||||
* the mgtdevice pager is also decreased, the reason for which is | |||||
* explained in mmap_gmap(). | |||||
*/ | |||||
static int | |||||
gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg) | |||||
{ | |||||
int error; | |||||
struct gntdev_gmap *gmap; | |||||
struct per_user_data *priv_user; | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
gmap = gntdev_find_gmap(priv_user, arg->index, arg->count); | |||||
if (gmap == NULL) { | |||||
log(LOG_ERR, "Can't find requested grant-map."); | |||||
return (EINVAL); | |||||
} | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
mtx_lock(&cleanup_data.to_kill_gmaps_mtx); | |||||
Done Inline ActionsIt seems like indentation is off here, but maybe it's just phabricator. Could you make sure you indent the lines that you have to break (because they are too long) using 4 spaces. royger: It seems like indentation is off here, but maybe it's just phabricator. Could you make sure you… | |||||
RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); | |||||
STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list); | |||||
mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
if (gmap->map) | |||||
vm_object_deallocate(gmap->map->mem); | |||||
taskqueue_enqueue(taskqueue_thread, &cleanup_task); | |||||
return (0); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR | |||||
* Get file-offset and count for a given mapping, from the virtual address | |||||
* where the mapping is mmapped. | |||||
* Please note, this only works for grants mapped by this domain, and not | |||||
* grants allocated. Count doesn't make much sense in reference to grants | |||||
* allocated. Also, because this function is present in the linux gntdev | |||||
* device, but not in the linux gntalloc one, most userspace code only use | |||||
* it for mapped grants. | |||||
*/ | |||||
static int | |||||
gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg, | |||||
struct thread *td) | |||||
{ | |||||
int error; | |||||
vm_map_t map; | |||||
vm_map_entry_t entry; | |||||
vm_object_t mem; | |||||
vm_pindex_t pindex; | |||||
vm_prot_t prot; | |||||
boolean_t wired; | |||||
struct gntdev_gmap *gmap; | |||||
map = &td->td_proc->p_vmspace->vm_map; | |||||
error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry, | |||||
&mem, &pindex, &prot, &wired); | |||||
if (error != KERN_SUCCESS) | |||||
Done Inline ActionsDon't mix declarations with code, this should be declared at the beginning of the function. royger: Don't mix declarations with code, this should be declared at the beginning of the function. | |||||
return (EINVAL); | |||||
vm_map_lookup_done(map, entry); | |||||
if ((mem->type != OBJT_MGTDEVICE) || | |||||
(mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) | |||||
return (EINVAL); | |||||
gmap = mem->handle; | |||||
if (gmap == NULL || | |||||
(entry->end - entry->start) != (gmap->count * PAGE_SIZE)) | |||||
return (EINVAL); | |||||
arg->count = gmap->count; | |||||
arg->offset = gmap->file_index; | |||||
return (0); | |||||
} | |||||
/*-------------------- Grant Mapping Pager ----------------------------------*/ | |||||
static int | |||||
gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, | |||||
vm_ooffset_t foff, struct ucred *cred, u_short *color) | |||||
{ | |||||
return (0); | |||||
} | |||||
static void | |||||
gntdev_gmap_pg_dtor(void *handle) | |||||
{ | |||||
notify_unmap_cleanup((struct gntdev_gmap *)handle); | |||||
} | |||||
static int | |||||
gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, | |||||
vm_page_t *mres) | |||||
{ | |||||
struct gntdev_gmap *gmap = object->handle; | |||||
vm_pindex_t pidx, ridx; | |||||
vm_page_t page, oldm; | |||||
vm_ooffset_t relative_offset; | |||||
if (gmap->map == NULL) | |||||
return (VM_PAGER_FAIL); | |||||
relative_offset = offset - gmap->file_index; | |||||
pidx = OFF_TO_IDX(offset); | |||||
ridx = OFF_TO_IDX(relative_offset); | |||||
if (ridx >= gmap->count || | |||||
gmap->grant_map_ops[ridx].status != GNTST_okay) | |||||
return (VM_PAGER_FAIL); | |||||
page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset); | |||||
if (page == NULL) | |||||
return (VM_PAGER_FAIL); | |||||
KASSERT((page->flags & PG_FICTITIOUS) != 0, | |||||
("not fictitious %p", page)); | |||||
KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); | |||||
KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); | |||||
if (*mres != NULL) { | |||||
oldm = *mres; | |||||
vm_page_lock(oldm); | |||||
vm_page_free(oldm); | |||||
vm_page_unlock(oldm); | |||||
*mres = NULL; | |||||
} | |||||
vm_page_insert(page, object, pidx); | |||||
page->valid = VM_PAGE_BITS_ALL; | |||||
vm_page_xbusy(page); | |||||
*mres = page; | |||||
return (VM_PAGER_OK); | |||||
} | |||||
/*------------------ Grant Table Methods ------------------------------------*/ | |||||
static void | |||||
notify(struct notify_data *notify, vm_page_t page) | |||||
{ | |||||
if (notify->action & UNMAP_NOTIFY_CLEAR_BYTE) { | |||||
uint8_t *mem; | |||||
uint64_t offset; | |||||
offset = notify->index & (PAGE_SIZE - 1); | |||||
roygerUnsubmitted Done Inline ActionsShouldn't there be some check to make sure you don't write past the mapped region? (Maybe it's somewhere else, but I'm not able to find it). royger: Shouldn't there be some check to make sure you don't write past the mapped region? (Maybe it's… | |||||
akshay1994.leo_gmail.comAuthorUnsubmitted Not Done Inline ActionsYes. The check is already present. akshay1994.leo_gmail.com: Yes. The check is already present.
The check is done when choosing the gmap/gref to make the… | |||||
mem = (uint8_t *)pmap_quick_enter_page(page); | |||||
Done Inline ActionsAll the cases are indented one level more than required. "switch" and "case" should be at the same indent level. royger: All the cases are indented one level more than required. "switch" and "case" should be at the… | |||||
mem[offset] = 0; | |||||
pmap_quick_remove_page((vm_offset_t)mem); | |||||
} | |||||
if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { | |||||
xen_intr_signal(notify->notify_evtchn_handle); | |||||
xen_intr_unbind(¬ify->notify_evtchn_handle); | |||||
} | |||||
notify->action = 0; | |||||
} | |||||
/* | |||||
* Helper to copy new arguments from the notify ioctl into | |||||
* the existing notify data. | |||||
*/ | |||||
static int | |||||
copy_notify_helper(struct notify_data *destination, | |||||
struct ioctl_gntdev_unmap_notify *source) | |||||
{ | |||||
xen_intr_handle_t handlep = NULL; | |||||
/* | |||||
* "Get" before "Put"ting previous reference, as we might be | |||||
* holding the last reference to the event channel port. | |||||
*/ | |||||
if (source->action & UNMAP_NOTIFY_SEND_EVENT) | |||||
if (xen_intr_get_evtchn_from_port(source->event_channel_port, | |||||
&handlep) != 0) | |||||
return (EINVAL); | |||||
if (destination->action & UNMAP_NOTIFY_SEND_EVENT) | |||||
xen_intr_unbind(&destination->notify_evtchn_handle); | |||||
destination->action = source->action; | |||||
destination->event_channel_port = source->event_channel_port; | |||||
destination->index = source->index; | |||||
destination->notify_evtchn_handle = handlep; | |||||
return (0); | |||||
} | |||||
/* | |||||
* IOCTL_GNTDEV_SET_UNMAP_NOTIFY | |||||
* Set unmap notification inside the appropriate grant. It sends a | |||||
* notification when the grant is completely munmapped by this domain | |||||
* and ready for destruction. | |||||
*/ | |||||
static int | |||||
gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg) | |||||
{ | |||||
int error; | |||||
uint64_t index; | |||||
struct per_user_data *priv_user; | |||||
struct gntdev_gref *gref = NULL; | |||||
struct gntdev_gmap *gmap; | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
if (arg->action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) | |||||
return (EINVAL); | |||||
index = arg->index & ~(PAGE_SIZE - 1); | |||||
roygerUnsubmitted Not Done Inline Actionss/(PAGE_SIZE - 1)/PAGE_MASK royger: s/(PAGE_SIZE - 1)/PAGE_MASK | |||||
gref = gntdev_find_grefs(priv_user, index, 1); | |||||
if (gref) { | |||||
if (gref->notify == NULL) | |||||
gref->notify = malloc(sizeof(*arg), M_GNTDEV, | |||||
M_WAITOK | M_ZERO); | |||||
return (copy_notify_helper(gref->notify, arg)); | |||||
} | |||||
Done Inline ActionsYou should create two separate helpers here, one for the gnt map and one for the gnt share cases, in order to help the readability of the code. royger: You should create two separate helpers here, one for the gnt map and one for the gnt share… | |||||
error = EINVAL; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
RB_FOREACH(gmap, gmap_tree_head, &priv_user->gmap_tree) { | |||||
if (arg->index >= gmap->file_index && | |||||
arg->index < gmap->file_index + gmap->count * PAGE_SIZE) { | |||||
if (gmap->notify == NULL) | |||||
gmap->notify = malloc(sizeof(*arg), M_GNTDEV, | |||||
M_WAITOK | M_ZERO); | |||||
error = copy_notify_helper(gmap->notify, arg); | |||||
break; | |||||
} | |||||
} | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
return (error); | |||||
} | |||||
/*------------------ Gntdev Char Device Methods -----------------------------*/ | |||||
static void | |||||
cleanup_function(void *arg, __unused int pending) | |||||
{ | |||||
gref_list_dtor((struct cleanup_data_struct *) arg); | |||||
gmap_list_dtor((struct cleanup_data_struct *) arg); | |||||
} | |||||
static void | |||||
per_user_data_dtor(void *arg) | |||||
{ | |||||
struct gntdev_gref *gref, *gref_tmp; | |||||
struct gntdev_gmap *gmap, *gmap_tmp; | |||||
struct per_user_data *priv_user; | |||||
priv_user = (struct per_user_data *) arg; | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
mtx_lock(&cleanup_data.to_kill_grefs_mtx); | |||||
RB_FOREACH_SAFE(gref, gref_tree_head, &priv_user->gref_tree, gref_tmp) { | |||||
RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); | |||||
STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, | |||||
gref_next.list); | |||||
} | |||||
mtx_unlock(&cleanup_data.to_kill_grefs_mtx); | |||||
mtx_lock(&cleanup_data.to_kill_gmaps_mtx); | |||||
RB_FOREACH_SAFE(gmap, gmap_tree_head, &priv_user->gmap_tree, gmap_tmp) { | |||||
RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); | |||||
STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, | |||||
gmap_next.list); | |||||
if (gmap->map) | |||||
vm_object_deallocate(gmap->map->mem); | |||||
} | |||||
mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
taskqueue_enqueue(taskqueue_thread, &cleanup_task); | |||||
mtx_destroy(&priv_user->user_data_lock); | |||||
free(priv_user, M_GNTDEV); | |||||
} | |||||
static int | |||||
gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td) | |||||
{ | |||||
int error; | |||||
struct per_user_data *priv_user; | |||||
priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO); | |||||
RB_INIT(&priv_user->gref_tree); | |||||
RB_INIT(&priv_user->gmap_tree); | |||||
mtx_init(&priv_user->user_data_lock, | |||||
"per user data mutex", NULL, MTX_DEF); | |||||
error = devfs_set_cdevpriv(priv_user, per_user_data_dtor); | |||||
if (error != 0) | |||||
per_user_data_dtor(priv_user); | |||||
return (error); | |||||
} | |||||
static int | |||||
gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, | |||||
int fflag, struct thread *td) | |||||
{ | |||||
int error; | |||||
switch (cmd) { | |||||
case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: | |||||
error = gntdev_set_unmap_notify( | |||||
Done Inline ActionsThis comments (here and below) don't follow the coding style, check style(9). royger: This comments (here and below) don't follow the coding style, check style(9). | |||||
(struct ioctl_gntdev_unmap_notify*) data); | |||||
break; | |||||
case IOCTL_GNTDEV_ALLOC_GREF: | |||||
error = gntdev_alloc_gref( | |||||
(struct ioctl_gntdev_alloc_gref*) data); | |||||
break; | |||||
case IOCTL_GNTDEV_DEALLOC_GREF: | |||||
error = gntdev_dealloc_gref( | |||||
Done Inline ActionsI don't see any reason for this ioctl to exists, it's not implemented and will never be. Should be removed from the header too. royger: I don't see any reason for this ioctl to exists, it's not implemented and will never be. Should… | |||||
Not Done Inline ActionsRemoved. :) akshay1994.leo_gmail.com: Removed. :) | |||||
(struct ioctl_gntdev_dealloc_gref*) data); | |||||
break; | |||||
case IOCTL_GNTDEV_MAP_GRANT_REF: | |||||
error = gntdev_map_grant_ref( | |||||
(struct ioctl_gntdev_map_grant_ref*) data); | |||||
break; | |||||
case IOCTL_GNTDEV_UNMAP_GRANT_REF: | |||||
error = gntdev_unmap_grant_ref( | |||||
(struct ioctl_gntdev_unmap_grant_ref*) data); | |||||
break; | |||||
case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: | |||||
error = gntdev_get_offset_for_vaddr( | |||||
(struct ioctl_gntdev_get_offset_for_vaddr*) data, td); | |||||
break; | |||||
default: | |||||
error = ENOSYS; | |||||
break; | |||||
} | |||||
return (error); | |||||
} | |||||
/* | |||||
* MMAP an allocated grant into user memory. | |||||
* Please note, that the grants must not already be mmapped, otherwise | |||||
* this function will fail. | |||||
*/ | |||||
static int | |||||
mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start, | |||||
uint32_t count, vm_size_t size, struct vm_object **object) | |||||
{ | |||||
vm_object_t mem_obj; | |||||
struct gntdev_gref *gref; | |||||
mem_obj = vm_object_allocate(OBJT_PHYS, size); | |||||
if (mem_obj == NULL) | |||||
return (ENOMEM); | |||||
mtx_lock(&priv_user->user_data_lock); | |||||
VM_OBJECT_WLOCK(mem_obj); | |||||
for (gref = gref_start; gref != NULL && count > 0; gref = | |||||
RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { | |||||
if (gref->page->object) | |||||
break; | |||||
vm_page_insert(gref->page, mem_obj, | |||||
OFF_TO_IDX(gref->file_index)); | |||||
count--; | |||||
} | |||||
VM_OBJECT_WUNLOCK(mem_obj); | |||||
mtx_unlock(&priv_user->user_data_lock); | |||||
if (count) { | |||||
Done Inline ActionsIf the function doesn't have any local variables an empty newline should be left at the top (here and below). royger: If the function doesn't have any local variables an empty newline should be left at the top… | |||||
vm_object_deallocate(mem_obj); | |||||
return (EINVAL); | |||||
} | |||||
*object = mem_obj; | |||||
return (0); | |||||
} | |||||
/* | |||||
* MMAP a mapped grant into user memory. | |||||
*/ | |||||
static int | |||||
mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start, | |||||
vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) | |||||
{ | |||||
int i, error; | |||||
/* | |||||
* The grant map hypercall might already be done. | |||||
* If that is the case, increase a reference to the | |||||
* vm object and return the already allocated object. | |||||
*/ | |||||
if (gmap_start->map) { | |||||
vm_object_reference(gmap_start->map->mem); | |||||
*object = gmap_start->map->mem; | |||||
return (0); | |||||
} | |||||
gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV, | |||||
M_WAITOK | M_ZERO); | |||||
/* Allocate the xen pseudo physical memory resource. */ | |||||
gmap_start->map->pseudo_phys_res_id = 0; | |||||
gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev, | |||||
&gmap_start->map->pseudo_phys_res_id, size); | |||||
if (gmap_start->map->pseudo_phys_res == NULL) { | |||||
free(gmap_start->map, M_GNTDEV); | |||||
gmap_start->map = NULL; | |||||
return (ENOMEM); | |||||
} | |||||
gmap_start->map->phys_base_addr = | |||||
rman_get_start(gmap_start->map->pseudo_phys_res); | |||||
/* Allocate the mgtdevice pager. */ | |||||
gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE, | |||||
&gntdev_gmap_pg_ops, size, nprot, *offset, NULL); | |||||
if (gmap_start->map->mem == NULL) { | |||||
xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id, | |||||
gmap_start->map->pseudo_phys_res); | |||||
free(gmap_start->map, M_GNTDEV); | |||||
gmap_start->map = NULL; | |||||
return (ENOMEM); | |||||
} | |||||
for(i = 0; i < gmap_start->count; i++) { | |||||
gmap_start->grant_map_ops[i].host_addr = | |||||
gmap_start->map->phys_base_addr + i * PAGE_SIZE; | |||||
if ((nprot & PROT_WRITE) == 0) | |||||
gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly; | |||||
} | |||||
/* Make the MAP hypercall. */ | |||||
error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |||||
gmap_start->grant_map_ops, gmap_start->count); | |||||
if (error != 0) { | |||||
/* | |||||
* Deallocate pager. | |||||
* Pager deallocation will automatically take care of | |||||
* xenmem deallocation, etc. | |||||
*/ | |||||
vm_object_deallocate(gmap_start->map->mem); | |||||
return (EINVAL); | |||||
} | |||||
/* Retry EAGAIN maps. */ | |||||
for (i = 0; i < gmap_start->count; i++) { | |||||
int delay = 1; | |||||
while (delay < 256 && | |||||
gmap_start->grant_map_ops[i].status == GNTST_eagain) { | |||||
HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, | |||||
&gmap_start->grant_map_ops[i], 1); | |||||
pause(("gntmap"), delay * SBT_1MS); | |||||
delay++; | |||||
} | |||||
if (gmap_start->grant_map_ops[i].status == GNTST_eagain) | |||||
gmap_start->grant_map_ops[i].status = GNTST_bad_page; | |||||
if (gmap_start->grant_map_ops[i].status != GNTST_okay) { | |||||
/* | |||||
* Deallocate pager. | |||||
* Pager deallocation will automatically take care of | |||||
* xenmem deallocation, notification, unmap hypercall, | |||||
* etc. | |||||
*/ | |||||
vm_object_deallocate(gmap_start->map->mem); | |||||
return (EINVAL); | |||||
} | |||||
} | |||||
/* | |||||
* Add a reference to the vm object. We do not want | |||||
* the vm object to be deleted when all the mmaps are | |||||
* unmapped, because it may be re-mmapped. Instead, | |||||
* we want the object to be deleted, when along with | |||||
* munmaps, we have also processed the unmap-ioctl. | |||||
*/ | |||||
vm_object_reference(gmap_start->map->mem); | |||||
*object = gmap_start->map->mem; | |||||
return (0); | |||||
} | |||||
static int | |||||
gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, | |||||
struct vm_object **object, int nprot) | |||||
{ | |||||
int error; | |||||
uint32_t count; | |||||
struct gntdev_gref *gref_start; | |||||
struct gntdev_gmap *gmap_start; | |||||
struct per_user_data *priv_user; | |||||
error = devfs_get_cdevpriv((void**) &priv_user); | |||||
if (error != 0) | |||||
return (EINVAL); | |||||
count = OFF_TO_IDX(size); | |||||
gref_start = gntdev_find_grefs(priv_user, *offset, count); | |||||
if (gref_start) { | |||||
error = mmap_gref(priv_user, gref_start, count, size, object); | |||||
return (error); | |||||
} | |||||
gmap_start = gntdev_find_gmap(priv_user, *offset, count); | |||||
if (gmap_start) { | |||||
error = mmap_gmap(priv_user, gmap_start, offset, size, object, | |||||
nprot); | |||||
return (error); | |||||
} | |||||
return (EINVAL); | |||||
} | |||||
/*------------------ Private Device Attachment Functions --------------------*/ | |||||
static void | |||||
gntdev_identify(driver_t *driver, device_t parent) | |||||
{ | |||||
KASSERT((xen_domain()), | |||||
("Trying to attach gntdev device on non Xen domain")); | |||||
if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL) | |||||
panic("unable to attach gntdev user-space device"); | |||||
} | |||||
static int | |||||
gntdev_probe(device_t dev) | |||||
{ | |||||
gntdev_dev = dev; | |||||
device_set_desc(dev, "Xen grant-table user-space device"); | |||||
return (BUS_PROBE_NOWILDCARD); | |||||
} | |||||
static int | |||||
gntdev_attach(device_t dev) | |||||
{ | |||||
make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT, | |||||
GID_WHEEL, 0600, "xen/gntdev"); | |||||
return (0); | |||||
} | |||||
/*-------------------- Private Device Attachment Data -----------------------*/ | |||||
static device_method_t gntdev_methods[] = { | |||||
DEVMETHOD(device_identify, gntdev_identify), | |||||
DEVMETHOD(device_probe, gntdev_probe), | |||||
DEVMETHOD(device_attach, gntdev_attach), | |||||
DEVMETHOD_END | |||||
}; | |||||
static driver_t gntdev_driver = { | |||||
"gntdev", | |||||
gntdev_methods, | |||||
0, | |||||
}; | |||||
devclass_t gntdev_devclass; | |||||
DRIVER_MODULE(gntdev, xenpv, gntdev_driver, gntdev_devclass, 0, 0); | |||||
MODULE_DEPEND(gntdev, xenpv, 1, 1, 1); |
These two lines should be moved after the license. The license should be strictly as suggested here:
https://www.freebsd.org/doc/en/articles/committers-guide/pref-license.html