Index: sys/fs/cuse/cuse.c =================================================================== --- sys/fs/cuse/cuse.c +++ sys/fs/cuse/cuse.c @@ -1,6 +1,6 @@ /* $FreeBSD$ */ /*- - * Copyright (c) 2010-2013 Hans Petter Selasky. All rights reserved. + * Copyright (c) 2010-2017 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -57,6 +58,9 @@ #include #include +#include +#include +#include #include #include @@ -69,8 +73,6 @@ */ MODULE_VERSION(cuse4bsd, 1); -#define NBUSY ((uint8_t *)1) - #ifdef FEATURE FEATURE(cuse, "Userspace character devices"); #endif @@ -94,10 +96,16 @@ }; struct cuse_memory { + TAILQ_ENTRY(cuse_memory) entry; struct cuse_server *owner; uint8_t *virtaddr; uint32_t page_count; - uint32_t is_allocated; + uint32_t alloc_nr; + uint32_t state; +#define CUSE_MEM_ALLOCATED (1 << 0) +#define CUSE_MEM_SERVER_BUSY (1 << 1) +#define CUSE_MEM_CLIENT_BUSY (1 << 2) +#define CUSE_MEM_OBJECT_BUSY (1 << 3) }; struct cuse_server_dev { @@ -112,6 +120,7 @@ TAILQ_HEAD(, cuse_client_command) head; TAILQ_HEAD(, cuse_server_dev) hdev; TAILQ_HEAD(, cuse_client) hcli; + TAILQ_HEAD(, cuse_memory) hmem; struct cv cv; struct selinfo selinfo; pid_t pid; @@ -128,8 +137,8 @@ uint8_t ioctl_buffer[CUSE_BUFFER_MAX] __aligned(4); - int fflags; /* file flags */ - int cflags; /* client flags */ + int fflags; /* file flags */ + int cflags; /* client flags */ #define CUSE_CLI_IS_CLOSING 0x01 #define CUSE_CLI_KNOTE_NEED_READ 0x02 #define CUSE_CLI_KNOTE_NEED_WRITE 0x04 @@ -140,15 +149,15 @@ #define CUSE_CLIENT_CLOSING(pcc) \ ((pcc)->cflags & CUSE_CLI_IS_CLOSING) -static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory"); +static MALLOC_DEFINE(M_CUSE, "cuse", "CUSE memory"); static TAILQ_HEAD(, cuse_server) cuse_server_head; static struct mtx cuse_mtx; static struct cdev *cuse_dev; static struct cuse_server *cuse_alloc_unit[CUSE_DEVICES_MAX]; static int cuse_alloc_unit_id[CUSE_DEVICES_MAX]; -static struct cuse_memory cuse_mem[CUSE_ALLOC_UNIT_MAX]; +static void cuse_server_unref(struct cuse_server *pcs); static void cuse_server_wakeup_all_client_locked(struct cuse_server *pcs); static void cuse_client_kqfilter_read_detach(struct knote *kn); static void cuse_client_kqfilter_write_detach(struct knote *kn); @@ -173,7 +182,7 @@ static d_read_t cuse_client_read; static d_write_t cuse_client_write; static d_poll_t cuse_client_poll; -static d_mmap_t cuse_client_mmap; +static d_mmap_single_t cuse_client_mmap_single; static d_kqfilter_t cuse_client_kqfilter; static struct cdevsw cuse_client_devsw = { @@ -186,7 +195,7 @@ .d_read = cuse_client_read, .d_write = cuse_client_write, .d_poll = cuse_client_poll, - .d_mmap = cuse_client_mmap, + .d_mmap_single = cuse_client_mmap_single, .d_kqfilter = cuse_client_kqfilter, }; @@ -196,7 +205,7 @@ static d_read_t cuse_server_read; static d_write_t cuse_server_write; static d_poll_t cuse_server_poll; -static d_mmap_t cuse_server_mmap; +static d_mmap_single_t cuse_server_mmap_single; static struct cdevsw cuse_server_devsw = { .d_version = D_VERSION, @@ -208,7 +217,7 @@ .d_read = cuse_server_read, .d_write = cuse_server_write, .d_poll = cuse_server_poll, - .d_mmap = cuse_server_mmap, + .d_mmap_single = cuse_server_mmap_single, }; static void cuse_client_is_closing(struct cuse_client *); @@ -252,7 +261,6 @@ (CUSE_VERSION >> 16) & 0xFF, (CUSE_VERSION >> 8) & 0xFF, (CUSE_VERSION >> 0) & 0xFF); } - SYSINIT(cuse_kern_init, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_init, 0); static void @@ -280,7 +288,6 @@ mtx_destroy(&cuse_mtx); } - SYSUNINIT(cuse_kern_uninit, SI_SUB_DEVFS, SI_ORDER_ANY, cuse_kern_uninit, 0); static int @@ -396,77 +403,120 @@ } static void -cuse_server_free_memory(struct cuse_server *pcs) +cuse_memory_update_state(struct cuse_memory *mem, uint32_t clrstate, uint32_t setstate) { - struct cuse_memory *mem; - uint32_t n; - for (n = 0; n != CUSE_ALLOC_UNIT_MAX; n++) { - mem = &cuse_mem[n]; + cuse_lock(); + mem->state &= ~clrstate; + mem->state |= setstate; + if (mem->state == 0) { + struct cuse_server *pcs = mem->owner; - /* this memory is never freed */ - if (mem->owner == pcs) { - mem->owner = NULL; - mem->is_allocated = 0; - } + cuse_unlock(); + + /* last user is gone - free */ + free(mem->virtaddr, M_CUSE); + free(mem, M_CUSE); + + /* drop the refcount on parenting server */ + cuse_server_unref(pcs); + } else { + cuse_unlock(); } } static int -cuse_server_alloc_memory(struct cuse_server *pcs, - struct cuse_memory *mem, uint32_t page_count) +cuse_server_alloc_memory(struct cuse_server *pcs, uint32_t alloc_nr, + uint32_t page_count) { + struct cuse_memory *temp; + struct cuse_memory *mem; void *ptr; int error; - cuse_lock(); + mem = malloc(sizeof(*mem), M_CUSE, M_WAITOK | M_ZERO); + if (mem == NULL) + return (ENOMEM); - if (mem->virtaddr == NBUSY) { + ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO); + if (ptr == NULL) { + error = ENOMEM; + goto error_0; + } + cuse_lock(); + /* check if allocation number already exists */ + TAILQ_FOREACH(temp, &pcs->hmem, entry) { + if (temp->alloc_nr == alloc_nr) + break; + } + if (temp != NULL) { cuse_unlock(); - return (EBUSY); + error = EBUSY; + goto error_1; } - if (mem->virtaddr != NULL) { - if (mem->is_allocated != 0) { - cuse_unlock(); - return (EBUSY); - } - if (mem->page_count == page_count) { - mem->is_allocated = 1; - mem->owner = pcs; - cuse_unlock(); - return (0); - } + /* try to get a reference on the server */ + pcs->refs++; + if (pcs->refs < 0) { + /* refcount overflow */ + pcs->refs--; cuse_unlock(); - return (EBUSY); + error = ERANGE; + goto error_1; } - memset(mem, 0, sizeof(*mem)); + mem->virtaddr = ptr; + mem->page_count = page_count; + mem->alloc_nr = alloc_nr; + mem->owner = pcs; + mem->state = CUSE_MEM_ALLOCATED; + TAILQ_INSERT_TAIL(&pcs->hmem, mem, entry); + cuse_unlock(); - mem->virtaddr = NBUSY; + return (0); - cuse_unlock(); +error_1: + free(ptr, M_CUSE); +error_0: + free(mem, M_CUSE); + return (error); +} - ptr = malloc(page_count * PAGE_SIZE, M_CUSE, M_WAITOK | M_ZERO); - if (ptr == NULL) - error = ENOMEM; - else - error = 0; +static int +cuse_server_free_memory(struct cuse_server *pcs, uint32_t alloc_nr) +{ + struct cuse_memory *mem; cuse_lock(); - - if (error) { - mem->virtaddr = NULL; + TAILQ_FOREACH(mem, &pcs->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; + } + if (mem == NULL) { cuse_unlock(); - return (error); + return (EINVAL); } - mem->virtaddr = ptr; - mem->page_count = page_count; - mem->is_allocated = 1; - mem->owner = pcs; + TAILQ_REMOVE(&pcs->hmem, mem, entry); cuse_unlock(); + cuse_memory_update_state(mem, CUSE_MEM_ALLOCATED, 0); + return (0); } +static void +cuse_server_free_all_memory(struct cuse_server *pcs) +{ + struct cuse_memory *mem; + + cuse_lock(); + while ((mem = TAILQ_FIRST(&pcs->hmem)) != NULL) { + TAILQ_REMOVE(&pcs->hmem, mem, entry); + cuse_unlock(); + cuse_memory_update_state(mem, CUSE_MEM_ALLOCATED, 0); + cuse_lock(); + } + cuse_unlock(); +} + static int cuse_client_get(struct cuse_client **ppcc) { @@ -644,9 +694,8 @@ } static void -cuse_server_free(void *arg) +cuse_server_unref(struct cuse_server *pcs) { - struct cuse_server *pcs = arg; struct cuse_server_dev *pcsd; cuse_lock(); @@ -670,8 +719,6 @@ cuse_lock(); } - cuse_server_free_memory(pcs); - knlist_clear(&pcs->selinfo.si_note, 1); knlist_destroy(&pcs->selinfo.si_note); @@ -684,6 +731,18 @@ free(pcs, M_CUSE); } +static void +cuse_server_free(void *arg) +{ + struct cuse_server *pcs = arg; + + /* avoid deadlock freeing up resources */ + cuse_server_free_all_memory(pcs); + + /* drop refcount */ + cuse_server_unref(pcs); +} + static int cuse_server_open(struct cdev *dev, int fflags, int devtype, struct thread *td) { @@ -698,13 +757,13 @@ free(pcs, M_CUSE); return (ENOMEM); } - /* store current process ID */ pcs->pid = curproc->p_pid; TAILQ_INIT(&pcs->head); TAILQ_INIT(&pcs->hdev); TAILQ_INIT(&pcs->hcli); + TAILQ_INIT(&pcs->hmem); cv_init(&pcs->cv, "cuse-server-cv"); @@ -1091,12 +1150,12 @@ error = ENOMEM; break; } - if (pai->page_count > CUSE_ALLOC_PAGES_MAX) { + if (pai->page_count >= CUSE_ALLOC_PAGES_MAX) { error = ENOMEM; break; } error = cuse_server_alloc_memory(pcs, - &cuse_mem[pai->alloc_nr], pai->page_count); + pai->alloc_nr, pai->page_count); break; case CUSE_IOCTL_FREE_MEMORY: @@ -1106,16 +1165,7 @@ error = ENOMEM; break; } - /* we trust the character device driver in this case */ - - cuse_lock(); - if (cuse_mem[pai->alloc_nr].owner == pcs) { - cuse_mem[pai->alloc_nr].is_allocated = 0; - cuse_mem[pai->alloc_nr].owner = NULL; - } else { - error = EINVAL; - } - cuse_unlock(); + error = cuse_server_free_memory(pcs, pai->alloc_nr); break; case CUSE_IOCTL_GET_SIG: @@ -1274,49 +1324,128 @@ } static int -cuse_server_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) +cuse_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, + vm_page_t *mres) +{ + struct cuse_memory *mem; + vm_paddr_t paddr; + vm_page_t page; + + mem = vm_obj->handle; + paddr = vtophys(mem->virtaddr) + offset; + + if (((*mres)->flags & PG_FICTITIOUS) != 0) { + /* + * If the passed in result page is a fake page, update + * it with the new physical address. + */ + page = *mres; + vm_page_updatefake(page, paddr, vm_obj->memattr); + } else { + /* + * Replace the passed in "mres" page with our own fake + * page and free up the all of the original pages. + */ + VM_OBJECT_WUNLOCK(vm_obj); + page = vm_page_getfake(paddr, vm_obj->memattr); + VM_OBJECT_WLOCK(vm_obj); + + vm_page_replace_checked(page, vm_obj, + (*mres)->pindex, *mres); + + vm_page_lock(*mres); + vm_page_free(*mres); + vm_page_unlock(*mres); + + *mres = page; + } + page->valid = VM_PAGE_BITS_ALL; + return (VM_PAGER_OK); +} + +static int +cuse_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + *color = 0; + return (0); +} + +static void +cuse_pager_dtor(void *handle) +{ + struct cuse_memory *mem = handle; + + /* + * This function is only called when all references to the VM + * object associated with the "handle" are gone. + */ + cuse_memory_update_state(mem, CUSE_MEM_OBJECT_BUSY, 0); +} + +static struct cdev_pager_ops cuse_pager_ops = { + .cdev_pg_fault = cuse_pager_fault, + .cdev_pg_ctor = cuse_pager_ctor, + .cdev_pg_dtor = cuse_pager_dtor +}; + +static int +cuse_server_mmap_single(struct cdev *dev, vm_ooffset_t *offset, + vm_size_t size, struct vm_object **object, int nprot) { - uint32_t page_nr = offset / PAGE_SIZE; + uint32_t page_nr = *offset / PAGE_SIZE; uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX; struct cuse_memory *mem; struct cuse_server *pcs; - uint8_t *ptr; int error; - if (alloc_nr >= CUSE_ALLOC_UNIT_MAX) - return (ENOMEM); - error = cuse_server_get(&pcs); if (error != 0) - pcs = NULL; + return (error); cuse_lock(); - mem = &cuse_mem[alloc_nr]; - - /* try to enforce slight ownership */ - if ((pcs != NULL) && (mem->owner != pcs)) { - cuse_unlock(); - return (EINVAL); - } - if (mem->virtaddr == NULL) { - cuse_unlock(); - return (ENOMEM); + /* lookup memory structure */ + TAILQ_FOREACH(mem, &pcs->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; } - if (mem->virtaddr == NBUSY) { + if (mem == NULL) { cuse_unlock(); return (ENOMEM); } + /* verify page offset */ page_nr %= CUSE_ALLOC_PAGES_MAX; - if (page_nr >= mem->page_count) { cuse_unlock(); return (ENXIO); } - ptr = mem->virtaddr + (page_nr * PAGE_SIZE); + /* verify mmap size */ + if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) || + (size > ((mem->page_count - page_nr) * PAGE_SIZE))) { + cuse_unlock(); + return (EINVAL); + } + /* serialize */ + if (mem->state & CUSE_MEM_SERVER_BUSY) { + cuse_unlock(); + return (EBUSY); + } + mem->state |= CUSE_MEM_SERVER_BUSY; cuse_unlock(); - *paddr = vtophys(ptr); + *object = cdev_pager_allocate(mem, OBJT_DEVICE, &cuse_pager_ops, + size, nprot, page_nr * PAGE_SIZE, curthread->td_ucred); + cuse_memory_update_state(mem, CUSE_MEM_SERVER_BUSY, + *object != NULL ? CUSE_MEM_OBJECT_BUSY : 0); + + if (*object == NULL) + return (ENXIO); + + /* set new VM object offset to use */ + *offset = page_nr * PAGE_SIZE; + + /* success */ return (0); } @@ -1349,7 +1478,7 @@ free(pcc, M_CUSE); /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); } static int @@ -1392,13 +1521,13 @@ pcc = malloc(sizeof(*pcc), M_CUSE, M_WAITOK | M_ZERO); if (pcc == NULL) { /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); return (ENOMEM); } if (devfs_set_cdevpriv(pcc, &cuse_client_free)) { printf("Cuse: Cannot set cdevpriv.\n"); /* drop reference on server */ - cuse_server_free(pcs); + cuse_server_unref(pcs); free(pcc, M_CUSE); return (ENOMEM); } @@ -1548,7 +1677,6 @@ error = ENOMEM; break; } - len = uio->uio_iov->iov_len; cuse_lock(); @@ -1608,7 +1736,6 @@ error = ENOMEM; break; } - len = uio->uio_iov->iov_len; cuse_lock(); @@ -1751,59 +1878,69 @@ } return (revents); - pollnval: +pollnval: /* XXX many clients don't understand POLLNVAL */ return (events & (POLLHUP | POLLPRI | POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)); } static int -cuse_client_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) +cuse_client_mmap_single(struct cdev *dev, vm_ooffset_t *offset, + vm_size_t size, struct vm_object **object, int nprot) { - uint32_t page_nr = offset / PAGE_SIZE; + uint32_t page_nr = *offset / PAGE_SIZE; uint32_t alloc_nr = page_nr / CUSE_ALLOC_PAGES_MAX; struct cuse_memory *mem; - struct cuse_server *pcs; struct cuse_client *pcc; - uint8_t *ptr; int error; - if (alloc_nr >= CUSE_ALLOC_UNIT_MAX) - return (ENOMEM); - error = cuse_client_get(&pcc); if (error != 0) - pcs = NULL; - else - pcs = pcc->server; + return (error); cuse_lock(); - mem = &cuse_mem[alloc_nr]; - - /* try to enforce slight ownership */ - if ((pcs != NULL) && (mem->owner != pcs)) { - cuse_unlock(); - return (EINVAL); - } - if (mem->virtaddr == NULL) { - cuse_unlock(); - return (ENOMEM); + /* lookup memory structure */ + TAILQ_FOREACH(mem, &pcc->server->hmem, entry) { + if (mem->alloc_nr == alloc_nr) + break; } - if (mem->virtaddr == NBUSY) { + if (mem == NULL) { cuse_unlock(); return (ENOMEM); } + /* verify page offset */ page_nr %= CUSE_ALLOC_PAGES_MAX; - if (page_nr >= mem->page_count) { cuse_unlock(); return (ENXIO); } - ptr = mem->virtaddr + (page_nr * PAGE_SIZE); + /* verify mmap size */ + if ((size % PAGE_SIZE) != 0 || (size < PAGE_SIZE) || + (size > ((mem->page_count - page_nr) * PAGE_SIZE))) { + cuse_unlock(); + return (EINVAL); + } + /* serialize */ + if (mem->state & CUSE_MEM_CLIENT_BUSY) { + cuse_unlock(); + return (EBUSY); + } + mem->state |= CUSE_MEM_CLIENT_BUSY; cuse_unlock(); - *paddr = vtophys(ptr); + *object = cdev_pager_allocate(mem, OBJT_DEVICE, &cuse_pager_ops, + size, nprot, page_nr * PAGE_SIZE, curthread->td_ucred); + + cuse_memory_update_state(mem, CUSE_MEM_CLIENT_BUSY, + *object != NULL ? CUSE_MEM_OBJECT_BUSY : 0); + + if (*object == NULL) + return (ENXIO); + + /* set new VM object offset to use */ + *offset = page_nr * PAGE_SIZE; + /* success */ return (0); } Index: sys/fs/cuse/cuse_ioctl.h =================================================================== --- sys/fs/cuse/cuse_ioctl.h +++ sys/fs/cuse/cuse_ioctl.h @@ -35,6 +35,7 @@ #define CUSE_BUF_MIN_PTR 0x10000UL #define CUSE_BUF_MAX_PTR 0x20000UL #define CUSE_ALLOC_UNIT_MAX 128 /* units */ +/* All memory allocations must be less than the following limit */ #define CUSE_ALLOC_PAGES_MAX (((16UL * 1024UL * 1024UL) + PAGE_SIZE - 1) / PAGE_SIZE) struct cuse_dev;