diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c index 08cd3de6a3fe..346118b78c2b 100644 --- a/sys/arm/nvidia/drm2/tegra_bo.c +++ b/sys/arm/nvidia/drm2/tegra_bo.c @@ -1,362 +1,362 @@ /*- * Copyright (c) 2015 Michal Meloun * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void tegra_bo_destruct(struct tegra_bo *bo) { vm_page_t m; size_t size; int i; if (bo->cdev_pager == NULL) return; size = round_page(bo->gem_obj.size); if (bo->vbase != 0) pmap_qremove(bo->vbase, bo->npages); VM_OBJECT_WLOCK(bo->cdev_pager); for (i = 0; i < bo->npages; i++) { m = bo->m[i]; vm_page_busy_acquire(m, 0); - cdev_pager_free_page(bo->cdev_pager, m); + cdev_mgtdev_pager_free_page(bo->cdev_pager, m); m->flags &= ~PG_FICTITIOUS; vm_page_unwire_noq(m); vm_page_free(m); } VM_OBJECT_WUNLOCK(bo->cdev_pager); vm_object_deallocate(bo->cdev_pager); if (bo->vbase != 0) vmem_free(kernel_arena, bo->vbase, size); } static void tegra_bo_free_object(struct drm_gem_object *gem_obj) { struct tegra_bo *bo; bo = container_of(gem_obj, struct tegra_bo, gem_obj); drm_gem_free_mmap_offset(gem_obj); drm_gem_object_release(gem_obj); tegra_bo_destruct(bo); free(bo->m, DRM_MEM_DRIVER); free(bo, DRM_MEM_DRIVER); } static int tegra_bo_alloc_contig(size_t npages, u_long alignment, vm_memattr_t memattr, vm_page_t **ret_page) { vm_page_t m; int err, i, tries; vm_paddr_t low, high, boundary; low = 0; high = -1UL; boundary = 0; tries = 0; retry: m = vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | VM_ALLOC_ZERO, npages, low, high, alignment, boundary, memattr); if (m == NULL) { if (tries < 3) { err = vm_page_reclaim_contig(0, npages, low, high, alignment, boundary); if (err == ENOMEM) vm_wait(NULL); else if (err != 0) return (ENOMEM); tries++; goto retry; } return (ENOMEM); } for (i = 0; i < npages; i++, m++) { m->valid = VM_PAGE_BITS_ALL; (*ret_page)[i] = m; } return (0); } /* Initialize pager and insert all object pages to it*/ static int tegra_bo_init_pager(struct tegra_bo *bo) { vm_page_t m; size_t size; int i; size = round_page(bo->gem_obj.size); bo->pbase = VM_PAGE_TO_PHYS(bo->m[0]); if (vmem_alloc(kernel_arena, size, M_WAITOK | M_BESTFIT, &bo->vbase)) return (ENOMEM); VM_OBJECT_WLOCK(bo->cdev_pager); for (i = 0; i < bo->npages; i++) { m = bo->m[i]; /* * XXX This is a temporary hack. * We need pager suitable for paging (mmap) managed * real (non-fictitious) pages. * - managed pages are needed for clean module unload. * - aliasing fictitious page to real one is bad, * pmap cannot handle this situation without issues * It expects that * paddr = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(paddr)) * for every single page passed to pmap. */ m->oflags &= ~VPO_UNMANAGED; m->flags |= PG_FICTITIOUS; if (vm_page_insert(m, bo->cdev_pager, i) != 0) return (EINVAL); } VM_OBJECT_WUNLOCK(bo->cdev_pager); pmap_qenter(bo->vbase, bo->m, bo->npages); return (0); } /* Allocate memory for frame buffer */ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo) { size_t size; int rv; size = bo->gem_obj.size; bo->npages = atop(size); bo->m = malloc(sizeof(vm_page_t *) * bo->npages, DRM_MEM_DRIVER, M_WAITOK | M_ZERO); rv = tegra_bo_alloc_contig(bo->npages, PAGE_SIZE, VM_MEMATTR_WRITE_COMBINING, &(bo->m)); if (rv != 0) { DRM_WARNING("Cannot allocate memory for gem object.\n"); return (rv); } rv = tegra_bo_init_pager(bo); if (rv != 0) { DRM_WARNING("Cannot initialize gem object pager.\n"); return (rv); } return (0); } int tegra_bo_create(struct drm_device *drm, size_t size, struct tegra_bo **res_bo) { struct tegra_bo *bo; int rv; if (size <= 0) return (-EINVAL); bo = malloc(sizeof(*bo), DRM_MEM_DRIVER, M_WAITOK | M_ZERO); size = round_page(size); rv = drm_gem_object_init(drm, &bo->gem_obj, size); if (rv != 0) { free(bo, DRM_MEM_DRIVER); return (rv); } rv = drm_gem_create_mmap_offset(&bo->gem_obj); if (rv != 0) { drm_gem_object_release(&bo->gem_obj); free(bo, DRM_MEM_DRIVER); return (rv); } bo->cdev_pager = cdev_pager_allocate(&bo->gem_obj, OBJT_MGTDEVICE, drm->driver->gem_pager_ops, size, 0, 0, NULL); rv = tegra_bo_alloc(drm, bo); if (rv != 0) { tegra_bo_free_object(&bo->gem_obj); return (rv); } *res_bo = bo; return (0); } static int tegra_bo_create_with_handle(struct drm_file *file, struct drm_device *drm, size_t size, uint32_t *handle, struct tegra_bo **res_bo) { int rv; struct tegra_bo *bo; rv = tegra_bo_create(drm, size, &bo); if (rv != 0) return (rv); rv = drm_gem_handle_create(file, &bo->gem_obj, handle); if (rv != 0) { tegra_bo_free_object(&bo->gem_obj); drm_gem_object_release(&bo->gem_obj); return (rv); } drm_gem_object_unreference_unlocked(&bo->gem_obj); *res_bo = bo; return (0); } static int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm_dev, struct drm_mode_create_dumb *args) { struct tegra_drm *drm; struct tegra_bo *bo; int rv; drm = container_of(drm_dev, struct tegra_drm, drm_dev); args->pitch= (args->width * args->bpp + 7) / 8; args->pitch = roundup(args->pitch, drm->pitch_align); args->size = args->pitch * args->height; rv = tegra_bo_create_with_handle(file, drm_dev, args->size, &args->handle, &bo); return (rv); } static int tegra_bo_dumb_map_offset(struct drm_file *file_priv, struct drm_device *drm_dev, uint32_t handle, uint64_t *offset) { struct drm_gem_object *gem_obj; int rv; DRM_LOCK(drm_dev); gem_obj = drm_gem_object_lookup(drm_dev, file_priv, handle); if (gem_obj == NULL) { device_printf(drm_dev->dev, "Object not found\n"); DRM_UNLOCK(drm_dev); return (-EINVAL); } rv = drm_gem_create_mmap_offset(gem_obj); if (rv != 0) goto fail; *offset = DRM_GEM_MAPPING_OFF(gem_obj->map_list.key) | DRM_GEM_MAPPING_KEY; drm_gem_object_unreference(gem_obj); DRM_UNLOCK(drm_dev); return (0); fail: drm_gem_object_unreference(gem_obj); DRM_UNLOCK(drm_dev); return (rv); } static int tegra_bo_dumb_destroy(struct drm_file *file_priv, struct drm_device *drm_dev, unsigned int handle) { int rv; rv = drm_gem_handle_delete(file_priv, handle); return (rv); } /* * mmap support */ static int tegra_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { #ifdef DRM_PAGER_DEBUG DRM_DEBUG("object %p offset %jd prot %d mres %p\n", vm_obj, (intmax_t)offset, prot, mres); #endif return (VM_PAGER_FAIL); } static int tegra_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { if (color != NULL) *color = 0; return (0); } static void tegra_gem_pager_dtor(void *handle) { } static struct cdev_pager_ops tegra_gem_pager_ops = { .cdev_pg_fault = tegra_gem_pager_fault, .cdev_pg_ctor = tegra_gem_pager_ctor, .cdev_pg_dtor = tegra_gem_pager_dtor }; /* Fill up relevant fields in drm_driver ops */ void tegra_bo_driver_register(struct drm_driver *drm_drv) { drm_drv->gem_free_object = tegra_bo_free_object; drm_drv->gem_pager_ops = &tegra_gem_pager_ops; drm_drv->dumb_create = tegra_bo_dumb_create; drm_drv->dumb_map_offset = tegra_bo_dumb_map_offset; drm_drv->dumb_destroy = tegra_bo_dumb_destroy; } diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index d4f8e75a3251..25243382f9ea 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -1,553 +1,553 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ DEFINE_IDR(mtrr_idr); static MALLOC_DEFINE(M_LKMTRR, "idr", "Linux MTRR compat"); extern int pat_works; #endif void si_meminfo(struct sysinfo *si) { si->totalram = physmem; si->freeram = vm_free_count(); si->totalhigh = 0; si->freehigh = 0; si->mem_unit = PAGE_SIZE; } void * linux_page_address(struct page *page) { if (page->object != kernel_object) { return (PMAP_HAS_DMAP ? ((void *)(uintptr_t)PHYS_TO_DMAP(page_to_phys(page))) : NULL); } return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS + IDX_TO_OFF(page->pindex))); } struct page * linux_alloc_pages(gfp_t flags, unsigned int order) { struct page *page; if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; int req = VM_ALLOC_WIRED; if ((flags & M_ZERO) != 0) req |= VM_ALLOC_ZERO; if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc_noobj(req); if (page == NULL) return (NULL); } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR; retry: page = vm_page_alloc_noobj_contig(req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if (page == NULL) { if (flags & M_WAITOK) { int err = vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0); if (err == ENOMEM) vm_wait(NULL); else if (err != 0) return (NULL); flags &= ~M_WAITOK; goto retry; } return (NULL); } } } else { vm_offset_t vaddr; vaddr = linux_alloc_kmem(flags, order); if (vaddr == 0) return (NULL); page = virt_to_page((void *)vaddr); KASSERT(vaddr == (vm_offset_t)page_address(page), ("Page address mismatch")); } return (page); } static void _linux_free_kmem(vm_offset_t addr, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; kmem_free((void *)addr, size); } void linux_free_pages(struct page *page, unsigned int order) { if (PMAP_HAS_DMAP) { unsigned long npages = 1UL << order; unsigned long x; for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; if (vm_page_unwire_noq(pgo)) vm_page_free(pgo); } } else { vm_offset_t vaddr; vaddr = (vm_offset_t)page_address(page); _linux_free_kmem(vaddr, order); } } vm_offset_t linux_alloc_kmem(gfp_t flags, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; void *addr; if ((flags & GFP_DMA32) == 0) { addr = kmem_malloc(size, flags & GFP_NATIVE_MASK); } else { addr = kmem_alloc_contig(size, flags & GFP_NATIVE_MASK, 0, BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } return ((vm_offset_t)addr); } void linux_free_kmem(vm_offset_t addr, unsigned int order) { KASSERT((addr & ~PAGE_MASK) == 0, ("%s: addr %p is not page aligned", __func__, (void *)addr)); if (addr >= VM_MIN_KERNEL_ADDRESS && addr < VM_MAX_KERNEL_ADDRESS) { _linux_free_kmem(addr, order); } else { vm_page_t page; page = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(addr)); linux_free_pages(page, order); } } static int linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages, int write, struct page **pages) { vm_prot_t prot; size_t len; int count; prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; len = ptoa((vm_offset_t)nr_pages); count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); return (count == -1 ? -EFAULT : nr_pages); } int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { vm_map_t map; vm_page_t *mp; vm_offset_t va; vm_offset_t end; vm_prot_t prot; int count; if (nr_pages == 0 || in_interrupt()) return (0); MPASS(pages != NULL); map = &curthread->td_proc->p_vmspace->vm_map; end = start + ptoa((vm_offset_t)nr_pages); if (!vm_map_range_valid(map, start, end)) return (-EINVAL); prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; for (count = 0, mp = pages, va = start; va < end; mp++, va += PAGE_SIZE, count++) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) break; if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } return (count); } long get_user_pages_remote(struct task_struct *task, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { vm_map_t map; map = &task->task_thread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } long lkpi_get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages) { vm_map_t map; map = &curthread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } int is_vmalloc_addr(const void *addr) { return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL); } vm_fault_t lkpi_vmf_insert_pfn_prot_locked(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t prot) { vm_object_t vm_obj = vma->vm_obj; vm_object_t tmp_obj; vm_page_t page; vm_pindex_t pindex; VM_OBJECT_ASSERT_WLOCKED(vm_obj); pindex = OFF_TO_IDX(addr - vma->vm_start); if (vma->vm_pfn_count == 0) vma->vm_pfn_first = pindex; MPASS(pindex <= OFF_TO_IDX(vma->vm_end)); retry: page = vm_page_grab(vm_obj, pindex, VM_ALLOC_NOCREAT); if (page == NULL) { page = PHYS_TO_VM_PAGE(IDX_TO_OFF(pfn)); if (!vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL)) goto retry; if (page->object != NULL) { tmp_obj = page->object; vm_page_xunbusy(page); VM_OBJECT_WUNLOCK(vm_obj); VM_OBJECT_WLOCK(tmp_obj); if (page->object == tmp_obj && vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL)) { KASSERT(page->object == tmp_obj, ("page has changed identity")); KASSERT((page->oflags & VPO_UNMANAGED) == 0, ("page does not belong to shmem")); vm_pager_page_unswapped(page); if (pmap_page_is_mapped(page)) { vm_page_xunbusy(page); VM_OBJECT_WUNLOCK(tmp_obj); printf("%s: page rename failed: page " "is mapped\n", __func__); VM_OBJECT_WLOCK(vm_obj); return (VM_FAULT_NOPAGE); } vm_page_remove(page); } VM_OBJECT_WUNLOCK(tmp_obj); VM_OBJECT_WLOCK(vm_obj); goto retry; } if (vm_page_insert(page, vm_obj, pindex)) { vm_page_xunbusy(page); return (VM_FAULT_OOM); } vm_page_valid(page); } pmap_page_set_memattr(page, pgprot2cachemode(prot)); vma->vm_pfn_count++; return (VM_FAULT_NOPAGE); } int lkpi_remap_pfn_range(struct vm_area_struct *vma, unsigned long start_addr, unsigned long start_pfn, unsigned long size, pgprot_t prot) { vm_object_t vm_obj; unsigned long addr, pfn; int err = 0; vm_obj = vma->vm_obj; VM_OBJECT_WLOCK(vm_obj); for (addr = start_addr, pfn = start_pfn; addr < start_addr + size; addr += PAGE_SIZE) { vm_fault_t ret; retry: ret = lkpi_vmf_insert_pfn_prot_locked(vma, addr, pfn, prot); if ((ret & VM_FAULT_OOM) != 0) { VM_OBJECT_WUNLOCK(vm_obj); vm_wait(NULL); VM_OBJECT_WLOCK(vm_obj); goto retry; } if ((ret & VM_FAULT_ERROR) != 0) { err = -EFAULT; break; } pfn++; } VM_OBJECT_WUNLOCK(vm_obj); if (unlikely(err)) { zap_vma_ptes(vma, start_addr, (pfn - start_pfn) << PAGE_SHIFT); return (err); } return (0); } int lkpi_io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size) { pgprot_t prot; int ret; prot = cachemode2protval(iomap->attr); ret = lkpi_remap_pfn_range(vma, addr, pfn, size, prot); return (ret); } /* * Although FreeBSD version of unmap_mapping_range has semantics and types of * parameters compatible with Linux version, the values passed in are different * @obj should match to vm_private_data field of vm_area_struct returned by * mmap file operation handler, see linux_file_mmap_single() sources * @holelen should match to size of area to be munmapped. */ void lkpi_unmap_mapping_range(void *obj, loff_t const holebegin __unused, loff_t const holelen, int even_cows __unused) { vm_object_t devobj; vm_page_t page; int i, page_count; devobj = cdev_pager_lookup(obj); if (devobj != NULL) { page_count = OFF_TO_IDX(holelen); VM_OBJECT_WLOCK(devobj); retry: for (i = 0; i < page_count; i++) { page = vm_page_lookup(devobj, i); if (page == NULL) continue; if (!vm_page_busy_acquire(page, VM_ALLOC_WAITFAIL)) goto retry; - cdev_pager_free_page(devobj, page); + cdev_mgtdev_pager_free_page(devobj, page); } VM_OBJECT_WUNLOCK(devobj); vm_object_deallocate(devobj); } } int lkpi_arch_phys_wc_add(unsigned long base, unsigned long size) { #ifdef __i386__ struct mem_range_desc *mrdesc; int error, id, act; /* If PAT is available, do nothing */ if (pat_works) return (0); mrdesc = malloc(sizeof(*mrdesc), M_LKMTRR, M_WAITOK); mrdesc->mr_base = base; mrdesc->mr_len = size; mrdesc->mr_flags = MDF_WRITECOMBINE; strlcpy(mrdesc->mr_owner, "drm", sizeof(mrdesc->mr_owner)); act = MEMRANGE_SET_UPDATE; error = mem_range_attr_set(mrdesc, &act); if (error == 0) { error = idr_get_new(&mtrr_idr, mrdesc, &id); MPASS(idr_find(&mtrr_idr, id) == mrdesc); if (error != 0) { act = MEMRANGE_SET_REMOVE; mem_range_attr_set(mrdesc, &act); } } if (error != 0) { free(mrdesc, M_LKMTRR); pr_warn( "Failed to add WC MTRR for [%p-%p]: %d; " "performance may suffer\n", (void *)base, (void *)(base + size - 1), error); } else pr_warn("Successfully added WC MTRR for [%p-%p]\n", (void *)base, (void *)(base + size - 1)); return (error != 0 ? -error : id + __MTRR_ID_BASE); #else return (0); #endif } void lkpi_arch_phys_wc_del(int reg) { #ifdef __i386__ struct mem_range_desc *mrdesc; int act; /* Check if arch_phys_wc_add() failed. */ if (reg < __MTRR_ID_BASE) return; mrdesc = idr_find(&mtrr_idr, reg - __MTRR_ID_BASE); MPASS(mrdesc != NULL); idr_remove(&mtrr_idr, reg - __MTRR_ID_BASE); act = MEMRANGE_SET_REMOVE; mem_range_attr_set(mrdesc, &act); free(mrdesc, M_LKMTRR); #endif } /* * This is a highly simplified version of the Linux page_frag_cache. * We only support up-to 1 single page as fragment size and we will * always return a full page. This may be wasteful on small objects * but the only known consumer (mt76) is either asking for a half-page * or a full page. If this was to become a problem we can implement * a more elaborate version. */ void * linuxkpi_page_frag_alloc(struct page_frag_cache *pfc, size_t fragsz, gfp_t gfp) { vm_page_t pages; if (fragsz == 0) return (NULL); KASSERT(fragsz <= PAGE_SIZE, ("%s: fragsz %zu > PAGE_SIZE not yet " "supported", __func__, fragsz)); pages = alloc_pages(gfp, flsl(howmany(fragsz, PAGE_SIZE) - 1)); if (pages == NULL) return (NULL); pfc->va = linux_page_address(pages); /* Passed in as "count" to __page_frag_cache_drain(). Unused by us. */ pfc->pagecnt_bias = 0; return (pfc->va); } void linuxkpi_page_frag_free(void *addr) { vm_page_t page; page = virt_to_page(addr); linux_free_pages(page, 0); } void linuxkpi__page_frag_cache_drain(struct page *page, size_t count __unused) { linux_free_pages(page, 0); } diff --git a/sys/dev/drm2/ttm/ttm_bo_vm.c b/sys/dev/drm2/ttm/ttm_bo_vm.c index 4f6c66382453..e543b8dfb993 100644 --- a/sys/dev/drm2/ttm/ttm_bo_vm.c +++ b/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -1,555 +1,555 @@ /************************************************************************** * * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ /* * Authors: Thomas Hellstrom */ /* * Copyright (c) 2013 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. */ #include #include "opt_vm.h" #include #include #include #include #include #include #include #define TTM_BO_VM_NUM_PREFAULT 16 RB_GENERATE(ttm_bo_device_buffer_objects, ttm_buffer_object, vm_rb, ttm_bo_cmp_rb_tree_items); int ttm_bo_cmp_rb_tree_items(struct ttm_buffer_object *a, struct ttm_buffer_object *b) { if (a->vm_node->start < b->vm_node->start) { return (-1); } else if (a->vm_node->start > b->vm_node->start) { return (1); } else { return (0); } } static struct ttm_buffer_object *ttm_bo_vm_lookup_rb(struct ttm_bo_device *bdev, unsigned long page_start, unsigned long num_pages) { unsigned long cur_offset; struct ttm_buffer_object *bo; struct ttm_buffer_object *best_bo = NULL; bo = RB_ROOT(&bdev->addr_space_rb); while (bo != NULL) { cur_offset = bo->vm_node->start; if (page_start >= cur_offset) { best_bo = bo; if (page_start == cur_offset) break; bo = RB_RIGHT(bo, vm_rb); } else bo = RB_LEFT(bo, vm_rb); } if (unlikely(best_bo == NULL)) return NULL; if (unlikely((best_bo->vm_node->start + best_bo->num_pages) < (page_start + num_pages))) return NULL; return best_bo; } static int ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct ttm_buffer_object *bo = vm_obj->handle; struct ttm_bo_device *bdev = bo->bdev; struct ttm_tt *ttm = NULL; vm_page_t m, m1; int ret; int retval = VM_PAGER_OK; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; vm_object_pip_add(vm_obj, 1); if (*mres != NULL) { (void)vm_page_remove(*mres); } retry: VM_OBJECT_WUNLOCK(vm_obj); m = NULL; reserve: ret = ttm_bo_reserve(bo, false, false, false, 0); if (unlikely(ret != 0)) { if (ret == -EBUSY) { kern_yield(PRI_USER); goto reserve; } } if (bdev->driver->fault_reserve_notify) { ret = bdev->driver->fault_reserve_notify(bo); switch (ret) { case 0: break; case -EBUSY: case -ERESTARTSYS: case -EINTR: kern_yield(PRI_USER); goto reserve; default: retval = VM_PAGER_ERROR; goto out_unlock; } } /* * Wait for buffer data in transit, due to a pipelined * move. */ mtx_lock(&bdev->fence_lock); if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) { /* * Here, the behavior differs between Linux and FreeBSD. * * On Linux, the wait is interruptible (3rd argument to * ttm_bo_wait). There must be some mechanism to resume * page fault handling, once the signal is processed. * * On FreeBSD, the wait is uninteruptible. This is not a * problem as we can't end up with an unkillable process * here, because the wait will eventually time out. * * An example of this situation is the Xorg process * which uses SIGALRM internally. The signal could * interrupt the wait, causing the page fault to fail * and the process to receive SIGSEGV. */ ret = ttm_bo_wait(bo, false, false, false); mtx_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } } else mtx_unlock(&bdev->fence_lock); ret = ttm_mem_io_lock(man, true); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_unlock; } ret = ttm_mem_io_reserve_vm(bo); if (unlikely(ret != 0)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } /* * Strictly, we're not allowed to modify vma->vm_page_prot here, * since the mmap_sem is only held in read mode. However, we * modify only the caching bits of vma->vm_page_prot and * consider those bits protected by * the bo->mutex, as we should be the only writers. * There shouldn't really be any readers of these bits except * within vm_insert_mixed()? fork? * * TODO: Add a list of vmas to the bo, and change the * vma->vm_page_prot when the object changes caching policy, with * the correct locks held. */ if (!bo->mem.bus.is_iomem) { /* Allocate all page at once, most common usage */ ttm = bo->ttm; if (ttm->bdev->driver->ttm_tt_populate(ttm)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } } if (bo->mem.bus.is_iomem) { m = PHYS_TO_VM_PAGE(bo->mem.bus.base + bo->mem.bus.offset + offset); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("physical address %#jx not fictitious", (uintmax_t)(bo->mem.bus.base + bo->mem.bus.offset + offset))); pmap_page_set_memattr(m, ttm_io_prot(bo->mem.placement)); } else { ttm = bo->ttm; m = ttm->pages[OFF_TO_IDX(offset)]; if (unlikely(!m)) { retval = VM_PAGER_ERROR; goto out_io_unlock; } pmap_page_set_memattr(m, (bo->mem.placement & TTM_PL_FLAG_CACHED) ? VM_MEMATTR_WRITE_BACK : ttm_io_prot(bo->mem.placement)); } VM_OBJECT_WLOCK(vm_obj); if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) { ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); /* XXX This looks like it should just be vm_page_replace? */ if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(vm_obj); vm_wait(vm_obj); VM_OBJECT_WLOCK(vm_obj); ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; } } else { KASSERT(m == m1, ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } vm_page_valid(m); if (*mres != NULL) { KASSERT(*mres != m, ("losing %p %p", *mres, m)); vm_page_xunbusy(*mres); vm_page_free(*mres); } *mres = m; out_io_unlock1: ttm_mem_io_unlock(man); out_unlock1: ttm_bo_unreserve(bo); vm_object_pip_wakeup(vm_obj); return (retval); out_io_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_io_unlock1; out_unlock: VM_OBJECT_WLOCK(vm_obj); goto out_unlock1; } static int ttm_bo_vm_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { /* * On Linux, a reference to the buffer object is acquired here. * The reason is that this function is not called when the * mmap() is initialized, but only when a process forks for * instance. Therefore on Linux, the reference on the bo is * acquired either in ttm_bo_mmap() or ttm_bo_vm_open(). It's * then released in ttm_bo_vm_close(). * * Here, this function is called during mmap() initialization. * Thus, the reference acquired in ttm_bo_mmap_single() is * sufficient. */ *color = 0; return (0); } static void ttm_bo_vm_dtor(void *handle) { struct ttm_buffer_object *bo = handle; ttm_bo_unref(&bo); } static struct cdev_pager_ops ttm_pager_ops = { .cdev_pg_fault = ttm_bo_vm_fault, .cdev_pg_ctor = ttm_bo_vm_ctor, .cdev_pg_dtor = ttm_bo_vm_dtor }; int ttm_bo_mmap_single(struct ttm_bo_device *bdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **obj_res, int nprot) { struct ttm_bo_driver *driver; struct ttm_buffer_object *bo; struct vm_object *vm_obj; int ret; rw_wlock(&bdev->vm_lock); bo = ttm_bo_vm_lookup_rb(bdev, OFF_TO_IDX(*offset), OFF_TO_IDX(size)); if (likely(bo != NULL)) refcount_acquire(&bo->kref); rw_wunlock(&bdev->vm_lock); if (unlikely(bo == NULL)) { printf("[TTM] Could not find buffer object to map\n"); return (-EINVAL); } driver = bo->bdev->driver; if (unlikely(!driver->verify_access)) { ret = -EPERM; goto out_unref; } ret = driver->verify_access(bo); if (unlikely(ret != 0)) goto out_unref; vm_obj = cdev_pager_allocate(bo, OBJT_MGTDEVICE, &ttm_pager_ops, size, nprot, 0, curthread->td_ucred); if (vm_obj == NULL) { ret = -EINVAL; goto out_unref; } /* * Note: We're transferring the bo reference to vm_obj->handle here. */ *offset = 0; *obj_res = vm_obj; return 0; out_unref: ttm_bo_unref(&bo); return ret; } void ttm_bo_release_mmap(struct ttm_buffer_object *bo) { vm_object_t vm_obj; vm_page_t m; int i; vm_obj = cdev_pager_lookup(bo); if (vm_obj == NULL) return; VM_OBJECT_WLOCK(vm_obj); retry: for (i = 0; i < bo->num_pages; i++) { m = vm_page_lookup(vm_obj, i); if (m == NULL) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(vm_obj, m); + cdev_mgtdev_pager_free_page(vm_obj, m); } VM_OBJECT_WUNLOCK(vm_obj); vm_object_deallocate(vm_obj); } #if 0 int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) { if (vma->vm_pgoff != 0) return -EACCES; vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND; return 0; } ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write) { struct ttm_buffer_object *bo; struct ttm_bo_driver *driver; struct ttm_bo_kmap_obj map; unsigned long dev_offset = (*f_pos >> PAGE_SHIFT); unsigned long kmap_offset; unsigned long kmap_end; unsigned long kmap_num; size_t io_size; unsigned int page_offset; char *virtual; int ret; bool no_wait = false; bool dummy; read_lock(&bdev->vm_lock); bo = ttm_bo_vm_lookup_rb(bdev, dev_offset, 1); if (likely(bo != NULL)) ttm_bo_reference(bo); read_unlock(&bdev->vm_lock); if (unlikely(bo == NULL)) return -EFAULT; driver = bo->bdev->driver; if (unlikely(!driver->verify_access)) { ret = -EPERM; goto out_unref; } ret = driver->verify_access(bo, filp); if (unlikely(ret != 0)) goto out_unref; kmap_offset = dev_offset - bo->vm_node->start; if (unlikely(kmap_offset >= bo->num_pages)) { ret = -EFBIG; goto out_unref; } page_offset = *f_pos & ~PAGE_MASK; io_size = bo->num_pages - kmap_offset; io_size = (io_size << PAGE_SHIFT) - page_offset; if (count < io_size) io_size = count; kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT; kmap_num = kmap_end - kmap_offset + 1; ret = ttm_bo_reserve(bo, true, no_wait, false, 0); switch (ret) { case 0: break; case -EBUSY: ret = -EAGAIN; goto out_unref; default: goto out_unref; } ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map); if (unlikely(ret != 0)) { ttm_bo_unreserve(bo); goto out_unref; } virtual = ttm_kmap_obj_virtual(&map, &dummy); virtual += page_offset; if (write) ret = copy_from_user(virtual, wbuf, io_size); else ret = copy_to_user(rbuf, virtual, io_size); ttm_bo_kunmap(&map); ttm_bo_unreserve(bo); ttm_bo_unref(&bo); if (unlikely(ret != 0)) return -EFBIG; *f_pos += io_size; return io_size; out_unref: ttm_bo_unref(&bo); return ret; } ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf, char __user *rbuf, size_t count, loff_t *f_pos, bool write) { struct ttm_bo_kmap_obj map; unsigned long kmap_offset; unsigned long kmap_end; unsigned long kmap_num; size_t io_size; unsigned int page_offset; char *virtual; int ret; bool no_wait = false; bool dummy; kmap_offset = (*f_pos >> PAGE_SHIFT); if (unlikely(kmap_offset >= bo->num_pages)) return -EFBIG; page_offset = *f_pos & ~PAGE_MASK; io_size = bo->num_pages - kmap_offset; io_size = (io_size << PAGE_SHIFT) - page_offset; if (count < io_size) io_size = count; kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT; kmap_num = kmap_end - kmap_offset + 1; ret = ttm_bo_reserve(bo, true, no_wait, false, 0); switch (ret) { case 0: break; case -EBUSY: return -EAGAIN; default: return ret; } ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map); if (unlikely(ret != 0)) { ttm_bo_unreserve(bo); return ret; } virtual = ttm_kmap_obj_virtual(&map, &dummy); virtual += page_offset; if (write) ret = copy_from_user(virtual, wbuf, io_size); else ret = copy_to_user(rbuf, virtual, io_size); ttm_bo_kunmap(&map); ttm_bo_unreserve(bo); ttm_bo_unref(&bo); if (unlikely(ret != 0)) return ret; *f_pos += io_size; return io_size; } #endif diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c index 4530feb1c76d..49f8aefad62e 100644 --- a/sys/dev/xen/gntdev/gntdev.c +++ b/sys/dev/xen/gntdev/gntdev.c @@ -1,1279 +1,1279 @@ /*- * Copyright (c) 2016 Akshay Jaggi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * gntdev.c * * Interface to /dev/xen/gntdev. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_GNTDEV, "gntdev", "Xen grant-table user-space device"); #define MAX_OFFSET_COUNT ((0xffffffffffffffffull >> PAGE_SHIFT) + 1) static d_open_t gntdev_open; static d_ioctl_t gntdev_ioctl; static d_mmap_single_t gntdev_mmap_single; static struct cdevsw gntdev_devsw = { .d_version = D_VERSION, .d_open = gntdev_open, .d_ioctl = gntdev_ioctl, .d_mmap_single = gntdev_mmap_single, .d_name = "gntdev", }; static device_t gntdev_dev = NULL; struct gntdev_gref; struct gntdev_gmap; STAILQ_HEAD(gref_list_head, gntdev_gref); STAILQ_HEAD(gmap_list_head, gntdev_gmap); RB_HEAD(gref_tree_head, gntdev_gref); RB_HEAD(gmap_tree_head, gntdev_gmap); struct file_offset_struct { RB_ENTRY(file_offset_struct) next; uint64_t file_offset; uint64_t count; }; static int offset_cmp(struct file_offset_struct *f1, struct file_offset_struct *f2) { return (f1->file_offset - f2->file_offset); } RB_HEAD(file_offset_head, file_offset_struct); RB_GENERATE_STATIC(file_offset_head, file_offset_struct, next, offset_cmp); struct per_user_data { struct mtx user_data_lock; struct gref_tree_head gref_tree; struct gmap_tree_head gmap_tree; struct file_offset_head file_offset; }; /* * Get offset into the file which will be used while mmapping the * appropriate pages by the userspace program. */ static int get_file_offset(struct per_user_data *priv_user, uint32_t count, uint64_t *file_offset) { struct file_offset_struct *offset, *offset_tmp; if (count == 0) return (EINVAL); mtx_lock(&priv_user->user_data_lock); RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset, offset_tmp) { if (offset->count >= count) { offset->count -= count; *file_offset = offset->file_offset + offset->count * PAGE_SIZE; if (offset->count == 0) { RB_REMOVE(file_offset_head, &priv_user->file_offset, offset); free(offset, M_GNTDEV); } mtx_unlock(&priv_user->user_data_lock); return (0); } } mtx_unlock(&priv_user->user_data_lock); return (ENOSPC); } static void put_file_offset(struct per_user_data *priv_user, uint32_t count, uint64_t file_offset) { struct file_offset_struct *offset, *offset_nxt, *offset_prv; offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO); offset->file_offset = file_offset; offset->count = count; mtx_lock(&priv_user->user_data_lock); RB_INSERT(file_offset_head, &priv_user->file_offset, offset); offset_nxt = RB_NEXT(file_offset_head, &priv_user->file_offset, offset); offset_prv = RB_PREV(file_offset_head, &priv_user->file_offset, offset); if (offset_nxt != NULL && offset_nxt->file_offset == offset->file_offset + offset->count * PAGE_SIZE) { offset->count += offset_nxt->count; RB_REMOVE(file_offset_head, &priv_user->file_offset, offset_nxt); free(offset_nxt, M_GNTDEV); } if (offset_prv != NULL && offset->file_offset == offset_prv->file_offset + offset_prv->count * PAGE_SIZE) { offset_prv->count += offset->count; RB_REMOVE(file_offset_head, &priv_user->file_offset, offset); free(offset, M_GNTDEV); } mtx_unlock(&priv_user->user_data_lock); } static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); static void gntdev_gmap_pg_dtor(void *handle); static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres); static struct cdev_pager_ops gntdev_gmap_pg_ops = { .cdev_pg_fault = gntdev_gmap_pg_fault, .cdev_pg_ctor = gntdev_gmap_pg_ctor, .cdev_pg_dtor = gntdev_gmap_pg_dtor, }; struct cleanup_data_struct { struct mtx to_kill_grefs_mtx; struct mtx to_kill_gmaps_mtx; struct gref_list_head to_kill_grefs; struct gmap_list_head to_kill_gmaps; }; static struct cleanup_data_struct cleanup_data = { .to_kill_grefs = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_grefs), .to_kill_gmaps = STAILQ_HEAD_INITIALIZER(cleanup_data.to_kill_gmaps), }; MTX_SYSINIT(to_kill_grefs_mtx, &cleanup_data.to_kill_grefs_mtx, "gntdev to_kill_grefs mutex", MTX_DEF); MTX_SYSINIT(to_kill_gmaps_mtx, &cleanup_data.to_kill_gmaps_mtx, "gntdev to_kill_gmaps mutex", MTX_DEF); static void cleanup_function(void *arg, __unused int pending); static struct task cleanup_task = TASK_INITIALIZER(0, cleanup_function, &cleanup_data); struct notify_data { uint64_t index; uint32_t action; uint32_t event_channel_port; xen_intr_handle_t notify_evtchn_handle; }; static void notify(struct notify_data *notify, vm_page_t page); /*-------------------- Grant Allocation Methods -----------------------------*/ struct gntdev_gref { union gref_next_union { STAILQ_ENTRY(gntdev_gref) list; RB_ENTRY(gntdev_gref) tree; } gref_next; uint64_t file_index; grant_ref_t gref_id; vm_page_t page; struct notify_data *notify; }; static int gref_cmp(struct gntdev_gref *g1, struct gntdev_gref *g2) { return (g1->file_index - g2->file_index); } RB_GENERATE_STATIC(gref_tree_head, gntdev_gref, gref_next.tree, gref_cmp); /* * Traverse over the device-list of to-be-deleted grants allocated, and * if all accesses, both local mmaps and foreign maps, to them have ended, * destroy them. */ static void gref_list_dtor(struct cleanup_data_struct *cleanup_data) { struct gref_list_head tmp_grefs; struct gntdev_gref *gref, *gref_tmp, *gref_previous; STAILQ_INIT(&tmp_grefs); mtx_lock(&cleanup_data->to_kill_grefs_mtx); STAILQ_SWAP(&cleanup_data->to_kill_grefs, &tmp_grefs, gntdev_gref); mtx_unlock(&cleanup_data->to_kill_grefs_mtx); gref_previous = NULL; STAILQ_FOREACH_SAFE(gref, &tmp_grefs, gref_next.list, gref_tmp) { if (gref->page && gref->page->object == NULL) { if (gref->notify) { notify(gref->notify, gref->page); } if (gref->gref_id != GRANT_REF_INVALID) { if (gnttab_query_foreign_access(gref->gref_id)) continue; if (gnttab_end_foreign_access_ref(gref->gref_id) == 0) continue; gnttab_free_grant_reference(gref->gref_id); } vm_page_unwire_noq(gref->page); vm_page_free(gref->page); gref->page = NULL; } if (gref->page == NULL) { if (gref_previous == NULL) STAILQ_REMOVE_HEAD(&tmp_grefs, gref_next.list); else STAILQ_REMOVE_AFTER(&tmp_grefs, gref_previous, gref_next.list); if (gref->notify) free(gref->notify, M_GNTDEV); free(gref, M_GNTDEV); } else gref_previous = gref; } if (!STAILQ_EMPTY(&tmp_grefs)) { mtx_lock(&cleanup_data->to_kill_grefs_mtx); STAILQ_CONCAT(&cleanup_data->to_kill_grefs, &tmp_grefs); mtx_unlock(&cleanup_data->to_kill_grefs_mtx); } } /* * Find count number of contiguous allocated grants for a given userspace * program by file-offset (index). */ static struct gntdev_gref* gntdev_find_grefs(struct per_user_data *priv_user, uint64_t index, uint32_t count) { struct gntdev_gref find_gref, *gref, *gref_start = NULL; find_gref.file_index = index; mtx_lock(&priv_user->user_data_lock); gref_start = RB_FIND(gref_tree_head, &priv_user->gref_tree, &find_gref); for (gref = gref_start; gref != NULL && count > 0; gref = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { if (index != gref->file_index) break; index += PAGE_SIZE; count--; } mtx_unlock(&priv_user->user_data_lock); if (count) return (NULL); return (gref_start); } /* * IOCTL_GNTDEV_ALLOC_GREF * Allocate required number of wired pages for the request, grant foreign * access to the physical frames for these pages, and add details about * this allocation to the per user private data, so that these pages can * be mmapped by the userspace program. */ static int gntdev_alloc_gref(struct ioctl_gntdev_alloc_gref *arg) { uint32_t i; int error, readonly; uint64_t file_offset; struct gntdev_gref *grefs; struct per_user_data *priv_user; readonly = !(arg->flags & GNTDEV_ALLOC_FLAG_WRITABLE); error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); /* Cleanup grefs and free pages. */ taskqueue_enqueue(taskqueue_thread, &cleanup_task); /* Get file offset for this request. */ error = get_file_offset(priv_user, arg->count, &file_offset); if (error != 0) return (error); /* Allocate grefs. */ grefs = malloc(sizeof(*grefs) * arg->count, M_GNTDEV, M_WAITOK); for (i = 0; i < arg->count; i++) { grefs[i].file_index = file_offset + i * PAGE_SIZE; grefs[i].gref_id = GRANT_REF_INVALID; grefs[i].notify = NULL; grefs[i].page = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (grefs[i].page == NULL) { log(LOG_ERR, "Page allocation failed."); error = ENOMEM; break; } grefs[i].page->valid = VM_PAGE_BITS_ALL; error = gnttab_grant_foreign_access(arg->domid, (VM_PAGE_TO_PHYS(grefs[i].page) >> PAGE_SHIFT), readonly, &grefs[i].gref_id); if (error != 0) { log(LOG_ERR, "Grant Table Hypercall failed."); break; } } /* Copy the output values. */ arg->index = file_offset; for (i = 0; error == 0 && i < arg->count; i++) { if (suword32(&arg->gref_ids[i], grefs[i].gref_id) != 0) error = EFAULT; } if (error != 0) { /* * If target domain maps the gref (by guessing the gref-id), * then we can't clean it up yet and we have to leave the * page in place so as to not leak our memory to that domain. * Add it to a global list to be cleaned up later. */ mtx_lock(&cleanup_data.to_kill_grefs_mtx); for (i = 0; i < arg->count; i++) STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, &grefs[i], gref_next.list); mtx_unlock(&cleanup_data.to_kill_grefs_mtx); taskqueue_enqueue(taskqueue_thread, &cleanup_task); return (error); } /* Modify the per user private data. */ mtx_lock(&priv_user->user_data_lock); for (i = 0; i < arg->count; i++) RB_INSERT(gref_tree_head, &priv_user->gref_tree, &grefs[i]); mtx_unlock(&priv_user->user_data_lock); return (error); } /* * IOCTL_GNTDEV_DEALLOC_GREF * Remove grant allocation information from the per user private data, so * that it can't be mmapped anymore by the userspace program, and add it * to the to-be-deleted grants global device-list. */ static int gntdev_dealloc_gref(struct ioctl_gntdev_dealloc_gref *arg) { int error; uint32_t count; struct gntdev_gref *gref, *gref_tmp; struct per_user_data *priv_user; error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); gref = gntdev_find_grefs(priv_user, arg->index, arg->count); if (gref == NULL) { log(LOG_ERR, "Can't find requested grant-refs."); return (EINVAL); } /* Remove the grefs from user private data. */ count = arg->count; mtx_lock(&priv_user->user_data_lock); mtx_lock(&cleanup_data.to_kill_grefs_mtx); for (; gref != NULL && count > 0; gref = gref_tmp) { gref_tmp = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref); RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, gref_next.list); count--; } mtx_unlock(&cleanup_data.to_kill_grefs_mtx); mtx_unlock(&priv_user->user_data_lock); taskqueue_enqueue(taskqueue_thread, &cleanup_task); put_file_offset(priv_user, arg->count, arg->index); return (0); } /*-------------------- Grant Mapping Methods --------------------------------*/ struct gntdev_gmap_map { vm_object_t mem; struct resource *pseudo_phys_res; int pseudo_phys_res_id; vm_paddr_t phys_base_addr; }; struct gntdev_gmap { union gmap_next_union { STAILQ_ENTRY(gntdev_gmap) list; RB_ENTRY(gntdev_gmap) tree; } gmap_next; uint64_t file_index; uint32_t count; struct gnttab_map_grant_ref *grant_map_ops; struct gntdev_gmap_map *map; struct notify_data *notify; }; static int gmap_cmp(struct gntdev_gmap *g1, struct gntdev_gmap *g2) { return (g1->file_index - g2->file_index); } RB_GENERATE_STATIC(gmap_tree_head, gntdev_gmap, gmap_next.tree, gmap_cmp); /* * Traverse over the device-list of to-be-deleted grant mappings, and if * the region is no longer mmapped by anyone, free the memory used to * store information about the mapping. */ static void gmap_list_dtor(struct cleanup_data_struct *cleanup_data) { struct gmap_list_head tmp_gmaps; struct gntdev_gmap *gmap, *gmap_tmp, *gmap_previous; STAILQ_INIT(&tmp_gmaps); mtx_lock(&cleanup_data->to_kill_gmaps_mtx); STAILQ_SWAP(&cleanup_data->to_kill_gmaps, &tmp_gmaps, gntdev_gmap); mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); gmap_previous = NULL; STAILQ_FOREACH_SAFE(gmap, &tmp_gmaps, gmap_next.list, gmap_tmp) { if (gmap->map == NULL) { if (gmap_previous == NULL) STAILQ_REMOVE_HEAD(&tmp_gmaps, gmap_next.list); else STAILQ_REMOVE_AFTER(&tmp_gmaps, gmap_previous, gmap_next.list); if (gmap->notify) free(gmap->notify, M_GNTDEV); free(gmap->grant_map_ops, M_GNTDEV); free(gmap, M_GNTDEV); } else gmap_previous = gmap; } if (!STAILQ_EMPTY(&tmp_gmaps)) { mtx_lock(&cleanup_data->to_kill_gmaps_mtx); STAILQ_CONCAT(&cleanup_data->to_kill_gmaps, &tmp_gmaps); mtx_unlock(&cleanup_data->to_kill_gmaps_mtx); } } /* * Find mapped grants for a given userspace program, by file-offset (index) * and count, as supplied during the map-ioctl. */ static struct gntdev_gmap* gntdev_find_gmap(struct per_user_data *priv_user, uint64_t index, uint32_t count) { struct gntdev_gmap find_gmap, *gmap; find_gmap.file_index = index; mtx_lock(&priv_user->user_data_lock); gmap = RB_FIND(gmap_tree_head, &priv_user->gmap_tree, &find_gmap); mtx_unlock(&priv_user->user_data_lock); if (gmap != NULL && gmap->count == count) return (gmap); return (NULL); } /* * Remove the pages from the mgtdevice pager, call the unmap hypercall, * free the xenmem resource. This function is called during the * destruction of the mgtdevice pager, which happens when all mmaps to * it have been removed, and the unmap-ioctl has been performed. */ static int notify_unmap_cleanup(struct gntdev_gmap *gmap) { uint32_t i; int error, count; vm_page_t m; struct gnttab_unmap_grant_ref *unmap_ops; unmap_ops = malloc(sizeof(struct gnttab_unmap_grant_ref) * gmap->count, M_GNTDEV, M_WAITOK); /* Enumerate freeable maps. */ count = 0; for (i = 0; i < gmap->count; i++) { if (gmap->grant_map_ops[i].handle != -1) { unmap_ops[count].handle = gmap->grant_map_ops[i].handle; unmap_ops[count].host_addr = gmap->grant_map_ops[i].host_addr; unmap_ops[count].dev_bus_addr = 0; count++; } } /* Perform notification. */ if (count > 0 && gmap->notify) { vm_page_t page; uint64_t page_offset; page_offset = gmap->notify->index - gmap->file_index; page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + page_offset); notify(gmap->notify, page); } /* Free the pages. */ VM_OBJECT_WLOCK(gmap->map->mem); retry: for (i = 0; i < gmap->count; i++) { m = vm_page_lookup(gmap->map->mem, i); if (m == NULL) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(gmap->map->mem, m); + cdev_mgtdev_pager_free_page(gmap->map->mem, m); } VM_OBJECT_WUNLOCK(gmap->map->mem); /* Perform unmap hypercall. */ error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); for (i = 0; i < gmap->count; i++) { gmap->grant_map_ops[i].handle = -1; gmap->grant_map_ops[i].host_addr = 0; } if (gmap->map) { error = xenmem_free(gntdev_dev, gmap->map->pseudo_phys_res_id, gmap->map->pseudo_phys_res); KASSERT(error == 0, ("Unable to release memory resource: %d", error)); free(gmap->map, M_GNTDEV); gmap->map = NULL; } free(unmap_ops, M_GNTDEV); return (error); } /* * IOCTL_GNTDEV_MAP_GRANT_REF * Populate structures for mapping the grant reference in the per user * private data. Actual resource allocation and map hypercall is performed * during the mmap. */ static int gntdev_map_grant_ref(struct ioctl_gntdev_map_grant_ref *arg) { uint32_t i; int error; struct gntdev_gmap *gmap; struct per_user_data *priv_user; error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); gmap = malloc(sizeof(*gmap), M_GNTDEV, M_WAITOK | M_ZERO); gmap->count = arg->count; gmap->grant_map_ops = malloc(sizeof(struct gnttab_map_grant_ref) * arg->count, M_GNTDEV, M_WAITOK | M_ZERO); for (i = 0; i < arg->count; i++) { struct ioctl_gntdev_grant_ref ref; error = copyin(&arg->refs[i], &ref, sizeof(ref)); if (error != 0) { free(gmap->grant_map_ops, M_GNTDEV); free(gmap, M_GNTDEV); return (error); } gmap->grant_map_ops[i].dom = ref.domid; gmap->grant_map_ops[i].ref = ref.ref; gmap->grant_map_ops[i].handle = -1; gmap->grant_map_ops[i].flags = GNTMAP_host_map; } error = get_file_offset(priv_user, arg->count, &gmap->file_index); if (error != 0) { free(gmap->grant_map_ops, M_GNTDEV); free(gmap, M_GNTDEV); return (error); } mtx_lock(&priv_user->user_data_lock); RB_INSERT(gmap_tree_head, &priv_user->gmap_tree, gmap); mtx_unlock(&priv_user->user_data_lock); arg->index = gmap->file_index; return (error); } /* * IOCTL_GNTDEV_UNMAP_GRANT_REF * Remove the map information from the per user private data and add it * to the global device-list of mappings to be deleted. A reference to * the mgtdevice pager is also decreased, the reason for which is * explained in mmap_gmap(). */ static int gntdev_unmap_grant_ref(struct ioctl_gntdev_unmap_grant_ref *arg) { int error; struct gntdev_gmap *gmap; struct per_user_data *priv_user; error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); gmap = gntdev_find_gmap(priv_user, arg->index, arg->count); if (gmap == NULL) { log(LOG_ERR, "Can't find requested grant-map."); return (EINVAL); } mtx_lock(&priv_user->user_data_lock); mtx_lock(&cleanup_data.to_kill_gmaps_mtx); RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list); mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); mtx_unlock(&priv_user->user_data_lock); if (gmap->map) vm_object_deallocate(gmap->map->mem); taskqueue_enqueue(taskqueue_thread, &cleanup_task); put_file_offset(priv_user, arg->count, arg->index); return (0); } /* * IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR * Get file-offset and count for a given mapping, from the virtual address * where the mapping is mmapped. * Please note, this only works for grants mapped by this domain, and not * grants allocated. Count doesn't make much sense in reference to grants * allocated. Also, because this function is present in the linux gntdev * device, but not in the linux gntalloc one, most userspace code only use * it for mapped grants. */ static int gntdev_get_offset_for_vaddr(struct ioctl_gntdev_get_offset_for_vaddr *arg, struct thread *td) { int error; vm_map_t map; vm_map_entry_t entry; vm_object_t mem; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; struct gntdev_gmap *gmap; int rc; map = &td->td_proc->p_vmspace->vm_map; error = vm_map_lookup(&map, arg->vaddr, VM_PROT_NONE, &entry, &mem, &pindex, &prot, &wired); if (error != KERN_SUCCESS) return (EINVAL); if ((mem->type != OBJT_MGTDEVICE) || (mem->un_pager.devp.ops != &gntdev_gmap_pg_ops)) { rc = EINVAL; goto out; } gmap = mem->handle; if (gmap == NULL || (entry->end - entry->start) != (gmap->count * PAGE_SIZE)) { rc = EINVAL; goto out; } arg->count = gmap->count; arg->offset = gmap->file_index; rc = 0; out: vm_map_lookup_done(map, entry); return (rc); } /*-------------------- Grant Mapping Pager ----------------------------------*/ static int gntdev_gmap_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { return (0); } static void gntdev_gmap_pg_dtor(void *handle) { notify_unmap_cleanup((struct gntdev_gmap *)handle); } static int gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct gntdev_gmap *gmap = object->handle; vm_pindex_t pidx, ridx; vm_page_t page; vm_ooffset_t relative_offset; if (gmap->map == NULL) return (VM_PAGER_FAIL); relative_offset = offset - gmap->file_index; pidx = OFF_TO_IDX(offset); ridx = OFF_TO_IDX(relative_offset); if (ridx >= gmap->count || gmap->grant_map_ops[ridx].status != GNTST_okay) return (VM_PAGER_FAIL); page = PHYS_TO_VM_PAGE(gmap->map->phys_base_addr + relative_offset); if (page == NULL) return (VM_PAGER_FAIL); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(vm_page_wired(page), ("page %p is not wired", page)); KASSERT(!vm_page_busied(page), ("page %p is busy", page)); vm_page_busy_acquire(page, 0); vm_page_valid(page); if (*mres != NULL) vm_page_replace(page, object, pidx, *mres); else vm_page_insert(page, object, pidx); *mres = page; return (VM_PAGER_OK); } /*------------------ Grant Table Methods ------------------------------------*/ static void notify(struct notify_data *notify, vm_page_t page) { if (notify->action & UNMAP_NOTIFY_CLEAR_BYTE) { uint8_t *mem; uint64_t offset; offset = notify->index & PAGE_MASK; mem = (uint8_t *)pmap_quick_enter_page(page); mem[offset] = 0; pmap_quick_remove_page((vm_offset_t)mem); } if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { xen_intr_signal(notify->notify_evtchn_handle); xen_intr_unbind(¬ify->notify_evtchn_handle); } notify->action = 0; } /* * Helper to copy new arguments from the notify ioctl into * the existing notify data. */ static int copy_notify_helper(struct notify_data *destination, struct ioctl_gntdev_unmap_notify *source) { xen_intr_handle_t handlep = NULL; /* * "Get" before "Put"ting previous reference, as we might be * holding the last reference to the event channel port. */ if (source->action & UNMAP_NOTIFY_SEND_EVENT) if (xen_intr_get_evtchn_from_port(source->event_channel_port, &handlep) != 0) return (EINVAL); if (destination->action & UNMAP_NOTIFY_SEND_EVENT) xen_intr_unbind(&destination->notify_evtchn_handle); destination->action = source->action; destination->event_channel_port = source->event_channel_port; destination->index = source->index; destination->notify_evtchn_handle = handlep; return (0); } /* * IOCTL_GNTDEV_SET_UNMAP_NOTIFY * Set unmap notification inside the appropriate grant. It sends a * notification when the grant is completely munmapped by this domain * and ready for destruction. */ static int gntdev_set_unmap_notify(struct ioctl_gntdev_unmap_notify *arg) { int error; uint64_t index; struct per_user_data *priv_user; struct gntdev_gref *gref = NULL; struct gntdev_gmap *gmap; error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); if (arg->action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) return (EINVAL); index = arg->index & ~PAGE_MASK; gref = gntdev_find_grefs(priv_user, index, 1); if (gref) { if (gref->notify == NULL) gref->notify = malloc(sizeof(*arg), M_GNTDEV, M_WAITOK | M_ZERO); return (copy_notify_helper(gref->notify, arg)); } error = EINVAL; mtx_lock(&priv_user->user_data_lock); RB_FOREACH(gmap, gmap_tree_head, &priv_user->gmap_tree) { if (arg->index >= gmap->file_index && arg->index < gmap->file_index + gmap->count * PAGE_SIZE) { if (gmap->notify == NULL) gmap->notify = malloc(sizeof(*arg), M_GNTDEV, M_WAITOK | M_ZERO); error = copy_notify_helper(gmap->notify, arg); break; } } mtx_unlock(&priv_user->user_data_lock); return (error); } /*------------------ Gntdev Char Device Methods -----------------------------*/ static void cleanup_function(void *arg, __unused int pending) { gref_list_dtor((struct cleanup_data_struct *) arg); gmap_list_dtor((struct cleanup_data_struct *) arg); } static void per_user_data_dtor(void *arg) { struct gntdev_gref *gref, *gref_tmp; struct gntdev_gmap *gmap, *gmap_tmp; struct file_offset_struct *offset, *offset_tmp; struct per_user_data *priv_user; priv_user = (struct per_user_data *) arg; mtx_lock(&priv_user->user_data_lock); mtx_lock(&cleanup_data.to_kill_grefs_mtx); RB_FOREACH_SAFE(gref, gref_tree_head, &priv_user->gref_tree, gref_tmp) { RB_REMOVE(gref_tree_head, &priv_user->gref_tree, gref); STAILQ_INSERT_TAIL(&cleanup_data.to_kill_grefs, gref, gref_next.list); } mtx_unlock(&cleanup_data.to_kill_grefs_mtx); mtx_lock(&cleanup_data.to_kill_gmaps_mtx); RB_FOREACH_SAFE(gmap, gmap_tree_head, &priv_user->gmap_tree, gmap_tmp) { RB_REMOVE(gmap_tree_head, &priv_user->gmap_tree, gmap); STAILQ_INSERT_TAIL(&cleanup_data.to_kill_gmaps, gmap, gmap_next.list); if (gmap->map) vm_object_deallocate(gmap->map->mem); } mtx_unlock(&cleanup_data.to_kill_gmaps_mtx); RB_FOREACH_SAFE(offset, file_offset_head, &priv_user->file_offset, offset_tmp) { RB_REMOVE(file_offset_head, &priv_user->file_offset, offset); free(offset, M_GNTDEV); } mtx_unlock(&priv_user->user_data_lock); taskqueue_enqueue(taskqueue_thread, &cleanup_task); mtx_destroy(&priv_user->user_data_lock); free(priv_user, M_GNTDEV); } static int gntdev_open(struct cdev *dev, int flag, int otyp, struct thread *td) { int error; struct per_user_data *priv_user; struct file_offset_struct *offset; priv_user = malloc(sizeof(*priv_user), M_GNTDEV, M_WAITOK | M_ZERO); RB_INIT(&priv_user->gref_tree); RB_INIT(&priv_user->gmap_tree); RB_INIT(&priv_user->file_offset); offset = malloc(sizeof(*offset), M_GNTDEV, M_WAITOK | M_ZERO); offset->file_offset = 0; offset->count = MAX_OFFSET_COUNT; RB_INSERT(file_offset_head, &priv_user->file_offset, offset); mtx_init(&priv_user->user_data_lock, "per user data mutex", NULL, MTX_DEF); error = devfs_set_cdevpriv(priv_user, per_user_data_dtor); if (error != 0) per_user_data_dtor(priv_user); return (error); } static int gntdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td) { int error; switch (cmd) { case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: error = gntdev_set_unmap_notify( (struct ioctl_gntdev_unmap_notify*) data); break; case IOCTL_GNTDEV_ALLOC_GREF: error = gntdev_alloc_gref( (struct ioctl_gntdev_alloc_gref*) data); break; case IOCTL_GNTDEV_DEALLOC_GREF: error = gntdev_dealloc_gref( (struct ioctl_gntdev_dealloc_gref*) data); break; case IOCTL_GNTDEV_MAP_GRANT_REF: error = gntdev_map_grant_ref( (struct ioctl_gntdev_map_grant_ref*) data); break; case IOCTL_GNTDEV_UNMAP_GRANT_REF: error = gntdev_unmap_grant_ref( (struct ioctl_gntdev_unmap_grant_ref*) data); break; case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR: error = gntdev_get_offset_for_vaddr( (struct ioctl_gntdev_get_offset_for_vaddr*) data, td); break; default: error = ENOSYS; break; } return (error); } /* * MMAP an allocated grant into user memory. * Please note, that the grants must not already be mmapped, otherwise * this function will fail. */ static int mmap_gref(struct per_user_data *priv_user, struct gntdev_gref *gref_start, uint32_t count, vm_size_t size, struct vm_object **object) { vm_object_t mem_obj; struct gntdev_gref *gref; mem_obj = vm_pager_allocate(OBJT_PHYS, NULL, size, VM_PROT_ALL, 0, curthread->td_ucred); if (mem_obj == NULL) return (ENOMEM); mtx_lock(&priv_user->user_data_lock); VM_OBJECT_WLOCK(mem_obj); for (gref = gref_start; gref != NULL && count > 0; gref = RB_NEXT(gref_tree_head, &priv_user->gref_tree, gref)) { if (gref->page->object) break; vm_page_insert(gref->page, mem_obj, OFF_TO_IDX(gref->file_index)); count--; } VM_OBJECT_WUNLOCK(mem_obj); mtx_unlock(&priv_user->user_data_lock); if (count) { vm_object_deallocate(mem_obj); return (EINVAL); } *object = mem_obj; return (0); } /* * MMAP a mapped grant into user memory. */ static int mmap_gmap(struct per_user_data *priv_user, struct gntdev_gmap *gmap_start, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { uint32_t i; int error; /* * The grant map hypercall might already be done. * If that is the case, increase a reference to the * vm object and return the already allocated object. */ if (gmap_start->map) { vm_object_reference(gmap_start->map->mem); *object = gmap_start->map->mem; return (0); } gmap_start->map = malloc(sizeof(*(gmap_start->map)), M_GNTDEV, M_WAITOK | M_ZERO); /* Allocate the xen pseudo physical memory resource. */ gmap_start->map->pseudo_phys_res_id = 0; gmap_start->map->pseudo_phys_res = xenmem_alloc(gntdev_dev, &gmap_start->map->pseudo_phys_res_id, size); if (gmap_start->map->pseudo_phys_res == NULL) { free(gmap_start->map, M_GNTDEV); gmap_start->map = NULL; return (ENOMEM); } gmap_start->map->phys_base_addr = rman_get_start(gmap_start->map->pseudo_phys_res); /* Allocate the mgtdevice pager. */ gmap_start->map->mem = cdev_pager_allocate(gmap_start, OBJT_MGTDEVICE, &gntdev_gmap_pg_ops, size, nprot, *offset, NULL); if (gmap_start->map->mem == NULL) { xenmem_free(gntdev_dev, gmap_start->map->pseudo_phys_res_id, gmap_start->map->pseudo_phys_res); free(gmap_start->map, M_GNTDEV); gmap_start->map = NULL; return (ENOMEM); } for (i = 0; i < gmap_start->count; i++) { gmap_start->grant_map_ops[i].host_addr = gmap_start->map->phys_base_addr + i * PAGE_SIZE; if ((nprot & PROT_WRITE) == 0) gmap_start->grant_map_ops[i].flags |= GNTMAP_readonly; } /* Make the MAP hypercall. */ error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gmap_start->grant_map_ops, gmap_start->count); if (error != 0) { /* * Deallocate pager. * Pager deallocation will automatically take care of * xenmem deallocation, etc. */ vm_object_deallocate(gmap_start->map->mem); return (EINVAL); } /* Retry EAGAIN maps. */ for (i = 0; i < gmap_start->count; i++) { int delay = 1; while (delay < 256 && gmap_start->grant_map_ops[i].status == GNTST_eagain) { HYPERVISOR_grant_table_op( GNTTABOP_map_grant_ref, &gmap_start->grant_map_ops[i], 1); pause(("gntmap"), delay * SBT_1MS); delay++; } if (gmap_start->grant_map_ops[i].status == GNTST_eagain) gmap_start->grant_map_ops[i].status = GNTST_bad_page; if (gmap_start->grant_map_ops[i].status != GNTST_okay) { /* * Deallocate pager. * Pager deallocation will automatically take care of * xenmem deallocation, notification, unmap hypercall, * etc. */ vm_object_deallocate(gmap_start->map->mem); return (EINVAL); } } /* * Add a reference to the vm object. We do not want * the vm object to be deleted when all the mmaps are * unmapped, because it may be re-mmapped. Instead, * we want the object to be deleted, when along with * munmaps, we have also processed the unmap-ioctl. */ vm_object_reference(gmap_start->map->mem); *object = gmap_start->map->mem; return (0); } static int gntdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { int error; uint32_t count; struct gntdev_gref *gref_start; struct gntdev_gmap *gmap_start; struct per_user_data *priv_user; error = devfs_get_cdevpriv((void**) &priv_user); if (error != 0) return (EINVAL); count = OFF_TO_IDX(size); gref_start = gntdev_find_grefs(priv_user, *offset, count); if (gref_start) { error = mmap_gref(priv_user, gref_start, count, size, object); return (error); } gmap_start = gntdev_find_gmap(priv_user, *offset, count); if (gmap_start) { error = mmap_gmap(priv_user, gmap_start, offset, size, object, nprot); return (error); } return (EINVAL); } /*------------------ Private Device Attachment Functions --------------------*/ static void gntdev_identify(driver_t *driver, device_t parent) { KASSERT((xen_domain()), ("Trying to attach gntdev device on non Xen domain")); if (BUS_ADD_CHILD(parent, 0, "gntdev", 0) == NULL) panic("unable to attach gntdev user-space device"); } static int gntdev_probe(device_t dev) { gntdev_dev = dev; device_set_desc(dev, "Xen grant-table user-space device"); return (BUS_PROBE_NOWILDCARD); } static int gntdev_attach(device_t dev) { make_dev_credf(MAKEDEV_ETERNAL, &gntdev_devsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "xen/gntdev"); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t gntdev_methods[] = { DEVMETHOD(device_identify, gntdev_identify), DEVMETHOD(device_probe, gntdev_probe), DEVMETHOD(device_attach, gntdev_attach), DEVMETHOD_END }; static driver_t gntdev_driver = { "gntdev", gntdev_methods, 0, }; DRIVER_MODULE(gntdev, xenpv, gntdev_driver, 0, 0); MODULE_DEPEND(gntdev, xenpv, 1, 1, 1); diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c index 02e268b23d42..c04ac287183b 100644 --- a/sys/dev/xen/privcmd/privcmd.c +++ b/sys/dev/xen/privcmd/privcmd.c @@ -1,608 +1,608 @@ /* * Copyright (c) 2014 Roger Pau MonnĂ© * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_PRIVCMD, "privcmd_dev", "Xen privcmd user-space device"); #define MAX_DMOP_BUFFERS 16 struct privcmd_map { vm_object_t mem; vm_size_t size; struct resource *pseudo_phys_res; int pseudo_phys_res_id; vm_paddr_t phys_base_addr; boolean_t mapped; BITSET_DEFINE_VAR() *err; }; static d_ioctl_t privcmd_ioctl; static d_open_t privcmd_open; static d_mmap_single_t privcmd_mmap_single; static struct cdevsw privcmd_devsw = { .d_version = D_VERSION, .d_ioctl = privcmd_ioctl, .d_mmap_single = privcmd_mmap_single, .d_open = privcmd_open, .d_name = "privcmd", }; static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); static void privcmd_pg_dtor(void *handle); static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres); static struct cdev_pager_ops privcmd_pg_ops = { .cdev_pg_fault = privcmd_pg_fault, .cdev_pg_ctor = privcmd_pg_ctor, .cdev_pg_dtor = privcmd_pg_dtor, }; struct per_user_data { domid_t dom; }; static device_t privcmd_dev = NULL; /*------------------------- Privcmd Pager functions --------------------------*/ static int privcmd_pg_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { return (0); } static void privcmd_pg_dtor(void *handle) { struct xen_remove_from_physmap rm = { .domid = DOMID_SELF }; struct privcmd_map *map = handle; int error __diagused; vm_size_t i; vm_page_t m; /* * Remove the mappings from the used pages. This will remove the * underlying p2m bindings in Xen second stage translation. */ if (map->mapped == true) { VM_OBJECT_WLOCK(map->mem); retry: for (i = 0; i < map->size; i++) { m = vm_page_lookup(map->mem, i); if (m == NULL) continue; if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; - cdev_pager_free_page(map->mem, m); + cdev_mgtdev_pager_free_page(map->mem, m); } VM_OBJECT_WUNLOCK(map->mem); for (i = 0; i < map->size; i++) { rm.gpfn = atop(map->phys_base_addr) + i; HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &rm); } free(map->err, M_PRIVCMD); } error = xenmem_free(privcmd_dev, map->pseudo_phys_res_id, map->pseudo_phys_res); KASSERT(error == 0, ("Unable to release memory resource: %d", error)); free(map, M_PRIVCMD); } static int privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct privcmd_map *map = object->handle; vm_pindex_t pidx; vm_page_t page; if (map->mapped != true) return (VM_PAGER_FAIL); pidx = OFF_TO_IDX(offset); if (pidx >= map->size || BIT_ISSET(map->size, pidx, map->err)) return (VM_PAGER_FAIL); page = PHYS_TO_VM_PAGE(map->phys_base_addr + offset); if (page == NULL) return (VM_PAGER_FAIL); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(vm_page_wired(page), ("page %p not wired", page)); KASSERT(!vm_page_busied(page), ("page %p is busy", page)); vm_page_busy_acquire(page, 0); vm_page_valid(page); if (*mres != NULL) vm_page_replace(page, object, pidx, *mres); else vm_page_insert(page, object, pidx); *mres = page; return (VM_PAGER_OK); } /*----------------------- Privcmd char device methods ------------------------*/ static int privcmd_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, vm_object_t *object, int nprot) { struct privcmd_map *map; map = malloc(sizeof(*map), M_PRIVCMD, M_WAITOK | M_ZERO); map->size = OFF_TO_IDX(size); map->pseudo_phys_res_id = 0; map->pseudo_phys_res = xenmem_alloc(privcmd_dev, &map->pseudo_phys_res_id, size); if (map->pseudo_phys_res == NULL) { free(map, M_PRIVCMD); return (ENOMEM); } map->phys_base_addr = rman_get_start(map->pseudo_phys_res); map->mem = cdev_pager_allocate(map, OBJT_MGTDEVICE, &privcmd_pg_ops, size, nprot, *offset, NULL); if (map->mem == NULL) { xenmem_free(privcmd_dev, map->pseudo_phys_res_id, map->pseudo_phys_res); free(map, M_PRIVCMD); return (ENOMEM); } *object = map->mem; return (0); } static struct privcmd_map * setup_virtual_area(struct thread *td, unsigned long addr, unsigned long num) { vm_map_t map; vm_map_entry_t entry; vm_object_t mem; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; struct privcmd_map *umap; int error; if ((num == 0) || ((addr & PAGE_MASK) != 0)) return NULL; map = &td->td_proc->p_vmspace->vm_map; error = vm_map_lookup(&map, addr, VM_PROT_NONE, &entry, &mem, &pindex, &prot, &wired); if (error != KERN_SUCCESS || (entry->start != addr) || (entry->end != addr + (num * PAGE_SIZE))) return NULL; vm_map_lookup_done(map, entry); if ((mem->type != OBJT_MGTDEVICE) || (mem->un_pager.devp.ops != &privcmd_pg_ops)) return NULL; umap = mem->handle; /* Allocate a bitset to store broken page mappings. */ umap->err = BITSET_ALLOC(num, M_PRIVCMD, M_WAITOK | M_ZERO); return umap; } static int privcmd_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int mode, struct thread *td) { int error; unsigned int i; void *data; const struct per_user_data *u; error = devfs_get_cdevpriv(&data); if (error != 0) return (EINVAL); /* * Constify user-data to prevent unintended changes to the restriction * limits. */ u = data; switch (cmd) { case IOCTL_PRIVCMD_HYPERCALL: { struct ioctl_privcmd_hypercall *hcall; hcall = (struct ioctl_privcmd_hypercall *)arg; /* Forbid hypercalls if restricted. */ if (u->dom != DOMID_INVALID) { error = EPERM; break; } #ifdef __amd64__ /* * The hypervisor page table walker will refuse to access * user-space pages if SMAP is enabled, so temporary disable it * while performing the hypercall. */ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) stac(); #endif error = privcmd_hypercall(hcall->op, hcall->arg[0], hcall->arg[1], hcall->arg[2], hcall->arg[3], hcall->arg[4]); #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) clac(); #endif if (error >= 0) { hcall->retval = error; error = 0; } else { error = xen_translate_error(error); hcall->retval = 0; } break; } case IOCTL_PRIVCMD_MMAPBATCH: { struct ioctl_privcmd_mmapbatch *mmap; struct xen_add_to_physmap_batch add; xen_ulong_t *idxs; xen_pfn_t *gpfns; int *errs; unsigned int index; struct privcmd_map *umap; uint16_t num; mmap = (struct ioctl_privcmd_mmapbatch *)arg; if (u->dom != DOMID_INVALID && u->dom != mmap->dom) { error = EPERM; break; } umap = setup_virtual_area(td, mmap->addr, mmap->num); if (umap == NULL) { error = EINVAL; break; } add.domid = DOMID_SELF; add.space = XENMAPSPACE_gmfn_foreign; add.u.foreign_domid = mmap->dom; /* * The 'size' field in the xen_add_to_physmap_range only * allows for UINT16_MAX mappings in a single hypercall. */ num = MIN(mmap->num, UINT16_MAX); idxs = malloc(sizeof(*idxs) * num, M_PRIVCMD, M_WAITOK); gpfns = malloc(sizeof(*gpfns) * num, M_PRIVCMD, M_WAITOK); errs = malloc(sizeof(*errs) * num, M_PRIVCMD, M_WAITOK); set_xen_guest_handle(add.idxs, idxs); set_xen_guest_handle(add.gpfns, gpfns); set_xen_guest_handle(add.errs, errs); for (index = 0; index < mmap->num; index += num) { num = MIN(mmap->num - index, UINT16_MAX); add.size = num; error = copyin(&mmap->arr[index], idxs, sizeof(idxs[0]) * num); if (error != 0) goto mmap_out; for (i = 0; i < num; i++) gpfns[i] = atop(umap->phys_base_addr + (i + index) * PAGE_SIZE); bzero(errs, sizeof(*errs) * num); error = HYPERVISOR_memory_op( XENMEM_add_to_physmap_batch, &add); if (error != 0) { error = xen_translate_error(error); goto mmap_out; } for (i = 0; i < num; i++) { if (errs[i] != 0) { errs[i] = xen_translate_error(errs[i]); /* Mark the page as invalid. */ BIT_SET(mmap->num, index + i, umap->err); } } error = copyout(errs, &mmap->err[index], sizeof(errs[0]) * num); if (error != 0) goto mmap_out; } umap->mapped = true; mmap_out: free(idxs, M_PRIVCMD); free(gpfns, M_PRIVCMD); free(errs, M_PRIVCMD); if (!umap->mapped) free(umap->err, M_PRIVCMD); break; } case IOCTL_PRIVCMD_MMAP_RESOURCE: { struct ioctl_privcmd_mmapresource *mmap; struct xen_mem_acquire_resource adq; xen_pfn_t *gpfns; struct privcmd_map *umap; mmap = (struct ioctl_privcmd_mmapresource *)arg; if (u->dom != DOMID_INVALID && u->dom != mmap->dom) { error = EPERM; break; } bzero(&adq, sizeof(adq)); adq.domid = mmap->dom; adq.type = mmap->type; adq.id = mmap->id; /* Shortcut for getting the resource size. */ if (mmap->addr == 0 && mmap->num == 0) { error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq); if (error != 0) error = xen_translate_error(error); else mmap->num = adq.nr_frames; break; } umap = setup_virtual_area(td, mmap->addr, mmap->num); if (umap == NULL) { error = EINVAL; break; } adq.nr_frames = mmap->num; adq.frame = mmap->idx; gpfns = malloc(sizeof(*gpfns) * mmap->num, M_PRIVCMD, M_WAITOK); for (i = 0; i < mmap->num; i++) gpfns[i] = atop(umap->phys_base_addr) + i; set_xen_guest_handle(adq.frame_list, gpfns); error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &adq); if (error != 0) error = xen_translate_error(error); else umap->mapped = true; free(gpfns, M_PRIVCMD); if (!umap->mapped) free(umap->err, M_PRIVCMD); break; } case IOCTL_PRIVCMD_DM_OP: { const struct ioctl_privcmd_dmop *dmop; struct privcmd_dmop_buf *bufs; struct xen_dm_op_buf *hbufs; dmop = (struct ioctl_privcmd_dmop *)arg; if (u->dom != DOMID_INVALID && u->dom != dmop->dom) { error = EPERM; break; } if (dmop->num == 0) break; if (dmop->num > MAX_DMOP_BUFFERS) { error = E2BIG; break; } bufs = malloc(sizeof(*bufs) * dmop->num, M_PRIVCMD, M_WAITOK); error = copyin(dmop->ubufs, bufs, sizeof(*bufs) * dmop->num); if (error != 0) { free(bufs, M_PRIVCMD); break; } hbufs = malloc(sizeof(*hbufs) * dmop->num, M_PRIVCMD, M_WAITOK); for (i = 0; i < dmop->num; i++) { set_xen_guest_handle(hbufs[i].h, bufs[i].uptr); hbufs[i].size = bufs[i].size; } #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) stac(); #endif error = HYPERVISOR_dm_op(dmop->dom, dmop->num, hbufs); #ifdef __amd64__ if (cpu_stdext_feature & CPUID_STDEXT_SMAP) clac(); #endif if (error != 0) error = xen_translate_error(error); free(bufs, M_PRIVCMD); free(hbufs, M_PRIVCMD); break; } case IOCTL_PRIVCMD_RESTRICT: { struct per_user_data *u; domid_t dom; dom = *(domid_t *)arg; error = devfs_get_cdevpriv((void **)&u); if (error != 0) break; if (u->dom != DOMID_INVALID && u->dom != dom) { error = -EINVAL; break; } u->dom = dom; break; } default: error = ENOSYS; break; } return (error); } static void user_release(void *arg) { free(arg, M_PRIVCMD); } static int privcmd_open(struct cdev *dev, int flag, int otyp, struct thread *td) { struct per_user_data *u; int error; u = malloc(sizeof(*u), M_PRIVCMD, M_WAITOK); u->dom = DOMID_INVALID; /* Assign the allocated per_user_data to this open instance. */ error = devfs_set_cdevpriv(u, user_release); if (error != 0) { free(u, M_PRIVCMD); } return (error); } /*------------------ Private Device Attachment Functions --------------------*/ static void privcmd_identify(driver_t *driver, device_t parent) { KASSERT(xen_domain(), ("Trying to attach privcmd device on non Xen domain")); if (BUS_ADD_CHILD(parent, 0, "privcmd", 0) == NULL) panic("unable to attach privcmd user-space device"); } static int privcmd_probe(device_t dev) { privcmd_dev = dev; device_set_desc(dev, "Xen privileged interface user-space device"); return (BUS_PROBE_NOWILDCARD); } static int privcmd_attach(device_t dev) { make_dev_credf(MAKEDEV_ETERNAL, &privcmd_devsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600, "xen/privcmd"); return (0); } /*-------------------- Private Device Attachment Data -----------------------*/ static device_method_t privcmd_methods[] = { DEVMETHOD(device_identify, privcmd_identify), DEVMETHOD(device_probe, privcmd_probe), DEVMETHOD(device_attach, privcmd_attach), DEVMETHOD_END }; static driver_t privcmd_driver = { "privcmd", privcmd_methods, 0, }; DRIVER_MODULE(privcmd, xenpv, privcmd_driver, 0, 0); MODULE_DEPEND(privcmd, xenpv, 1, 1, 1); diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 4f8651411851..a5be05efc6d9 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -1,502 +1,514 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void dev_pager_init(void); static vm_object_t dev_pager_alloc(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); static void dev_pager_dealloc(vm_object_t); static int dev_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); static void dev_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); static boolean_t dev_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); static void dev_pager_free_page(vm_object_t object, vm_page_t m); static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, vm_prot_t, vm_pindex_t *first, vm_pindex_t *last); /* list of device pager objects */ static struct pagerlst dev_pager_object_list; /* protect list manipulation */ static struct mtx dev_pager_mtx; const struct pagerops devicepagerops = { .pgo_kvme_type = KVME_TYPE_DEVICE, .pgo_init = dev_pager_init, .pgo_alloc = dev_pager_alloc, .pgo_dealloc = dev_pager_dealloc, .pgo_getpages = dev_pager_getpages, .pgo_putpages = dev_pager_putpages, .pgo_haspage = dev_pager_haspage, }; const struct pagerops mgtdevicepagerops = { .pgo_kvme_type = KVME_TYPE_MGTDEVICE, .pgo_alloc = dev_pager_alloc, .pgo_dealloc = dev_pager_dealloc, .pgo_getpages = dev_pager_getpages, .pgo_putpages = dev_pager_putpages, .pgo_haspage = dev_pager_haspage, .pgo_populate = dev_pager_populate, }; static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); static void old_dev_pager_dtor(void *handle); static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres); static const struct cdev_pager_ops old_dev_pager_ops = { .cdev_pg_ctor = old_dev_pager_ctor, .cdev_pg_dtor = old_dev_pager_dtor, .cdev_pg_fault = old_dev_pager_fault }; static void dev_pager_init(void) { TAILQ_INIT(&dev_pager_object_list); mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF); } vm_object_t cdev_pager_lookup(void *handle) { vm_object_t object; again: mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object != NULL && object->un_pager.devp.dev == NULL) { msleep(&object->un_pager.devp.dev, &dev_pager_mtx, PVM | PDROP, "cdplkp", 0); vm_object_deallocate(object); goto again; } mtx_unlock(&dev_pager_mtx); return (object); } vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, const struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { vm_object_t object; vm_pindex_t pindex; if (tp != OBJT_DEVICE && tp != OBJT_MGTDEVICE) return (NULL); KASSERT(tp == OBJT_MGTDEVICE || ops->cdev_pg_populate == NULL, ("populate on unmanaged device pager")); /* * Offset should be page aligned. */ if (foff & PAGE_MASK) return (NULL); /* * Treat the mmap(2) file offset as an unsigned value for a * device mapping. This, in effect, allows a user to pass all * possible off_t values as the mapping cookie to the driver. At * this point, we know that both foff and size are a multiple * of the page size. Do a check to avoid wrap. */ size = round_page(size); pindex = OFF_TO_IDX(foff) + OFF_TO_IDX(size); if (pindex > OBJ_MAX_SIZE || pindex < OFF_TO_IDX(foff) || pindex < OFF_TO_IDX(size)) return (NULL); again: mtx_lock(&dev_pager_mtx); /* * Look up pager, creating as necessary. */ object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object == NULL) { vm_object_t object1; /* * Allocate object and associate it with the pager. Initialize * the object's pg_color based upon the physical address of the * device's memory. */ mtx_unlock(&dev_pager_mtx); object1 = vm_object_allocate(tp, pindex); mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); if (object != NULL) { object1->type = OBJT_DEAD; vm_object_deallocate(object1); object1 = NULL; if (object->un_pager.devp.dev == NULL) { msleep(&object->un_pager.devp.dev, &dev_pager_mtx, PVM | PDROP, "cdplkp", 0); vm_object_deallocate(object); goto again; } /* * We raced with other thread while allocating object. */ if (pindex > object->size) object->size = pindex; KASSERT(object->type == tp, ("Inconsistent device pager type %p %d", object, tp)); KASSERT(object->un_pager.devp.ops == ops, ("Inconsistent devops %p %p", object, ops)); } else { u_short color; object = object1; object1 = NULL; object->handle = handle; object->un_pager.devp.ops = ops; TAILQ_INIT(&object->un_pager.devp.devp_pglist); TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); mtx_unlock(&dev_pager_mtx); if (ops->cdev_pg_populate != NULL) vm_object_set_flag(object, OBJ_POPULATE); if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) { mtx_lock(&dev_pager_mtx); TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list); wakeup(&object->un_pager.devp.dev); mtx_unlock(&dev_pager_mtx); object->type = OBJT_DEAD; vm_object_deallocate(object); object = NULL; mtx_lock(&dev_pager_mtx); } else { mtx_lock(&dev_pager_mtx); object->flags |= OBJ_COLORED; object->pg_color = color; object->un_pager.devp.dev = handle; wakeup(&object->un_pager.devp.dev); } } MPASS(object1 == NULL); } else { if (object->un_pager.devp.dev == NULL) { msleep(&object->un_pager.devp.dev, &dev_pager_mtx, PVM | PDROP, "cdplkp", 0); vm_object_deallocate(object); goto again; } if (pindex > object->size) object->size = pindex; KASSERT(object->type == tp, ("Inconsistent device pager type %p %d", object, tp)); } mtx_unlock(&dev_pager_mtx); return (object); } static vm_object_t dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { return (cdev_pager_allocate(handle, OBJT_DEVICE, &old_dev_pager_ops, size, prot, foff, cred)); } void cdev_pager_free_page(vm_object_t object, vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(object); - if (object->type == OBJT_MGTDEVICE) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); - pmap_remove_all(m); - (void)vm_page_remove(m); - } else if (object->type == OBJT_DEVICE) + if (object->type == OBJT_MGTDEVICE) + cdev_mgtdev_pager_free_page(object, m); + else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); + else + KASSERT(false, + ("Invalid device type obj %p m %p", object, m)); +} + +void +cdev_mgtdev_pager_free_page(vm_object_t object, vm_page_t m) +{ + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((object->type == OBJT_MGTDEVICE && + (m->oflags & VPO_UNMANAGED) == 0), + ("Unmanaged device or page obj %p m %p", object, m)); + pmap_remove_all(m); + (void)vm_page_remove(m); } static void dev_pager_free_page(vm_object_t object, vm_page_t m) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT((object->type == OBJT_DEVICE && (m->oflags & VPO_UNMANAGED) != 0), ("Managed device or page obj %p m %p", object, m)); TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, plinks.q); vm_page_putfake(m); } static void dev_pager_dealloc(vm_object_t object) { vm_page_t m; VM_OBJECT_WUNLOCK(object); object->un_pager.devp.ops->cdev_pg_dtor(object->un_pager.devp.dev); mtx_lock(&dev_pager_mtx); TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list); mtx_unlock(&dev_pager_mtx); VM_OBJECT_WLOCK(object); if (object->type == OBJT_DEVICE) { /* * Free up our fake pages. */ while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != NULL) { if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) continue; dev_pager_free_page(object, m); } } object->handle = NULL; object->type = OBJT_DEAD; } static int dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int *rbehind, int *rahead) { int error; /* Since our haspage reports zero after/before, the count is 1. */ KASSERT(count == 1, ("%s: count %d", __func__, count)); if (object->un_pager.devp.ops->cdev_pg_fault == NULL) return (VM_PAGER_FAIL); VM_OBJECT_WLOCK(object); error = object->un_pager.devp.ops->cdev_pg_fault(object, IDX_TO_OFF(ma[0]->pindex), PROT_READ, &ma[0]); VM_OBJECT_ASSERT_WLOCKED(object); if (error == VM_PAGER_OK) { KASSERT((object->type == OBJT_DEVICE && (ma[0]->oflags & VPO_UNMANAGED) != 0) || (object->type == OBJT_MGTDEVICE && (ma[0]->oflags & VPO_UNMANAGED) == 0), ("Wrong page type %p %p", ma[0], object)); if (object->type == OBJT_DEVICE) { TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, ma[0], plinks.q); } if (rbehind) *rbehind = 0; if (rahead) *rahead = 0; } VM_OBJECT_WUNLOCK(object); return (error); } static int dev_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) { VM_OBJECT_ASSERT_WLOCKED(object); if (object->un_pager.devp.ops->cdev_pg_populate == NULL) return (VM_PAGER_FAIL); return (object->un_pager.devp.ops->cdev_pg_populate(object, pidx, fault_type, max_prot, first, last)); } static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { vm_paddr_t paddr; vm_page_t m_paddr, page; struct cdev *dev; struct cdevsw *csw; struct file *fpop; struct thread *td; vm_memattr_t memattr, memattr1; int ref, ret; memattr = object->memattr; VM_OBJECT_WUNLOCK(object); dev = object->handle; csw = dev_refthread(dev, &ref); if (csw == NULL) { VM_OBJECT_WLOCK(object); return (VM_PAGER_FAIL); } td = curthread; fpop = td->td_fpop; td->td_fpop = NULL; ret = csw->d_mmap(dev, offset, &paddr, prot, &memattr); td->td_fpop = fpop; dev_relthread(dev, ref); if (ret != 0) { printf( "WARNING: dev_pager_getpage: map function returns error %d", ret); VM_OBJECT_WLOCK(object); return (VM_PAGER_FAIL); } /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && (memattr1 = pmap_page_get_memattr(m_paddr)) != memattr) { /* * For the /dev/mem d_mmap routine to return the * correct memattr, pmap_page_get_memattr() needs to * be called, which we do there. */ if ((csw->d_flags & D_MEM) == 0) { printf("WARNING: Device driver %s has set " "\"memattr\" inconsistently (drv %u pmap %u).\n", csw->d_name, memattr, memattr1); } memattr = memattr1; } if (((*mres)->flags & PG_FICTITIOUS) != 0) { /* * If the passed in result page is a fake page, update it with * the new physical address. */ page = *mres; VM_OBJECT_WLOCK(object); vm_page_updatefake(page, paddr, memattr); } else { /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); vm_page_replace(page, object, (*mres)->pindex, *mres); *mres = page; } vm_page_valid(page); return (VM_PAGER_OK); } static void dev_pager_putpages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { panic("dev_pager_putpage called"); } static boolean_t dev_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after) { if (before != NULL) *before = 0; if (after != NULL) *after = 0; return (TRUE); } static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { struct cdev *dev; struct cdevsw *csw; vm_memattr_t dummy; vm_ooffset_t off; vm_paddr_t paddr; unsigned int npages; int ref; /* * Make sure this device can be mapped. */ dev = handle; csw = dev_refthread(dev, &ref); if (csw == NULL) return (ENXIO); /* * Check that the specified range of the device allows the desired * protection. * * XXX assumes VM_PROT_* == PROT_* */ npages = OFF_TO_IDX(size); paddr = 0; /* Make paddr initialized for the case of size == 0. */ for (off = foff; npages--; off += PAGE_SIZE) { if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) { dev_relthread(dev, ref); return (EINVAL); } } dev_ref(dev); dev_relthread(dev, ref); *color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE); return (0); } static void old_dev_pager_dtor(void *handle) { dev_rel(handle); } diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index 7d6b2e96b38c..d30bf349e411 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -1,322 +1,323 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1990 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Pager routine interface definition. */ #ifndef _VM_PAGER_ #define _VM_PAGER_ #include TAILQ_HEAD(pagerlst, vm_object); struct vnode; typedef void pgo_init_t(void); typedef vm_object_t pgo_alloc_t(void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); typedef void pgo_dealloc_t(vm_object_t); typedef int pgo_getpages_t(vm_object_t, vm_page_t *, int, int *, int *); typedef void pgo_getpages_iodone_t(void *, vm_page_t *, int, int); typedef int pgo_getpages_async_t(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); typedef void pgo_putpages_t(vm_object_t, vm_page_t *, int, int, int *); typedef boolean_t pgo_haspage_t(vm_object_t, vm_pindex_t, int *, int *); typedef int pgo_populate_t(vm_object_t, vm_pindex_t, int, vm_prot_t, vm_pindex_t *, vm_pindex_t *); typedef void pgo_pageunswapped_t(vm_page_t); typedef void pgo_writecount_t(vm_object_t, vm_offset_t, vm_offset_t); typedef void pgo_set_writeable_dirty_t(vm_object_t); typedef bool pgo_mightbedirty_t(vm_object_t); typedef void pgo_getvp_t(vm_object_t object, struct vnode **vpp, bool *vp_heldp); typedef void pgo_freespace_t(vm_object_t object, vm_pindex_t start, vm_size_t size); typedef void pgo_page_inserted_t(vm_object_t object, vm_page_t m); typedef void pgo_page_removed_t(vm_object_t object, vm_page_t m); typedef boolean_t pgo_can_alloc_page_t(vm_object_t object, vm_pindex_t pindex); struct pagerops { int pgo_kvme_type; pgo_init_t *pgo_init; /* Initialize pager. */ pgo_alloc_t *pgo_alloc; /* Allocate pager. */ pgo_dealloc_t *pgo_dealloc; /* Disassociate. */ pgo_getpages_t *pgo_getpages; /* Get (read) page. */ pgo_getpages_async_t *pgo_getpages_async; /* Get page asyncly. */ pgo_putpages_t *pgo_putpages; /* Put (write) page. */ pgo_haspage_t *pgo_haspage; /* Query page. */ pgo_populate_t *pgo_populate; /* Bulk spec pagein. */ pgo_pageunswapped_t *pgo_pageunswapped; pgo_writecount_t *pgo_update_writecount; pgo_writecount_t *pgo_release_writecount; pgo_set_writeable_dirty_t *pgo_set_writeable_dirty; pgo_mightbedirty_t *pgo_mightbedirty; pgo_getvp_t *pgo_getvp; pgo_freespace_t *pgo_freespace; pgo_page_inserted_t *pgo_page_inserted; pgo_page_removed_t *pgo_page_removed; pgo_can_alloc_page_t *pgo_can_alloc_page; }; extern const struct pagerops defaultpagerops; extern const struct pagerops swappagerops; extern const struct pagerops vnodepagerops; extern const struct pagerops devicepagerops; extern const struct pagerops physpagerops; extern const struct pagerops sgpagerops; extern const struct pagerops mgtdevicepagerops; extern const struct pagerops swaptmpfspagerops; /* * get/put return values * OK operation was successful * BAD specified data was out of the accepted range * FAIL specified data was in range, but doesn't exist * PEND operations was initiated but not completed * ERROR error while accessing data that is in range and exists * AGAIN temporary resource shortage prevented operation from happening */ #define VM_PAGER_OK 0 #define VM_PAGER_BAD 1 #define VM_PAGER_FAIL 2 #define VM_PAGER_PEND 3 #define VM_PAGER_ERROR 4 #define VM_PAGER_AGAIN 5 #define VM_PAGER_PUT_SYNC 0x0001 #define VM_PAGER_PUT_INVAL 0x0002 #define VM_PAGER_PUT_NOREUSE 0x0004 #define VM_PAGER_CLUSTER_OK 0x0008 #ifdef _KERNEL extern const struct pagerops *pagertab[] __read_mostly; extern struct mtx_padalign pbuf_mtx; /* * Number of pages that pbuf buffer can store in b_pages. * It is +1 to allow for unaligned data buffer of maxphys size. */ #define PBUF_PAGES (atop(maxphys) + 1) vm_object_t vm_pager_allocate(objtype_t, void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t, struct ucred *); void vm_pager_bufferinit(void); void vm_pager_deallocate(vm_object_t); int vm_pager_get_pages(vm_object_t, vm_page_t *, int, int *, int *); int vm_pager_get_pages_async(vm_object_t, vm_page_t *, int, int *, int *, pgo_getpages_iodone_t, void *); void vm_pager_init(void); vm_object_t vm_pager_object_lookup(struct pagerlst *, void *); static __inline void vm_pager_put_pages(vm_object_t object, vm_page_t *m, int count, int flags, int *rtvals) { VM_OBJECT_ASSERT_WLOCKED(object); (*pagertab[object->type]->pgo_putpages) (object, m, count, flags, rtvals); } /* * vm_pager_haspage * * Check to see if an object's pager has the requested page. The * object's pager will also set before and after to give the caller * some idea of the number of pages before and after the requested * page can be I/O'd efficiently. * * The object must be locked. */ static __inline boolean_t vm_pager_has_page(vm_object_t object, vm_pindex_t offset, int *before, int *after) { boolean_t ret; VM_OBJECT_ASSERT_LOCKED(object); ret = (*pagertab[object->type]->pgo_haspage) (object, offset, before, after); return (ret); } static __inline int vm_pager_populate(vm_object_t object, vm_pindex_t pidx, int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last) { MPASS((object->flags & OBJ_POPULATE) != 0); MPASS(pidx < object->size); MPASS(blockcount_read(&object->paging_in_progress) > 0); return ((*pagertab[object->type]->pgo_populate)(object, pidx, fault_type, max_prot, first, last)); } /* * vm_pager_page_unswapped * * Destroy swap associated with the page. * * XXX: A much better name would be "vm_pager_page_dirtied()" * XXX: It is not obvious if this could be profitably used by any * XXX: pagers besides the swap_pager or if it should even be a * XXX: generic pager_op in the first place. */ static __inline void vm_pager_page_unswapped(vm_page_t m) { pgo_pageunswapped_t *method; method = pagertab[m->object->type]->pgo_pageunswapped; if (method != NULL) method(m); } static __inline void vm_pager_update_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { pgo_writecount_t *method; method = pagertab[object->type]->pgo_update_writecount; if (method != NULL) method(object, start, end); } static __inline void vm_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end) { pgo_writecount_t *method; method = pagertab[object->type]->pgo_release_writecount; if (method != NULL) method(object, start, end); } static __inline void vm_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp) { pgo_getvp_t *method; *vpp = NULL; if (vp_heldp != NULL) *vp_heldp = false; method = pagertab[object->type]->pgo_getvp; if (method != NULL) method(object, vpp, vp_heldp); } static __inline void vm_pager_freespace(vm_object_t object, vm_pindex_t start, vm_size_t size) { pgo_freespace_t *method; method = pagertab[object->type]->pgo_freespace; if (method != NULL) method(object, start, size); } static __inline void vm_pager_page_inserted(vm_object_t object, vm_page_t m) { pgo_page_inserted_t *method; method = pagertab[object->type]->pgo_page_inserted; if (method != NULL) method(object, m); } static __inline void vm_pager_page_removed(vm_object_t object, vm_page_t m) { pgo_page_removed_t *method; method = pagertab[object->type]->pgo_page_removed; if (method != NULL) method(object, m); } static __inline bool vm_pager_can_alloc_page(vm_object_t object, vm_pindex_t pindex) { pgo_can_alloc_page_t *method; method = pagertab[object->type]->pgo_can_alloc_page; return (method != NULL ? method(object, pindex) : true); } int vm_pager_alloc_dyn_type(struct pagerops *ops, int base_type); void vm_pager_free_dyn_type(objtype_t type); struct cdev_pager_ops { int (*cdev_pg_fault)(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres); int (*cdev_pg_populate)(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last); int (*cdev_pg_ctor)(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color); void (*cdev_pg_dtor)(void *handle); }; vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, const struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred); vm_object_t cdev_pager_lookup(void *handle); void cdev_pager_free_page(vm_object_t object, vm_page_t m); +void cdev_mgtdev_pager_free_page(vm_object_t object, vm_page_t m); struct phys_pager_ops { int (*phys_pg_getpages)(vm_object_t vm_obj, vm_page_t *m, int count, int *rbehind, int *rahead); int (*phys_pg_populate)(vm_object_t vm_obj, vm_pindex_t pidx, int fault_type, vm_prot_t max_prot, vm_pindex_t *first, vm_pindex_t *last); boolean_t (*phys_pg_haspage)(vm_object_t obj, vm_pindex_t pindex, int *before, int *after); void (*phys_pg_ctor)(vm_object_t vm_obj, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred); void (*phys_pg_dtor)(vm_object_t vm_obj); }; extern const struct phys_pager_ops default_phys_pg_ops; vm_object_t phys_pager_allocate(void *handle, const struct phys_pager_ops *ops, void *data, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred); #endif /* _KERNEL */ #endif /* _VM_PAGER_ */