diff --git a/sys/compat/linuxkpi/common/include/linux/fs.h b/sys/compat/linuxkpi/common/include/linux/fs.h index 31ab194e5b74..12f3a46f3b98 100644 --- a/sys/compat/linuxkpi/common/include/linux/fs.h +++ b/sys/compat/linuxkpi/common/include/linux/fs.h @@ -1,291 +1,312 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. - * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. + * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_FS_H_ #define _LINUX_FS_H_ #include #include #include #include #include #include #include #include #include #include #include struct module; struct kiocb; struct iovec; struct dentry; struct page; struct file_lock; struct pipe_inode_info; struct vm_area_struct; struct poll_table_struct; struct files_struct; struct pfs_node; #define inode vnode #define i_cdev v_rdev #define i_private v_data #define S_IRUGO (S_IRUSR | S_IRGRP | S_IROTH) #define S_IWUGO (S_IWUSR | S_IWGRP | S_IWOTH) typedef struct files_struct *fl_owner_t; struct dentry { struct inode *d_inode; struct pfs_node *d_pfs_node; }; struct file_operations; struct linux_file_wait_queue { struct wait_queue wq; struct wait_queue_head *wqh; atomic_t state; #define LINUX_FWQ_STATE_INIT 0 #define LINUX_FWQ_STATE_NOT_READY 1 #define LINUX_FWQ_STATE_QUEUED 2 #define LINUX_FWQ_STATE_READY 3 #define LINUX_FWQ_STATE_MAX 4 }; struct linux_file { struct file *_file; const struct file_operations *f_op; void *private_data; int f_flags; int f_mode; /* Just starting mode. */ struct dentry *f_dentry; struct dentry f_dentry_store; struct selinfo f_selinfo; struct sigio *f_sigio; struct vnode *f_vnode; #define f_inode f_vnode volatile u_int f_count; /* anonymous shmem object */ vm_object_t f_shmem; /* kqfilter support */ int f_kqflags; #define LINUX_KQ_FLAG_HAS_READ (1 << 0) #define LINUX_KQ_FLAG_HAS_WRITE (1 << 1) #define LINUX_KQ_FLAG_NEED_READ (1 << 2) #define LINUX_KQ_FLAG_NEED_WRITE (1 << 3) /* protects f_selinfo.si_note */ spinlock_t f_kqlock; struct linux_file_wait_queue f_wait_queue; }; #define file linux_file #define fasync_struct sigio * #define fasync_helper(fd, filp, on, queue) \ ({ \ if ((on)) \ *(queue) = &(filp)->f_sigio; \ else \ *(queue) = NULL; \ 0; \ }) #define kill_fasync(queue, sig, pollstat) \ do { \ if (*(queue) != NULL) \ pgsigio(*(queue), (sig), 0); \ } while (0) typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct file_operations { struct module *owner; ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long); int (*mmap)(struct file *, struct vm_area_struct *); int (*open)(struct inode *, struct file *); int (*release)(struct inode *, struct file *); int (*fasync)(int, struct file *, int); /* Although not supported in FreeBSD, to align with Linux code * we are adding llseek() only when it is mapped to no_llseek which returns * an illegal seek error */ loff_t (*llseek)(struct file *, loff_t, int); #if 0 /* We do not support these methods. Don't permit them to compile. */ loff_t (*llseek)(struct file *, loff_t, int); ssize_t (*aio_read)(struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write)(struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir)(struct file *, void *, filldir_t); int (*ioctl)(struct inode *, struct file *, unsigned int, unsigned long); long (*compat_ioctl)(struct file *, unsigned int, unsigned long); int (*flush)(struct file *, fl_owner_t id); int (*fsync)(struct file *, struct dentry *, int datasync); int (*aio_fsync)(struct kiocb *, int datasync); int (*lock)(struct file *, int, struct file_lock *); ssize_t (*sendpage)(struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock)(struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **); #endif }; #define fops_get(fops) (fops) #define replace_fops(f, fops) ((f)->f_op = (fops)) #define FMODE_READ FREAD #define FMODE_WRITE FWRITE #define FMODE_EXEC FEXEC int __register_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops); int __register_chrdev_p(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops, uid_t uid, gid_t gid, int mode); void __unregister_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name); static inline void unregister_chrdev(unsigned int major, const char *name) { __unregister_chrdev(major, 0, 256, name); } static inline int register_chrdev(unsigned int major, const char *name, const struct file_operations *fops) { return (__register_chrdev(major, 0, 256, name, fops)); } static inline int register_chrdev_p(unsigned int major, const char *name, const struct file_operations *fops, uid_t uid, gid_t gid, int mode) { return (__register_chrdev_p(major, 0, 256, name, fops, uid, gid, mode)); } static inline int register_chrdev_region(dev_t dev, unsigned range, const char *name) { return 0; } static inline void unregister_chrdev_region(dev_t dev, unsigned range) { return; } static inline int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, const char *name) { return 0; } /* No current support for seek op in FreeBSD */ static inline int nonseekable_open(struct inode *inode, struct file *filp) { return 0; } extern unsigned int linux_iminor(struct inode *); #define iminor(...) linux_iminor(__VA_ARGS__) static inline struct linux_file * get_file(struct linux_file *f) { refcount_acquire(f->_file == NULL ? &f->f_count : &f->_file->f_count); return (f); } static inline struct inode * igrab(struct inode *inode) { int error; error = vget(inode, 0, curthread); if (error) return (NULL); return (inode); } static inline void iput(struct inode *inode) { vrele(inode); } static inline loff_t no_llseek(struct file *file, loff_t offset, int whence) { return (-ESPIPE); } static inline loff_t noop_llseek(struct linux_file *file, loff_t offset, int whence) { return (file->_file->f_offset); } +/* Shared memory support */ +unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t); +struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t); +struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long); +void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t); + +#define invalidate_mapping_pages(...) \ + linux_invalidate_mapping_pages(__VA_ARGS__) + +#define shmem_read_mapping_page(...) \ + linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0) + +#define shmem_read_mapping_page_gfp(...) \ + linux_shmem_read_mapping_page_gfp(__VA_ARGS__) + +#define shmem_file_setup(...) \ + linux_shmem_file_setup(__VA_ARGS__) + +#define shmem_truncate_range(...) \ + linux_shmem_truncate_range(__VA_ARGS__) + #endif /* _LINUX_FS_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index ba58211232e7..2a3e33d3d2f3 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -1,291 +1,396 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org) * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #if defined(__amd64__) || defined(__aarch64__) || defined(__riscv) #define LINUXKPI_HAVE_DMAP #else #undef LINUXKPI_HAVE_DMAP #endif void * linux_page_address(struct page *page) { if (page->object != kmem_object && page->object != kernel_object) { #ifdef LINUXKPI_HAVE_DMAP return ((void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page))); #else return (NULL); #endif } return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS + IDX_TO_OFF(page->pindex))); } vm_page_t linux_alloc_pages(gfp_t flags, unsigned int order) { #ifdef LINUXKPI_HAVE_DMAP unsigned long npages = 1UL << order; int req = (flags & M_ZERO) ? (VM_ALLOC_ZERO | VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL) : (VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL); vm_page_t page; if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc(NULL, 0, req); if (page == NULL) return (NULL); } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR; retry: page = vm_page_alloc_contig(NULL, 0, req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if (page == NULL) { if (flags & M_WAITOK) { if (!vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0)) { VM_WAIT; } flags &= ~M_WAITOK; goto retry; } return (NULL); } } if (flags & M_ZERO) { unsigned long x; for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; if ((pgo->flags & PG_ZERO) == 0) pmap_zero_page(pgo); } } #else vm_offset_t vaddr; vm_page_t page; vaddr = linux_alloc_kmem(flags, order); if (vaddr == 0) return (NULL); page = PHYS_TO_VM_PAGE(vtophys((void *)vaddr)); KASSERT(vaddr == (vm_offset_t)page_address(page), ("Page address mismatch")); #endif return (page); } void linux_free_pages(vm_page_t page, unsigned int order) { #ifdef LINUXKPI_HAVE_DMAP unsigned long npages = 1UL << order; unsigned long x; for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; vm_page_lock(pgo); vm_page_free(pgo); vm_page_unlock(pgo); } #else vm_offset_t vaddr; vaddr = (vm_offset_t)page_address(page); linux_free_kmem(vaddr, order); #endif } vm_offset_t linux_alloc_kmem(gfp_t flags, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; vm_offset_t addr; if ((flags & GFP_DMA32) == 0) { addr = kmem_malloc(kmem_arena, size, flags & GFP_NATIVE_MASK); } else { addr = kmem_alloc_contig(kmem_arena, size, flags & GFP_NATIVE_MASK, 0, BUS_SPACE_MAXADDR_32BIT, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); } return (addr); } void linux_free_kmem(vm_offset_t addr, unsigned int order) { size_t size = ((size_t)PAGE_SIZE) << order; kmem_free(kmem_arena, addr, size); } static int linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages, int write, struct page **pages) { vm_prot_t prot; size_t len; int count; int i; prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; len = ((size_t)nr_pages) << PAGE_SHIFT; count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); if (count == -1) return (-EFAULT); for (i = 0; i != nr_pages; i++) { struct page *pg = pages[i]; vm_page_lock(pg); vm_page_wire(pg); vm_page_unlock(pg); } return (nr_pages); } int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { vm_map_t map; vm_page_t *mp; vm_offset_t va; vm_offset_t end; vm_prot_t prot; int count; if (nr_pages == 0 || in_interrupt()) return (0); MPASS(pages != NULL); va = start; map = &curthread->td_proc->p_vmspace->vm_map; end = start + (((size_t)nr_pages) << PAGE_SHIFT); if (start < vm_map_min(map) || end > vm_map_max(map)) return (-EINVAL); prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; for (count = 0, mp = pages, va = start; va < end; mp++, va += PAGE_SIZE, count++) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) break; vm_page_lock(*mp); vm_page_wire(*mp); vm_page_unlock(*mp); if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } return (count); } long get_user_pages_remote(struct task_struct *task, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int gup_flags, struct page **pages, struct vm_area_struct **vmas) { vm_map_t map; map = &task->task_thread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } long get_user_pages(unsigned long start, unsigned long nr_pages, int gup_flags, struct page **pages, struct vm_area_struct **vmas) { vm_map_t map; map = &curthread->td_proc->p_vmspace->vm_map; return (linux_get_user_pages_internal(map, start, nr_pages, !!(gup_flags & FOLL_WRITE), pages)); } int is_vmalloc_addr(const void *addr) { return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL); } + +struct page * +linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) +{ + vm_page_t page; + int rv; + + if ((gfp & GFP_NOWAIT) != 0) + panic("GFP_NOWAIT is unimplemented"); + + VM_OBJECT_WLOCK(obj); + page = vm_page_grab(obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED); + if (page->valid != VM_PAGE_BITS_ALL) { + vm_page_xbusy(page); + if (vm_pager_has_page(obj, pindex, NULL, NULL)) { + rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); + if (rv != VM_PAGER_OK) { + vm_page_lock(page); + vm_page_unwire(page, PQ_NONE); + vm_page_free(page); + vm_page_unlock(page); + VM_OBJECT_WUNLOCK(obj); + return (ERR_PTR(-EINVAL)); + } + MPASS(page->valid == VM_PAGE_BITS_ALL); + } else { + pmap_zero_page(page); + page->valid = VM_PAGE_BITS_ALL; + page->dirty = 0; + } + vm_page_xunbusy(page); + } + vm_page_lock(page); + vm_page_hold(page); + vm_page_unlock(page); + VM_OBJECT_WUNLOCK(obj); + return (page); +} + +struct linux_file * +linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags) +{ + struct fileobj { + struct linux_file file __aligned(sizeof(void *)); + struct vnode vnode __aligned(sizeof(void *)); + }; + struct fileobj *fileobj; + struct linux_file *filp; + struct vnode *vp; + int error; + + fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL); + if (fileobj == NULL) { + error = -ENOMEM; + goto err_0; + } + filp = &fileobj->file; + vp = &fileobj->vnode; + + filp->f_count = 1; + filp->f_vnode = vp; + filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size, + VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred); + if (filp->f_shmem == NULL) { + error = -ENOMEM; + goto err_1; + } + return (filp); +err_1: + kfree(filp); +err_0: + return (ERR_PTR(error)); +} + +static vm_ooffset_t +linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start, + vm_pindex_t end, int flags) +{ + int start_count, end_count; + + VM_OBJECT_WLOCK(obj); + start_count = obj->resident_page_count; + vm_object_page_remove(obj, start, end, flags); + end_count = obj->resident_page_count; + VM_OBJECT_WUNLOCK(obj); + return (start_count - end_count); +} + +unsigned long +linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end) +{ + + return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY)); +} + +void +linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend) +{ + vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1); + vm_pindex_t end = OFF_TO_IDX(lend + 1); + + (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0); +}