Index: sys/dev/drm/drm_bufs.c =================================================================== --- sys/dev/drm/drm_bufs.c +++ sys/dev/drm/drm_bufs.c @@ -1067,12 +1067,12 @@ vaddr = round_page((vm_offset_t)vms->vm_daddr + MAXDSIZ); #if __FreeBSD_version >= 600023 - retcode = vm_mmap(&vms->vm_map, &vaddr, size, PROT_READ | PROT_WRITE, - VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE, + retcode = vm_mmap(&vms->vm_map, &vaddr, size, VM_PROT_READ | + VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE, dev->devnode, foff); #else - retcode = vm_mmap(&vms->vm_map, &vaddr, size, PROT_READ | PROT_WRITE, - VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, + retcode = vm_mmap(&vms->vm_map, &vaddr, size, VM_PROT_READ | + VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, SLIST_FIRST(&dev->devnode->si_hlist), foff); #endif if (retcode) Index: sys/dev/drm2/drm_bufs.c =================================================================== --- sys/dev/drm2/drm_bufs.c +++ sys/dev/drm2/drm_bufs.c @@ -1635,12 +1635,12 @@ goto done; } retcode = vm_mmap(&vms->vm_map, &virtual, map->size, - PROT_READ | PROT_WRITE, VM_PROT_ALL, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE, file_priv->minor->device, token); } else { retcode = vm_mmap(&vms->vm_map, &virtual, dma->byte_count, - PROT_READ | PROT_WRITE, VM_PROT_ALL, + VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE, file_priv->minor->device, 0); } Index: sys/fs/devfs/devfs_vnops.c =================================================================== --- sys/fs/devfs/devfs_vnops.c +++ sys/fs/devfs/devfs_vnops.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,10 @@ #include +#include +#include +#include + static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); struct mtx devfs_de_interlock; @@ -1738,6 +1743,65 @@ return (error); } +static int +devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, + vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, + struct thread *td) +{ + struct cdev *dev; + struct cdevsw *dsw; + struct mount *mp; + struct vnode *vp; + struct file *fpop; + vm_object_t object; + vm_prot_t maxprot; + int error, ref; + + vp = fp->f_vnode; + + /* + * Ensure that file and memory protections are + * compatible. + */ + mp = vp->v_mount; + if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) + maxprot = VM_PROT_NONE; + else + maxprot = VM_PROT_EXECUTE; + if ((fp->f_flag & FREAD) != 0) + maxprot |= VM_PROT_READ; + else if ((prot & VM_PROT_READ) != 0) + return (EACCES); + + /* + * Character devices always share mappings, so + * require a writable fd for writable mappings. + */ + if ((fp->f_flag & FWRITE) != 0) + maxprot |= VM_PROT_WRITE; + else if ((prot & VM_PROT_WRITE) != 0) + return (EACCES); + maxprot &= cap_maxprot; + + fpop = td->td_fpop; + error = devfs_fp_check(fp, &dev, &dsw, &ref); + if (error != 0) + return (error); + + error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, + &object); + td->td_fpop = fpop; + dev_relthread(dev, ref); + if (error != 0) + return (error); + + error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, + foff, FALSE, td); + if (error != 0) + vm_object_deallocate(object); + return (error); +} + dev_t dev2udev(struct cdev *x) { @@ -1760,6 +1824,7 @@ .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, + .fo_mmap = devfs_mmap_f, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; Index: sys/kern/subr_uio.c =================================================================== --- sys/kern/subr_uio.c +++ sys/kern/subr_uio.c @@ -417,7 +417,7 @@ /* round size up to page boundry */ size = (vm_size_t)round_page(sz); - error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE, + error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); return (error); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -127,6 +127,7 @@ static fo_chown_t shm_chown; static fo_seek_t shm_seek; static fo_fill_kinfo_t shm_fill_kinfo; +static fo_mmap_t shm_mmap; /* File descriptor operations. */ static struct fileops shm_ops = { @@ -143,6 +144,7 @@ .fo_sendfile = vn_sendfile, .fo_seek = shm_seek, .fo_fill_kinfo = shm_fill_kinfo, + .fo_mmap = shm_mmap, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -851,15 +853,37 @@ return (error); } -/* - * mmap() helper to validate mmap() requests against shm object state - * and give mmap() the vm_object to use for the mapping. - */ int -shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, - vm_object_t *obj) +shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize, + vm_prot_t prot, vm_prot_t cap_maxprot, int flags, + vm_ooffset_t foff, struct thread *td) { + struct shmfd *shmfd; + vm_prot_t maxprot; + int error; + + shmfd = fp->f_data; + maxprot = VM_PROT_NONE; + + /* FREAD should always be set. */ + if ((fp->f_flag & FREAD) != 0) + maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; + if ((fp->f_flag & FWRITE) != 0) + maxprot |= VM_PROT_WRITE; + + /* Don't permit shared writable mappings on read-only descriptors. */ + if ((flags & MAP_SHARED) != 0 && + (maxprot & VM_PROT_WRITE) == 0 && + (prot & VM_PROT_WRITE) != 0) + return (EACCES); + maxprot &= cap_maxprot; +#ifdef MAC + error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags); + if (error != 0) + return (error); +#endif + /* * XXXRW: This validation is probably insufficient, and subject to * sign errors. It should be fixed. @@ -872,7 +896,11 @@ vfs_timestamp(&shmfd->shm_atime); mtx_unlock(&shm_timestamp_lock); vm_object_reference(shmfd->shm_object); - *obj = shmfd->shm_object; + + error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags, + shmfd->shm_object, foff, FALSE, td); + if (error != 0) + vm_object_deallocate(shmfd->shm_object); return (0); } Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,7 @@ #include #include #include +#include static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; @@ -90,6 +92,7 @@ static fo_kqfilter_t vn_kqfilter; static fo_stat_t vn_statfile; static fo_close_t vn_closefile; +static fo_mmap_t vn_mmap; struct fileops vnops = { .fo_read = vn_io_fault, @@ -105,6 +108,7 @@ .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, + .fo_mmap = vn_mmap, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; @@ -2358,3 +2362,95 @@ kif->kf_un.kf_file.kf_file_rdev = va.va_rdev; return (0); } + +int +vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, + vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, + struct thread *td) +{ +#ifdef HWPMC_HOOKS + struct pmckern_map_in pkm; +#endif + struct mount *mp; + struct vnode *vp; + vm_object_t object; + vm_prot_t maxprot; + boolean_t writecounted; + int error; + +#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ + defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) + /* + * POSIX shared-memory objects are defined to have + * kernel persistence, and are not defined to support + * read(2)/write(2) -- or even open(2). Thus, we can + * use MAP_ASYNC to trade on-disk coherence for speed. + * The shm_open(3) library routine turns on the FPOSIXSHM + * flag to request this behavior. + */ + if ((fp->f_flag & FPOSIXSHM) != 0) + flags |= MAP_NOSYNC; +#endif + vp = fp->f_vnode; + + /* + * Ensure that file and memory protections are + * compatible. Note that we only worry about + * writability if mapping is shared; in this case, + * current and max prot are dictated by the open file. + * XXX use the vnode instead? Problem is: what + * credentials do we use for determination? What if + * proc does a setuid? + */ + mp = vp->v_mount; + if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) + maxprot = VM_PROT_NONE; + else + maxprot = VM_PROT_EXECUTE; + if ((fp->f_flag & FREAD) != 0) + maxprot |= VM_PROT_READ; + else if ((prot & VM_PROT_READ) != 0) + return (EACCES); + + /* + * If we are sharing potential changes via MAP_SHARED and we + * are trying to get write permission although we opened it + * without asking for it, bail out. + */ + if ((flags & MAP_SHARED) != 0) { + if ((fp->f_flag & FWRITE) != 0) + maxprot |= VM_PROT_WRITE; + else if ((prot & VM_PROT_WRITE) != 0) + return (EACCES); + } else { + maxprot |= VM_PROT_WRITE; + cap_maxprot |= VM_PROT_WRITE; + } + maxprot &= cap_maxprot; + + writecounted = FALSE; + error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, + &foff, &object, &writecounted); + if (error != 0) + return (error); + error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, + foff, writecounted, td); + if (error != 0) { + /* + * If this mapping was accounted for in the vnode's + * writecount, then undo that now. + */ + if (writecounted) + vnode_pager_release_writecount(object, 0, size); + vm_object_deallocate(object); + } +#ifdef HWPMC_HOOKS + /* Inform hwpmc(4) if an executable is being mapped. */ + if (error == 0 && (prot & VM_PROT_EXECUTE) != 0) { + pkm.pm_file = vp; + pkm.pm_address = (uintptr_t) addr; + PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); + } +#endif + return (error); +} Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -42,6 +42,7 @@ #include #include #include +#include struct filedesc; struct stat; @@ -115,6 +116,9 @@ struct thread *td); typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp); +typedef int fo_mmap_t(struct file *fp, vm_map_t map, vm_offset_t *addr, + vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, + int flags, vm_ooffset_t foff, struct thread *td); typedef int fo_flags_t; struct fileops { @@ -131,6 +135,7 @@ fo_sendfile_t *fo_sendfile; fo_seek_t *fo_seek; fo_fill_kinfo_t *fo_fill_kinfo; + fo_mmap_t *fo_mmap; fo_flags_t fo_flags; /* DFLAG_* below */ }; @@ -391,6 +396,18 @@ return ((*fp->f_ops->fo_fill_kinfo)(fp, kif, fdp)); } +static __inline int +fo_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, + vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, + struct thread *td) +{ + + if (fp->f_ops->fo_mmap == NULL) + return (ENODEV); + return ((*fp->f_ops->fo_mmap)(fp, map, addr, size, prot, cap_maxprot, + flags, foff, td)); +} + #endif /* _KERNEL */ #endif /* !SYS_FILE_H */ Index: sys/sys/mman.h =================================================================== --- sys/sys/mman.h +++ sys/sys/mman.h @@ -230,8 +230,6 @@ #endif #ifdef _KERNEL -int shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, - vm_object_t *obj); int shm_map(struct file *fp, size_t size, off_t offset, void **memp); int shm_unmap(struct file *fp, void *mem, size_t size); Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h +++ sys/vm/vm_extern.h @@ -40,6 +40,8 @@ struct vmem; #ifdef _KERNEL +struct cdev; +struct cdevsw; /* These operate on kernel virtual addresses only. */ vm_offset_t kva_alloc(vm_size_t); @@ -81,10 +83,18 @@ int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); -int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); +int vm_forkproc(struct thread *, struct proc *, struct thread *, + struct vmspace *, int); void vm_waitproc(struct proc *); -int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); +int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, + objtype_t, void *, vm_ooffset_t); +int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, + vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *); int vm_mmap_to_errno(int rv); +int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, + int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *); +int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, + struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); void vm_set_page_size(void); void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t); typedef int (*pmap_pinit_t)(struct pmap *pmap); Index: sys/vm/vm_mmap.c =================================================================== --- sys/vm/vm_mmap.c +++ sys/vm/vm_mmap.c @@ -100,13 +100,6 @@ #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) #endif -static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, - int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); -static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, - int *, struct cdev *, vm_ooffset_t *, vm_object_t *); -static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, - int *, struct shmfd *, vm_ooffset_t, vm_object_t *); - #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { int incr; @@ -197,16 +190,10 @@ struct thread *td; struct mmap_args *uap; { -#ifdef HWPMC_HOOKS - struct pmckern_map_in pkm; -#endif struct file *fp; - struct vnode *vp; vm_offset_t addr; vm_size_t size, pageoff; - vm_prot_t cap_maxprot, maxprot; - void *handle; - objtype_t handle_type; + vm_prot_t cap_maxprot; int align, error, flags, prot; off_t pos; struct vmspace *vms = td->td_proc->p_vmspace; @@ -334,14 +321,22 @@ lim_max(td->td_proc, RLIMIT_DATA)); PROC_UNLOCK(td->td_proc); } - if (flags & MAP_ANON) { + if (size == 0) { + /* + * Return success without mapping anything for old + * binaries that request a page-aligned mapping of + * length 0. For modern binaries, this function + * returns an error earlier. + */ + error = 0; + } else if (flags & MAP_ANON) { /* * Mapping blank space is trivial. + * + * This relies on VM_PROT_* matching PROT_*. */ - handle = NULL; - handle_type = OBJT_DEFAULT; - maxprot = VM_PROT_ALL; - cap_maxprot = VM_PROT_ALL; + error = vm_mmap_object(&vms->vm_map, &addr, size, prot, + VM_PROT_ALL, flags, NULL, pos, FALSE, td); } else { /* * Mapping file, get fp for validation and don't let the @@ -366,93 +361,12 @@ error = EINVAL; goto done; } - if (fp->f_type == DTYPE_SHM) { - handle = fp->f_data; - handle_type = OBJT_SWAP; - maxprot = VM_PROT_NONE; - - /* FREAD should always be set. */ - if (fp->f_flag & FREAD) - maxprot |= VM_PROT_EXECUTE | VM_PROT_READ; - if (fp->f_flag & FWRITE) - maxprot |= VM_PROT_WRITE; - goto map; - } - if (fp->f_type != DTYPE_VNODE) { - error = ENODEV; - goto done; - } -#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ - defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) - /* - * POSIX shared-memory objects are defined to have - * kernel persistence, and are not defined to support - * read(2)/write(2) -- or even open(2). Thus, we can - * use MAP_ASYNC to trade on-disk coherence for speed. - * The shm_open(3) library routine turns on the FPOSIXSHM - * flag to request this behavior. - */ - if (fp->f_flag & FPOSIXSHM) - flags |= MAP_NOSYNC; -#endif - vp = fp->f_vnode; - /* - * Ensure that file and memory protections are - * compatible. Note that we only worry about - * writability if mapping is shared; in this case, - * current and max prot are dictated by the open file. - * XXX use the vnode instead? Problem is: what - * credentials do we use for determination? What if - * proc does a setuid? - */ - if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC) - maxprot = VM_PROT_NONE; - else - maxprot = VM_PROT_EXECUTE; - if (fp->f_flag & FREAD) { - maxprot |= VM_PROT_READ; - } else if (prot & PROT_READ) { - error = EACCES; - goto done; - } - /* - * If we are sharing potential changes (either via - * MAP_SHARED or via the implicit sharing of character - * device mappings), and we are trying to get write - * permission although we opened it without asking - * for it, bail out. - */ - if ((flags & MAP_SHARED) != 0) { - if ((fp->f_flag & FWRITE) != 0) { - maxprot |= VM_PROT_WRITE; - } else if ((prot & PROT_WRITE) != 0) { - error = EACCES; - goto done; - } - } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) { - maxprot |= VM_PROT_WRITE; - cap_maxprot |= VM_PROT_WRITE; - } - handle = (void *)vp; - handle_type = OBJT_VNODE; - } -map: - td->td_fpop = fp; - maxprot &= cap_maxprot; - /* This relies on VM_PROT_* matching PROT_*. */ - error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, - flags, handle_type, handle, pos); - td->td_fpop = NULL; -#ifdef HWPMC_HOOKS - /* inform hwpmc(4) if an executable is being mapped */ - if (error == 0 && handle_type == OBJT_VNODE && - (prot & PROT_EXEC)) { - pkm.pm_file = handle; - pkm.pm_address = (uintptr_t) addr; - PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm); + /* This relies on VM_PROT_* matching PROT_*. */ + error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, + cap_maxprot, flags, pos, td); } -#endif + if (error == 0) td->td_retval[0] = (register_t) (addr + pageoff); done: @@ -1311,9 +1225,6 @@ * * Helper function for vm_mmap. Perform sanity check specific for mmap * operations on vnodes. - * - * For VCHR vnodes, the vnode lock is held over the call to - * vm_mmap_cdev() to keep vp->v_rdev valid. */ int vm_mmap_vnode(struct thread *td, vm_size_t objsize, @@ -1360,12 +1271,6 @@ *writecounted = TRUE; vnode_pager_update_writecount(obj, 0, objsize); } - } else if (vp->v_type == VCHR) { - error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp, - vp->v_rdev, foffp, objp); - if (error == 0) - goto mark_atime; - goto done; } else { error = EINVAL; goto done; @@ -1373,13 +1278,14 @@ if ((error = VOP_GETATTR(vp, &va, cred))) goto done; #ifdef MAC - error = mac_vnode_check_mmap(cred, vp, prot, flags); + /* This relies on VM_PROT_* matching PROT_*. */ + error = mac_vnode_check_mmap(cred, vp, (int)prot, flags); if (error != 0) goto done; #endif if ((flags & MAP_SHARED) != 0) { if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) { - if (prot & PROT_WRITE) { + if (prot & VM_PROT_WRITE) { error = EPERM; goto done; } @@ -1414,7 +1320,6 @@ *objp = obj; *flagsp = flags; -mark_atime: vfs_mark_atime(vp, cred); done: @@ -1435,21 +1340,18 @@ * operations on cdevs. */ int -vm_mmap_cdev(struct thread *td, vm_size_t objsize, - vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, - struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp) +vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot, + vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw, + vm_ooffset_t *foff, vm_object_t *objp) { vm_object_t obj; - struct cdevsw *dsw; - int error, flags, ref; + int error, flags; flags = *flagsp; - dsw = dev_refthread(cdev, &ref); - if (dsw == NULL) - return (ENXIO); if (dsw->d_flags & D_MMAP_ANON) { - dev_relthread(cdev, ref); + *objp = NULL; + *foff = 0; *maxprotp = VM_PROT_ALL; *flagsp |= MAP_ANON; return (0); @@ -1458,24 +1360,18 @@ * cdevs do not provide private mappings of any kind. */ if ((*maxprotp & VM_PROT_WRITE) == 0 && - (prot & PROT_WRITE) != 0) { - dev_relthread(cdev, ref); + (prot & VM_PROT_WRITE) != 0) return (EACCES); - } - if (flags & (MAP_PRIVATE|MAP_COPY)) { - dev_relthread(cdev, ref); + if (flags & (MAP_PRIVATE|MAP_COPY)) return (EINVAL); - } /* * Force device mappings to be shared. */ flags |= MAP_SHARED; #ifdef MAC_XXX - error = mac_cdev_check_mmap(td->td_ucred, cdev, prot); - if (error != 0) { - dev_relthread(cdev, ref); + error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot); + if (error != 0) return (error); - } #endif /* * First, try d_mmap_single(). If that is not implemented @@ -1487,7 +1383,6 @@ * XXX assumes VM_PROT_* == PROT_* */ error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot); - dev_relthread(cdev, ref); if (error != ENODEV) return (error); obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff, @@ -1500,59 +1395,89 @@ } /* - * vm_mmap_shm() - * - * MPSAFE + * vm_mmap() * - * Helper function for vm_mmap. Perform sanity check specific for mmap - * operations on shm file descriptors. + * Internal version of mmap used by exec, sys5 shared memory, and + * various device drivers. Handle is either a vnode pointer, a + * character device, or NULL for MAP_ANON. */ int -vm_mmap_shm(struct thread *td, vm_size_t objsize, - vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, - struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp) +vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, + vm_prot_t maxprot, int flags, + objtype_t handle_type, void *handle, + vm_ooffset_t foff) { + vm_object_t object; + struct thread *td = curthread; int error; + boolean_t writecounted; - if ((*flagsp & MAP_SHARED) != 0 && - (*maxprotp & VM_PROT_WRITE) == 0 && - (prot & PROT_WRITE) != 0) - return (EACCES); -#ifdef MAC - error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp); - if (error != 0) - return (error); -#endif - error = shm_mmap(shmfd, objsize, foff, objp); + if (size == 0) + return (EINVAL); + + size = round_page(size); + writecounted = FALSE; + + /* + * Lookup/allocate object. + */ + switch (handle_type) { + case OBJT_DEVICE: { + struct cdevsw *dsw; + struct cdev *cdev; + int ref; + + cdev = handle; + dsw = dev_refthread(cdev, &ref); + if (dsw == NULL) + return (ENXIO); + error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev, + dsw, &foff, &object); + dev_relthread(cdev, ref); + break; + } + case OBJT_VNODE: + error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, + handle, &foff, &object, &writecounted); + break; + case OBJT_DEFAULT: + if (handle == NULL) { + error = 0; + break; + } + /* FALLTHROUGH */ + default: + error = EINVAL; + break; + } if (error) return (error); - return (0); + + error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, + foff, writecounted, td); + if (error != 0 && object != NULL) { + /* + * If this mapping was accounted for in the vnode's + * writecount, then undo that now. + */ + if (writecounted) + vnode_pager_release_writecount(object, 0, size); + vm_object_deallocate(object); + } + return (error); } /* - * vm_mmap() - * - * MPSAFE - * - * Internal version of mmap. Currently used by mmap, exec, and sys5 - * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON. + * Internal version of mmap that maps a specific VM object into an + * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap. */ int -vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, - vm_prot_t maxprot, int flags, - objtype_t handle_type, void *handle, - vm_ooffset_t foff) +vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, + vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff, + boolean_t writecounted, struct thread *td) { boolean_t fitit; - vm_object_t object = NULL; - struct thread *td = curthread; int docow, error, findspace, rv; - boolean_t writecounted; - - if (size == 0) - return (0); - - size = round_page(size); if (map == &td->td_proc->p_vmspace->vm_map) { PROC_LOCK(td->td_proc); @@ -1586,11 +1511,11 @@ /* * We currently can only deal with page aligned file offsets. - * The check is here rather than in the syscall because the - * kernel calls this function internally for other mmaping - * operations (such as in exec) and non-aligned offsets will - * cause pmap inconsistencies...so we want to be sure to - * disallow this in all cases. + * The mmap() system call already enforces this by subtracting + * the page offset from the file offset, but checking here + * catches errors in device drivers (e.g. d_single_mmap() + * callbacks or other internal mapping requests (such as in + * exec). */ if (foff & PAGE_MASK) return (EINVAL); @@ -1603,44 +1528,11 @@ return (EINVAL); fitit = FALSE; } - writecounted = FALSE; - /* - * Lookup/allocate object. - */ - switch (handle_type) { - case OBJT_DEVICE: - error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, - handle, &foff, &object); - break; - case OBJT_VNODE: - error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, - handle, &foff, &object, &writecounted); - break; - case OBJT_SWAP: - error = vm_mmap_shm(td, size, prot, &maxprot, &flags, - handle, foff, &object); - break; - case OBJT_DEFAULT: - if (handle == NULL) { - error = 0; - break; - } - /* FALLTHROUGH */ - default: - error = EINVAL; - break; - } - if (error) - return (error); if (flags & MAP_ANON) { - object = NULL; + if (object != NULL || foff != 0) + return (EINVAL); docow = 0; - /* - * Unnamed anonymous regions always start at 0. - */ - if (handle == 0) - foff = 0; } else if (flags & MAP_PREFAULT_READ) docow = MAP_PREFAULT; else @@ -1693,19 +1585,6 @@ VM_MAP_WIRE_USER | ((flags & MAP_STACK) ? VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES)); } - } else { - /* - * If this mapping was accounted for in the vnode's - * writecount, then undo that now. - */ - if (writecounted) - vnode_pager_release_writecount(object, 0, size); - /* - * Lose the object reference. Will destroy the - * object if it's an unnamed anonymous mapping - * or named anonymous without other references. - */ - vm_object_deallocate(object); } return (vm_mmap_to_errno(rv)); }