Index: sys/compat/linfdescfs/linfdesc.h =================================================================== --- /dev/null +++ sys/compat/linfdescfs/linfdesc.h @@ -0,0 +1,38 @@ +/*- + * Copyright (c) 2017 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_FDESCFS_H_ +#define _LINUX_FDESCFS_H_ + +extern struct mtx linfdesc_hashmtx; + +extern vfs_init_t linfdesc_init; +extern vfs_uninit_t linfdesc_uninit; +extern int linfdesc_allocvp(fdntype, unsigned, int, struct mount *, + struct vnode **); +#endif /* _LINUX_FDESCFS_H_*/ Index: sys/compat/linfdescfs/linfdesc_vfsops.c =================================================================== --- /dev/null +++ sys/compat/linfdescfs/linfdesc_vfsops.c @@ -0,0 +1,209 @@ +/*- + * Copyright (c) 2017 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * Linux /proc/self/fd Filesystem + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static MALLOC_DEFINE(M_LFDESCMNT, "linfdesc_mount", "FDESC mount structure"); + +static vfs_cmount_t linfdesc_cmount; +static vfs_mount_t linfdesc_mount; +static vfs_unmount_t linfdesc_unmount; +static vfs_statfs_t linfdesc_statfs; +static vfs_root_t linfdesc_root; + +/* + * Compatibility shim for old mount(2) system call. + */ +int +linfdesc_cmount(struct mntarg *ma, void *data, uint64_t flags) +{ + + return kernel_mount(ma, flags); +} + +/* + * Mount the per-process file descriptors + */ +static int +linfdesc_mount(struct mount *mp) +{ + struct fdescmount *fmp; + struct thread *td = curthread; + struct vnode *rvp; + int error; + + if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_FDESCFS)) + return (EPERM); + + if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS)) + return (EOPNOTSUPP); + + fmp = malloc(sizeof(struct fdescmount), M_LFDESCMNT, M_WAITOK); + mp->mnt_data = fmp; + fmp->flags = 0; + error = linfdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp); + if (error != 0) { + free(fmp, M_LFDESCMNT); + mp->mnt_data = NULL; + return (error); + } + rvp->v_type = VDIR; + rvp->v_vflag |= VV_ROOT; + fmp->f_root = rvp; + VOP_UNLOCK(rvp, 0); + vfs_getnewfsid(mp); + + vfs_mountedfrom(mp, "linfdescfs"); + return (0); +} + +static int +linfdesc_unmount(struct mount *mp, int mntflags) +{ + struct fdescmount *fmp; + int error, flags; + + flags = 0; + fmp = mp->mnt_data; + if (mntflags & MNT_FORCE) { + mtx_lock(&linfdesc_hashmtx); + fmp->flags |= FMNT_UNMOUNTF; + mtx_unlock(&linfdesc_hashmtx); + flags |= FORCECLOSE; + } + + /* + * Clear out buffer cache. I don't think we + * ever get anything cached at this level at the + * moment, but who knows... + * + * There is 1 extra root vnode reference corresponding + * to f_root. + */ + if ((error = vflush(mp, 1, flags, curthread)) != 0) + return (error); + + mp->mnt_data = NULL; + free(fmp, M_LFDESCMNT); + return (0); +} + +static int +linfdesc_root(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *vp; + + /* + * Return locked reference to root. + */ + vp = VFSTOFDESC(mp)->f_root; + vget(vp, LK_EXCLUSIVE | LK_RETRY, curthread); + *vpp = vp; + return (0); +} + +static int +linfdesc_statfs(struct mount *mp, struct statfs *sbp) +{ + struct thread *td = curthread; + struct filedesc *fdp; + int lim, i, last, freefd; + uint64_t limit; + + /* + * Compute number of free file descriptors. + * [ Strange results will ensue if the open file + * limit is ever reduced below the current number + * of open files... ] + */ + lim = lim_cur(td, RLIMIT_NOFILE); + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + limit = racct_get_limit(td->td_proc, RACCT_NOFILE); + if (lim > limit) + lim = limit; + last = min(fdp->fd_nfiles, lim); + freefd = 0; + for (i = fdp->fd_freefile; i < last; i++) + if (fdp->fd_ofiles[i].fde_file == NULL) + freefd++; + + /* + * Adjust for the fact that the fdesc array may not + * have been fully allocated yet. + */ + if (fdp->fd_nfiles < lim) + freefd += (lim - fdp->fd_nfiles); + FILEDESC_SUNLOCK(fdp); + + sbp->f_flags = 0; + sbp->f_bsize = DEV_BSIZE; + sbp->f_iosize = DEV_BSIZE; + sbp->f_blocks = 2; /* 1K to keep df happy */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = lim + 1; /* Allow for "." */ + sbp->f_ffree = freefd; /* See comments above */ + return (0); +} + +static struct vfsops linfdesc_vfsops = { + .vfs_cmount = linfdesc_cmount, + .vfs_init = linfdesc_init, + .vfs_mount = linfdesc_mount, + .vfs_root = linfdesc_root, + .vfs_statfs = linfdesc_statfs, + .vfs_uninit = linfdesc_uninit, + .vfs_unmount = linfdesc_unmount, +}; + +VFS_SET(linfdesc_vfsops, linfdescfs, VFCF_SYNTHETIC | VFCF_JAIL); +#if defined(__amd64__) +MODULE_DEPEND(linfdescfs, linux_common, 1, 1, 1); +#else +MODULE_DEPEND(linfdescfs, linux, 1, 1, 1); +#endif Index: sys/compat/linfdescfs/linfdesc_vnops.c =================================================================== --- /dev/null +++ sys/compat/linfdescfs/linfdesc_vnops.c @@ -0,0 +1,640 @@ +/*- + * Copyright (c) 2017 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define NFDCACHE 4 +#define FD_NHASH(ix) \ + (&linfdhashtbl[(ix) & linfdhash]) +static LIST_HEAD(fdhashhead, fdescnode) *linfdhashtbl; +static u_long linfdhash; + +struct mtx linfdesc_hashmtx; + +static vop_getattr_t linfdesc_getattr; +static vop_lookup_t linfdesc_lookup; +static vop_open_t linfdesc_open; +static vop_readdir_t linfdesc_readdir; +static vop_readlink_t linfdesc_readlink; +static vop_reclaim_t linfdesc_reclaim; +static vop_setattr_t linfdesc_setattr; + +static struct vop_vector linfdesc_vnodeops = { + .vop_default = &default_vnodeops, + + .vop_access = VOP_NULL, + .vop_getattr = linfdesc_getattr, + .vop_lookup = linfdesc_lookup, + .vop_open = linfdesc_open, + .vop_pathconf = vop_stdpathconf, + .vop_readdir = linfdesc_readdir, + .vop_readlink = linfdesc_readlink, + .vop_reclaim = linfdesc_reclaim, + .vop_setattr = linfdesc_setattr, +}; + +static void linfdesc_insmntque_dtr(struct vnode *, void *); +static void linfdesc_remove_entry(struct fdescnode *); + +/* + * Initialise cache headers + */ +int +linfdesc_init(struct vfsconf *vfsp) +{ + + mtx_init(&linfdesc_hashmtx, "linfdescfs_hash", NULL, MTX_DEF); + linfdhashtbl = hashinit(NFDCACHE, M_CACHE, &linfdhash); + return (0); +} + +/* + * Uninit ready for unload. + */ +int +linfdesc_uninit(struct vfsconf *vfsp) +{ + + hashdestroy(linfdhashtbl, M_CACHE, linfdhash); + mtx_destroy(&linfdesc_hashmtx); + return (0); +} + +/* + * If allocating vnode fails, call this. + */ +static void +linfdesc_insmntque_dtr(struct vnode *vp, void *arg) +{ + + vgone(vp); + vput(vp); +} + +/* + * Remove an entry from the hash if it exists. + */ +static void +linfdesc_remove_entry(struct fdescnode *fd) +{ + struct fdhashhead *fc; + struct fdescnode *fd2; + + fc = FD_NHASH(fd->fd_ix); + mtx_lock(&linfdesc_hashmtx); + LIST_FOREACH(fd2, fc, fd_hash) { + if (fd == fd2) { + LIST_REMOVE(fd, fd_hash); + break; + } + } + mtx_unlock(&linfdesc_hashmtx); +} + +int +linfdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp, + struct vnode **vpp) +{ + struct fdescmount *fmp; + struct fdhashhead *fc; + struct fdescnode *fd, *fd2; + struct vnode *vp, *vp2; + struct thread *td; + int error; + + td = curthread; + fc = FD_NHASH(ix); +loop: + mtx_lock(&linfdesc_hashmtx); + /* + * If a forced unmount is progressing, we need to drop it. The flags are + * protected by the hashmtx. + */ + fmp = mp->mnt_data; + if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { + mtx_unlock(&linfdesc_hashmtx); + return (-1); + } + + LIST_FOREACH(fd, fc, fd_hash) { + if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { + /* Get reference to vnode in case it's being free'd */ + vp = fd->fd_vnode; + VI_LOCK(vp); + mtx_unlock(&linfdesc_hashmtx); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) + goto loop; + *vpp = vp; + return (0); + } + } + mtx_unlock(&linfdesc_hashmtx); + + fd = malloc(sizeof(struct fdescnode), M_TEMP, M_WAITOK); + + error = getnewvnode("linfdescfs", mp, &linfdesc_vnodeops, &vp); + if (error != 0) { + free(fd, M_TEMP); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vp->v_data = fd; + fd->fd_vnode = vp; + fd->fd_type = ftype; + fd->fd_fd = fd_fd; + fd->fd_ix = ix; + error = insmntque1(vp, mp, linfdesc_insmntque_dtr, NULL); + if (error != 0) { + *vpp = NULLVP; + return (error); + } + + /* Make sure that someone didn't beat us when inserting the vnode. */ + mtx_lock(&linfdesc_hashmtx); + /* + * If a forced unmount is progressing, we need to drop it. The flags are + * protected by the hashmtx. + */ + fmp = mp->mnt_data; + if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { + mtx_unlock(&linfdesc_hashmtx); + vgone(vp); + vput(vp); + *vpp = NULLVP; + return (-1); + } + + LIST_FOREACH(fd2, fc, fd_hash) { + if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { + /* Get reference to vnode in case it's being free'd */ + vp2 = fd2->fd_vnode; + VI_LOCK(vp2); + mtx_unlock(&linfdesc_hashmtx); + error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td); + /* Someone beat us, dec use count and wait for reclaim */ + vgone(vp); + vput(vp); + /* If we didn't get it, return no vnode. */ + if (error != 0) + vp2 = NULLVP; + *vpp = vp2; + return (error); + } + } + + /* If we came here, we can insert it safely. */ + LIST_INSERT_HEAD(fc, fd, fd_hash); + mtx_unlock(&linfdesc_hashmtx); + *vpp = vp; + return (0); +} + +struct linfdesc_get_ino_args { + fdntype ftype; + unsigned fd_fd; + int ix; + struct file *fp; + struct thread *td; +}; + +static int +linfdesc_get_ino_alloc(struct mount *mp, void *arg, int lkflags, + struct vnode **rvp) +{ + struct linfdesc_get_ino_args *a; + int error; + + a = arg; + error = linfdesc_allocvp(a->ftype, a->fd_fd, a->ix, mp, rvp); + fdrop(a->fp, a->td); + return (error); +} + +/* + * vp is the current namei directory + * ndp is the name to locate in that directory... + */ +static int +linfdesc_lookup(struct vop_lookup_args *ap) +{ + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + char *pname = cnp->cn_nameptr; + struct thread *td = cnp->cn_thread; + struct linux_emuldata *em; + struct file *fp; + struct linfdesc_get_ino_args arg; + cap_rights_t rights; + int nlen = cnp->cn_namelen; + u_int fd, fd1; + int error; + struct vnode *fvp; + + if ((cnp->cn_flags & ISLASTCN) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { + error = EROFS; + goto bad; + } + + if (cnp->cn_namelen == 1 && *pname == '.') { + *vpp = dvp; + VREF(dvp); + return (0); + } + + if (VTOFDESC(dvp)->fd_type != Froot) { + error = ENOTDIR; + goto bad; + } + + fd = 0; + /* the only time a leading 0 is acceptable is if it's "0" */ + if (*pname == '0' && nlen != 1) { + error = ENOENT; + goto bad; + } + while (nlen--) { + if (*pname < '0' || *pname > '9') { + error = ENOENT; + goto bad; + } + fd1 = 10 * fd + *pname++ - '0'; + if (fd1 < fd) { + error = ENOENT; + goto bad; + } + fd = fd1; + } + + /* + * No rights to check since 'fp' isn't actually used. + */ + if ((error = fget(td, fd, cap_rights_init(&rights), &fp)) != 0) + goto bad; + + /* Check if we're looking up ourselves. */ + if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) { + /* + * In case we're holding the last reference to the file, the dvp + * will be re-acquired. + */ + vhold(dvp); + VOP_UNLOCK(dvp, 0); + fdrop(fp, td); + + /* Re-aquire the lock afterwards. */ + vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); + vdrop(dvp); + fvp = dvp; + if ((dvp->v_iflag & VI_DOOMED) != 0) + error = ENOENT; + } else { + /* + * Unlock our root node (dvp) when doing this, since we might + * deadlock since the vnode might be locked by another thread + * and the root vnode lock will be obtained afterwards (in case + * we're looking up the fd of the root vnode), which will be the + * opposite lock order. Vhold the root vnode first so we don't + * lose it. + */ + arg.ftype = Fdesc; + arg.fd_fd = fd; + arg.ix = FD_DESC + fd; + arg.fp = fp; + arg.td = td; + error = vn_vget_ino_gen(dvp, linfdesc_get_ino_alloc, &arg, + LK_EXCLUSIVE, &fvp); + } + + if (error != 0) + goto bad; + if (SV_CURPROC_ABI() == SV_ABI_LINUX) { + em = em_find(td); + KASSERT(em != NULL, ("linfdesc_lookup: emuldata not found.\n")); + if (em->flags & LINUX_TD_READLINK) + fvp->v_type = VLNK; + else + fvp->v_type = VNON; + } else + fvp->v_type = VLNK; + + *vpp = fvp; + return (0); + +bad: + *vpp = NULL; + return (error); +} + +static int +linfdesc_open(struct vop_open_args *ap) +{ + struct vnode *vp = ap->a_vp; + + if (VTOFDESC(vp)->fd_type == Froot) + return (0); + + /* + * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the file + * descriptor being sought for duplication. The error return ensures + * that the vnode for this device will be released by vn_open. Open + * will detect this special error and take the actions in dupfdopen. + * Other callers of vn_open or VOP_OPEN will simply report the + * error. + */ + ap->a_td->td_dupfd = VTOFDESC(vp)->fd_fd; + return (ENODEV); +} + +static int +linfdesc_getattr(struct vop_getattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct timeval boottime; + + getboottime(&boottime); + vap->va_fileid = VTOFDESC(vp)->fd_ix; + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_blocksize = DEV_BSIZE; + vap->va_atime.tv_sec = boottime.tv_sec; + vap->va_atime.tv_nsec = 0; + vap->va_mtime = vap->va_atime; + vap->va_ctime = vap->va_mtime; + vap->va_gen = 0; + vap->va_flags = 0; + vap->va_bytes = 0; + vap->va_filerev = 0; + + switch (VTOFDESC(vp)->fd_type) { + case Froot: + vap->va_type = VDIR; + vap->va_nlink = 2; + vap->va_size = DEV_BSIZE; + vap->va_rdev = NODEV; + vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; + break; + + case Fdesc: + vap->va_type = VLNK; + vap->va_nlink = 1; + vap->va_size = 0; + vap->va_rdev = makedev(0, vap->va_fileid); + vap->va_mode = S_IRUSR|S_IRGRP|S_IROTH; + break; + + default: + panic("linfdesc_getattr"); + break; + } + + vp->v_type = vap->va_type; + return (0); +} + +static int +linfdesc_setattr(struct vop_setattr_args *ap) +{ + struct thread *td = curthread; + struct vattr *vap = ap->a_vap; + struct vnode *vp; + struct mount *mp; + struct file *fp; + cap_rights_t rights; + unsigned fd; + int error; + + /* + * Can't mess with the root vnode + */ + if (VTOFDESC(ap->a_vp)->fd_type == Froot) + return (EACCES); + + fd = VTOFDESC(ap->a_vp)->fd_fd; + + /* + * Allow setattr where there is an underlying vnode. + */ + error = getvnode(td, fd, + cap_rights_init(&rights, CAP_EXTATTR_SET), &fp); + if (error != 0) { + /* + * getvnode() returns EINVAL if the file descriptor is not + * backed by a vnode. Silently drop all changes except + * chflags(2) in this case. + */ + if (error == EINVAL) { + if (vap->va_flags != VNOVAL) + error = EOPNOTSUPP; + else + error = 0; + } + return (error); + } + vp = fp->f_vnode; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) == 0) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred); + VOP_UNLOCK(vp, 0); + vn_finished_write(mp); + } + fdrop(fp, td); + return (error); +} + +#define UIO_MX _GENERIC_DIRLEN(10) /* number of symbols in INT_MAX printout */ + +static int +linfdesc_readdir(struct vop_readdir_args *ap) +{ + struct uio *uio = ap->a_uio; + struct filedesc *fdp; + struct dirent d; + struct dirent *dp = &d; + int error, i, off, fcnt; + + if (VTOFDESC(ap->a_vp)->fd_type != Froot) + panic("linfdesc_readdir: not dir"); + + if (ap->a_ncookies != NULL) + *ap->a_ncookies = 0; + + off = (int)uio->uio_offset; + if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || + uio->uio_resid < UIO_MX) + return (EINVAL); + i = (u_int)off / UIO_MX; + fdp = uio->uio_td->td_proc->p_fd; + error = 0; + + fcnt = i - 2; /* The first two nodes are `.' and `..' */ + FILEDESC_SLOCK(fdp); + while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { + bzero((caddr_t)dp, UIO_MX); + switch (i) { + case 0: /* `.' */ + case 1: /* `..' */ + dp->d_fileno = i + FD_ROOT; + dp->d_namlen = i + 1; + dp->d_reclen = UIO_MX; + bcopy("..", dp->d_name, dp->d_namlen); + dp->d_name[i + 1] = '\0'; + dp->d_type = DT_DIR; + break; + default: + if (fdp->fd_ofiles[fcnt].fde_file == NULL) + break; + dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); + dp->d_reclen = UIO_MX; + dp->d_type = DT_LNK; + dp->d_fileno = i + FD_DESC; + break; + } + if (dp->d_namlen != 0) { + /* + * And ship to userland + */ + FILEDESC_SUNLOCK(fdp); + error = uiomove(dp, UIO_MX, uio); + if (error != 0) + goto done; + FILEDESC_SLOCK(fdp); + } + i++; + fcnt++; + } + FILEDESC_SUNLOCK(fdp); + +done: + uio->uio_offset = i * UIO_MX; + return (error); +} + +static int +linfdesc_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp; + struct fdescnode *fd; + + vp = ap->a_vp; + fd = VTOFDESC(vp); + linfdesc_remove_entry(fd); + free(vp->v_data, M_TEMP); + vp->v_data = NULL; + return (0); +} + +static int +linfdesc_readlink(struct vop_readlink_args *va) +{ + struct vnode *vp, *vn = va->a_vp; + struct fdescnode *fd = vn->v_data; + struct thread *td = curthread; + struct uio *uio = va->a_uio; + struct filedesc *fdp; + cap_rights_t rights; + struct file *fp; + char *freepath, *fullpath; + size_t pathlen; + int error, locked; + + if (VTOFDESC(vn)->fd_type != Fdesc) + panic("linfdesc_readlink: not fdescfs link"); + + vhold(vn); + locked = VOP_ISLOCKED(vn); + VOP_UNLOCK(vn, 0); + + fdp = td->td_proc->p_fd; + error = fget_unlocked(fdp, fd->fd_fd, + cap_rights_init(&rights), &fp, NULL); + if (error != 0) + goto out; + + freepath = NULL; + switch (fp->f_type) { + case DTYPE_VNODE: + vp = fp->f_vnode; + vref(vp); + error = vn_fullpath(td, vp, &fullpath, &freepath); + vrele(vp); + break; + + case DTYPE_SOCKET: + fullpath = "socket:[0]"; + break; + + case DTYPE_PIPE: + fullpath = "pipe:[0]"; + break; + + case DTYPE_LINUXEFD: + fullpath = "anon_inode:[eventpoll]"; + break; + + default: + fullpath = "anon_inode:[unknown]"; + break; + } + + if (error == 0) { + pathlen = strnlen(fullpath, MAXPATHLEN); + error = uiomove(fullpath, pathlen, uio); + } + if (freepath != NULL) + free(freepath, M_TEMP); + fdrop(fp, td); + +out: + vn_lock(vn, locked | LK_RETRY); + vdrop(vn); + return (error); +} Index: sys/compat/linux/linux_emul.h =================================================================== --- sys/compat/linux/linux_emul.h +++ sys/compat/linux/linux_emul.h @@ -47,6 +47,9 @@ struct linux_robust_list_head *robust_futexes; }; +/* thread emuldata flags */ +#define LINUX_TD_READLINK 0x00000001 + struct linux_emuldata *em_find(struct thread *); void linux_proc_init(struct thread *, struct thread *, int); Index: sys/compat/linux/linux_file.c =================================================================== --- sys/compat/linux/linux_file.c +++ sys/compat/linux/linux_file.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ #include #include #endif +#include #include #include #include @@ -833,6 +835,7 @@ int linux_readlink(struct thread *td, struct linux_readlink_args *args) { + struct linux_emuldata *em; char *name; int error; @@ -843,15 +846,21 @@ printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf, args->count); #endif + em = em_find(td); + KASSERT(em != NULL, ("linux_readlink: emuldata not found.\n")); + em->flags |= LINUX_TD_READLINK; + error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); + em->flags &= ~LINUX_TD_READLINK; return (error); } int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { + struct linux_emuldata *em; char *name; int error, dfd; @@ -863,10 +872,14 @@ printf(ARGS(readlinkat, "%s, %p, %d"), name, (void *)args->buf, args->bufsiz); #endif + em = em_find(td); + KASSERT(em != NULL, ("linux_readlinkat: emuldata not found.\n")); + em->flags |= LINUX_TD_READLINK; error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); + em->flags &= ~LINUX_TD_READLINK; return (error); } Index: sys/modules/linfdescfs/Makefile =================================================================== --- /dev/null +++ sys/modules/linfdescfs/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../compat/linfdescfs + +KMOD= linfdescfs +SRCS= vnode_if.h \ + linfdesc_vfsops.c linfdesc_vnops.c + +.include