Index: head/sys/compat/linux/linux_file.c =================================================================== --- head/sys/compat/linux/linux_file.c (revision 283414) +++ head/sys/compat/linux/linux_file.c (revision 283415) @@ -1,1629 +1,1638 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include int linux_creat(struct thread *td, struct linux_creat_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(creat)) printf(ARGS(creat, "%s, %d"), path, args->mode); #endif error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); LFREEPATH(path); return (error); } static int linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode) { cap_rights_t rights; struct proc *p = td->td_proc; struct file *fp; int fd; int bsd_flags, error; bsd_flags = 0; switch (l_flags & LINUX_O_ACCMODE) { case LINUX_O_WRONLY: bsd_flags |= O_WRONLY; break; case LINUX_O_RDWR: bsd_flags |= O_RDWR; break; default: bsd_flags |= O_RDONLY; } if (l_flags & LINUX_O_NDELAY) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_O_APPEND) bsd_flags |= O_APPEND; if (l_flags & LINUX_O_SYNC) bsd_flags |= O_FSYNC; if (l_flags & LINUX_O_NONBLOCK) bsd_flags |= O_NONBLOCK; if (l_flags & LINUX_FASYNC) bsd_flags |= O_ASYNC; if (l_flags & LINUX_O_CREAT) bsd_flags |= O_CREAT; if (l_flags & LINUX_O_TRUNC) bsd_flags |= O_TRUNC; if (l_flags & LINUX_O_EXCL) bsd_flags |= O_EXCL; if (l_flags & LINUX_O_NOCTTY) bsd_flags |= O_NOCTTY; if (l_flags & LINUX_O_DIRECT) bsd_flags |= O_DIRECT; if (l_flags & LINUX_O_NOFOLLOW) bsd_flags |= O_NOFOLLOW; if (l_flags & LINUX_O_DIRECTORY) bsd_flags |= O_DIRECTORY; /* XXX LINUX_O_NOATIME: unable to be easily implemented. */ error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode); if (error != 0) goto done; if (bsd_flags & O_NOCTTY) goto done; /* * XXX In between kern_open() and fget(), another process * having the same filedesc could use that fd without * checking below. */ fd = td->td_retval[0]; if (fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp) == 0) { if (fp->f_type != DTYPE_VNODE) { fdrop(fp, td); goto done; } sx_slock(&proctree_lock); PROC_LOCK(p); if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); /* XXXPJD: Verify if TIOCSCTTY is allowed. */ (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); } else { PROC_UNLOCK(p); sx_sunlock(&proctree_lock); } fdrop(fp, td); } done: #ifdef DEBUG if (ldebug(open)) printf(LMSG("open returns error %d"), error); #endif LFREEPATH(path); return (error); } int linux_openat(struct thread *td, struct linux_openat_args *args) { char *path; int dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->flags & LINUX_O_CREAT) LCONVPATH_AT(td, args->filename, &path, 1, dfd); else LCONVPATH_AT(td, args->filename, &path, 0, dfd); #ifdef DEBUG if (ldebug(openat)) printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd, path, args->flags, args->mode); #endif return (linux_common_open(td, dfd, path, args->flags, args->mode)); } int linux_open(struct thread *td, struct linux_open_args *args) { char *path; if (args->flags & LINUX_O_CREAT) LCONVPATHCREAT(td, args->path, &path); else LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(open)) printf(ARGS(open, "%s, 0x%x, 0x%x"), path, args->flags, args->mode); #endif return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode)); } int linux_lseek(struct thread *td, struct linux_lseek_args *args) { struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ tmp_args; int error; #ifdef DEBUG if (ldebug(lseek)) printf(ARGS(lseek, "%d, %ld, %d"), args->fdes, (long)args->off, args->whence); #endif tmp_args.fd = args->fdes; tmp_args.offset = (off_t)args->off; tmp_args.whence = args->whence; error = sys_lseek(td, &tmp_args); return error; } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { struct lseek_args bsd_args; int error; off_t off; #ifdef DEBUG if (ldebug(llseek)) printf(ARGS(llseek, "%d, %d:%d, %d"), args->fd, args->ohigh, args->olow, args->whence); #endif off = (args->olow) | (((off_t) args->ohigh) << 32); bsd_args.fd = args->fd; bsd_args.offset = off; bsd_args.whence = args->whence; if ((error = sys_lseek(td, &bsd_args))) return error; if ((error = copyout(td->td_retval, args->res, sizeof (off_t)))) return error; td->td_retval[0] = 0; return 0; } int linux_readdir(struct thread *td, struct linux_readdir_args *args) { struct linux_getdents_args lda; lda.fd = args->fd; lda.dent = args->dent; lda.count = 1; return linux_getdents(td, &lda); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same * arguments. They only differ in the definition of struct dirent they * operate on. We use this to common the code, with the exception of * accessing struct dirent. Note that linux_readdir(2) is implemented * by means of linux_getdents(2). In this case we never operate on * struct dirent64 and thus don't need to handle it... */ struct l_dirent { l_ulong d_ino; l_off_t d_off; l_ushort d_reclen; char d_name[LINUX_NAME_MAX + 1]; }; struct l_dirent64 { uint64_t d_ino; int64_t d_off; l_ushort d_reclen; u_char d_type; char d_name[LINUX_NAME_MAX + 1]; }; /* * Linux uses the last byte in the dirent buffer to store d_type, * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes. */ #define LINUX_RECLEN(namlen) \ roundup((offsetof(struct l_dirent, d_name) + (namlen) + 2), \ sizeof(l_ulong)) #define LINUX_RECLEN64(namlen) \ roundup((offsetof(struct l_dirent64, d_name) + (namlen) + 1), \ sizeof(uint64_t)) #define LINUX_MAXRECLEN max(LINUX_RECLEN(LINUX_NAME_MAX), \ LINUX_RECLEN64(LINUX_NAME_MAX)) #define LINUX_DIRBLKSIZ 512 static int getdents_common(struct thread *td, struct linux_getdents64_args *args, int is64bit) { struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen=0; /* Linux-format */ caddr_t lbuf; /* Linux-format */ cap_rights_t rights; struct file *fp; struct uio auio; struct iovec aiov; off_t off; struct l_dirent *linux_dirent; struct l_dirent64 *linux_dirent64; int buflen, error, eofflag, nbytes, justone; u_long *cookies = NULL, *cookiep; int ncookies; nbytes = args->count; if (nbytes == 1) { /* readdir(2) case. Always struct dirent. */ if (is64bit) return (EINVAL); nbytes = sizeof(*linux_dirent); justone = 1; } else justone = 0; error = getvnode(td->td_proc->p_fd, args->fd, cap_rights_init(&rights, CAP_READ), &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } off = foffset_lock(fp, 0); vp = fp->f_vnode; if (vp->v_type != VDIR) { foffset_unlock(fp, off, 0); fdrop(fp, td); return (EINVAL); } buflen = max(LINUX_DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); lbuf = malloc(LINUX_MAXRECLEN, M_TEMP, M_WAITOK | M_ZERO); vn_lock(vp, LK_SHARED | LK_RETRY); aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = buflen; auio.uio_offset = off; #ifdef MAC /* * Do directory search MAC check using non-cached credentials. */ if ((error = mac_vnode_check_readdir(td->td_ucred, vp))) goto out; #endif /* MAC */ if ((error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies))) goto out; inp = buf; outp = (caddr_t)args->dirent; resid = nbytes; if ((len = buflen - auio.uio_resid) <= 0) goto eof; cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { bdp = (struct dirent *) inp; len -= bdp->d_reclen; inp += bdp->d_reclen; cookiep++; ncookies--; } } while (len > 0) { if (cookiep && ncookies == 0) break; bdp = (struct dirent *) inp; reclen = bdp->d_reclen; if (reclen & 3) { error = EFAULT; goto out; } if (bdp->d_fileno == 0) { inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; len -= reclen; continue; } linuxreclen = (is64bit) ? LINUX_RECLEN64(bdp->d_namlen) : LINUX_RECLEN(bdp->d_namlen); if (reclen > len || resid < linuxreclen) { outp++; break; } if (justone) { /* readdir(2) case. */ linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = (l_off_t)linuxreclen; linux_dirent->d_reclen = (l_ushort)bdp->d_namlen; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)); error = copyout(linux_dirent, outp, linuxreclen); } if (is64bit) { linux_dirent64 = (struct l_dirent64*)lbuf; linux_dirent64->d_ino = bdp->d_fileno; linux_dirent64->d_off = (cookiep) ? (l_off_t)*cookiep : (l_off_t)(off + reclen); linux_dirent64->d_reclen = (l_ushort)linuxreclen; linux_dirent64->d_type = bdp->d_type; strlcpy(linux_dirent64->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent64, d_name)); error = copyout(linux_dirent64, outp, linuxreclen); } else if (!justone) { linux_dirent = (struct l_dirent*)lbuf; linux_dirent->d_ino = bdp->d_fileno; linux_dirent->d_off = (cookiep) ? (l_off_t)*cookiep : (l_off_t)(off + reclen); linux_dirent->d_reclen = (l_ushort)linuxreclen; /* * Copy d_type to last byte of l_dirent buffer */ lbuf[linuxreclen-1] = bdp->d_type; strlcpy(linux_dirent->d_name, bdp->d_name, linuxreclen - offsetof(struct l_dirent, d_name)-1); error = copyout(linux_dirent, outp, linuxreclen); } if (error) goto out; inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; outp += linuxreclen; resid -= linuxreclen; len -= reclen; if (justone) break; } if (outp == (caddr_t)args->dirent) { nbytes = resid; goto eof; } if (justone) nbytes = resid + linuxreclen; eof: td->td_retval[0] = nbytes - resid; out: free(cookies, M_TEMP); VOP_UNLOCK(vp, 0); foffset_unlock(fp, off, 0); fdrop(fp, td); free(buf, M_TEMP); free(lbuf, M_TEMP); return (error); } int linux_getdents(struct thread *td, struct linux_getdents_args *args) { #ifdef DEBUG if (ldebug(getdents)) printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count); #endif return (getdents_common(td, (struct linux_getdents64_args*)args, 0)); } int linux_getdents64(struct thread *td, struct linux_getdents64_args *args) { #ifdef DEBUG if (ldebug(getdents64)) printf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count); #endif return (getdents_common(td, args, 1)); } /* * These exist mainly for hooks for doing /compat/linux translation. */ int linux_access(struct thread *td, struct linux_access_args *args) { char *path; int error; /* linux convention */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(access)) printf(ARGS(access, "%s, %d"), path, args->amode); #endif error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); } int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { char *path; int error, dfd, flag; if (args->flag & ~LINUX_AT_EACCESS) return (EINVAL); /* linux convention */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(access)) printf(ARGS(access, "%s, %d"), path, args->amode); #endif flag = (args->flag & LINUX_AT_EACCESS) == 0 ? 0 : AT_EACCESS; error = kern_accessat(td, dfd, path, UIO_SYSSPACE, flag, args->amode); LFREEPATH(path); return (error); } int linux_unlink(struct thread *td, struct linux_unlink_args *args) { char *path; int error; struct stat st; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(unlink)) printf(ARGS(unlink, "%s"), path); #endif error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0); if (error == EPERM) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st, NULL) == 0) { if (S_ISDIR(st.st_mode)) error = EISDIR; } } LFREEPATH(path); return (error); } int linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args) { char *path; int error, dfd; struct stat st; if (args->flag & ~LINUX_AT_REMOVEDIR) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(unlinkat)) printf(ARGS(unlinkat, "%s"), path); #endif if (args->flag & LINUX_AT_REMOVEDIR) error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE); else error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0); if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) { /* Introduce POSIX noncompliant behaviour of Linux */ if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path, UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode)) error = EISDIR; } LFREEPATH(path); return (error); } int linux_chdir(struct thread *td, struct linux_chdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chdir)) printf(ARGS(chdir, "%s"), path); #endif error = kern_chdir(td, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } int linux_chmod(struct thread *td, struct linux_chmod_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chmod)) printf(ARGS(chmod, "%s, %d"), path, args->mode); #endif error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } int linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(fchmodat)) printf(ARGS(fchmodat, "%s, %d"), path, args->mode); #endif error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0); LFREEPATH(path); return (error); } int linux_mkdir(struct thread *td, struct linux_mkdir_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mkdir)) printf(ARGS(mkdir, "%s, %d"), path, args->mode); #endif error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } int linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(mkdirat)) printf(ARGS(mkdirat, "%s, %d"), path, args->mode); #endif error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode); LFREEPATH(path); return (error); } int linux_rmdir(struct thread *td, struct linux_rmdir_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(rmdir)) printf(ARGS(rmdir, "%s"), path); #endif error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE); LFREEPATH(path); return (error); } int linux_rename(struct thread *td, struct linux_rename_args *args) { char *from, *to; int error; LCONVPATHEXIST(td, args->from, &from); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(from); return (error); } #ifdef DEBUG if (ldebug(rename)) printf(ARGS(rename, "%s, %s"), from, to); #endif error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } int linux_renameat(struct thread *td, struct linux_renameat_args *args) { char *from, *to; int error, olddfd, newdfd; olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd); /* Expand LCONVPATHCREATE so that `from' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(from); return (error); } #ifdef DEBUG if (ldebug(renameat)) printf(ARGS(renameat, "%s, %s"), from, to); #endif error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE); LFREEPATH(from); LFREEPATH(to); return (error); } int linux_symlink(struct thread *td, struct linux_symlink_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(symlink)) printf(ARGS(symlink, "%s, %s"), path, to); #endif error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args) { char *path, *to; int error, dfd; dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &path, dfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(symlinkat)) printf(ARGS(symlinkat, "%s, %s"), path, to); #endif error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_readlink(struct thread *td, struct linux_readlink_args *args) { char *name; int error; LCONVPATHEXIST(td, args->name, &name); #ifdef DEBUG if (ldebug(readlink)) printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf, args->count); #endif error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->count); LFREEPATH(name); return (error); } int linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args) { char *name; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->path, &name, dfd); #ifdef DEBUG if (ldebug(readlinkat)) printf(ARGS(readlinkat, "%s, %p, %d"), name, (void *)args->buf, args->bufsiz); #endif error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf, UIO_USERSPACE, args->bufsiz); LFREEPATH(name); return (error); } int linux_truncate(struct thread *td, struct linux_truncate_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(truncate)) printf(ARGS(truncate, "%s, %ld"), path, (long)args->length); #endif error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(truncate64)) printf(ARGS(truncate64, "%s, %jd"), path, args->length); #endif error = kern_truncate(td, path, UIO_SYSSPACE, args->length); LFREEPATH(path); return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ + int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { struct ftruncate_args /* { int fd; int pad; off_t length; } */ nuap; nuap.fd = args->fd; nuap.length = args->length; return (sys_ftruncate(td, &nuap)); } int linux_link(struct thread *td, struct linux_link_args *args) { char *path, *to; int error; LCONVPATHEXIST(td, args->path, &path); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(link)) printf(ARGS(link, "%s, %s"), path, to); #endif error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE, FOLLOW); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_linkat(struct thread *td, struct linux_linkat_args *args) { char *path, *to; int error, olddfd, newdfd, follow; if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW) return (EINVAL); olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd; newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd; LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd); /* Expand LCONVPATHCREATE so that `path' can be freed on errors */ error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd); if (to == NULL) { LFREEPATH(path); return (error); } #ifdef DEBUG if (ldebug(linkat)) printf(ARGS(linkat, "%i, %s, %i, %s, %i"), args->olddfd, path, args->newdfd, to, args->flag); #endif follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW : FOLLOW; error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow); LFREEPATH(path); LFREEPATH(to); return (error); } int linux_fdatasync(td, uap) struct thread *td; struct linux_fdatasync_args *uap; { struct fsync_args bsd; bsd.fd = uap->fd; return sys_fsync(td, &bsd); } int linux_pread(td, uap) struct thread *td; struct linux_pread_args *uap; { struct pread_args bsd; cap_rights_t rights; struct vnode *vp; int error; bsd.fd = uap->fd; bsd.buf = uap->buf; bsd.nbyte = uap->nbyte; bsd.offset = uap->offset; error = sys_pread(td, &bsd); if (error == 0) { /* This seems to violate POSIX but linux does it */ error = fgetvp(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &vp); if (error != 0) return (error); if (vp->v_type == VDIR) { vrele(vp); return (EISDIR); } vrele(vp); } return (error); } int linux_pwrite(td, uap) struct thread *td; struct linux_pwrite_args *uap; { struct pwrite_args bsd; bsd.fd = uap->fd; bsd.buf = uap->buf; bsd.nbyte = uap->nbyte; bsd.offset = uap->offset; return sys_pwrite(td, &bsd); } int linux_mount(struct thread *td, struct linux_mount_args *args) { char fstypename[MFSNAMELEN]; char mntonname[MNAMELEN], mntfromname[MNAMELEN]; int error; int fsflags; error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1, NULL); if (error) return (error); error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL); if (error) return (error); error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL); if (error) return (error); #ifdef DEBUG if (ldebug(mount)) printf(ARGS(mount, "%s, %s, %s"), fstypename, mntfromname, mntonname); #endif if (strcmp(fstypename, "ext2") == 0) { strcpy(fstypename, "ext2fs"); } else if (strcmp(fstypename, "proc") == 0) { strcpy(fstypename, "linprocfs"); } else if (strcmp(fstypename, "vfat") == 0) { strcpy(fstypename, "msdosfs"); } fsflags = 0; if ((args->rwflag & 0xffff0000) == 0xc0ed0000) { /* * Linux SYNC flag is not included; the closest equivalent * FreeBSD has is !ASYNC, which is our default. */ if (args->rwflag & LINUX_MS_RDONLY) fsflags |= MNT_RDONLY; if (args->rwflag & LINUX_MS_NOSUID) fsflags |= MNT_NOSUID; if (args->rwflag & LINUX_MS_NOEXEC) fsflags |= MNT_NOEXEC; if (args->rwflag & LINUX_MS_REMOUNT) fsflags |= MNT_UPDATE; } error = kernel_vmount(fsflags, "fstype", fstypename, "fspath", mntonname, "from", mntfromname, NULL); return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { struct linux_umount_args args2; args2.path = args->path; args2.flags = 0; return (linux_umount(td, &args2)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_umount(struct thread *td, struct linux_umount_args *args) { struct unmount_args bsd; bsd.path = args->path; bsd.flags = args->flags; /* XXX correct? */ return (sys_unmount(td, &bsd)); } /* * fcntl family of syscalls */ struct l_flock { l_short l_type; l_short l_whence; l_off_t l_start; l_off_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_off_t)bsd_flock->l_start; linux_flock->l_len = (l_off_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_flock64 { l_short l_type; l_short l_whence; l_loff_t l_start; l_loff_t l_len; l_pid_t l_pid; } #if defined(__amd64__) && defined(COMPAT_LINUX32) __packed #endif ; static void linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; bsd_flock->l_sysid = 0; } static void bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (l_loff_t)bsd_flock->l_start; linux_flock->l_len = (l_loff_t)bsd_flock->l_len; linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid; } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int -fcntl_common(struct thread *td, struct linux_fcntl64_args *args) +fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; cap_rights_t rights; struct file *fp; long arg; int error, result; switch (args->cmd) { case LINUX_F_DUPFD: return (kern_fcntl(td, args->fd, F_DUPFD, args->arg)); case LINUX_F_GETFD: return (kern_fcntl(td, args->fd, F_GETFD, 0)); case LINUX_F_SETFD: return (kern_fcntl(td, args->fd, F_SETFD, args->arg)); case LINUX_F_GETFL: error = kern_fcntl(td, args->fd, F_GETFL, 0); result = td->td_retval[0]; td->td_retval[0] = 0; if (result & O_RDONLY) td->td_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) td->td_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) td->td_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) td->td_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) td->td_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) td->td_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) td->td_retval[0] |= LINUX_FASYNC; #ifdef LINUX_O_NOFOLLOW if (result & O_NOFOLLOW) td->td_retval[0] |= LINUX_O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (result & O_DIRECT) td->td_retval[0] |= LINUX_O_DIRECT; #endif return (error); case LINUX_F_SETFL: arg = 0; if (args->arg & LINUX_O_NDELAY) arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) arg |= O_FSYNC; if (args->arg & LINUX_FASYNC) arg |= O_ASYNC; #ifdef LINUX_O_NOFOLLOW if (args->arg & LINUX_O_NOFOLLOW) arg |= O_NOFOLLOW; #endif #ifdef LINUX_O_DIRECT if (args->arg & LINUX_O_DIRECT) arg |= O_DIRECT; #endif return (kern_fcntl(td, args->fd, F_SETFL, arg)); case LINUX_F_GETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); case LINUX_F_GETOWN: return (kern_fcntl(td, args->fd, F_GETOWN, 0)); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ error = fget(td, args->fd, cap_rights_init(&rights, CAP_FCNTL), &fp); if (error) return (error); if (fp->f_type == DTYPE_PIPE) { fdrop(fp, td); return (EINVAL); } fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); } return (EINVAL); } int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { - struct linux_fcntl64_args args64; #ifdef DEBUG if (ldebug(fcntl)) printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd); #endif - args64.fd = args->fd; - args64.cmd = args->cmd; - args64.arg = args->arg; - return (fcntl_common(td, &args64)); + return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; + struct linux_fcntl_args fcntl_args; int error; #ifdef DEBUG if (ldebug(fcntl64)) printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd); #endif switch (args->cmd) { case LINUX_F_GETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock); if (error) return (error); bsd_to_linux_flock64(&bsd_flock, &linux_flock); return (copyout(&linux_flock, (void *)args->arg, sizeof(linux_flock))); case LINUX_F_SETLK64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLK, (intptr_t)&bsd_flock)); case LINUX_F_SETLKW64: error = copyin((void *)args->arg, &linux_flock, sizeof(linux_flock)); if (error) return (error); linux_to_bsd_flock64(&linux_flock, &bsd_flock); return (kern_fcntl(td, args->fd, F_SETLKW, (intptr_t)&bsd_flock)); } - return (fcntl_common(td, args)); + fcntl_args.fd = args->fd; + fcntl_args.cmd = args->cmd; + fcntl_args.arg = args->arg; + return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_chown(struct thread *td, struct linux_chown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(chown)) printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid); #endif error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, 0); LFREEPATH(path); return (error); } int linux_fchownat(struct thread *td, struct linux_fchownat_args *args) { char *path; int error, dfd, flag; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(fchownat)) printf(ARGS(fchownat, "%s, %d, %d"), path, args->uid, args->gid); #endif flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 : AT_SYMLINK_NOFOLLOW; error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid, flag); LFREEPATH(path); return (error); } int linux_lchown(struct thread *td, struct linux_lchown_args *args) { char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(lchown)) printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid); #endif error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid, args->gid, AT_SYMLINK_NOFOLLOW); LFREEPATH(path); return (error); } static int convert_fadvice(int advice) { switch (advice) { case LINUX_POSIX_FADV_NORMAL: return (POSIX_FADV_NORMAL); case LINUX_POSIX_FADV_RANDOM: return (POSIX_FADV_RANDOM); case LINUX_POSIX_FADV_SEQUENTIAL: return (POSIX_FADV_SEQUENTIAL); case LINUX_POSIX_FADV_WILLNEED: return (POSIX_FADV_WILLNEED); case LINUX_POSIX_FADV_DONTNEED: return (POSIX_FADV_DONTNEED); case LINUX_POSIX_FADV_NOREUSE: return (POSIX_FADV_NOREUSE); default: return (-1); } } int linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { int advice; advice = convert_fadvice(args->advice); if (advice == -1) return (EINVAL); return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_pipe(struct thread *td, struct linux_pipe_args *args) { int fildes[2]; int error; #ifdef DEBUG if (ldebug(pipe)) printf(ARGS(pipe, "*")); #endif error = kern_pipe2(td, fildes, 0); if (error) return (error); /* XXX: Close descriptors on error. */ return (copyout(fildes, args->pipefds, sizeof(fildes))); } int linux_pipe2(struct thread *td, struct linux_pipe2_args *args) { int fildes[2]; int error, flags; #ifdef DEBUG if (ldebug(pipe2)) printf(ARGS(pipe2, "*, %d"), args->flags); #endif if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0) return (EINVAL); flags = 0; if ((args->flags & LINUX_O_NONBLOCK) != 0) flags |= O_NONBLOCK; if ((args->flags & LINUX_O_CLOEXEC) != 0) flags |= O_CLOEXEC; error = kern_pipe2(td, fildes, flags); if (error) return (error); /* XXX: Close descriptors on error. */ return (copyout(fildes, args->pipefds, sizeof(fildes))); } int linux_dup3(struct thread *td, struct linux_dup3_args *args) { int cmd; intptr_t newfd; if (args->oldfd == args->newfd) return (EINVAL); if ((args->flags & ~LINUX_O_CLOEXEC) != 0) return (EINVAL); if (args->flags & LINUX_O_CLOEXEC) cmd = F_DUP2FD_CLOEXEC; else cmd = F_DUP2FD; newfd = args->newfd; return (kern_fcntl(td, args->oldfd, cmd, newfd)); } Index: head/sys/compat/linux/linux_misc.c =================================================================== --- head/sys/compat/linux/linux_misc.c (revision 283414) +++ head/sys/compat/linux/linux_misc.c (revision 283415) @@ -1,2284 +1,2288 @@ /*- * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalbig; l_ulong freebig; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; vm_object_t object; int i, j; struct timespec ts; getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = sysinfo.totalram - vm_cnt.v_wire_count * PAGE_SIZE; sysinfo.sharedram = 0; mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) if (object->shadow_count > 1) sysinfo.sharedram += object->resident_page_count; mtx_unlock(&vm_object_list_mtx); sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* The following are only present in newer Linux kernels. */ sysinfo.totalbig = 0; sysinfo.freebig = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error; #ifdef DEBUG if (ldebug(alarm)) printf(ARGS(alarm, "%u"), args->secs); #endif secs = args->secs; if (secs > INT_MAX) secs = INT_MAX; it.it_value.tv_sec = (long) secs; it.it_value.tv_usec = 0; it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 0; error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); if (error) return (error); if (timevalisset(&old_it.it_value)) { if (old_it.it_value.tv_usec != 0) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; } return (0); } int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; vm_offset_t new, old; struct obreak_args /* { char * nsize; } */ tmp; #ifdef DEBUG if (ldebug(brk)) printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); #endif old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (vm_offset_t)args->dsend; tmp.nsize = (char *)new; if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) td->td_retval[0] = (long)new; else td->td_retval[0] = (long)old; return (0); } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; unsigned long bss_size; char *library; ssize_t aresid; int error, locked, writecount; LCONVPATHEXIST(td, args->library, &library); #ifdef DEBUG if (ldebug(uselib)) printf(ARGS(uselib, "%s"), library); #endif a_out = NULL; locked = 0; vp = NULL; NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, library, td); error = namei(&ni); LFREEPATH(library); if (error) goto cleanup; vp = ni.ni_vp; NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = 1; /* Writable? */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) goto cleanup; if (writecount != 0) { error = ETXTBSY; goto cleanup; } /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; /* Pull in executable header into exec_map */ error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA) || racct_set(td->td_proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. * XXX: Note that if any of the VM operations fail below we don't * clear this flag. */ VOP_SET_TEXT(vp); /* * Lock no longer needed */ locked = 0; VOP_UNLOCK(vp, 0); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { #ifdef DEBUG printf("uselib: Non page aligned binary %lu\n", file_offset); #endif /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, td->td_ucred, NOCRED, &aresid, td); if (error != 0) goto cleanup; if (aresid != 0) { error = ENOEXEC; goto cleanup; } } else { #ifdef DEBUG printf("uselib: Page aligned binary %lu\n", file_offset); #endif /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; } #ifdef DEBUG printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], ((long *)vmaddr)[1]); #endif if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: /* Unlock vnode if needed */ if (locked) VOP_UNLOCK(vp, 0); /* Release the temporary mapping. */ if (a_out) kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); return (error); } #endif /* __i386__ */ int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; #ifdef DEBUG if (ldebug(select)) printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, (void *)args->readfds, (void *)args->writefds, (void *)args->exceptfds, (void *)args->timeout); #endif /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; #ifdef DEBUG if (ldebug(select)) printf(LMSG("incoming timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, sizeof(l_int) * 8); #ifdef DEBUG if (ldebug(select)) printf(LMSG("real select returns %d"), error); #endif if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); #ifdef DEBUG if (ldebug(select)) printf(LMSG("outgoing timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: #ifdef DEBUG if (ldebug(select)) printf(LMSG("select_out -> %d"), error); #endif return (error); } int linux_mremap(struct thread *td, struct linux_mremap_args *args) { struct munmap_args /* { void *addr; size_t len; } */ bsd_args; int error = 0; #ifdef DEBUG if (ldebug(mremap)) printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), (void *)(uintptr_t)args->addr, (unsigned long)args->old_len, (unsigned long)args->new_len, (unsigned long)args->flags); #endif if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { bsd_args.addr = (caddr_t)((uintptr_t)args->addr + args->new_len); bsd_args.len = args->old_len - args->new_len; error = sys_munmap(td, &bsd_args); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { struct msync_args bsd_args; bsd_args.addr = (caddr_t)(uintptr_t)args->addr; bsd_args.len = (uintptr_t)args->len; bsd_args.flags = args->fl & ~LINUX_MS_SYNC; return (sys_msync(td, &bsd_args)); } int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; #ifdef DEBUG if (ldebug(time)) printf(ARGS(time, "*")); #endif microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; #ifdef DEBUG if (ldebug(times)) printf(ARGS(times, "*")); #endif if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; #ifdef DEBUG if (ldebug(newuname)) printf(ARGS(newuname, "*")); #endif linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); return (copyout(&utsname, args->buf, sizeof(utsname))); } -#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utime)) printf(ARGS(utime, "%s, *"), fname); #endif if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utimes)) printf(ARGS(utimes, "%s, *"), fname); #endif if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); #ifdef DEBUG if (ldebug(futimesat)) printf(ARGS(futimesat, "%s, *"), fname); #endif if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } -#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_common_wait(struct thread *td, int pid, int *status, int options, struct rusage *ru) { int error, tmpstat; error = kern_wait(td, pid, &tmpstat, options, ru); if (error) return (error); if (status) { tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); error = copyout(&tmpstat, status, sizeof(int)); } return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { int options; #ifdef DEBUG if (ldebug(waitpid)) printf(ARGS(waitpid, "%d, %p, %d"), args->pid, (void *)args->status, args->options); #endif /* * this is necessary because the test in kern_wait doesn't work * because we mess with the options here */ if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) return (EINVAL); options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; return (linux_common_wait(td, args->pid, args->status, options, NULL)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct rusage ru, *rup; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; if (args->rusage != NULL) rup = &ru; else rup = NULL; error = linux_common_wait(td, args->pid, args->status, options, rup); if (error != 0) return (error); if (args->rusage != NULL) error = linux_copyout_rusage(&ru, args->rusage); return (error); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { int status, options, sig; struct __wrusage wru; siginfo_t siginfo; l_siginfo_t lsi; idtype_t idtype; struct proc *p; int error; options = 0; linux_to_bsd_waitopts(args->options, &options); if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) return (EINVAL); if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) return (EINVAL); switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; default: return (EINVAL); } error = kern_wait6(td, idtype, args->id, &status, options, &wru, &siginfo); if (error != 0) return (error); if (args->rusage != NULL) { error = linux_copyout_rusage(&wru.wru_children, args->rusage); if (error != 0) return (error); } if (args->info != NULL) { p = td->td_proc; if (td->td_retval[0] == 0) bzero(&lsi, sizeof(lsi)); else { sig = BSD_TO_LINUX_SIGNAL(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); } error = copyout(&lsi, args->info, sizeof(lsi)); } td->td_retval[0] = 0; return (error); } int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mknod)) printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(mknodat)) printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { #ifdef DEBUG if (ldebug(personality)) printf(ARGS(personality, "%lu"), (unsigned long)args->per); #endif if (args->per != 0) return (EINVAL); /* Yes Jim, it's still a Linux... */ td->td_retval[0] = 0; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; #ifdef DEBUG if (ldebug(setitimer)) printf(ARGS(setitimer, "%p, %p"), (void *)uap->itv, (void *)uap->oitv); #endif if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); #ifdef DEBUG if (ldebug(setitimer)) { printf("setitimer: value: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); printf("setitimer: interval: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); } #endif error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; #ifdef DEBUG if (ldebug(getitimer)) printf(ARGS(getitimer, "%p"), (void *)uap->itv); #endif error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { struct setpriority_args bsd_args; bsd_args.which = PRIO_PROCESS; bsd_args.who = 0; /* current process */ bsd_args.prio = args->inc; return (sys_setpriority(td, &bsd_args)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); p = td->td_proc; PROC_LOCK(p); oldcred = crcopysafe(p, newcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_TEMP); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_TEMP, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_TEMP); if (error) return (error); td->td_retval[0] = ngrp; return (0); } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; #ifdef DEBUG if (ldebug(setrlimit)) printf(ARGS(setrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct proc *p = td->td_proc; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(old_getrlimit)) printf(ARGS(old_getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); PROC_LOCK(p); lim_rlimit(p, which, &bsd_rlim); PROC_UNLOCK(p); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct proc *p = td->td_proc; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(getrlimit)) printf(ARGS(getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); PROC_LOCK(p); lim_rlimit(p, which, &bsd_rlim); PROC_UNLOCK(p); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_setscheduler)) printf(ARGS(sched_setscheduler, "%d, %d, %p"), args->pid, args->policy, (const void *)args->param); #endif switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_max)) printf(ARGS(sched_get_priority_max, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_min)) printf(ARGS(sched_get_priority_min, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; #ifdef DEBUG if (ldebug(reboot)) printf(ARGS(reboot, "0x%x"), args->cmd); #endif if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } /* * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that * are assumed to be preserved. The following lightweight syscalls fixes * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c * * linux_getpid() - MP SAFE * linux_getgid() - MP SAFE * linux_getuid() - MP SAFE */ int linux_getpid(struct thread *td, struct linux_getpid_args *args) { #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif PROC_LOCK(td->td_proc); td->td_retval[0] = td->td_proc->p_pptr->p_pid; PROC_UNLOCK(td->td_proc); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { #ifdef DEBUG if (ldebug(getgid)) printf(ARGS(getgid, "")); #endif td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { #ifdef DEBUG if (ldebug(getuid)) printf(ARGS(getuid, "")); #endif td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { struct getsid_args bsd; #ifdef DEBUG if (ldebug(getsid)) printf(ARGS(getsid, "%i"), args->pid); #endif bsd.pid = args->pid; return (sys_getsid(td, &bsd)); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { struct getpriority_args bsd_args; int error; #ifdef DEBUG if (ldebug(getpriority)) printf(ARGS(getpriority, "%i, %i"), args->which, args->who); #endif bsd_args.which = args->which; bsd_args.who = args->who; error = sys_getpriority(td, &bsd_args); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(sethostname)) printf(ARGS(sethostname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(setdomainname)) printf(ARGS(setdomainname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, W_EXITCODE(args->error_code, 0)); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION 0x19980330 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *args) { struct l_user_cap_header luch; struct l_user_cap_data lucd; int error; if (args->hdrp == NULL) return (EFAULT); error = copyin(args->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); if (luch.version != _LINUX_CAPABILITY_VERSION) { luch.version = _LINUX_CAPABILITY_VERSION; error = copyout(&luch, args->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (args->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ bzero (&lucd, sizeof(lucd)); error = copyout(&lucd, args->datap, sizeof(lucd)); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *args) { struct l_user_cap_header luch; struct l_user_cap_data lucd; int error; if (args->hdrp == NULL || args->datap == NULL) return (EFAULT); error = copyin(args->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); if (luch.version != _LINUX_CAPABILITY_VERSION) { luch.version = _LINUX_CAPABILITY_VERSION; error = copyout(&luch, args->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(args->datap, &lucd, sizeof(lucd)); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ if (lucd.effective || lucd.permitted || lucd.inheritable) { linux_msg(td, "capset effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", (int)lucd.effective, (int)lucd.permitted, (int)lucd.inheritable); return (EPERM); } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; struct linux_emuldata *em; int pdeath_signal; #ifdef DEBUG if (ldebug(prctl)) printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, args->arg2, args->arg3, args->arg4, args->arg5); #endif switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); em->pdeath_signal = args->arg2; break; case LINUX_PR_GET_PDEATHSIG: em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); pdeath_signal = em->pdeath_signal; error = copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal)); break; case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; default: error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_setparam)) printf(ARGS(sched_setparam, "%d, *"), uap->pid); #endif error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_getparam)) printf(ARGS(sched_getparam, "%d, *"), uap->pid); #endif tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); if (error == 0) error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct thread *tdt; struct cpuset_getaffinity_args cga; #ifdef DEBUG if (ldebug(sched_getaffinity)) printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); cga.level = CPU_LEVEL_WHICH; cga.which = CPU_WHICH_TID; cga.id = tdt->td_tid; cga.cpusetsize = sizeof(cpuset_t); cga.mask = (cpuset_t *) args->user_mask_ptr; if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) td->td_retval[0] = sizeof(cpuset_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct cpuset_setaffinity_args csa; struct thread *tdt; #ifdef DEBUG if (ldebug(sched_setaffinity)) printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); csa.level = CPU_LEVEL_WHICH; csa.which = CPU_WHICH_TID; csa.id = tdt->td_tid; csa.cpusetsize = sizeof(cpuset_t); csa.mask = (cpuset_t *) args->user_mask_ptr; return (sys_cpuset_setaffinity(td, &csa)); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; #ifdef DEBUG if (ldebug(prlimit64)) printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, args->resource, (void *)args->new, (void *)args->old); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget(args->pid, flags, &p); if (error != 0) return (error); if (args->old != NULL) { PROC_LOCK(p); lim_rlimit(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; struct l_timespec lts; struct timespec uts; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (args->sig != NULL) { error = copyin(args->sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); if (lpse6.ss_len != sizeof(l_ss)) return (EINVAL); if (lpse6.ss != 0) { error = copyin(PTRIN(lpse6.ss), &l_ss, sizeof(l_ss)); if (error != 0) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } } /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); uts.tv_sec = lts.tv_sec; uts.tv_nsec = lts.tv_nsec; TIMESPEC_TO_TIMEVAL(&utv, &uts); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, ssp, sizeof(l_int) * 8); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0] != 0) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, &uts); lts.tv_sec = uts.tv_sec; lts.tv_nsec = uts.tv_nsec; error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec lts; struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (uap->pid < 0) return (EINVAL); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); lts.tv_sec = ts.tv_sec; lts.tv_nsec = ts.tv_nsec; return (copyout(<s, uap->interval, sizeof(lts))); } /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { tdt = td; PROC_LOCK(tdt->td_proc); } else if (tid > PID_MAX) tdt = tdfind(tid, pid); else { /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } return (tdt); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } Index: head/sys/compat/linux/linux_signal.c =================================================================== --- head/sys/compat/linux/linux_signal.c (revision 283414) +++ head/sys/compat/linux/linux_signal.c (revision 283415) @@ -1,808 +1,812 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "opt_compat.h" #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include static int linux_do_tkill(struct thread *td, struct thread *tdt, ksiginfo_t *ksi); static void sicode_to_lsicode(int si_code, int *lsi_code); +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) void linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) { int b, l; SIGEMPTYSET(*bss); bss->__bits[0] = lss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1); bss->__bits[1] = lss->__bits[1]; for (l = 1; l <= LINUX_SIGTBLSZ; l++) { if (LINUX_SIGISMEMBER(*lss, l)) { b = linux_to_bsd_signal[_SIG_IDX(l)]; if (b) SIGADDSET(*bss, b); } } } void bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) { int b, l; LINUX_SIGEMPTYSET(*lss); lss->__bits[0] = bss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1); lss->__bits[1] = bss->__bits[1]; for (b = 1; b <= LINUX_SIGTBLSZ; b++) { if (SIGISMEMBER(*bss, b)) { l = bsd_to_linux_signal[_SIG_IDX(b)]; if (l) LINUX_SIGADDSET(*lss, l); } } } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static void linux_to_bsd_sigaction(l_sigaction_t *lsa, struct sigaction *bsa) { linux_to_bsd_sigset(&lsa->lsa_mask, &bsa->sa_mask); bsa->sa_handler = PTRIN(lsa->lsa_handler); bsa->sa_flags = 0; if (lsa->lsa_flags & LINUX_SA_NOCLDSTOP) bsa->sa_flags |= SA_NOCLDSTOP; if (lsa->lsa_flags & LINUX_SA_NOCLDWAIT) bsa->sa_flags |= SA_NOCLDWAIT; if (lsa->lsa_flags & LINUX_SA_SIGINFO) bsa->sa_flags |= SA_SIGINFO; if (lsa->lsa_flags & LINUX_SA_ONSTACK) bsa->sa_flags |= SA_ONSTACK; if (lsa->lsa_flags & LINUX_SA_RESTART) bsa->sa_flags |= SA_RESTART; if (lsa->lsa_flags & LINUX_SA_ONESHOT) bsa->sa_flags |= SA_RESETHAND; if (lsa->lsa_flags & LINUX_SA_NOMASK) bsa->sa_flags |= SA_NODEFER; } static void bsd_to_linux_sigaction(struct sigaction *bsa, l_sigaction_t *lsa) { bsd_to_linux_sigset(&bsa->sa_mask, &lsa->lsa_mask); #ifdef COMPAT_LINUX32 lsa->lsa_handler = (uintptr_t)bsa->sa_handler; #else lsa->lsa_handler = bsa->sa_handler; #endif lsa->lsa_restorer = 0; /* unsupported */ lsa->lsa_flags = 0; if (bsa->sa_flags & SA_NOCLDSTOP) lsa->lsa_flags |= LINUX_SA_NOCLDSTOP; if (bsa->sa_flags & SA_NOCLDWAIT) lsa->lsa_flags |= LINUX_SA_NOCLDWAIT; if (bsa->sa_flags & SA_SIGINFO) lsa->lsa_flags |= LINUX_SA_SIGINFO; if (bsa->sa_flags & SA_ONSTACK) lsa->lsa_flags |= LINUX_SA_ONSTACK; if (bsa->sa_flags & SA_RESTART) lsa->lsa_flags |= LINUX_SA_RESTART; if (bsa->sa_flags & SA_RESETHAND) lsa->lsa_flags |= LINUX_SA_ONESHOT; if (bsa->sa_flags & SA_NODEFER) lsa->lsa_flags |= LINUX_SA_NOMASK; } int linux_do_sigaction(struct thread *td, int linux_sig, l_sigaction_t *linux_nsa, l_sigaction_t *linux_osa) { struct sigaction act, oact, *nsa, *osa; int error, sig; if (!LINUX_SIG_VALID(linux_sig)) return (EINVAL); osa = (linux_osa != NULL) ? &oact : NULL; if (linux_nsa != NULL) { nsa = &act; linux_to_bsd_sigaction(linux_nsa, nsa); } else nsa = NULL; if (linux_sig <= LINUX_SIGTBLSZ) sig = linux_to_bsd_signal[_SIG_IDX(linux_sig)]; else sig = linux_sig; error = kern_sigaction(td, sig, nsa, osa, 0); if (error) return (error); if (linux_osa != NULL) bsd_to_linux_sigaction(osa, linux_osa); return (0); } - +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_signal(struct thread *td, struct linux_signal_args *args) { l_sigaction_t nsa, osa; int error; #ifdef DEBUG if (ldebug(signal)) printf(ARGS(signal, "%d, %p"), args->sig, (void *)(uintptr_t)args->handler); #endif nsa.lsa_handler = args->handler; nsa.lsa_flags = LINUX_SA_ONESHOT | LINUX_SA_NOMASK; LINUX_SIGEMPTYSET(nsa.lsa_mask); error = linux_do_sigaction(td, args->sig, &nsa, &osa); td->td_retval[0] = (int)(intptr_t)osa.lsa_handler; return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_rt_sigaction(struct thread *td, struct linux_rt_sigaction_args *args) { l_sigaction_t nsa, osa; int error; #ifdef DEBUG if (ldebug(rt_sigaction)) printf(ARGS(rt_sigaction, "%ld, %p, %p, %ld"), (long)args->sig, (void *)args->act, (void *)args->oact, (long)args->sigsetsize); #endif if (args->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); if (args->act != NULL) { error = copyin(args->act, &nsa, sizeof(l_sigaction_t)); if (error) return (error); } error = linux_do_sigaction(td, args->sig, args->act ? &nsa : NULL, args->oact ? &osa : NULL); if (args->oact != NULL && !error) { error = copyout(&osa, args->oact, sizeof(l_sigaction_t)); } return (error); } static int linux_do_sigprocmask(struct thread *td, int how, l_sigset_t *new, l_sigset_t *old) { sigset_t omask, nmask; sigset_t *nmaskp; int error; td->td_retval[0] = 0; switch (how) { case LINUX_SIG_BLOCK: how = SIG_BLOCK; break; case LINUX_SIG_UNBLOCK: how = SIG_UNBLOCK; break; case LINUX_SIG_SETMASK: how = SIG_SETMASK; break; default: return (EINVAL); } if (new != NULL) { linux_to_bsd_sigset(new, &nmask); nmaskp = &nmask; } else nmaskp = NULL; error = kern_sigprocmask(td, how, nmaskp, &omask, 0); if (error == 0 && old != NULL) bsd_to_linux_sigset(&omask, old); return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args) { l_osigset_t mask; l_sigset_t set, oset; int error; #ifdef DEBUG if (ldebug(sigprocmask)) printf(ARGS(sigprocmask, "%d, *, *"), args->how); #endif if (args->mask != NULL) { error = copyin(args->mask, &mask, sizeof(l_osigset_t)); if (error) return (error); LINUX_SIGEMPTYSET(set); set.__bits[0] = mask; } error = linux_do_sigprocmask(td, args->how, args->mask ? &set : NULL, args->omask ? &oset : NULL); if (args->omask != NULL && !error) { mask = oset.__bits[0]; error = copyout(&mask, args->omask, sizeof(l_osigset_t)); } return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_rt_sigprocmask(struct thread *td, struct linux_rt_sigprocmask_args *args) { l_sigset_t set, oset; int error; #ifdef DEBUG if (ldebug(rt_sigprocmask)) printf(ARGS(rt_sigprocmask, "%d, %p, %p, %ld"), args->how, (void *)args->mask, (void *)args->omask, (long)args->sigsetsize); #endif if (args->sigsetsize != sizeof(l_sigset_t)) return EINVAL; if (args->mask != NULL) { error = copyin(args->mask, &set, sizeof(l_sigset_t)); if (error) return (error); } error = linux_do_sigprocmask(td, args->how, args->mask ? &set : NULL, args->omask ? &oset : NULL); if (args->omask != NULL && !error) { error = copyout(&oset, args->omask, sizeof(l_sigset_t)); } return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sgetmask(struct thread *td, struct linux_sgetmask_args *args) { struct proc *p = td->td_proc; l_sigset_t mask; #ifdef DEBUG if (ldebug(sgetmask)) printf(ARGS(sgetmask, "")); #endif PROC_LOCK(p); bsd_to_linux_sigset(&td->td_sigmask, &mask); PROC_UNLOCK(p); td->td_retval[0] = mask.__bits[0]; return (0); } int linux_ssetmask(struct thread *td, struct linux_ssetmask_args *args) { struct proc *p = td->td_proc; l_sigset_t lset; sigset_t bset; #ifdef DEBUG if (ldebug(ssetmask)) printf(ARGS(ssetmask, "%08lx"), (unsigned long)args->mask); #endif PROC_LOCK(p); bsd_to_linux_sigset(&td->td_sigmask, &lset); td->td_retval[0] = lset.__bits[0]; LINUX_SIGEMPTYSET(lset); lset.__bits[0] = args->mask; linux_to_bsd_sigset(&lset, &bset); td->td_sigmask = bset; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (0); } -/* - * MPSAFE - */ int linux_sigpending(struct thread *td, struct linux_sigpending_args *args) { struct proc *p = td->td_proc; sigset_t bset; l_sigset_t lset; l_osigset_t mask; #ifdef DEBUG if (ldebug(sigpending)) printf(ARGS(sigpending, "*")); #endif PROC_LOCK(p); bset = p->p_siglist; SIGSETOR(bset, td->td_siglist); SIGSETAND(bset, td->td_sigmask); PROC_UNLOCK(p); bsd_to_linux_sigset(&bset, &lset); mask = lset.__bits[0]; return (copyout(&mask, args->mask, sizeof(mask))); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * MPSAFE */ int linux_rt_sigpending(struct thread *td, struct linux_rt_sigpending_args *args) { struct proc *p = td->td_proc; sigset_t bset; l_sigset_t lset; if (args->sigsetsize > sizeof(lset)) return EINVAL; /* NOT REACHED */ #ifdef DEBUG if (ldebug(rt_sigpending)) printf(ARGS(rt_sigpending, "*")); #endif PROC_LOCK(p); bset = p->p_siglist; SIGSETOR(bset, td->td_siglist); SIGSETAND(bset, td->td_sigmask); PROC_UNLOCK(p); bsd_to_linux_sigset(&bset, &lset); return (copyout(&lset, args->set, args->sigsetsize)); } /* * MPSAFE */ int linux_rt_sigtimedwait(struct thread *td, struct linux_rt_sigtimedwait_args *args) { int error, sig; l_timeval ltv; struct timeval tv; struct timespec ts, *tsa; l_sigset_t lset; sigset_t bset; l_siginfo_t linfo; ksiginfo_t info; #ifdef DEBUG if (ldebug(rt_sigtimedwait)) printf(ARGS(rt_sigtimedwait, "*")); #endif if (args->sigsetsize != sizeof(l_sigset_t)) return (EINVAL); if ((error = copyin(args->mask, &lset, sizeof(lset)))) return (error); linux_to_bsd_sigset(&lset, &bset); tsa = NULL; if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) return (error); #ifdef DEBUG if (ldebug(rt_sigtimedwait)) printf(LMSG("linux_rt_sigtimedwait: " "incoming timeout (%d/%d)\n"), ltv.tv_sec, ltv.tv_usec); #endif tv.tv_sec = (long)ltv.tv_sec; tv.tv_usec = (suseconds_t)ltv.tv_usec; if (itimerfix(&tv)) { /* * The timeout was invalid. Convert it to something * valid that will act as it does under Linux. */ tv.tv_sec += tv.tv_usec / 1000000; tv.tv_usec %= 1000000; if (tv.tv_usec < 0) { tv.tv_sec -= 1; tv.tv_usec += 1000000; } if (tv.tv_sec < 0) timevalclear(&tv); #ifdef DEBUG if (ldebug(rt_sigtimedwait)) printf(LMSG("linux_rt_sigtimedwait: " "converted timeout (%jd/%ld)\n"), (intmax_t)tv.tv_sec, tv.tv_usec); #endif } TIMEVAL_TO_TIMESPEC(&tv, &ts); tsa = &ts; } error = kern_sigtimedwait(td, bset, &info, tsa); #ifdef DEBUG if (ldebug(rt_sigtimedwait)) printf(LMSG("linux_rt_sigtimedwait: " "sigtimedwait returning (%d)\n"), error); #endif if (error) return (error); sig = BSD_TO_LINUX_SIGNAL(info.ksi_signo); if (args->ptr) { memset(&linfo, 0, sizeof(linfo)); ksiginfo_to_lsiginfo(&info, &linfo, sig); error = copyout(&linfo, args->ptr, sizeof(linfo)); } if (error == 0) td->td_retval[0] = sig; return (error); } int linux_kill(struct thread *td, struct linux_kill_args *args) { struct kill_args /* { int pid; int signum; } */ tmp; #ifdef DEBUG if (ldebug(kill)) printf(ARGS(kill, "%d, %d"), args->pid, args->signum); #endif /* * Allow signal 0 as a means to check for privileges */ if (!LINUX_SIG_VALID(args->signum) && args->signum != 0) return (EINVAL); if (args->signum > 0 && args->signum <= LINUX_SIGTBLSZ) tmp.signum = linux_to_bsd_signal[_SIG_IDX(args->signum)]; else tmp.signum = args->signum; tmp.pid = args->pid; return (sys_kill(td, &tmp)); } static int linux_do_tkill(struct thread *td, struct thread *tdt, ksiginfo_t *ksi) { struct proc *p; int error; p = tdt->td_proc; AUDIT_ARG_SIGNUM(ksi->ksi_signo); AUDIT_ARG_PID(p->p_pid); AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, ksi->ksi_signo); if (error != 0 || ksi->ksi_signo == 0) goto out; tdksignal(tdt, ksi->ksi_signo, ksi); out: PROC_UNLOCK(p); return (error); } int linux_tgkill(struct thread *td, struct linux_tgkill_args *args) { struct thread *tdt; ksiginfo_t ksi; int sig; #ifdef DEBUG if (ldebug(tgkill)) printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig); #endif if (args->pid <= 0 || args->tgid <=0) return (EINVAL); /* * Allow signal 0 as a means to check for privileges */ if (!LINUX_SIG_VALID(args->sig) && args->sig != 0) return (EINVAL); if (args->sig > 0 && args->sig <= LINUX_SIGTBLSZ) sig = linux_to_bsd_signal[_SIG_IDX(args->sig)]; else sig = args->sig; tdt = linux_tdfind(td, args->pid, args->tgid); if (tdt == NULL) return (ESRCH); ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_LWP; ksi.ksi_errno = 0; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; return (linux_do_tkill(td, tdt, &ksi)); } /* * Deprecated since 2.5.75. Replaced by tgkill(). */ int linux_tkill(struct thread *td, struct linux_tkill_args *args) { struct thread *tdt; ksiginfo_t ksi; int sig; #ifdef DEBUG if (ldebug(tkill)) printf(ARGS(tkill, "%i, %i"), args->tid, args->sig); #endif if (args->tid <= 0) return (EINVAL); if (!LINUX_SIG_VALID(args->sig)) return (EINVAL); if (args->sig > 0 && args->sig <= LINUX_SIGTBLSZ) sig = linux_to_bsd_signal[_SIG_IDX(args->sig)]; else sig = args->sig; tdt = linux_tdfind(td, args->tid, -1); if (tdt == NULL) return (ESRCH); ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_LWP; ksi.ksi_errno = 0; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; return (linux_do_tkill(td, tdt, &ksi)); } void ksiginfo_to_lsiginfo(const ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig) { siginfo_to_lsiginfo(&ksi->ksi_info, lsi, sig); } static void sicode_to_lsicode(int si_code, int *lsi_code) { switch (si_code) { case SI_USER: *lsi_code = LINUX_SI_USER; break; case SI_KERNEL: *lsi_code = LINUX_SI_KERNEL; break; case SI_QUEUE: *lsi_code = LINUX_SI_QUEUE; break; case SI_TIMER: *lsi_code = LINUX_SI_TIMER; break; case SI_MESGQ: *lsi_code = LINUX_SI_MESGQ; break; case SI_ASYNCIO: *lsi_code = LINUX_SI_ASYNCIO; break; case SI_LWP: *lsi_code = LINUX_SI_TKILL; break; default: *lsi_code = si_code; break; } } void siginfo_to_lsiginfo(const siginfo_t *si, l_siginfo_t *lsi, l_int sig) { /* sig alredy converted */ lsi->lsi_signo = sig; sicode_to_lsicode(si->si_code, &lsi->lsi_code); switch (si->si_code) { case SI_LWP: lsi->lsi_pid = si->si_pid; lsi->lsi_uid = si->si_uid; break; case SI_TIMER: lsi->lsi_int = si->si_value.sival_int; lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); lsi->lsi_tid = si->si_timerid; break; case SI_QUEUE: lsi->lsi_pid = si->si_pid; lsi->lsi_uid = si->si_uid; lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); break; case SI_ASYNCIO: lsi->lsi_int = si->si_value.sival_int; lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); break; default: switch (sig) { case LINUX_SIGPOLL: /* XXX si_fd? */ lsi->lsi_band = si->si_band; break; case LINUX_SIGCHLD: lsi->lsi_errno = 0; lsi->lsi_pid = si->si_pid; lsi->lsi_uid = si->si_uid; if (si->si_code == CLD_STOPPED) lsi->lsi_status = BSD_TO_LINUX_SIGNAL(si->si_status); else if (si->si_code == CLD_CONTINUED) lsi->lsi_status = BSD_TO_LINUX_SIGNAL(SIGCONT); else lsi->lsi_status = si->si_status; break; case LINUX_SIGBUS: case LINUX_SIGILL: case LINUX_SIGFPE: case LINUX_SIGSEGV: lsi->lsi_addr = PTROUT(si->si_addr); break; default: lsi->lsi_pid = si->si_pid; lsi->lsi_uid = si->si_uid; if (sig >= LINUX_SIGRTMIN) { lsi->lsi_int = si->si_value.sival_int; lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); } break; } break; } } void lsiginfo_to_ksiginfo(const l_siginfo_t *lsi, ksiginfo_t *ksi, int sig) { ksi->ksi_signo = sig; ksi->ksi_code = lsi->lsi_code; /* XXX. Convert. */ ksi->ksi_pid = lsi->lsi_pid; ksi->ksi_uid = lsi->lsi_uid; ksi->ksi_status = lsi->lsi_status; ksi->ksi_addr = PTRIN(lsi->lsi_addr); ksi->ksi_info.si_value.sival_int = lsi->lsi_int; } int linux_rt_sigqueueinfo(struct thread *td, struct linux_rt_sigqueueinfo_args *args) { l_siginfo_t linfo; struct proc *p; ksiginfo_t ksi; int error; int sig; if (!LINUX_SIG_VALID(args->sig)) return (EINVAL); error = copyin(args->info, &linfo, sizeof(linfo)); if (error != 0) return (error); if (linfo.lsi_code >= 0) return (EPERM); if (args->sig > 0 && args->sig <= LINUX_SIGTBLSZ) sig = linux_to_bsd_signal[_SIG_IDX(args->sig)]; else sig = args->sig; error = ESRCH; if ((p = pfind(args->pid)) != NULL || (p = zpfind(args->pid)) != NULL) { error = p_cansignal(td, p, sig); if (error != 0) { PROC_UNLOCK(p); return (error); } ksiginfo_init(&ksi); lsiginfo_to_ksiginfo(&linfo, &ksi, sig); error = tdsendsignal(p, NULL, sig, &ksi); PROC_UNLOCK(p); } return (error); } Index: head/sys/compat/linux/linux_socket.c =================================================================== --- head/sys/compat/linux/linux_socket.c (revision 283414) +++ head/sys/compat/linux/linux_socket.c (revision 283415) @@ -1,1609 +1,1610 @@ /*- * Copyright (c) 1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* XXX we use functions that might not exist. */ #include "opt_compat.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #include #endif #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include static int linux_to_bsd_domain(int); /* * Reads a linux sockaddr and does any necessary translation. * Linux sockaddrs don't have a length field, only a family. * Copy the osockaddr structure pointed to by osa to kernel, adjust * family and convert to sockaddr. */ static int linux_getsockaddr(struct sockaddr **sap, const struct osockaddr *osa, int salen) { struct sockaddr *sa; struct osockaddr *kosa; #ifdef INET6 struct sockaddr_in6 *sin6; int oldv6size; #endif char *name; int bdom, error, hdrlen, namelen; if (salen < 2 || salen > UCHAR_MAX || !osa) return (EINVAL); #ifdef INET6 oldv6size = 0; /* * Check for old (pre-RFC2553) sockaddr_in6. We may accept it * if it's a v4-mapped address, so reserve the proper space * for it. */ if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) { salen += sizeof(uint32_t); oldv6size = 1; } #endif kosa = malloc(salen, M_SONAME, M_WAITOK); if ((error = copyin(osa, kosa, salen))) goto out; bdom = linux_to_bsd_domain(kosa->sa_family); if (bdom == -1) { error = EAFNOSUPPORT; goto out; } #ifdef INET6 /* * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6, * which lacks the scope id compared with RFC2553 one. If we detect * the situation, reject the address and write a message to system log. * * Still accept addresses for which the scope id is not used. */ if (oldv6size) { if (bdom == AF_INET6) { sin6 = (struct sockaddr_in6 *)kosa; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) || (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) && !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) && !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) && !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) { sin6->sin6_scope_id = 0; } else { log(LOG_DEBUG, "obsolete pre-RFC2553 sockaddr_in6 rejected\n"); error = EINVAL; goto out; } } else salen -= sizeof(uint32_t); } #endif if (bdom == AF_INET) { if (salen < sizeof(struct sockaddr_in)) { error = EINVAL; goto out; } salen = sizeof(struct sockaddr_in); } if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) { hdrlen = offsetof(struct sockaddr_un, sun_path); name = ((struct sockaddr_un *)kosa)->sun_path; if (*name == '\0') { /* * Linux abstract namespace starts with a NULL byte. * XXX We do not support abstract namespace yet. */ namelen = strnlen(name + 1, salen - hdrlen - 1) + 1; } else namelen = strnlen(name, salen - hdrlen); salen = hdrlen + namelen; if (salen > sizeof(struct sockaddr_un)) { error = ENAMETOOLONG; goto out; } } sa = (struct sockaddr *)kosa; sa->sa_family = bdom; sa->sa_len = salen; *sap = sa; return (0); out: free(kosa, M_SONAME); return (error); } static int linux_to_bsd_domain(int domain) { switch (domain) { case LINUX_AF_UNSPEC: return (AF_UNSPEC); case LINUX_AF_UNIX: return (AF_LOCAL); case LINUX_AF_INET: return (AF_INET); case LINUX_AF_INET6: return (AF_INET6); case LINUX_AF_AX25: return (AF_CCITT); case LINUX_AF_IPX: return (AF_IPX); case LINUX_AF_APPLETALK: return (AF_APPLETALK); } return (-1); } static int bsd_to_linux_domain(int domain) { switch (domain) { case AF_UNSPEC: return (LINUX_AF_UNSPEC); case AF_LOCAL: return (LINUX_AF_UNIX); case AF_INET: return (LINUX_AF_INET); case AF_INET6: return (LINUX_AF_INET6); case AF_CCITT: return (LINUX_AF_AX25); case AF_IPX: return (LINUX_AF_IPX); case AF_APPLETALK: return (LINUX_AF_APPLETALK); } return (-1); } static int linux_to_bsd_sockopt_level(int level) { switch (level) { case LINUX_SOL_SOCKET: return (SOL_SOCKET); } return (level); } static int bsd_to_linux_sockopt_level(int level) { switch (level) { case SOL_SOCKET: return (LINUX_SOL_SOCKET); } return (level); } static int linux_to_bsd_ip_sockopt(int opt) { switch (opt) { case LINUX_IP_TOS: return (IP_TOS); case LINUX_IP_TTL: return (IP_TTL); case LINUX_IP_OPTIONS: return (IP_OPTIONS); case LINUX_IP_MULTICAST_IF: return (IP_MULTICAST_IF); case LINUX_IP_MULTICAST_TTL: return (IP_MULTICAST_TTL); case LINUX_IP_MULTICAST_LOOP: return (IP_MULTICAST_LOOP); case LINUX_IP_ADD_MEMBERSHIP: return (IP_ADD_MEMBERSHIP); case LINUX_IP_DROP_MEMBERSHIP: return (IP_DROP_MEMBERSHIP); case LINUX_IP_HDRINCL: return (IP_HDRINCL); } return (-1); } static int linux_to_bsd_so_sockopt(int opt) { switch (opt) { case LINUX_SO_DEBUG: return (SO_DEBUG); case LINUX_SO_REUSEADDR: return (SO_REUSEADDR); case LINUX_SO_TYPE: return (SO_TYPE); case LINUX_SO_ERROR: return (SO_ERROR); case LINUX_SO_DONTROUTE: return (SO_DONTROUTE); case LINUX_SO_BROADCAST: return (SO_BROADCAST); case LINUX_SO_SNDBUF: return (SO_SNDBUF); case LINUX_SO_RCVBUF: return (SO_RCVBUF); case LINUX_SO_KEEPALIVE: return (SO_KEEPALIVE); case LINUX_SO_OOBINLINE: return (SO_OOBINLINE); case LINUX_SO_LINGER: return (SO_LINGER); case LINUX_SO_PEERCRED: return (LOCAL_PEERCRED); case LINUX_SO_RCVLOWAT: return (SO_RCVLOWAT); case LINUX_SO_SNDLOWAT: return (SO_SNDLOWAT); case LINUX_SO_RCVTIMEO: return (SO_RCVTIMEO); case LINUX_SO_SNDTIMEO: return (SO_SNDTIMEO); case LINUX_SO_TIMESTAMP: return (SO_TIMESTAMP); case LINUX_SO_ACCEPTCONN: return (SO_ACCEPTCONN); } return (-1); } static int linux_to_bsd_tcp_sockopt(int opt) { switch (opt) { case LINUX_TCP_NODELAY: return (TCP_NODELAY); case LINUX_TCP_MAXSEG: return (TCP_MAXSEG); case LINUX_TCP_KEEPIDLE: return (TCP_KEEPIDLE); case LINUX_TCP_KEEPINTVL: return (TCP_KEEPINTVL); case LINUX_TCP_KEEPCNT: return (TCP_KEEPCNT); case LINUX_TCP_MD5SIG: return (TCP_MD5SIG); } return (-1); } static int linux_to_bsd_msg_flags(int flags) { int ret_flags = 0; if (flags & LINUX_MSG_OOB) ret_flags |= MSG_OOB; if (flags & LINUX_MSG_PEEK) ret_flags |= MSG_PEEK; if (flags & LINUX_MSG_DONTROUTE) ret_flags |= MSG_DONTROUTE; if (flags & LINUX_MSG_CTRUNC) ret_flags |= MSG_CTRUNC; if (flags & LINUX_MSG_TRUNC) ret_flags |= MSG_TRUNC; if (flags & LINUX_MSG_DONTWAIT) ret_flags |= MSG_DONTWAIT; if (flags & LINUX_MSG_EOR) ret_flags |= MSG_EOR; if (flags & LINUX_MSG_WAITALL) ret_flags |= MSG_WAITALL; if (flags & LINUX_MSG_NOSIGNAL) ret_flags |= MSG_NOSIGNAL; #if 0 /* not handled */ if (flags & LINUX_MSG_PROXY) ; if (flags & LINUX_MSG_FIN) ; if (flags & LINUX_MSG_SYN) ; if (flags & LINUX_MSG_CONFIRM) ; if (flags & LINUX_MSG_RST) ; if (flags & LINUX_MSG_ERRQUEUE) ; #endif return ret_flags; } /* * If bsd_to_linux_sockaddr() or linux_to_bsd_sockaddr() faults, then the * native syscall will fault. Thus, we don't really need to check the * return values for these functions. */ static int bsd_to_linux_sockaddr(struct sockaddr *arg) { struct sockaddr sa; size_t sa_len = sizeof(struct sockaddr); int error; if ((error = copyin(arg, &sa, sa_len))) return (error); *(u_short *)&sa = sa.sa_family; error = copyout(&sa, arg, sa_len); return (error); } static int linux_to_bsd_sockaddr(struct sockaddr *arg, int len) { struct sockaddr sa; size_t sa_len = sizeof(struct sockaddr); int error; if ((error = copyin(arg, &sa, sa_len))) return (error); sa.sa_family = *(sa_family_t *)&sa; sa.sa_len = len; error = copyout(&sa, arg, sa_len); return (error); } static int linux_sa_put(struct osockaddr *osa) { struct osockaddr sa; int error, bdom; /* * Only read/write the osockaddr family part, the rest is * not changed. */ error = copyin(osa, &sa, sizeof(sa.sa_family)); if (error) return (error); bdom = bsd_to_linux_domain(sa.sa_family); if (bdom == -1) return (EINVAL); sa.sa_family = bdom; error = copyout(&sa, osa, sizeof(sa.sa_family)); if (error) return (error); return (0); } static int linux_to_bsd_cmsg_type(int cmsg_type) { switch (cmsg_type) { case LINUX_SCM_RIGHTS: return (SCM_RIGHTS); case LINUX_SCM_CREDENTIALS: return (SCM_CREDS); } return (-1); } static int bsd_to_linux_cmsg_type(int cmsg_type) { switch (cmsg_type) { case SCM_RIGHTS: return (LINUX_SCM_RIGHTS); case SCM_CREDS: return (LINUX_SCM_CREDENTIALS); } return (-1); } static int linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr) { if (lhdr->msg_controllen > INT_MAX) return (ENOBUFS); bhdr->msg_name = PTRIN(lhdr->msg_name); bhdr->msg_namelen = lhdr->msg_namelen; bhdr->msg_iov = PTRIN(lhdr->msg_iov); bhdr->msg_iovlen = lhdr->msg_iovlen; bhdr->msg_control = PTRIN(lhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ bhdr->msg_flags = linux_to_bsd_msg_flags(lhdr->msg_flags); return (0); } static int bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr) { lhdr->msg_name = PTROUT(bhdr->msg_name); lhdr->msg_namelen = bhdr->msg_namelen; lhdr->msg_iov = PTROUT(bhdr->msg_iov); lhdr->msg_iovlen = bhdr->msg_iovlen; lhdr->msg_control = PTROUT(bhdr->msg_control); /* * msg_controllen is skipped since BSD and LINUX control messages * are potentially different sizes (e.g. the cred structure used * by SCM_CREDS is different between the two operating system). * * The caller can set it (if necessary) after converting all the * control messages. */ /* msg_flags skipped */ return (0); } static int linux_set_socket_flags(struct thread *td, int s, int flags) { int error; if (flags & LINUX_SOCK_NONBLOCK) { error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK); if (error) return (error); } if (flags & LINUX_SOCK_CLOEXEC) { error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC); if (error) return (error); } return (0); } static int linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg) { struct sockaddr *to; int error; if (mp->msg_name != NULL) { error = linux_getsockaddr(&to, mp->msg_name, mp->msg_namelen); if (error) return (error); mp->msg_name = to; } else to = NULL; error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control, segflg); if (to) free(to, M_SONAME); return (error); } /* Return 0 if IP_HDRINCL is set for the given socket. */ static int linux_check_hdrincl(struct thread *td, int s) { int error, optval; socklen_t size_val; size_val = sizeof(optval); error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL, &optval, UIO_SYSSPACE, &size_val); if (error) return (error); return (optval == 0); } /* * Updated sendto() when IP_HDRINCL is set: * tweak endian-dependent fields in the IP packet. */ static int linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) { /* * linux_ip_copysize defines how many bytes we should copy * from the beginning of the IP packet before we customize it for BSD. * It should include all the fields we modify (ip_len and ip_off). */ #define linux_ip_copysize 8 struct ip *packet; struct msghdr msg; struct iovec aiov[1]; int error; /* Check that the packet isn't too big or too small. */ if (linux_args->len < linux_ip_copysize || linux_args->len > IP_MAXPACKET) return (EINVAL); packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK); /* Make kernel copy of the packet to be sent */ if ((error = copyin(PTRIN(linux_args->msg), packet, linux_args->len))) goto goout; /* Convert fields from Linux to BSD raw IP socket format */ packet->ip_len = linux_args->len; packet->ip_off = ntohs(packet->ip_off); /* Prepare the msghdr and iovec structures describing the new packet */ msg.msg_name = PTRIN(linux_args->to); msg.msg_namelen = linux_args->tolen; msg.msg_iov = aiov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_flags = 0; aiov[0].iov_base = (char *)packet; aiov[0].iov_len = linux_args->len; error = linux_sendit(td, linux_args->s, &msg, linux_args->flags, NULL, UIO_SYSSPACE); goout: free(packet, M_TEMP); return (error); } int linux_socket(struct thread *td, struct linux_socket_args *args) { struct socket_args /* { int domain; int type; int protocol; } */ bsd_args; int retval_socket, socket_flags; bsd_args.protocol = args->protocol; socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK; if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) return (EINVAL); bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK; if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX) return (EINVAL); bsd_args.domain = linux_to_bsd_domain(args->domain); if (bsd_args.domain == -1) return (EAFNOSUPPORT); retval_socket = sys_socket(td, &bsd_args); if (retval_socket) return (retval_socket); retval_socket = linux_set_socket_flags(td, td->td_retval[0], socket_flags); if (retval_socket) { (void)kern_close(td, td->td_retval[0]); goto out; } if (bsd_args.type == SOCK_RAW && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0) && bsd_args.domain == PF_INET) { /* It's a raw IP socket: set the IP_HDRINCL option. */ int hdrincl; hdrincl = 1; /* We ignore any error returned by kern_setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL, &hdrincl, UIO_SYSSPACE, sizeof(hdrincl)); } #ifdef INET6 /* * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default * and some apps depend on this. So, set V6ONLY to 0 for Linux apps. * For simplicity we do this unconditionally of the net.inet6.ip6.v6only * sysctl value. */ if (bsd_args.domain == PF_INET6) { int v6only; v6only = 0; /* We ignore any error returned by setsockopt() */ kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY, &v6only, UIO_SYSSPACE, sizeof(v6only)); } #endif out: return (retval_socket); } int linux_bind(struct thread *td, struct linux_bind_args *args) { struct sockaddr *sa; int error; error = linux_getsockaddr(&sa, PTRIN(args->name), args->namelen); if (error) return (error); error = kern_bindat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in)) return (EINVAL); return (error); } int linux_connect(struct thread *td, struct linux_connect_args *args) { cap_rights_t rights; struct socket *so; struct sockaddr *sa; u_int fflag; int error; error = linux_getsockaddr(&sa, (struct osockaddr *)PTRIN(args->name), args->namelen); if (error) return (error); error = kern_connectat(td, AT_FDCWD, args->s, sa); free(sa, M_SONAME); if (error != EISCONN) return (error); /* * Linux doesn't return EISCONN the first time it occurs, * when on a non-blocking socket. Instead it returns the * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD. * * XXXRW: Instead of using fgetsock(), check that it is a * socket and use the file descriptor reference instead of * creating a new one. */ error = fgetsock(td, args->s, cap_rights_init(&rights, CAP_CONNECT), &so, &fflag); if (error == 0) { error = EISCONN; if (fflag & FNONBLOCK) { SOCK_LOCK(so); if (so->so_emuldata == 0) error = so->so_error; so->so_emuldata = (void *)1; SOCK_UNLOCK(so); } fputsock(so); } return (error); } int linux_listen(struct thread *td, struct linux_listen_args *args) { struct listen_args /* { int s; int backlog; } */ bsd_args; bsd_args.s = args->s; bsd_args.backlog = args->backlog; return (sys_listen(td, &bsd_args)); } static int linux_accept_common(struct thread *td, int s, l_uintptr_t addr, l_uintptr_t namelen, int flags) { struct accept_args /* { int s; struct sockaddr * __restrict name; socklen_t * __restrict anamelen; } */ bsd_args; int error; if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) return (EINVAL); bsd_args.s = s; /* XXX: */ bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr); bsd_args.anamelen = PTRIN(namelen);/* XXX */ error = sys_accept(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name); if (error) { if (error == EFAULT && namelen != sizeof(struct sockaddr_in)) return (EINVAL); return (error); } /* * linux appears not to copy flags from the parent socket to the * accepted one, so we must clear the flags in the new descriptor * and apply the requested flags. */ error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0); if (error) goto out; error = linux_set_socket_flags(td, td->td_retval[0], flags); if (error) goto out; if (addr) error = linux_sa_put(PTRIN(addr)); out: if (error) { (void)kern_close(td, td->td_retval[0]); td->td_retval[0] = 0; } return (error); } int linux_accept(struct thread *td, struct linux_accept_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, 0)); } int linux_accept4(struct thread *td, struct linux_accept4_args *args) { return (linux_accept_common(td, args->s, args->addr, args->namelen, args->flags)); } int linux_getsockname(struct thread *td, struct linux_getsockname_args *args) { struct getsockname_args /* { int fdes; struct sockaddr * __restrict asa; socklen_t * __restrict alen; } */ bsd_args; int error; bsd_args.fdes = args->s; /* XXX: */ bsd_args.asa = (struct sockaddr * __restrict)PTRIN(args->addr); bsd_args.alen = PTRIN(args->namelen); /* XXX */ error = sys_getsockname(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa); if (error) return (error); error = linux_sa_put(PTRIN(args->addr)); if (error) return (error); return (0); } int linux_getpeername(struct thread *td, struct linux_getpeername_args *args) { struct getpeername_args /* { int fdes; caddr_t asa; int *alen; } */ bsd_args; int error; bsd_args.fdes = args->s; bsd_args.asa = (struct sockaddr *)PTRIN(args->addr); bsd_args.alen = (socklen_t *)PTRIN(args->namelen); error = sys_getpeername(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.asa); if (error) return (error); error = linux_sa_put(PTRIN(args->addr)); if (error) return (error); return (0); } int linux_socketpair(struct thread *td, struct linux_socketpair_args *args) { struct socketpair_args /* { int domain; int type; int protocol; int *rsv; } */ bsd_args; int error, socket_flags; int sv[2]; bsd_args.domain = linux_to_bsd_domain(args->domain); if (bsd_args.domain != PF_LOCAL) return (EAFNOSUPPORT); socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK; if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) return (EINVAL); bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK; if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX) return (EINVAL); if (args->protocol != 0 && args->protocol != PF_UNIX) /* * Use of PF_UNIX as protocol argument is not right, * but Linux does it. * Do not map PF_UNIX as its Linux value is identical * to FreeBSD one. */ return (EPROTONOSUPPORT); else bsd_args.protocol = 0; bsd_args.rsv = (int *)PTRIN(args->rsv); error = kern_socketpair(td, bsd_args.domain, bsd_args.type, bsd_args.protocol, sv); if (error) return (error); error = linux_set_socket_flags(td, sv[0], socket_flags); if (error) goto out; error = linux_set_socket_flags(td, sv[1], socket_flags); if (error) goto out; error = copyout(sv, bsd_args.rsv, 2 * sizeof(int)); out: if (error) { (void)kern_close(td, sv[0]); (void)kern_close(td, sv[1]); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct linux_send_args { int s; l_uintptr_t msg; int len; int flags; }; static int linux_send(struct thread *td, struct linux_send_args *args) { struct sendto_args /* { int s; caddr_t buf; int len; int flags; caddr_t to; int tolen; } */ bsd_args; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = args->flags; bsd_args.to = NULL; bsd_args.tolen = 0; return sys_sendto(td, &bsd_args); } struct linux_recv_args { int s; l_uintptr_t msg; int len; int flags; }; static int linux_recv(struct thread *td, struct linux_recv_args *args) { struct recvfrom_args /* { int s; caddr_t buf; int len; int flags; struct sockaddr *from; socklen_t fromlenaddr; } */ bsd_args; bsd_args.s = args->s; bsd_args.buf = (caddr_t)PTRIN(args->msg); bsd_args.len = args->len; bsd_args.flags = linux_to_bsd_msg_flags(args->flags); bsd_args.from = NULL; bsd_args.fromlenaddr = 0; return (sys_recvfrom(td, &bsd_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_sendto(struct thread *td, struct linux_sendto_args *args) { struct msghdr msg; struct iovec aiov; int error; if (linux_check_hdrincl(td, args->s) == 0) /* IP_HDRINCL set, tweak the packet before sending */ return (linux_sendto_hdrincl(td, args)); msg.msg_name = PTRIN(args->to); msg.msg_namelen = args->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_flags = 0; aiov.iov_base = PTRIN(args->msg); aiov.iov_len = args->len; error = linux_sendit(td, args->s, &msg, args->flags, NULL, UIO_USERSPACE); return (error); } int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) { struct recvfrom_args /* { int s; caddr_t buf; size_t len; int flags; struct sockaddr * __restrict from; socklen_t * __restrict fromlenaddr; } */ bsd_args; size_t len; int error; if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t)))) return (error); bsd_args.s = args->s; bsd_args.buf = PTRIN(args->buf); bsd_args.len = args->len; bsd_args.flags = linux_to_bsd_msg_flags(args->flags); /* XXX: */ bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from); bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */ linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len); error = sys_recvfrom(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from); if (error) return (error); if (args->from) { error = linux_sa_put((struct osockaddr *) PTRIN(args->from)); if (error) return (error); } return (0); } int linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) { struct cmsghdr *cmsg; struct cmsgcred cmcred; struct mbuf *control; struct msghdr msg; struct l_cmsghdr linux_cmsg; struct l_cmsghdr *ptr_cmsg; struct l_msghdr linux_msg; struct iovec *iov; socklen_t datalen; struct sockaddr *sa; sa_family_t sa_family; void *data; int error; error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg)); if (error) return (error); /* * Some Linux applications (ping) define a non-NULL control data * pointer, but a msg_controllen of 0, which is not allowed in the * FreeBSD system call interface. NULL the msg_control pointer in * order to handle this case. This should be checked, but allows the * Linux ping to work. */ if (PTRIN(linux_msg.msg_control) != NULL && linux_msg.msg_controllen == 0) linux_msg.msg_control = PTROUT(NULL); error = linux_to_bsd_msghdr(&msg, &linux_msg); if (error) return (error); #ifdef COMPAT_LINUX32 error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); #endif if (error) return (error); control = NULL; cmsg = NULL; if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) { error = kern_getsockname(td, args->s, &sa, &datalen); if (error) goto bad; sa_family = sa->sa_family; free(sa, M_SONAME); error = ENOBUFS; cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO); control = m_get(M_WAITOK, MT_CONTROL); if (control == NULL) goto bad; do { error = copyin(ptr_cmsg, &linux_cmsg, sizeof(struct l_cmsghdr)); if (error) goto bad; error = EINVAL; if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr)) goto bad; /* * Now we support only SCM_RIGHTS and SCM_CRED, * so return EINVAL in any other cmsg_type */ cmsg->cmsg_type = linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type); cmsg->cmsg_level = linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level); if (cmsg->cmsg_type == -1 || cmsg->cmsg_level != SOL_SOCKET) goto bad; /* * Some applications (e.g. pulseaudio) attempt to * send ancillary data even if the underlying protocol * doesn't support it which is not allowed in the * FreeBSD system call interface. */ if (sa_family != AF_UNIX) continue; data = LINUX_CMSG_DATA(ptr_cmsg); datalen = linux_cmsg.cmsg_len - L_CMSG_HDRSZ; switch (cmsg->cmsg_type) { case SCM_RIGHTS: break; case SCM_CREDS: data = &cmcred; datalen = sizeof(cmcred); /* * The lower levels will fill in the structure */ bzero(data, datalen); break; } cmsg->cmsg_len = CMSG_LEN(datalen); error = ENOBUFS; if (!m_append(control, CMSG_HDRSZ, (c_caddr_t)cmsg)) goto bad; if (!m_append(control, datalen, (c_caddr_t)data)) goto bad; } while ((ptr_cmsg = LINUX_CMSG_NXTHDR(&linux_msg, ptr_cmsg))); if (m_length(control, NULL) == 0) { m_freem(control); control = NULL; } } msg.msg_iov = iov; msg.msg_flags = 0; error = linux_sendit(td, args->s, &msg, args->flags, control, UIO_USERSPACE); bad: free(iov, M_IOV); if (cmsg) free(cmsg, M_TEMP); return (error); } int linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) { struct cmsghdr *cm; struct cmsgcred *cmcred; struct msghdr msg; struct l_cmsghdr *linux_cmsg = NULL; struct l_ucred linux_ucred; socklen_t datalen, outlen; struct l_msghdr linux_msg; struct iovec *iov, *uiov; struct mbuf *control = NULL; struct mbuf **controlp; caddr_t outbuf; void *data; int error, i, fd, fds, *fdp; error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg)); if (error) return (error); error = linux_to_bsd_msghdr(&msg, &linux_msg); if (error) return (error); #ifdef COMPAT_LINUX32 error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, &iov, EMSGSIZE); #else error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); #endif if (error) return (error); if (msg.msg_name) { error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name, msg.msg_namelen); if (error) goto bad; } uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp); msg.msg_iov = uiov; if (error) goto bad; error = bsd_to_linux_msghdr(&msg, &linux_msg); if (error) goto bad; if (linux_msg.msg_name) { error = bsd_to_linux_sockaddr((struct sockaddr *) PTRIN(linux_msg.msg_name)); if (error) goto bad; } if (linux_msg.msg_name && linux_msg.msg_namelen > 2) { error = linux_sa_put(PTRIN(linux_msg.msg_name)); if (error) goto bad; } outbuf = PTRIN(linux_msg.msg_control); outlen = 0; if (control) { linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO); msg.msg_control = mtod(control, struct cmsghdr *); msg.msg_controllen = control->m_len; cm = CMSG_FIRSTHDR(&msg); while (cm != NULL) { linux_cmsg->cmsg_type = bsd_to_linux_cmsg_type(cm->cmsg_type); linux_cmsg->cmsg_level = bsd_to_linux_sockopt_level(cm->cmsg_level); if (linux_cmsg->cmsg_type == -1 || cm->cmsg_level != SOL_SOCKET) { error = EINVAL; goto bad; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; switch (cm->cmsg_type) { case SCM_RIGHTS: if (args->flags & LINUX_MSG_CMSG_CLOEXEC) { fds = datalen / sizeof(int); fdp = data; for (i = 0; i < fds; i++) { fd = *fdp++; (void)kern_fcntl(td, fd, F_SETFD, FD_CLOEXEC); } } break; case SCM_CREDS: /* * Currently LOCAL_CREDS is never in * effect for Linux so no need to worry * about sockcred */ if (datalen != sizeof(*cmcred)) { error = EMSGSIZE; goto bad; } cmcred = (struct cmsgcred *)data; bzero(&linux_ucred, sizeof(linux_ucred)); linux_ucred.pid = cmcred->cmcred_pid; linux_ucred.uid = cmcred->cmcred_uid; linux_ucred.gid = cmcred->cmcred_gid; data = &linux_ucred; datalen = sizeof(linux_ucred); break; } if (outlen + LINUX_CMSG_LEN(datalen) > linux_msg.msg_controllen) { if (outlen == 0) { error = EMSGSIZE; goto bad; } else { linux_msg.msg_flags |= LINUX_MSG_CTRUNC; goto out; } } linux_cmsg->cmsg_len = LINUX_CMSG_LEN(datalen); error = copyout(linux_cmsg, outbuf, L_CMSG_HDRSZ); if (error) goto bad; outbuf += L_CMSG_HDRSZ; error = copyout(data, outbuf, datalen); if (error) goto bad; outbuf += LINUX_CMSG_ALIGN(datalen); outlen += LINUX_CMSG_LEN(datalen); cm = CMSG_NXTHDR(&msg, cm); } } out: linux_msg.msg_controllen = outlen; error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg)); bad: free(iov, M_IOV); m_freem(control); free(linux_cmsg, M_TEMP); return (error); } int linux_shutdown(struct thread *td, struct linux_shutdown_args *args) { struct shutdown_args /* { int s; int how; } */ bsd_args; bsd_args.s = args->s; bsd_args.how = args->how; return (sys_shutdown(td, &bsd_args)); } int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) { struct setsockopt_args /* { int s; int level; int name; caddr_t val; int valsize; } */ bsd_args; l_timeval linux_tv; struct timeval tv; int error, name; bsd_args.s = args->s; bsd_args.level = linux_to_bsd_sockopt_level(args->level); switch (bsd_args.level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: error = copyin(PTRIN(args->optval), &linux_tv, sizeof(linux_tv)); if (error) return (error); tv.tv_sec = linux_tv.tv_sec; tv.tv_usec = linux_tv.tv_usec; return (kern_setsockopt(td, args->s, bsd_args.level, name, &tv, UIO_SYSSPACE, sizeof(tv))); /* NOTREACHED */ break; default: break; } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; default: name = -1; break; } if (name == -1) return (ENOPROTOOPT); bsd_args.name = name; bsd_args.val = PTRIN(args->optval); bsd_args.valsize = args->optlen; if (name == IPV6_NEXTHOP) { linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.val, bsd_args.valsize); error = sys_setsockopt(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val); } else error = sys_setsockopt(td, &bsd_args); return (error); } int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) { struct getsockopt_args /* { int s; int level; int name; caddr_t val; int *avalsize; } */ bsd_args; l_timeval linux_tv; struct timeval tv; socklen_t tv_len, xulen; struct xucred xu; struct l_ucred lxu; int error, name; bsd_args.s = args->s; bsd_args.level = linux_to_bsd_sockopt_level(args->level); switch (bsd_args.level) { case SOL_SOCKET: name = linux_to_bsd_so_sockopt(args->optname); switch (name) { case SO_RCVTIMEO: /* FALLTHROUGH */ case SO_SNDTIMEO: tv_len = sizeof(tv); error = kern_getsockopt(td, args->s, bsd_args.level, name, &tv, UIO_SYSSPACE, &tv_len); if (error) return (error); linux_tv.tv_sec = tv.tv_sec; linux_tv.tv_usec = tv.tv_usec; return (copyout(&linux_tv, PTRIN(args->optval), sizeof(linux_tv))); /* NOTREACHED */ break; case LOCAL_PEERCRED: if (args->optlen != sizeof(lxu)) return (EINVAL); xulen = sizeof(xu); error = kern_getsockopt(td, args->s, bsd_args.level, name, &xu, UIO_SYSSPACE, &xulen); if (error) return (error); /* * XXX Use 0 for pid as the FreeBSD does not cache peer pid. */ lxu.pid = 0; lxu.uid = xu.cr_uid; lxu.gid = xu.cr_gid; return (copyout(&lxu, PTRIN(args->optval), sizeof(lxu))); /* NOTREACHED */ break; default: break; } break; case IPPROTO_IP: name = linux_to_bsd_ip_sockopt(args->optname); break; case IPPROTO_TCP: name = linux_to_bsd_tcp_sockopt(args->optname); break; default: name = -1; break; } if (name == -1) return (EINVAL); bsd_args.name = name; bsd_args.val = PTRIN(args->optval); bsd_args.avalsize = PTRIN(args->optlen); if (name == IPV6_NEXTHOP) { error = sys_getsockopt(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.val); } else error = sys_getsockopt(td, &bsd_args); return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) + /* Argument list sizes for linux_socketcall */ #define LINUX_AL(x) ((x) * sizeof(l_ulong)) static const unsigned char lxs_args[] = { LINUX_AL(0) /* unused*/, LINUX_AL(3) /* socket */, LINUX_AL(3) /* bind */, LINUX_AL(3) /* connect */, LINUX_AL(2) /* listen */, LINUX_AL(3) /* accept */, LINUX_AL(3) /* getsockname */, LINUX_AL(3) /* getpeername */, LINUX_AL(4) /* socketpair */, LINUX_AL(4) /* send */, LINUX_AL(4) /* recv */, LINUX_AL(6) /* sendto */, LINUX_AL(6) /* recvfrom */, LINUX_AL(2) /* shutdown */, LINUX_AL(5) /* setsockopt */, LINUX_AL(5) /* getsockopt */, LINUX_AL(3) /* sendmsg */, LINUX_AL(3) /* recvmsg */, LINUX_AL(4) /* accept4 */ }; #define LINUX_AL_SIZE sizeof(lxs_args) / sizeof(lxs_args[0]) - 1 -#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_socketcall(struct thread *td, struct linux_socketcall_args *args) { l_ulong a[6]; void *arg; int error; if (args->what < LINUX_SOCKET || args->what > LINUX_AL_SIZE) return (EINVAL); error = copyin(PTRIN(args->args), a, lxs_args[args->what]); if (error) return (error); arg = a; switch (args->what) { case LINUX_SOCKET: return (linux_socket(td, arg)); case LINUX_BIND: return (linux_bind(td, arg)); case LINUX_CONNECT: return (linux_connect(td, arg)); case LINUX_LISTEN: return (linux_listen(td, arg)); case LINUX_ACCEPT: return (linux_accept(td, arg)); case LINUX_GETSOCKNAME: return (linux_getsockname(td, arg)); case LINUX_GETPEERNAME: return (linux_getpeername(td, arg)); case LINUX_SOCKETPAIR: return (linux_socketpair(td, arg)); case LINUX_SEND: return (linux_send(td, arg)); case LINUX_RECV: return (linux_recv(td, arg)); case LINUX_SENDTO: return (linux_sendto(td, arg)); case LINUX_RECVFROM: return (linux_recvfrom(td, arg)); case LINUX_SHUTDOWN: return (linux_shutdown(td, arg)); case LINUX_SETSOCKOPT: return (linux_setsockopt(td, arg)); case LINUX_GETSOCKOPT: return (linux_getsockopt(td, arg)); case LINUX_SENDMSG: return (linux_sendmsg(td, arg)); case LINUX_RECVMSG: return (linux_recvmsg(td, arg)); case LINUX_ACCEPT4: return (linux_accept4(td, arg)); } uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what); return (ENOSYS); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ Index: head/sys/compat/linux/linux_stats.c =================================================================== --- head/sys/compat/linux/linux_stats.c (revision 283414) +++ head/sys/compat/linux/linux_stats.c (revision 283415) @@ -1,626 +1,630 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #define LINUX_SHMFS_MAGIC 0x01021994 static void translate_vnhook_major_minor(struct vnode *vp, struct stat *sb) { int major, minor; if (vp->v_type == VCHR && vp->v_rdev != NULL && linux_driver_get_major_minor(devtoname(vp->v_rdev), &major, &minor) == 0) { sb->st_rdev = (major << 8 | minor); } } static int linux_kern_statat(struct thread *td, int flag, int fd, char *path, enum uio_seg pathseg, struct stat *sbp) { return (kern_statat(td, flag, fd, path, pathseg, sbp, translate_vnhook_major_minor)); } static int linux_kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); } static int linux_kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { return (linux_kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, sbp)); } /* * XXX: This was removed from newstat_copyout(), and almost identical * XXX: code was in stat64_copyout(). findcdev() needs to be replaced * XXX: with something that does lookup and locking properly. * XXX: When somebody fixes this: please try to avoid duplicating it. */ #if 0 static void disk_foo(struct somestat *tbuf) { struct cdevsw *cdevsw; struct cdev *dev; /* Lie about disk drives which are character devices * in FreeBSD but block devices under Linux. */ if (S_ISCHR(tbuf.st_mode) && (dev = findcdev(buf->st_rdev)) != NULL) { cdevsw = dev_refthread(dev); if (cdevsw != NULL) { if (cdevsw->d_flags & D_DISK) { tbuf.st_mode &= ~S_IFMT; tbuf.st_mode |= S_IFBLK; /* XXX this may not be quite right */ /* Map major number to 0 */ tbuf.st_dev = minor(buf->st_dev) & 0xf; tbuf.st_rdev = buf->st_rdev & 0xff; } dev_relthread(dev); } } } #endif static void translate_fd_major_minor(struct thread *td, int fd, struct stat *buf) { struct file *fp; struct vnode *vp; int major, minor; /* * No capability rights required here. */ if ((!S_ISCHR(buf->st_mode) && !S_ISBLK(buf->st_mode)) || fget(td, fd, 0, &fp) != 0) return; vp = fp->f_vnode; if (vp != NULL && vp->v_rdev != NULL && linux_driver_get_major_minor(devtoname(vp->v_rdev), &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } else if (fp->f_type == DTYPE_PTS) { struct tty *tp = fp->f_data; /* Convert the numbers for the slave device. */ if (linux_driver_get_major_minor(devtoname(tp->t_dev), &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } } fdrop(fp, td); } static int newstat_copyout(struct stat *buf, void *ubuf) { struct l_newstat tbuf; bzero(&tbuf, sizeof(tbuf)); tbuf.st_dev = minor(buf->st_dev) | (major(buf->st_dev) << 8); tbuf.st_ino = buf->st_ino; tbuf.st_mode = buf->st_mode; tbuf.st_nlink = buf->st_nlink; tbuf.st_uid = buf->st_uid; tbuf.st_gid = buf->st_gid; tbuf.st_rdev = buf->st_rdev; tbuf.st_size = buf->st_size; tbuf.st_atim.tv_sec = buf->st_atim.tv_sec; tbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; tbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; tbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; tbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; tbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; tbuf.st_blksize = buf->st_blksize; tbuf.st_blocks = buf->st_blocks; return (copyout(&tbuf, ubuf, sizeof(tbuf))); } int linux_newstat(struct thread *td, struct linux_newstat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(newstat)) printf(ARGS(newstat, "%s, *"), path); #endif error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); LFREEPATH(path); if (error) return (error); return (newstat_copyout(&buf, args->buf)); } int linux_newlstat(struct thread *td, struct linux_newlstat_args *args) { struct stat sb; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(newlstat)) printf(ARGS(newlstat, "%s, *"), path); #endif error = linux_kern_lstat(td, path, UIO_SYSSPACE, &sb); LFREEPATH(path); if (error) return (error); return (newstat_copyout(&sb, args->buf)); } int linux_newfstat(struct thread *td, struct linux_newfstat_args *args) { struct stat buf; int error; #ifdef DEBUG if (ldebug(newfstat)) printf(ARGS(newfstat, "%d, *"), args->fd); #endif error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = newstat_copyout(&buf, args->buf); return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat_copyout(struct stat *buf, void *ubuf) { struct l_stat lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = buf->st_dev; lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; if (buf->st_size < (quad_t)1 << 32) lbuf.st_size = buf->st_size; else lbuf.st_size = -2; lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; lbuf.st_flags = buf->st_flags; lbuf.st_gen = buf->st_gen; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat(struct thread *td, struct linux_stat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(stat)) printf(ARGS(stat, "%s, *"), path); #endif error = linux_kern_stat(td, path, UIO_SYSSPACE, &buf); if (error) { LFREEPATH(path); return (error); } LFREEPATH(path); return(stat_copyout(&buf, args->up)); } int linux_lstat(struct thread *td, struct linux_lstat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(lstat)) printf(ARGS(lstat, "%s, *"), path); #endif error = linux_kern_lstat(td, path, UIO_SYSSPACE, &buf); if (error) { LFREEPATH(path); return (error); } LFREEPATH(path); return(stat_copyout(&buf, args->up)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ struct l_statfs { l_long f_type; l_long f_bsize; l_long f_blocks; l_long f_bfree; l_long f_bavail; l_long f_files; l_long f_ffree; l_fsid_t f_fsid; l_long f_namelen; l_long f_spare[6]; }; #define LINUX_CODA_SUPER_MAGIC 0x73757245L #define LINUX_EXT2_SUPER_MAGIC 0xEF53L #define LINUX_HPFS_SUPER_MAGIC 0xf995e849L #define LINUX_ISOFS_SUPER_MAGIC 0x9660L #define LINUX_MSDOS_SUPER_MAGIC 0x4d44L #define LINUX_NCP_SUPER_MAGIC 0x564cL #define LINUX_NFS_SUPER_MAGIC 0x6969L #define LINUX_NTFS_SUPER_MAGIC 0x5346544EL #define LINUX_PROC_SUPER_MAGIC 0x9fa0L #define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */ #define LINUX_DEVFS_SUPER_MAGIC 0x1373L static long bsd_to_linux_ftype(const char *fstypename) { int i; static struct {const char *bsd_name; long linux_type;} b2l_tbl[] = { {"ufs", LINUX_UFS_SUPER_MAGIC}, {"cd9660", LINUX_ISOFS_SUPER_MAGIC}, {"nfs", LINUX_NFS_SUPER_MAGIC}, {"ext2fs", LINUX_EXT2_SUPER_MAGIC}, {"procfs", LINUX_PROC_SUPER_MAGIC}, {"msdosfs", LINUX_MSDOS_SUPER_MAGIC}, {"ntfs", LINUX_NTFS_SUPER_MAGIC}, {"nwfs", LINUX_NCP_SUPER_MAGIC}, {"hpfs", LINUX_HPFS_SUPER_MAGIC}, {"coda", LINUX_CODA_SUPER_MAGIC}, {"devfs", LINUX_DEVFS_SUPER_MAGIC}, {NULL, 0L}}; for (i = 0; b2l_tbl[i].bsd_name != NULL; i++) if (strcmp(b2l_tbl[i].bsd_name, fstypename) == 0) return (b2l_tbl[i].linux_type); return (0L); } static void bsd_to_linux_statfs(struct statfs *bsd_statfs, struct l_statfs *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; } int linux_statfs(struct thread *td, struct linux_statfs_args *args) { struct l_statfs linux_statfs; struct statfs bsd_statfs; char *path; int error, dev_shm; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(statfs)) printf(ARGS(statfs, "%s, *"), path); #endif dev_shm = 0; error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs); if (strncmp(path, "/dev/shm", sizeof("/dev/shm") - 1) == 0) dev_shm = (path[8] == '\0' || (path[8] == '/' && path[9] == '\0')); LFREEPATH(path); if (error) return (error); bsd_to_linux_statfs(&bsd_statfs, &linux_statfs); if (dev_shm) linux_statfs.f_type = LINUX_SHMFS_MAGIC; return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static void bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; } int linux_statfs64(struct thread *td, struct linux_statfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs bsd_statfs; char *path; int error; if (args->bufsize != sizeof(struct l_statfs64)) return EINVAL; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(statfs64)) printf(ARGS(statfs64, "%s, *"), path); #endif error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs); LFREEPATH(path); if (error) return (error); bsd_to_linux_statfs64(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) { struct l_statfs linux_statfs; struct statfs bsd_statfs; int error; #ifdef DEBUG if (ldebug(fstatfs)) printf(ARGS(fstatfs, "%d, *"), args->fd); #endif error = kern_fstatfs(td, args->fd, &bsd_statfs); if (error) return error; bsd_to_linux_statfs(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } struct l_ustat { l_daddr_t f_tfree; l_ino_t f_tinode; char f_fname[6]; char f_fpack[6]; }; int linux_ustat(struct thread *td, struct linux_ustat_args *args) { #ifdef DEBUG if (ldebug(ustat)) printf(ARGS(ustat, "%d, *"), args->dev); #endif return (EOPNOTSUPP); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat64_copyout(struct stat *buf, void *ubuf) { struct l_stat64 lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = minor(buf->st_dev) | (major(buf->st_dev) << 8); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = buf->st_size; lbuf.st_atim.tv_sec = buf->st_atim.tv_sec; lbuf.st_atim.tv_nsec = buf->st_atim.tv_nsec; lbuf.st_mtim.tv_sec = buf->st_mtim.tv_sec; lbuf.st_mtim.tv_nsec = buf->st_mtim.tv_nsec; lbuf.st_ctim.tv_sec = buf->st_ctim.tv_sec; lbuf.st_ctim.tv_nsec = buf->st_ctim.tv_nsec; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; /* * The __st_ino field makes all the difference. In the Linux kernel * it is conditionally compiled based on STAT64_HAS_BROKEN_ST_INO, * but without the assignment to __st_ino the runtime linker refuses * to mmap(2) any shared libraries. I guess it's broken alright :-) */ lbuf.__st_ino = buf->st_ino; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat64(struct thread *td, struct linux_stat64_args *args) { struct stat buf; char *filename; int error; LCONVPATHEXIST(td, args->filename, &filename); #ifdef DEBUG if (ldebug(stat64)) printf(ARGS(stat64, "%s, *"), filename); #endif error = linux_kern_stat(td, filename, UIO_SYSSPACE, &buf); LFREEPATH(filename); if (error) return (error); return (stat64_copyout(&buf, args->statbuf)); } int linux_lstat64(struct thread *td, struct linux_lstat64_args *args) { struct stat sb; char *filename; int error; LCONVPATHEXIST(td, args->filename, &filename); #ifdef DEBUG if (ldebug(lstat64)) printf(ARGS(lstat64, "%s, *"), args->filename); #endif error = linux_kern_lstat(td, filename, UIO_SYSSPACE, &sb); LFREEPATH(filename); if (error) return (error); return (stat64_copyout(&sb, args->statbuf)); } int linux_fstat64(struct thread *td, struct linux_fstat64_args *args) { struct stat buf; int error; #ifdef DEBUG if (ldebug(fstat64)) printf(ARGS(fstat64, "%d, *"), args->fd); #endif error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = stat64_copyout(&buf, args->statbuf); return (error); } int linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args) { char *path; int error, dfd, flag; struct stat buf; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(fstatat64)) printf(ARGS(fstatat64, "%i, %s, %i"), args->dfd, path, args->flag); #endif error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); if (!error) error = stat64_copyout(&buf, args->statbuf); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */