diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 3efc93969419..cc777f2bf830 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -1,4243 +1,4242 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ffclock.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ktrace.h" #define __ELF_WORD_SIZE 32 #ifdef COMPAT_FREEBSD11 #define _WANT_FREEBSD11_KEVENT #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef INET #include #endif #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #endif #include #include #include #include #include #include #include #include int compat_freebsd_32bit = 1; static void register_compat32_feature(void *arg) { if (!compat_freebsd_32bit) return; FEATURE_ADD("compat_freebsd32", "Compatible with 32-bit FreeBSD"); FEATURE_ADD("compat_freebsd_32bit", "Compatible with 32-bit FreeBSD (legacy feature name)"); } SYSINIT(freebsd32, SI_SUB_EXEC, SI_ORDER_ANY, register_compat32_feature, NULL); struct ptrace_io_desc32 { int piod_op; uint32_t piod_offs; uint32_t piod_addr; uint32_t piod_len; }; struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; uint32_t pve_start; uint32_t pve_end; uint32_t pve_offset; u_int pve_prot; u_int pve_pathlen; int32_t pve_fileid; u_int pve_fsid; uint32_t pve_path; }; #ifdef __amd64__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); CTASSERT(sizeof(struct bintime32) == 12); #else CTASSERT(sizeof(struct timeval32) == 16); CTASSERT(sizeof(struct timespec32) == 16); CTASSERT(sizeof(struct itimerval32) == 32); CTASSERT(sizeof(struct bintime32) == 16); #endif CTASSERT(sizeof(struct ostatfs32) == 256); #ifdef __amd64__ CTASSERT(sizeof(struct rusage32) == 72); #else CTASSERT(sizeof(struct rusage32) == 88); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); #ifdef __amd64__ CTASSERT(sizeof(struct kevent32) == 56); #else CTASSERT(sizeof(struct kevent32) == 64); #endif CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifdef __amd64__ CTASSERT(sizeof(struct stat32) == 208); CTASSERT(sizeof(struct freebsd11_stat32) == 96); #else CTASSERT(sizeof(struct stat32) == 224); CTASSERT(sizeof(struct freebsd11_stat32) == 120); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct __wrusage32 wru32; struct __wrusage wru, *wrup; struct __siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct ostatfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_OMNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_OMNAMELEN)); } #endif int freebsd32_getfsstat(struct thread *td, struct freebsd32_getfsstat_args *uap) { size_t count; int error; if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) return (EINVAL); error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, UIO_USERSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct ostatfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct ostatfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD11 int freebsd11_freebsd32_getfsstat(struct thread *td, struct freebsd11_freebsd32_getfsstat_args *uap) { return(kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, const char *fname, - enum uio_seg segflg, uint32_t *argv, uint32_t *envv) + uint32_t *argv, uint32_t *envv) { char *argp, *envp; uint32_t *p32, arg; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ - error = exec_args_add_fname(args, fname, segflg); + error = exec_args_add_fname(args, fname, UIO_USERSPACE); if (error != 0) goto err_exit; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = exec_args_add_arg(args, argp, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = exec_args_add_env(args, envp, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); - error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, - uap->argv, uap->envv); + error = freebsd32_exec_copyin_args(&eargs, uap->fname, uap->argv, + uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); - error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, - uap->argv, uap->envv); + error = freebsd32_exec_copyin_args(&eargs, NULL, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_mknodat(struct thread *td, struct freebsd32_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, PAIR32TO64(dev_t, uap->dev))); } int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ) != 0) prot |= PROT_EXEC; #endif return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len, prot, 0)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = PAIR32TO64(off_t, uap->pos), })); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = PAIR32TO64(off_t, uap->pos), })); } #endif #ifdef COMPAT_43 int ofreebsd32_mmap(struct thread *td, struct ofreebsd32_mmap_args *uap) { return (kern_ommap(td, (uintptr_t)uap->addr, uap->len, uap->prot, uap->flags, uap->fd, uap->pos)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } static void freebsd32_kevent_to_kevent32(const struct kevent *kevp, struct kevent32 *ks32) { uint64_t e; int j; CP(*kevp, *ks32, ident); CP(*kevp, *ks32, filter); CP(*kevp, *ks32, flags); CP(*kevp, *ks32, fflags); #if BYTE_ORDER == LITTLE_ENDIAN ks32->data1 = kevp->data; ks32->data2 = kevp->data >> 32; #else ks32->data1 = kevp->data >> 32; ks32->data2 = kevp->data; #endif PTROUT_CP(*kevp, *ks32, udata); for (j = 0; j < nitems(kevp->ext); j++) { e = kevp->ext[j]; #if BYTE_ORDER == LITTLE_ENDIAN ks32->ext64[2 * j] = e; ks32->ext64[2 * j + 1] = e >> 32; #else ks32->ext64[2 * j] = e >> 32; ks32->ext64[2 * j + 1] = e; #endif } } void freebsd32_kinfo_knote_to_32(const struct kinfo_knote *kin, struct kinfo_knote32 *kin32) { memset(kin32, 0, sizeof(*kin32)); CP(*kin, *kin32, knt_kq_fd); freebsd32_kevent_to_kevent32(&kin->knt_event, &kin32->knt_event); CP(*kin, *kin32, knt_status); CP(*kin, *kin32, knt_extdata); switch (kin->knt_extdata) { case KNOTE_EXTDATA_NONE: break; case KNOTE_EXTDATA_VNODE: CP(*kin, *kin32, knt_vnode.knt_vnode_type); #if BYTE_ORDER == LITTLE_ENDIAN kin32->knt_vnode.knt_vnode_fsid[0] = kin->knt_vnode. knt_vnode_fsid; kin32->knt_vnode.knt_vnode_fsid[1] = kin->knt_vnode. knt_vnode_fsid >> 32; kin32->knt_vnode.knt_vnode_fileid[0] = kin->knt_vnode. knt_vnode_fileid; kin32->knt_vnode.knt_vnode_fileid[1] = kin->knt_vnode. knt_vnode_fileid >> 32; #else kin32->knt_vnode.knt_vnode_fsid[1] = kin->knt_vnode. knt_vnode_fsid; kin32->knt_vnode.knt_vnode_fsid[0] = kin->knt_vnode. knt_vnode_fsid >> 32; kin32->knt_vnode.knt_vnode_fileid[1] = kin->knt_vnode. knt_vnode_fileid; kin32->knt_vnode.knt_vnode_fileid[0] = kin->knt_vnode. knt_vnode_fileid >> 32; #endif memcpy(kin32->knt_vnode.knt_vnode_fullpath, kin->knt_vnode.knt_vnode_fullpath, PATH_MAX); break; case KNOTE_EXTDATA_PIPE: #if BYTE_ORDER == LITTLE_ENDIAN kin32->knt_pipe.knt_pipe_ino[0] = kin->knt_pipe.knt_pipe_ino; kin32->knt_pipe.knt_pipe_ino[1] = kin->knt_pipe. knt_pipe_ino >> 32; #else kin32->knt_pipe.knt_pipe_ino[1] = kin->knt_pipe.knt_pipe_ino; kin32->knt_pipe.knt_pipe_ino[0] = kin->knt_pipe. knt_pipe_ino >> 32; #endif break; } } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) freebsd32_kevent_to_kevent32(&kevp[i], &ks32[i]); error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; uint64_t e; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) { #if BYTE_ORDER == LITTLE_ENDIAN e = ks32[i].ext64[2 * j + 1]; e <<= 32; e += ks32[i].ext64[2 * j]; #else e = ks32[i].ext64[2 * j]; e <<= 32; e += ks32[i].ext64[2 * j + 1]; #endif kevp[i].ext[j] = e; } } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent_copyout, .k_copyin = freebsd32_kevent_copyin, }; #ifdef KTRACE struct kevent32 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct kevent32)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct kevent32)); #endif return (error); } #ifdef COMPAT_FREEBSD11 static int freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct freebsd11_kevent32 ks32[KQ_NEVENTS]; int i, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct freebsd11_kevent32 ks32[KQ_NEVENTS]; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) kevp[i].ext[j] = 0; } done: return (error); } int freebsd11_freebsd32_kevent(struct thread *td, struct freebsd11_freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent11_copyout, .k_copyin = freebsd32_kevent11_copyin, }; #ifdef KTRACE struct freebsd11_kevent32 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("freebsd11_kevent32", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct freebsd11_kevent32)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("freebsd11_kevent32", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct freebsd11_kevent32)); #endif return (error); } #endif int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error == 0) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static void ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl, struct ptrace_lwpinfo32 *pl32) { bzero(pl32, sizeof(*pl32)); pl32->pl_lwpid = pl->pl_lwpid; pl32->pl_event = pl->pl_event; pl32->pl_flags = pl->pl_flags; pl32->pl_sigmask = pl->pl_sigmask; pl32->pl_siglist = pl->pl_siglist; siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo); strcpy(pl32->pl_tdname, pl->pl_tdname); pl32->pl_child_pid = pl->pl_child_pid; pl32->pl_syscall_code = pl->pl_syscall_code; pl32->pl_syscall_narg = pl->pl_syscall_narg; } static void ptrace_sc_ret_to32(const struct ptrace_sc_ret *psr, struct ptrace_sc_ret32 *psr32) { bzero(psr32, sizeof(*psr32)); psr32->sr_retval[0] = psr->sr_retval[0]; psr32->sr_retval[1] = psr->sr_retval[1]; psr32->sr_error = psr->sr_error; } int freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap) { union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct ptrace_coredump pc; struct ptrace_sc_remote sr; struct dbreg32 dbreg; struct fpreg32 fpreg; struct reg32 reg; struct iovec vec; register_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret psr; int ptevents; } r; union { struct ptrace_io_desc32 piod; struct ptrace_lwpinfo32 pl; struct ptrace_vm_entry32 pve; struct ptrace_coredump32 pc; struct ptrace_sc_remote32 sr; uint32_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret32 psr; struct iovec32 vec; } r32; syscallarg_t pscr_args[nitems(td->td_sa.args)]; u_int pscr_args32[nitems(td->td_sa.args)]; void *addr; int data, error, i; if (!allow_ptrace) return (ENOSYS); error = 0; AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; data = uap->data; switch (uap->req) { case PT_GET_EVENT_MASK: case PT_GET_SC_ARGS: case PT_GET_SC_RET: break; case PT_LWPINFO: if (uap->data > sizeof(r32.pl)) return (EINVAL); /* * Pass size of native structure in 'data'. Truncate * if necessary to avoid siginfo. */ data = sizeof(r.pl); if (uap->data < offsetof(struct ptrace_lwpinfo32, pl_siginfo) + sizeof(struct __siginfo32)) data = offsetof(struct ptrace_lwpinfo, pl_siginfo); break; case PT_GETREGS: bzero(&r.reg, sizeof(r.reg)); break; case PT_GETFPREGS: bzero(&r.fpreg, sizeof(r.fpreg)); break; case PT_GETDBREGS: bzero(&r.dbreg, sizeof(r.dbreg)); break; case PT_SETREGS: error = copyin(uap->addr, &r.reg, sizeof(r.reg)); break; case PT_SETFPREGS: error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg)); break; case PT_SETDBREGS: error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg)); break; case PT_GETREGSET: case PT_SETREGSET: error = copyin(uap->addr, &r32.vec, sizeof(r32.vec)); if (error != 0) break; r.vec.iov_len = r32.vec.iov_len; r.vec.iov_base = PTRIN(r32.vec.iov_base); break; case PT_SET_EVENT_MASK: if (uap->data != sizeof(r.ptevents)) error = EINVAL; else error = copyin(uap->addr, &r.ptevents, uap->data); break; case PT_IO: error = copyin(uap->addr, &r32.piod, sizeof(r32.piod)); if (error) break; CP(r32.piod, r.piod, piod_op); PTRIN_CP(r32.piod, r.piod, piod_offs); PTRIN_CP(r32.piod, r.piod, piod_addr); CP(r32.piod, r.piod, piod_len); break; case PT_VM_ENTRY: error = copyin(uap->addr, &r32.pve, sizeof(r32.pve)); if (error) break; CP(r32.pve, r.pve, pve_entry); CP(r32.pve, r.pve, pve_timestamp); CP(r32.pve, r.pve, pve_start); CP(r32.pve, r.pve, pve_end); CP(r32.pve, r.pve, pve_offset); CP(r32.pve, r.pve, pve_prot); CP(r32.pve, r.pve, pve_pathlen); CP(r32.pve, r.pve, pve_fileid); CP(r32.pve, r.pve, pve_fsid); PTRIN_CP(r32.pve, r.pve, pve_path); break; case PT_COREDUMP: if (uap->data != sizeof(r32.pc)) error = EINVAL; else error = copyin(uap->addr, &r32.pc, uap->data); CP(r32.pc, r.pc, pc_fd); CP(r32.pc, r.pc, pc_flags); r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit); data = sizeof(r.pc); break; case PT_SC_REMOTE: if (uap->data != sizeof(r32.sr)) { error = EINVAL; break; } error = copyin(uap->addr, &r32.sr, uap->data); if (error != 0) break; CP(r32.sr, r.sr, pscr_syscall); CP(r32.sr, r.sr, pscr_nargs); if (r.sr.pscr_nargs > nitems(td->td_sa.args)) { error = EINVAL; break; } error = copyin(PTRIN(r32.sr.pscr_args), pscr_args32, sizeof(u_int) * r32.sr.pscr_nargs); if (error != 0) break; for (i = 0; i < r32.sr.pscr_nargs; i++) pscr_args[i] = pscr_args32[i]; r.sr.pscr_args = pscr_args; break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: CP(r.pve, r32.pve, pve_entry); CP(r.pve, r32.pve, pve_timestamp); CP(r.pve, r32.pve, pve_start); CP(r.pve, r32.pve, pve_end); CP(r.pve, r32.pve, pve_offset); CP(r.pve, r32.pve, pve_prot); CP(r.pve, r32.pve, pve_pathlen); CP(r.pve, r32.pve, pve_fileid); CP(r.pve, r32.pve, pve_fsid); error = copyout(&r32.pve, uap->addr, sizeof(r32.pve)); break; case PT_IO: CP(r.piod, r32.piod, piod_len); error = copyout(&r32.piod, uap->addr, sizeof(r32.piod)); break; case PT_GETREGS: error = copyout(&r.reg, uap->addr, sizeof(r.reg)); break; case PT_GETFPREGS: error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg)); break; case PT_GETDBREGS: error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg)); break; case PT_GETREGSET: r32.vec.iov_len = r.vec.iov_len; error = copyout(&r32.vec, uap->addr, sizeof(r32.vec)); break; case PT_GET_EVENT_MASK: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.ptevents, uap->addr, uap->data); break; case PT_LWPINFO: ptrace_lwpinfo_to32(&r.pl, &r32.pl); error = copyout(&r32.pl, uap->addr, uap->data); break; case PT_GET_SC_ARGS: for (i = 0; i < nitems(r.args); i++) r32.args[i] = (uint32_t)r.args[i]; error = copyout(r32.args, uap->addr, MIN(uap->data, sizeof(r32.args))); break; case PT_GET_SC_RET: ptrace_sc_ret_to32(&r.psr, &r32.psr); error = copyout(&r32.psr, uap->addr, MIN(uap->data, sizeof(r32.psr))); break; case PT_SC_REMOTE: ptrace_sc_ret_to32(&r.sr.pscr_ret, &r32.sr.pscr_ret); error = copyout(&r32.sr.pscr_ret, uap->addr + offsetof(struct ptrace_sc_remote32, pscr_ret), sizeof(r32.psr)); break; } return (error); } int freebsd32_copyinuio(const struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); uio = allocuio(iovcnt); iov = uio->uio_iov; for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { freeuio(uio); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { freeuio(uio); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); freeuio(auio); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); freeuio(auio); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); freeuio(auio); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); freeuio(auio); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(const struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static size_t freebsd32_cmsg_convert(const struct cmsghdr *cm, void *data, socklen_t datalen) { size_t copylen; union { struct timespec32 ts; struct timeval32 tv; struct bintime32 bt; } tmp32; union { struct timespec ts; struct timeval tv; struct bintime bt; } *in; in = data; copylen = 0; switch (cm->cmsg_level) { case SOL_SOCKET: switch (cm->cmsg_type) { case SCM_TIMESTAMP: TV_CP(*in, tmp32, tv); copylen = sizeof(tmp32.tv); break; case SCM_BINTIME: BT_CP(*in, tmp32, bt); copylen = sizeof(tmp32.bt); break; case SCM_REALTIME: case SCM_MONOTONIC: TS_CP(*in, tmp32, ts); copylen = sizeof(tmp32.ts); break; default: break; } default: break; } if (copylen == 0) return (datalen); KASSERT((datalen >= copylen), ("corrupted cmsghdr")); bcopy(&tmp32, data, copylen); return (copylen); } static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen, datalen_out, oldclen; int error; caddr_t ctlbuf; int len, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; msg->msg_controllen = 0; ctlbuf = msg->msg_control; for (m = control; m != NULL && len > 0; m = m->m_next) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; datalen_out = freebsd32_cmsg_convert(cm, data, datalen); /* * Copy out the message header. Preserve the native * message size in case we need to inspect the message * contents later. */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); goto exit; } oldclen = cm->cmsg_len; cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen_out; error = copyout(cm, ctlbuf, copylen); cm->cmsg_len = oldclen; if (error != 0) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); copylen = datalen_out; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); break; } /* Copy out the message data. */ error = copyout(data, ctlbuf, copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } msg->msg_controllen += FREEBSD32_CMSG_SPACE(datalen_out); } } if (len == 0 && m != NULL) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); } exit: return (error); } int freebsd32_recvmsg(struct thread *td, struct freebsd32_recvmsg_args *uap) { struct msghdr msg; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } return (error); } #ifdef COMPAT_43 int ofreebsd32_recvmsg(struct thread *td, struct ofreebsd32_recvmsg_args *uap) { return (ENOSYS); } #endif /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct cmsghdr *cm; struct mbuf *m; void *in, *in1, *md; u_int msglen, outlen; int error; /* Enforce the size limit of the native implementation. */ if (buflen > MCLBYTES) return (EINVAL); in = malloc(buflen, M_TEMP, M_WAITOK); error = copyin(buf, in, buflen); if (error != 0) goto out; /* * Make a pass over the input buffer to determine the amount of space * required for 64 bit-aligned copies of the control messages. */ in1 = in; outlen = 0; while (buflen > 0) { if (buflen < sizeof(*cm)) { error = EINVAL; break; } cm = (struct cmsghdr *)in1; if (cm->cmsg_len < FREEBSD32_ALIGN(sizeof(*cm)) || cm->cmsg_len > buflen) { error = EINVAL; break; } msglen = FREEBSD32_ALIGN(cm->cmsg_len); if (msglen < cm->cmsg_len) { error = EINVAL; break; } /* The native ABI permits the final padding to be omitted. */ if (msglen > buflen) msglen = buflen; buflen -= msglen; in1 = (char *)in1 + msglen; outlen += CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen - FREEBSD32_ALIGN(sizeof(*cm))); } if (error != 0) goto out; /* * Allocate up to MJUMPAGESIZE space for the re-aligned and * re-padded control messages. This allows a full MCLBYTES of * 32-bit sized and aligned messages to fit and avoids an ABI * mismatch with the native implementation. */ m = m_get2(outlen, M_WAITOK, MT_CONTROL, 0); if (m == NULL) { error = EINVAL; goto out; } m->m_len = outlen; md = mtod(m, void *); /* * Make a second pass over input messages, copying them into the output * buffer. */ in1 = in; while (outlen > 0) { /* Copy the message header and align the length field. */ cm = md; memcpy(cm, in1, sizeof(*cm)); msglen = cm->cmsg_len - FREEBSD32_ALIGN(sizeof(*cm)); cm->cmsg_len = CMSG_ALIGN(sizeof(*cm)) + msglen; /* Copy the message body. */ in1 = (char *)in1 + FREEBSD32_ALIGN(sizeof(*cm)); md = (char *)md + CMSG_ALIGN(sizeof(*cm)); memcpy(md, in1, msglen); in1 = (char *)in1 + FREEBSD32_ALIGN(msglen); md = (char *)md + CMSG_ALIGN(msglen); KASSERT(outlen >= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen), ("outlen %u underflow, msglen %u", outlen, msglen)); outlen -= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen); } *mp = m; out: free(in, M_TEMP); return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } #ifdef COMPAT_43 int ofreebsd32_sendmsg(struct thread *td, struct ofreebsd32_sendmsg_args *uap) { return (ENOSYS); } #endif int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_truncate(struct thread *td, struct ofreebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } #endif int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_ftruncate(struct thread *td, struct ofreebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, uap->length)); } int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif #if defined(COMPAT_FREEBSD11) int freebsd11_freebsd32_getdirentries(struct thread *td, struct freebsd11_freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #endif /* COMPAT_FREEBSD11 */ #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init_one(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) (void)copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) freeuio(hdr_uio); if (trl_uio) freeuio(trl_uio); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { #ifndef __amd64__ /* * 32-bit architectures other than i386 have 64-bit time_t. This * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec, * and 4 bytes of padding. Zero the padding holes in struct stat32. */ bzero(&out->st_atim, sizeof(out->st_atim)); bzero(&out->st_mtim, sizeof(out->st_mtim)); bzero(&out->st_ctim, sizeof(out->st_ctim)); bzero(&out->st_birthtim, sizeof(out->st_birthtim)); #endif CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); CP(*in, *out, st_filerev); CP(*in, *out, st_bsdflags); TS_CP(*in, *out, st_birthtim); out->st_padding1 = 0; #ifdef __STAT32_TIME_T_EXT out->st_atim_ext = 0; out->st_mtim_ext = 0; out->st_ctim_ext = 0; out->st_btim_ext = 0; #endif bzero(out->st_spare, sizeof(out->st_spare)); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { bzero(out, sizeof(*out)); CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); out->st_size = MIN(in->st_size, INT32_MAX); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->sb, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->sb, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap) { struct stat sb; struct stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #if defined(COMPAT_FREEBSD11) extern int ino64_trunc_error; static int freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out) { #ifndef __amd64__ /* * 32-bit architectures other than i386 have 64-bit time_t. This * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec, * and 4 bytes of padding. Zero the padding holes in freebsd11_stat32. */ bzero(&out->st_atim, sizeof(out->st_atim)); bzero(&out->st_mtim, sizeof(out->st_mtim)); bzero(&out->st_ctim, sizeof(out->st_ctim)); bzero(&out->st_birthtim, sizeof(out->st_birthtim)); #endif CP(*in, *out, st_ino); if (in->st_ino != out->st_ino) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_ino = UINT32_MAX; break; } } CP(*in, *out, st_nlink); if (in->st_nlink != out->st_nlink) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_nlink = UINT16_MAX; break; } } out->st_dev = in->st_dev; if (out->st_dev != in->st_dev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } CP(*in, *out, st_mode); CP(*in, *out, st_uid); CP(*in, *out, st_gid); out->st_rdev = in->st_rdev; if (out->st_rdev != in->st_rdev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_lspare = 0; bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim), sizeof(*out) - offsetof(struct freebsd11_stat32, st_birthtim) - sizeof(out->st_birthtim)); return (0); } int freebsd11_freebsd32_stat(struct thread *td, struct freebsd11_freebsd32_stat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstat(struct thread *td, struct freebsd11_freebsd32_fstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_fstat(td, uap->fd, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstatat(struct thread *td, struct freebsd11_freebsd32_fstatat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->buf, sizeof (sb32)); return (error); } int freebsd11_freebsd32_lstat(struct thread *td, struct freebsd11_freebsd32_lstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fhstat(struct thread *td, struct freebsd11_freebsd32_fhstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } static int freebsd11_cvtnstat32(struct stat *sb, struct nstat32 *nsb32) { struct nstat nsb; int error; error = freebsd11_cvtnstat(sb, &nsb); if (error != 0) return (error); bzero(nsb32, sizeof(*nsb32)); CP(nsb, *nsb32, st_dev); CP(nsb, *nsb32, st_ino); CP(nsb, *nsb32, st_mode); CP(nsb, *nsb32, st_nlink); CP(nsb, *nsb32, st_uid); CP(nsb, *nsb32, st_gid); CP(nsb, *nsb32, st_rdev); CP(nsb, *nsb32, st_atim.tv_sec); CP(nsb, *nsb32, st_atim.tv_nsec); CP(nsb, *nsb32, st_mtim.tv_sec); CP(nsb, *nsb32, st_mtim.tv_nsec); CP(nsb, *nsb32, st_ctim.tv_sec); CP(nsb, *nsb32, st_ctim.tv_nsec); CP(nsb, *nsb32, st_size); CP(nsb, *nsb32, st_blocks); CP(nsb, *nsb32, st_blksize); CP(nsb, *nsb32, st_flags); CP(nsb, *nsb32, st_gen); CP(nsb, *nsb32, st_birthtim.tv_sec); CP(nsb, *nsb32, st_birthtim.tv_nsec); return (0); } int freebsd11_freebsd32_nstat(struct thread *td, struct freebsd11_freebsd32_nstat_args *uap) { struct stat sb; struct nstat32 nsb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtnstat32(&sb, &nsb); if (error != 0) error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } int freebsd11_freebsd32_nlstat(struct thread *td, struct freebsd11_freebsd32_nlstat_args *uap) { struct stat sb; struct nstat32 nsb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtnstat32(&sb, &nsb); if (error == 0) error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } int freebsd11_freebsd32_nfstat(struct thread *td, struct freebsd11_freebsd32_nfstat_args *uap) { struct nstat32 nub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error != 0) return (error); error = freebsd11_cvtnstat32(&ub, &nub); if (error == 0) error = copyout(&nub, uap->sb, sizeof(nub)); return (error); } #endif int freebsd32___sysctl(struct thread *td, struct freebsd32___sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error) return (error); if (uap->oldlenp != NULL && suword32(uap->oldlenp, j) != 0) error = EFAULT; return (error); } int freebsd32___sysctlbyname(struct thread *td, struct freebsd32___sysctlbyname_args *uap) { size_t oldlen, rv; int error; uint32_t tmp; if (uap->oldlenp != NULL) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { error = oldlen = 0; } if (error != 0) return (EFAULT); error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old, &oldlen, uap->new, uap->newlen, &rv, SCTL_MASK32, 1); if (error != 0) return (error); if (uap->oldlenp != NULL && suword32(uap->oldlenp, rv) != 0) error = EFAULT; return (error); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); freeuio(auio); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } freeuio(auio); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { uint32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } struct sigvec32 { uint32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } struct sigstack32 { uint32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } int freebsd32_clock_nanosleep(struct thread *td, struct freebsd32_clock_nanosleep_args *uap) { int error; error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error, error2; error = copyin(ua_rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32)); if (error2 != 0) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_timerfd_gettime(struct thread *td, struct freebsd32_timerfd_gettime_args *uap) { struct itimerspec curr_value; struct itimerspec32 curr_value32; int error; error = kern_timerfd_gettime(td, uap->fd, &curr_value); if (error == 0) { CP(curr_value, curr_value32, it_value.tv_sec); CP(curr_value, curr_value32, it_value.tv_nsec); CP(curr_value, curr_value32, it_interval.tv_sec); CP(curr_value, curr_value32, it_interval.tv_nsec); error = copyout(&curr_value32, uap->curr_value, sizeof(curr_value32)); } return (error); } int freebsd32_timerfd_settime(struct thread *td, struct freebsd32_timerfd_settime_args *uap) { struct itimerspec new_value, old_value; struct itimerspec32 new_value32, old_value32; int error; error = copyin(uap->new_value, &new_value32, sizeof(new_value32)); if (error != 0) return (error); CP(new_value32, new_value, it_value.tv_sec); CP(new_value32, new_value, it_value.tv_nsec); CP(new_value32, new_value, it_interval.tv_sec); CP(new_value32, new_value, it_interval.tv_nsec); if (uap->old_value == NULL) { error = kern_timerfd_settime(td, uap->fd, uap->flags, &new_value, NULL); } else { error = kern_timerfd_settime(td, uap->fd, uap->flags, &new_value, &old_value); if (error == 0) { CP(old_value, old_value32, it_value.tv_sec); CP(old_value, old_value32, it_value.tv_nsec); CP(old_value, old_value32, it_interval.tv_sec); CP(old_value, old_value32, it_interval.tv_nsec); error = copyout(&old_value32, uap->old_value, sizeof(old_value32)); } } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct __siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } #ifndef _FREEBSD32_SYSPROTO_H_ struct freebsd32_sigqueue_args { pid_t pid; int signum; /* union sigval32 */ int value; }; #endif int freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap) { union sigval sv; /* * On 32-bit ABIs, sival_int and sival_ptr are the same. * On 64-bit little-endian ABIs, the low bits are the same. * In 64-bit big-endian ABIs, sival_int overlaps with * sival_ptr's HIGH bits. We choose to support sival_int * rather than sival_ptr in this case as it seems to be * more common. */ bzero(&sv, sizeof(sv)); sv.sival_int = (uint32_t)(uint64_t)uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct __siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct __siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct __siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct __siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } static int copyin32_set(const void *u, void *k, size_t size) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ int rv; struct bitset *kb = k; int *p; rv = copyin(u, k, size); if (rv != 0) return (rv); p = (int *)kb->__bits; /* Loop through swapping words. * `size' is in bytes, we need bits. */ for (int i = 0; i < __bitset_words(size * 8); i++) { int tmp = p[0]; p[0] = p[1]; p[1] = tmp; p += 2; } return (0); #else return (copyin(u, k, size)); #endif } static int copyout32_set(const void *k, void *u, size_t size) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ const struct bitset *kb = k; struct bitset *ub = u; const int *kp = (const int *)kb->__bits; int *up = (int *)ub->__bits; int rv; for (int i = 0; i < __bitset_words(CPU_SETSIZE); i++) { /* `size' is in bytes, we need bits. */ for (int i = 0; i < __bitset_words(size * 8); i++) { rv = suword32(up, kp[1]); if (rv == 0) rv = suword32(up + 1, kp[0]); if (rv != 0) return (EFAULT); } } return (0); #else return (copyout(k, u, size)); #endif } static const struct cpuset_copy_cb cpuset_copy32_cb = { .cpuset_copyin = copyin32_set, .cpuset_copyout = copyout32_set }; int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { return (user_cpuset_getaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask, &cpuset_copy32_cb)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { return (user_cpuset_setaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask, &cpuset_copy32_cb)); } int freebsd32_cpuset_getdomain(struct thread *td, struct freebsd32_cpuset_getdomain_args *uap) { return (kern_cpuset_getdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy, &cpuset_copy32_cb)); } int freebsd32_cpuset_setdomain(struct thread *td, struct freebsd32_cpuset_setdomain_args *uap) { return (kern_cpuset_setdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy, &cpuset_copy32_cb)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); freeuio(auio); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_module_handler(struct module *mod, int what, void *arg) { return (kern_syscall_module_handler(freebsd32_sysent, mod, what, arg)); } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { return (kern_syscall_helper_register(freebsd32_sysent, sd, flags)); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { return (kern_syscall_helper_unregister(freebsd32_sysent, sd)); } int freebsd32_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { struct sysentvec *sysent; int argc, envc, i; uint32_t *vectp; char *stringp; uintptr_t destp, ustringp; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int error, szsigcode; sysent = imgp->sysent; arginfo = (struct freebsd32_ps_strings *)PROC_PS_STRINGS(imgp->proc); imgp->ps_strings = arginfo; destp = (uintptr_t)arginfo; /* * Install sigcode. */ if (!PROC_HAS_SHP(imgp->proc)) { szsigcode = *sysent->sv_szsigcode; destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) { execpath_len = strlen(imgp->execpath) + 1; destp -= execpath_len; imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes32, imgp->pagesizes, sizeof(pagesizes32)); if (error != 0) return (error); imgp->pagesizeslen = sizeof(pagesizes32); /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(uint32_t)); } vectp = (uint32_t *)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword32(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword32(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat *stat; struct kld_file_stat32 *stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld_file_stat_1_32) && version != sizeof(struct kld_file_stat32)) return (EINVAL); stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO); stat32 = malloc(sizeof(*stat32), M_TEMP, M_WAITOK | M_ZERO); error = kern_kldstat(td, uap->fileid, stat); if (error == 0) { bcopy(&stat->name[0], &stat32->name[0], sizeof(stat->name)); CP(*stat, *stat32, refs); CP(*stat, *stat32, id); PTROUT_CP(*stat, *stat32, address); CP(*stat, *stat32, size); bcopy(&stat->pathname[0], &stat32->pathname[0], sizeof(stat->pathname)); stat32->version = version; error = copyout(stat32, uap->stat, version); } free(stat, M_TEMP); free(stat32, M_TEMP); return (error); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags, signum; if (uap->com >= PROC_PROCCTL_MD_MIN) return (cpu_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, PTRIN(uap->data))); switch (uap->com) { case PROC_ASLR_CTL: case PROC_PROTMAX_CTL: case PROC_SPROTECT: case PROC_STACKGAP_CTL: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: case PROC_NO_NEW_PRIVS_CTL: case PROC_WXMAP_CTL: case PROC_LOGSIGEXIT_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: case PROC_NO_NEW_PRIVS_STATUS: case PROC_WXMAP_STATUS: case PROC_LOGSIGEXIT_STATUS: data = &flags; break; case PROC_PDEATHSIG_CTL: error = copyin(uap->data, &signum, sizeof(signum)); if (error != 0) return (error); data = &signum; break; case PROC_PDEATHSIG_STATUS: data = &signum; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: case PROC_NO_NEW_PRIVS_STATUS: case PROC_WXMAP_STATUS: case PROC_LOGSIGEXIT_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; case PROC_PDEATHSIG_STATUS: if (error == 0) error = copyout(&signum, uap->data, sizeof(signum)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { intptr_t tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: case F_KINFO: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } int freebsd32_sched_rr_get_interval(struct thread *td, struct freebsd32_sched_rr_get_interval_args *uap) { struct timespec ts; struct timespec32 ts32; int error; error = kern_sched_rr_get_interval(td, uap->pid, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->interval, sizeof(ts32)); } return (error); } static void timex_to_32(struct timex32 *dst, struct timex *src) { CP(*src, *dst, modes); CP(*src, *dst, offset); CP(*src, *dst, freq); CP(*src, *dst, maxerror); CP(*src, *dst, esterror); CP(*src, *dst, status); CP(*src, *dst, constant); CP(*src, *dst, precision); CP(*src, *dst, tolerance); CP(*src, *dst, ppsfreq); CP(*src, *dst, jitter); CP(*src, *dst, shift); CP(*src, *dst, stabil); CP(*src, *dst, jitcnt); CP(*src, *dst, calcnt); CP(*src, *dst, errcnt); CP(*src, *dst, stbcnt); } static void timex_from_32(struct timex *dst, struct timex32 *src) { CP(*src, *dst, modes); CP(*src, *dst, offset); CP(*src, *dst, freq); CP(*src, *dst, maxerror); CP(*src, *dst, esterror); CP(*src, *dst, status); CP(*src, *dst, constant); CP(*src, *dst, precision); CP(*src, *dst, tolerance); CP(*src, *dst, ppsfreq); CP(*src, *dst, jitter); CP(*src, *dst, shift); CP(*src, *dst, stabil); CP(*src, *dst, jitcnt); CP(*src, *dst, calcnt); CP(*src, *dst, errcnt); CP(*src, *dst, stbcnt); } int freebsd32_ntp_adjtime(struct thread *td, struct freebsd32_ntp_adjtime_args *uap) { struct timex tx; struct timex32 tx32; int error, retval; error = copyin(uap->tp, &tx32, sizeof(tx32)); if (error == 0) { timex_from_32(&tx, &tx32); error = kern_ntp_adjtime(td, &tx, &retval); if (error == 0) { timex_to_32(&tx32, &tx); error = copyout(&tx32, uap->tp, sizeof(tx32)); if (error == 0) td->td_retval[0] = retval; } } return (error); } #ifdef FFCLOCK extern struct mtx ffclock_mtx; extern struct ffclock_estimate ffclock_estimate; extern int8_t ffclock_updated; int freebsd32_ffclock_setestimate(struct thread *td, struct freebsd32_ffclock_setestimate_args *uap) { struct ffclock_estimate cest; struct ffclock_estimate32 cest32; int error; /* Reuse of PRIV_CLOCK_SETTIME. */ if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) return (error); if ((error = copyin(uap->cest, &cest32, sizeof(struct ffclock_estimate32))) != 0) return (error); CP(cest.update_time, cest32.update_time, sec); memcpy(&cest.update_time.frac, &cest32.update_time.frac, sizeof(uint64_t)); CP(cest, cest32, update_ffcount); CP(cest, cest32, leapsec_next); CP(cest, cest32, period); CP(cest, cest32, errb_abs); CP(cest, cest32, errb_rate); CP(cest, cest32, status); CP(cest, cest32, leapsec_total); CP(cest, cest32, leapsec); mtx_lock(&ffclock_mtx); memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate)); ffclock_updated++; mtx_unlock(&ffclock_mtx); return (error); } int freebsd32_ffclock_getestimate(struct thread *td, struct freebsd32_ffclock_getestimate_args *uap) { struct ffclock_estimate cest; struct ffclock_estimate32 cest32; int error; mtx_lock(&ffclock_mtx); memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); mtx_unlock(&ffclock_mtx); CP(cest32.update_time, cest.update_time, sec); memcpy(&cest32.update_time.frac, &cest.update_time.frac, sizeof(uint64_t)); CP(cest32, cest, update_ffcount); CP(cest32, cest, leapsec_next); CP(cest32, cest, period); CP(cest32, cest, errb_abs); CP(cest32, cest, errb_rate); CP(cest32, cest, status); CP(cest32, cest, leapsec_total); CP(cest32, cest, leapsec); error = copyout(&cest32, uap->cest, sizeof(struct ffclock_estimate32)); return (error); } #else /* !FFCLOCK */ int freebsd32_ffclock_setestimate(struct thread *td, struct freebsd32_ffclock_setestimate_args *uap) { return (ENOSYS); } int freebsd32_ffclock_getestimate(struct thread *td, struct freebsd32_ffclock_getestimate_args *uap) { return (ENOSYS); } #endif /* FFCLOCK */ #ifdef COMPAT_43 int ofreebsd32_sethostid(struct thread *td, struct ofreebsd32_sethostid_args *uap) { int name[] = { CTL_KERN, KERN_HOSTID }; long hostid; hostid = uap->hostid; return (kernel_sysctl(td, name, nitems(name), NULL, NULL, &hostid, sizeof(hostid), NULL, 0)); } #endif int freebsd32_setcred(struct thread *td, struct freebsd32_setcred_args *uap) { /* Last argument is 'is_32bit'. */ return (user_setcred(td, uap->flags, uap->wcred, uap->size, true)); } diff --git a/sys/compat/freebsd32/freebsd32_util.h b/sys/compat/freebsd32/freebsd32_util.h index 4ac314f4391e..1ae016814329 100644 --- a/sys/compat/freebsd32/freebsd32_util.h +++ b/sys/compat/freebsd32/freebsd32_util.h @@ -1,132 +1,132 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1998-1999 Andrew Gallatin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _COMPAT_FREEBSD32_FREEBSD32_UTIL_H_ #define _COMPAT_FREEBSD32_FREEBSD32_UTIL_H_ #include #include #include #include #include #include #include struct freebsd32_ps_strings { uint32_t ps_argvstr; /* first of 0 or more argument strings */ int ps_nargvstr; /* the number of argument strings */ uint32_t ps_envstr; /* first of 0 or more environment strings */ int ps_nenvstr; /* the number of environment strings */ }; #if defined(__amd64__) #include #endif #define FREEBSD32_PS_STRINGS \ (FREEBSD32_USRSTACK - sizeof(struct freebsd32_ps_strings)) extern struct sysent freebsd32_sysent[]; #define SYSCALL32_MODULE(name, offset, new_sysent, evh, arg) \ static struct syscall_module_data name##_syscall32_mod = { \ evh, arg, offset, new_sysent, { 0, NULL } \ }; \ \ static moduledata_t name##32_mod = { \ "sys32/" #name, \ syscall32_module_handler, \ &name##_syscall32_mod \ }; \ DECLARE_MODULE(name##32, name##32_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE) #define SYSCALL32_MODULE_HELPER(syscallname) \ static int syscallname##_syscall32 = FREEBSD32_SYS_##syscallname; \ static struct sysent syscallname##_sysent32 = { \ (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ (sy_call_t *)& syscallname \ }; \ SYSCALL32_MODULE(syscallname, \ & syscallname##_syscall32, & syscallname##_sysent32,\ NULL, NULL); #define SYSCALL32_INIT_HELPER_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = FREEBSD32_SYS_##syscallname \ } #define SYSCALL32_INIT_HELPER_COMPAT_F(syscallname, flags) { \ .new_sysent = { \ .sy_narg = (sizeof(struct syscallname ## _args ) \ / sizeof(register_t)), \ .sy_call = (sy_call_t *)& sys_ ## syscallname, \ .sy_flags = (flags) \ }, \ .syscall_no = FREEBSD32_SYS_##syscallname \ } #define SYSCALL32_INIT_HELPER(syscallname) \ SYSCALL32_INIT_HELPER_F(syscallname, 0) #define SYSCALL32_INIT_HELPER_COMPAT(syscallname) \ SYSCALL32_INIT_HELPER_COMPAT_F(syscallname, 0) int syscall32_module_handler(struct module *mod, int what, void *arg); int syscall32_helper_register(struct syscall_helper_data *sd, int flags); int syscall32_helper_unregister(struct syscall_helper_data *sd); struct iovec32; struct rusage32; int freebsd32_copyout_strings(struct image_params *imgp, uintptr_t *stack_base); int freebsd32_copyiniov(struct iovec32 *iovp, u_int iovcnt, struct iovec **iov, int error); int freebsd32_copyinuio(const struct iovec32 *iovp, u_int iovcnt, struct uio **uiop); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32); struct image_args; int freebsd32_exec_copyin_args(struct image_args *args, const char *fname, - enum uio_seg segflg, uint32_t *argv, uint32_t *envv); + uint32_t *argv, uint32_t *envv); struct kinfo_knote; struct kinfo_knote32; void freebsd32_kinfo_knote_to_32(const struct kinfo_knote *kin, struct kinfo_knote32 *kin32); extern int compat_freebsd_32bit; #endif /* !_COMPAT_FREEBSD32_FREEBSD32_UTIL_H_ */ diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index e789586160b6..b88f1451f1a2 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -1,3095 +1,3095 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include #include int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalhigh; l_ulong freehigh; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; static int linux_utimensat_lts_to_ts(struct l_timespec *, struct timespec *); #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, struct timespec *); #endif static int linux_common_utimensat(struct thread *, int, const char *, struct timespec *, int); static int linux_common_pselect6(struct thread *, l_int, l_fd_set *, l_fd_set *, l_fd_set *, struct timespec *, l_uintptr_t *); static int linux_common_ppoll(struct thread *, struct pollfd *, uint32_t, struct timespec *, l_sigset_t *, l_size_t); static int linux_pollin(struct thread *, struct pollfd *, struct pollfd *, u_int); static int linux_pollout(struct thread *, struct pollfd *, struct pollfd *, u_int); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; int i, j; struct timespec ts; bzero(&sysinfo, sizeof(sysinfo)); getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; /* * sharedram counts pages allocated to named, swap-backed objects such * as shared memory segments and tmpfs files. There is no cheap way to * compute this, so just leave the field unpopulated. Linux itself only * started setting this field in the 3.x timeframe. */ sysinfo.sharedram = 0; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* * Platforms supported by the emulation layer do not have a notion of * high memory. */ sysinfo.totalhigh = 0; sysinfo.freehigh = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } #ifdef LINUX_LEGACY_SYSCALLS int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error __diagused; secs = args->secs; /* * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 * to match kern_setitimer()'s limit to avoid error from it. * * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit * platforms. */ if (secs > INT32_MAX / 2) secs = INT32_MAX / 2; it.it_value.tv_sec = secs; it.it_value.tv_usec = 0; timevalclear(&it.it_interval); error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); KASSERT(error == 0, ("kern_setitimer returns %d", error)); if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || old_it.it_value.tv_usec >= 500000) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; return (0); } #endif int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; uintptr_t new, old; old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (uintptr_t)args->dsend; if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) td->td_retval[0] = (register_t)new; else td->td_retval[0] = (register_t)old; return (0); } #ifdef LINUX_LEGACY_SYSCALLS int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, LINUX_NFDBITS); if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: return (error); } #endif int linux_mremap(struct thread *td, struct linux_mremap_args *args) { uintptr_t addr; size_t len; int error = 0; if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { addr = args->addr + args->new_len; len = args->old_len - args->new_len; error = kern_munmap(td, addr, len); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { return (kern_msync(td, args->addr, args->len, args->fl & ~LINUX_MS_SYNC)); } int linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) { return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } int linux_madvise(struct thread *td, struct linux_madvise_args *uap) { return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); } int linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) { #if defined(LINUX_ARCHWANT_MMAP2PGOFF) /* * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is * implemented with mmap2 syscall and the offset is represented in * multiples of page size. */ return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); #else return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, uap->flags, uap->fd, uap->pgoff)); #endif } #ifdef LINUX_LEGACY_SYSCALLS int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } #endif struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } #if defined(__amd64__) /* * On amd64, Linux uname(2) needs to return "x86_64" * for both 64-bit and 32-bit applications. On 32-bit, * the string returned by getauxval(AT_PLATFORM) needs * to remain "i686", though. */ #if defined(COMPAT_LINUX32) if (linux32_emulate_i386) strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); else #endif strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #elif defined(__aarch64__) strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); #elif defined(__i386__) strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); #endif return (copyout(&utsname, args->buf, sizeof(utsname))); } struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; #ifdef LINUX_LEGACY_SYSCALLS int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; int error; if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut)) != 0) return (error); tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, tvp, UIO_SYSSPACE)); } #endif #ifdef LINUX_LEGACY_SYSCALLS int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; int error; if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) return (error); tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, tvp, UIO_SYSSPACE)); } #endif static int linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) { if (l_times->tv_nsec != LINUX_UTIME_OMIT && l_times->tv_nsec != LINUX_UTIME_NOW && (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) return (EINVAL); times->tv_sec = l_times->tv_sec; switch (l_times->tv_nsec) { case LINUX_UTIME_OMIT: times->tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times->tv_nsec = UTIME_NOW; break; default: times->tv_nsec = l_times->tv_nsec; } return (0); } static int linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, struct timespec *timesp, int lflags) { int dfd, flags = 0; dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) return (EINVAL); if (timesp != NULL) { /* This breaks POSIX, but is what the Linux kernel does * _on purpose_ (documented in the man page for utimensat(2)), * so we must follow that behaviour. */ if (timesp[0].tv_nsec == UTIME_OMIT && timesp[1].tv_nsec == UTIME_OMIT) return (0); } if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) flags |= AT_SYMLINK_NOFOLLOW; if (lflags & LINUX_AT_EMPTY_PATH) flags |= AT_EMPTY_PATH; if (pathname != NULL) return (kern_utimensat(td, dfd, pathname, UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); if (lflags != 0) return (EINVAL); return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); } int linux_utimensat(struct thread *td, struct linux_utimensat_args *args) { struct l_timespec l_times[2]; struct timespec times[2], *timesp; int error; if (args->times != NULL) { error = copyin(args->times, l_times, sizeof(l_times)); if (error != 0) return (error); error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); if (error != 0) return (error); error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); if (error != 0) return (error); timesp = times; } else timesp = NULL; return (linux_common_utimensat(td, args->dfd, args->pathname, timesp, args->flags)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) { /* Zero out the padding in compat mode. */ l_times->tv_nsec &= 0xFFFFFFFFUL; if (l_times->tv_nsec != LINUX_UTIME_OMIT && l_times->tv_nsec != LINUX_UTIME_NOW && (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) return (EINVAL); times->tv_sec = l_times->tv_sec; switch (l_times->tv_nsec) { case LINUX_UTIME_OMIT: times->tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times->tv_nsec = UTIME_NOW; break; default: times->tv_nsec = l_times->tv_nsec; } return (0); } int linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) { struct l_timespec64 l_times[2]; struct timespec times[2], *timesp; int error; if (args->times64 != NULL) { error = copyin(args->times64, l_times, sizeof(l_times)); if (error != 0) return (error); error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); if (error != 0) return (error); error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); if (error != 0) return (error); timesp = times; } else timesp = NULL; return (linux_common_utimensat(td, args->dfd, args->pathname, timesp, args->flags)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #ifdef LINUX_LEGACY_SYSCALLS int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) return (error); tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, tvp, UIO_SYSSPACE)); } #endif static int linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, int options, void *rup, l_siginfo_t *infop) { l_siginfo_t lsi; siginfo_t siginfo; struct __wrusage wru; int error, status, tmpstat, sig; error = kern_wait6(td, idtype, id, &status, options, rup != NULL ? &wru : NULL, &siginfo); if (error == 0 && statusp) { tmpstat = status & 0xffff; if (WIFSIGNALED(tmpstat)) { tmpstat = (tmpstat & 0xffffff80) | bsd_to_linux_signal(WTERMSIG(tmpstat)); } else if (WIFSTOPPED(tmpstat)) { tmpstat = (tmpstat & 0xffff00ff) | (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) if (WSTOPSIG(status) == SIGTRAP) { tmpstat = linux_ptrace_status(td, siginfo.si_pid, tmpstat); } #endif } else if (WIFCONTINUED(tmpstat)) { tmpstat = 0xffff; } error = copyout(&tmpstat, statusp, sizeof(int)); } if (error == 0 && rup != NULL) error = linux_copyout_rusage(&wru.wru_self, rup); if (error == 0 && infop != NULL && td->td_retval[0] != 0) { sig = bsd_to_linux_signal(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); error = copyout(&lsi, infop, sizeof(lsi)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { struct linux_wait4_args wait4_args = { .pid = args->pid, .status = args->status, .options = args->options, .rusage = NULL, }; return (linux_wait4(td, &wait4_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { struct proc *p; int options, id, idtype; if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); /* -INT_MIN is not defined. */ if (args->pid == INT_MIN) return (ESRCH); options = 0; linux_to_bsd_waitopts(args->options, &options); /* * For backward compatibility we implicitly add flags WEXITED * and WTRAPPED here. */ options |= WEXITED | WTRAPPED; if (args->pid == WAIT_ANY) { idtype = P_ALL; id = 0; } else if (args->pid < 0) { idtype = P_PGID; id = (id_t)-args->pid; } else if (args->pid == 0) { idtype = P_PGID; p = td->td_proc; PROC_LOCK(p); id = p->p_pgid; PROC_UNLOCK(p); } else { idtype = P_PID; id = (id_t)args->pid; } return (linux_common_wait(td, idtype, id, args->status, options, args->rusage, NULL)); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { idtype_t idtype; int error, options; struct proc *p; pid_t id; if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); options = 0; linux_to_bsd_waitopts(args->options, &options); id = args->id; switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { p = td->td_proc; PROC_LOCK(p); id = p->p_pgid; PROC_UNLOCK(p); } else if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; case LINUX_P_PIDFD: LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); return (ENOSYS); default: return (EINVAL); } error = linux_common_wait(td, idtype, id, NULL, options, args->rusage, args->info); td->td_retval[0] = 0; return (error); } #ifdef LINUX_LEGACY_SYSCALLS int linux_mknod(struct thread *td, struct linux_mknod_args *args) { int error; switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, args->mode, linux_decode_dev(args->dev)); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } return (error); } #endif int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, args->mode, linux_decode_dev(args->dev)); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { struct linux_pemuldata *pem; struct proc *p = td->td_proc; uint32_t old; PROC_LOCK(p); pem = pem_find(p); old = pem->persona; if (args->per != 0xffffffff) pem->persona = args->per; PROC_UNLOCK(p); td->td_retval[0] = old; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_LINUX); if (error) return (error); td->td_retval[0] = ngrp; return (0); } static bool linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) { ssize_t size; int res, error; if (linux_dummy_rlimits == 0) return (false); switch (resource) { case LINUX_RLIMIT_LOCKS: case LINUX_RLIMIT_RTTIME: rlim->rlim_cur = LINUX_RLIM_INFINITY; rlim->rlim_max = LINUX_RLIM_INFINITY; return (true); case LINUX_RLIMIT_NICE: case LINUX_RLIMIT_RTPRIO: rlim->rlim_cur = 0; rlim->rlim_max = 0; return (true); case LINUX_RLIMIT_SIGPENDING: error = kernel_sysctlbyname(td, "kern.sigqueue.max_pending_per_proc", &res, &size, 0, 0, 0, 0); if (error != 0) return (false); rlim->rlim_cur = res; rlim->rlim_max = res; return (true); case LINUX_RLIMIT_MSGQUEUE: error = kernel_sysctlbyname(td, "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); if (error != 0) return (false); rlim->rlim_cur = res; rlim->rlim_max = res; return (true); default: return (false); } } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { rlim.rlim_cur = bsd_rlim.rlim_cur; rlim.rlim_max = bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); if (linux_map_sched_prio) { switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) return (EINVAL); sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) return (EINVAL); /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; if (linux_map_sched_prio) { switch (args->policy) { case LINUX_SCHED_OTHER: td->td_retval[0] = 0; return (0); case LINUX_SCHED_FIFO: case LINUX_SCHED_RR: td->td_retval[0] = 1; return (0); default: return (EINVAL); } } switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } int linux_getpid(struct thread *td, struct linux_getpid_args *args) { td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { td->td_retval[0] = kern_getppid(td); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { return (kern_getsid(td, args->pid)); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { int error; error = kern_getpriority(td, args->which, args->who); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, args->error_code, 0); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION_1 0x19980330 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, u32s; if (uap->hdrp == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (uap->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ memset(&lucd, 0, u32s * sizeof(lucd[0])); error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *uap) { struct l_user_cap_header luch; struct l_user_cap_data lucd[2]; int error, i, u32s; if (uap->hdrp == NULL || uap->datap == NULL) return (EFAULT); error = copyin(uap->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); switch (luch.version) { case _LINUX_CAPABILITY_VERSION_1: u32s = 1; break; case _LINUX_CAPABILITY_VERSION_2: case _LINUX_CAPABILITY_VERSION_3: u32s = 2; break; default: luch.version = _LINUX_CAPABILITY_VERSION_1; error = copyout(&luch, uap->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ for (i = 0; i < u32s; i++) { if (lucd[i].effective || lucd[i].permitted || lucd[i].inheritable) { linux_msg(td, "capset[%d] effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", i, (int)lucd[i].effective, (int)lucd[i].permitted, (int)lucd[i].inheritable); return (EPERM); } } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size, arg; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; int pdeath_signal, trace_state; switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); pdeath_signal = linux_to_bsd_signal(args->arg2); return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, &pdeath_signal)); case LINUX_PR_GET_PDEATHSIG: error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, &pdeath_signal); if (error != 0) return (error); pdeath_signal = bsd_to_linux_signal(pdeath_signal); return (copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal))); /* * In Linux, this flag controls if set[gu]id processes can coredump. * There are additional semantics imposed on processes that cannot * coredump: * - Such processes can not be ptraced. * - There are some semantics around ownership of process-related files * in the /proc namespace. * * In FreeBSD, we can (and by default, do) disable setuid coredump * system-wide with 'sugid_coredump.' We control tracability on a * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). * By happy coincidence, P2_NOTRACE also prevents coredumping. So the * procctl is roughly analogous to Linux's DUMPABLE. * * So, proxy these knobs to the corresponding PROC_TRACE setting. */ case LINUX_PR_GET_DUMPABLE: error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, &trace_state); if (error != 0) return (error); td->td_retval[0] = (trace_state != -1); return (0); case LINUX_PR_SET_DUMPABLE: /* * It is only valid for userspace to set one of these two * flags, and only one at a time. */ switch (args->arg2) { case LINUX_SUID_DUMP_DISABLE: trace_state = PROC_TRACE_CTL_DISABLE_EXEC; break; case LINUX_SUID_DUMP_USER: trace_state = PROC_TRACE_CTL_ENABLE; break; default: return (EINVAL); } return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, &trace_state)); case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a Linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; case LINUX_PR_GET_SECCOMP: case LINUX_PR_SET_SECCOMP: /* * Same as returned by Linux without CONFIG_SECCOMP enabled. */ error = EINVAL; break; case LINUX_PR_CAPBSET_READ: #if 0 /* * This makes too much noise with Ubuntu Focal. */ linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", (int)args->arg2); #endif error = EINVAL; break; case LINUX_PR_SET_CHILD_SUBREAPER: if (args->arg2 == 0) { return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, NULL)); } return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, NULL)); case LINUX_PR_SET_NO_NEW_PRIVS: arg = args->arg2 == 1 ? PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; error = kern_procctl(td, P_PID, p->p_pid, PROC_NO_NEW_PRIVS_CTL, &arg); break; case LINUX_PR_SET_PTRACER: linux_msg(td, "unsupported prctl PR_SET_PTRACER"); error = EINVAL; break; default: linux_msg(td, "unsupported prctl option %d", args->option); error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); if (error) goto out; switch (policy) { case SCHED_OTHER: if (sched_param.sched_priority != 0) { error = EINVAL; goto out; } sched_param.sched_priority = PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; break; case SCHED_FIFO: case SCHED_RR: if (sched_param.sched_priority < 1 || sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { error = EINVAL; goto out; } /* * Map [1, LINUX_MAX_RT_PRIO - 1] to * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). */ sched_param.sched_priority = (sched_param.sched_priority - 1) * (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / (LINUX_MAX_RT_PRIO - 1); break; } } error = kern_sched_setparam(td, tdt, &sched_param); out: PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error, policy; tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); if (error) { PROC_UNLOCK(tdt->td_proc); return (error); } if (linux_map_sched_prio) { error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); if (error) return (error); switch (policy) { case SCHED_OTHER: sched_param.sched_priority = 0; break; case SCHED_FIFO: case SCHED_RR: /* * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). */ sched_param.sched_priority = (sched_param.sched_priority * (LINUX_MAX_RT_PRIO - 1) + (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; break; } } else PROC_UNLOCK(tdt->td_proc); error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { struct thread *tdt; cpuset_t *mask; size_t size; int error; id_t tid; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); tid = tdt->td_tid; PROC_UNLOCK(tdt->td_proc); mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); size = min(args->len, sizeof(cpuset_t)); error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, size, mask); if (error == ERANGE) error = EINVAL; if (error == 0) error = copyout(mask, args->user_mask_ptr, size); if (error == 0) td->td_retval[0] = size; free(mask, M_LINUX); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct thread *tdt; cpuset_t *mask; int cpu, error; size_t len; id_t tid; tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); tid = tdt->td_tid; PROC_UNLOCK(tdt->td_proc); len = min(args->len, sizeof(cpuset_t)); mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); error = copyin(args->user_mask_ptr, mask, len); if (error != 0) goto out; /* Linux ignore high bits */ CPU_FOREACH_ISSET(cpu, mask) if (cpu > mp_maxid) CPU_CLR(cpu, mask); error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, tid, mask); if (error == EDEADLK) error = EINVAL; out: free(mask, M_TEMP); return (error); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; if (args->new == NULL && args->old != NULL) { if (linux_get_dummy_limit(td, args->resource, &rlim)) { lrlim.rlim_cur = rlim.rlim_cur; lrlim.rlim_max = rlim.rlim_max; return (copyout(&lrlim, args->old, sizeof(lrlim))); } } if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; if (args->pid == 0) { p = td->td_proc; PHOLD(p); } else { error = pget(args->pid, flags, &p); if (error != 0) return (error); } if (args->old != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct timespec ts, *tsp; int error; if (args->tsp != NULL) { error = linux_get_timespec(&ts, args->tsp); if (error != 0) return (error); tsp = &ts; } else tsp = NULL; error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); if (args->tsp != NULL) linux_put_timespec(&ts, args->tsp); return (error); } static int linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, l_uintptr_t *sig) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (sig != NULL) { error = copyin(sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); error = linux_copyin_sigset(td, PTRIN(lpse6.ss), lpse6.ss_len, &ss, &ssp); if (error != 0) return (error); } else ssp = NULL; /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (tsp != NULL) { TIMESPEC_TO_TIMEVAL(&utv, tsp); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, nfds, readfds, writefds, exceptfds, tvp, ssp, LINUX_NFDBITS); if (tsp != NULL) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, tsp); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_pselect6_time64(struct thread *td, struct linux_pselect6_time64_args *args) { struct timespec ts, *tsp; int error; if (args->tsp != NULL) { error = linux_get_timespec64(&ts, args->tsp); if (error != 0) return (error); tsp = &ts; } else tsp = NULL; error = linux_common_pselect6(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tsp, args->sig); if (args->tsp != NULL) linux_put_timespec64(&ts, args->tsp); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec uts, *tsp; int error; if (args->tsp != NULL) { error = linux_get_timespec(&uts, args->tsp); if (error != 0) return (error); tsp = &uts; } else tsp = NULL; error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); if (error == 0 && args->tsp != NULL) error = linux_put_timespec(&uts, args->tsp); return (error); } static int linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) { struct timespec ts0, ts1; struct pollfd stackfds[32]; struct pollfd *kfds; sigset_t *ssp; sigset_t ss; int error; if (kern_poll_maxfds(nfds)) return (EINVAL); if (sset != NULL) { error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); if (error != 0) return (error); } else ssp = NULL; if (tsp != NULL) nanotime(&ts0); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else kfds = stackfds; error = linux_pollin(td, kfds, fds, nfds); if (error != 0) goto out; error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); if (error == 0) error = linux_pollout(td, kfds, fds, nfds); if (error == 0 && tsp != NULL) { if (td->td_retval[0]) { nanotime(&ts1); timespecsub(&ts1, &ts0, &ts1); timespecsub(tsp, &ts1, tsp); if (tsp->tv_sec < 0) timespecclear(tsp); } else timespecclear(tsp); } out: if (nfds > nitems(stackfds)) free(kfds, M_TEMP); return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) { struct timespec uts, *tsp; int error; if (args->tsp != NULL) { error = linux_get_timespec64(&uts, args->tsp); if (error != 0) return (error); tsp = &uts; } else tsp = NULL; error = linux_common_ppoll(td, args->fds, args->nfds, tsp, args->sset, args->ssize); if (error == 0 && args->tsp != NULL) error = linux_put_timespec64(&uts, args->tsp); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) { int error; u_int i; error = copyin(ufds, fds, nfd * sizeof(*fds)); if (error != 0) return (error); for (i = 0; i < nfd; i++) { if (fds->events != 0) linux_to_bsd_poll_events(td, fds->fd, fds->events, &fds->events); fds++; } return (0); } static int linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) { int error = 0; u_int i, n = 0; for (i = 0; i < nfd; i++) { if (fds->revents != 0) { bsd_to_linux_poll_events(fds->revents, &fds->revents); n++; } error = copyout(&fds->revents, &ufds->revents, sizeof(ufds->revents)); if (error) return (error); fds++; ufds++; } td->td_retval[0] = n; return (0); } static int linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, struct timespec *ts) { struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (pid < 0) return (EINVAL); tdt = linux_tdfind(td, pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, ts); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; int error; error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); if (error != 0) return (error); return (linux_put_timespec(&ts, uap->interval)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sched_rr_get_interval_time64(struct thread *td, struct linux_sched_rr_get_interval_time64_args *uap) { struct timespec ts; int error; error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); if (error != 0) return (error); return (linux_put_timespec64(&ts, uap->interval)); } #endif /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { if (pid != -1 && td->td_proc->p_pid != pid) return (NULL); PROC_LOCK(td->td_proc); return (td); } else if (tid > PID_MAX) return (tdfind(tid, pid)); /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX || (pid != -1 && tid != pid)) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } int linux_getrandom(struct thread *td, struct linux_getrandom_args *args) { struct uio uio; struct iovec iov; int error; if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) return (EINVAL); if (args->count > INT_MAX) args->count = INT_MAX; iov.iov_base = args->buf; iov.iov_len = args->count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = iov.iov_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); if (error == 0) td->td_retval[0] = args->count - uio.uio_resid; return (error); } int linux_mincore(struct thread *td, struct linux_mincore_args *args) { /* Needs to be page-aligned */ if (args->start & PAGE_MASK) return (EINVAL); return (kern_mincore(td, args->start, args->len, args->vec)); } #define SYSLOG_TAG "<6>" int linux_syslog(struct thread *td, struct linux_syslog_args *args) { char buf[128], *src, *dst; u_int seq; int buflen, error; if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { linux_msg(td, "syslog unsupported type 0x%x", args->type); return (EINVAL); } if (args->len < 6) { td->td_retval[0] = 0; return (0); } error = priv_check(td, PRIV_MSGBUF); if (error) return (error); mtx_lock(&msgbuf_lock); msgbuf_peekbytes(msgbufp, NULL, 0, &seq); mtx_unlock(&msgbuf_lock); dst = args->buf; error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); /* The -1 is to skip the trailing '\0'. */ dst += sizeof(SYSLOG_TAG) - 1; while (error == 0) { mtx_lock(&msgbuf_lock); buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); mtx_unlock(&msgbuf_lock); if (buflen == 0) break; for (src = buf; src < buf + buflen && error == 0; src++) { if (*src == '\0') continue; if (dst >= args->buf + args->len) goto out; error = copyout(src, dst, 1); dst++; if (*src == '\n' && *(src + 1) != '<' && dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); dst += sizeof(SYSLOG_TAG) - 1; } } } out: td->td_retval[0] = dst - args->buf; return (error); } int linux_getcpu(struct thread *td, struct linux_getcpu_args *args) { int cpu, error, node; cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ error = 0; node = cpuid_to_pcpu[cpu]->pc_domain; if (args->cpu != NULL) error = copyout(&cpu, args->cpu, sizeof(l_int)); if (args->node != NULL) error = copyout(&node, args->node, sizeof(l_int)); return (error); } #if defined(__i386__) || defined(__amd64__) int linux_poll(struct thread *td, struct linux_poll_args *args) { struct timespec ts, *tsp; if (args->timeout != INFTIM) { if (args->timeout < 0) return (EINVAL); ts.tv_sec = args->timeout / 1000; ts.tv_nsec = (args->timeout % 1000) * 1000000; tsp = &ts; } else tsp = NULL; return (linux_common_ppoll(td, args->fds, args->nfds, tsp, NULL, 0)); } #endif /* __i386__ || __amd64__ */ int linux_seccomp(struct thread *td, struct linux_seccomp_args *args) { switch (args->op) { case LINUX_SECCOMP_GET_ACTION_AVAIL: return (EOPNOTSUPP); default: /* * Ignore unknown operations, just like Linux kernel built * without CONFIG_SECCOMP. */ return (EINVAL); } } /* * Custom version of exec_copyin_args(), to copy out argument and environment * strings from the old process address space into the temporary string buffer. * Based on freebsd32_exec_copyin_args. */ static int linux_exec_copyin_args(struct image_args *args, const char *fname, - enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) + l_uintptr_t *argv, l_uintptr_t *envv) { char *argp, *envp; l_uintptr_t *ptr, arg; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ - error = exec_args_add_fname(args, fname, segflg); + error = exec_args_add_fname(args, fname, UIO_USERSPACE); if (error != 0) goto err_exit; /* * extract arguments first */ ptr = argv; for (;;) { error = copyin(ptr++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = exec_args_add_arg(args, argp, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * This comment is from Linux do_execveat_common: * When argv is empty, add an empty string ("") as argv[0] to * ensure confused userspace programs that start processing * from argv[1] won't end up walking envp. */ if (args->argc == 0 && (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) goto err_exit; /* * extract environment strings */ if (envv) { ptr = envv; for (;;) { error = copyin(ptr++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = exec_args_add_env(args, envp, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; int error; LINUX_CTR(execve); - error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, - args->argp, args->envp); + error = linux_exec_copyin_args(&eargs, args->path, args->argp, + args->envp); if (error == 0) error = linux_common_execve(td, &eargs); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } static void linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) { struct rtprio rtp2; pri_to_rtp(td1, &rtp2); if (rtp2.type < rtp->type || (rtp2.type == rtp->type && rtp2.prio < rtp->prio)) { rtp->type = rtp2.type; rtp->prio = rtp2.prio; } } #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX static int linux_rtprio2ioprio(struct rtprio *rtp) { int ioprio, prio; switch (rtp->type) { case RTP_PRIO_IDLE: prio = RTP_PRIO_MIN; ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); break; case RTP_PRIO_NORMAL: prio = rtp->prio / LINUX_PRIO_DIVIDER; ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); break; case RTP_PRIO_REALTIME: prio = rtp->prio / LINUX_PRIO_DIVIDER; ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); break; default: prio = RTP_PRIO_MIN; ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); break; } return (ioprio); } static int linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) { switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { case LINUX_IOPRIO_CLASS_IDLE: rtp->prio = RTP_PRIO_MIN; rtp->type = RTP_PRIO_IDLE; break; case LINUX_IOPRIO_CLASS_BE: rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; rtp->type = RTP_PRIO_NORMAL; break; case LINUX_IOPRIO_CLASS_RT: rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; rtp->type = RTP_PRIO_REALTIME; break; default: return (EINVAL); } return (0); } #undef LINUX_PRIO_DIVIDER int linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) { struct thread *td1; struct rtprio rtp; struct pgrp *pg; struct proc *p; int error, found; p = NULL; td1 = NULL; error = 0; found = 0; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; switch (args->which) { case LINUX_IOPRIO_WHO_PROCESS: if (args->who == 0) { td1 = td; p = td1->td_proc; PROC_LOCK(p); } else if (args->who > PID_MAX) { td1 = linux_tdfind(td, args->who, -1); if (td1 != NULL) p = td1->td_proc; } else p = pfind(args->who); if (p == NULL) return (ESRCH); if ((error = p_cansee(td, p))) { PROC_UNLOCK(p); break; } if (td1 != NULL) { pri_to_rtp(td1, &rtp); } else { FOREACH_THREAD_IN_PROC(p, td1) { linux_up_rtprio_if(td1, &rtp); } } found++; PROC_UNLOCK(p); break; case LINUX_IOPRIO_WHO_PGRP: sx_slock(&proctree_lock); if (args->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(args->who); if (pg == NULL) { sx_sunlock(&proctree_lock); error = ESRCH; break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansee(td, p) == 0) { FOREACH_THREAD_IN_PROC(p, td1) { linux_up_rtprio_if(td1, &rtp); found++; } } PROC_UNLOCK(p); } PGRP_UNLOCK(pg); break; case LINUX_IOPRIO_WHO_USER: if (args->who == 0) args->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p->p_ucred->cr_uid == args->who && p_cansee(td, p) == 0) { FOREACH_THREAD_IN_PROC(p, td1) { linux_up_rtprio_if(td1, &rtp); found++; } } PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } if (error == 0) { if (found != 0) td->td_retval[0] = linux_rtprio2ioprio(&rtp); else error = ESRCH; } return (error); } int linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) { struct thread *td1; struct rtprio rtp; struct pgrp *pg; struct proc *p; int error; if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) return (error); /* Attempts to set high priorities (REALTIME) require su privileges. */ if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) return (error); p = NULL; td1 = NULL; switch (args->which) { case LINUX_IOPRIO_WHO_PROCESS: if (args->who == 0) { td1 = td; p = td1->td_proc; PROC_LOCK(p); } else if (args->who > PID_MAX) { td1 = linux_tdfind(td, args->who, -1); if (td1 != NULL) p = td1->td_proc; } else p = pfind(args->who); if (p == NULL) return (ESRCH); if ((error = p_cansched(td, p))) { PROC_UNLOCK(p); break; } if (td1 != NULL) { error = rtp_to_pri(&rtp, td1); } else { FOREACH_THREAD_IN_PROC(p, td1) { if ((error = rtp_to_pri(&rtp, td1)) != 0) break; } } PROC_UNLOCK(p); break; case LINUX_IOPRIO_WHO_PGRP: sx_slock(&proctree_lock); if (args->who == 0) { pg = td->td_proc->p_pgrp; PGRP_LOCK(pg); } else { pg = pgfind(args->who); if (pg == NULL) { sx_sunlock(&proctree_lock); error = ESRCH; break; } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pg->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p_cansched(td, p) == 0) { FOREACH_THREAD_IN_PROC(p, td1) { if ((error = rtp_to_pri(&rtp, td1)) != 0) break; } } PROC_UNLOCK(p); if (error != 0) break; } PGRP_UNLOCK(pg); break; case LINUX_IOPRIO_WHO_USER: if (args->who == 0) args->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && p->p_ucred->cr_uid == args->who && p_cansched(td, p) == 0) { FOREACH_THREAD_IN_PROC(p, td1) { if ((error = rtp_to_pri(&rtp, td1)) != 0) break; } } PROC_UNLOCK(p); if (error != 0) break; } sx_sunlock(&allproc_lock); break; default: error = EINVAL; break; } return (error); } /* The only flag is O_NONBLOCK */ #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) int linux_mq_open(struct thread *td, struct linux_mq_open_args *args) { struct mq_attr attr; int error, flags; flags = linux_common_openflags(args->oflag); if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) return (EINVAL); flags = FFLAGS(flags); if ((flags & O_CREAT) != 0 && args->attr != NULL) { error = copyin(args->attr, &attr, sizeof(attr)); if (error != 0) return (error); attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); } return (kern_kmq_open(td, args->name, flags, args->mode, args->attr != NULL ? &attr : NULL)); } int linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) { struct kmq_unlink_args bsd_args = { .path = PTRIN(args->name) }; return (sys_kmq_unlink(td, &bsd_args)); } int linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) { struct timespec ts, *abs_timeout; int error; if (args->abs_timeout == NULL) abs_timeout = NULL; else { error = linux_get_timespec(&ts, args->abs_timeout); if (error != 0) return (error); abs_timeout = &ts; } return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), args->msg_len, args->msg_prio, abs_timeout)); } int linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) { struct timespec ts, *abs_timeout; int error; if (args->abs_timeout == NULL) abs_timeout = NULL; else { error = linux_get_timespec(&ts, args->abs_timeout); if (error != 0) return (error); abs_timeout = &ts; } return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), args->msg_len, args->msg_prio, abs_timeout)); } int linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) { struct sigevent ev, *evp; struct l_sigevent l_ev; int error; if (args->sevp == NULL) evp = NULL; else { error = copyin(args->sevp, &l_ev, sizeof(l_ev)); if (error != 0) return (error); error = linux_convert_l_sigevent(&l_ev, &ev); if (error != 0) return (error); evp = &ev; } return (kern_kmq_notify(td, args->mqd, evp)); } int linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) { struct mq_attr attr, oattr; int error; if (args->attr != NULL) { error = copyin(args->attr, &attr, sizeof(attr)); if (error != 0) return (error); attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); } error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, &oattr); if (error == 0 && args->oattr != NULL) { oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); bzero(oattr.__reserved, sizeof(oattr.__reserved)); error = copyout(&oattr, args->oattr, sizeof(oattr)); } return (error); } MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index f6d0201f9b37..a943ec339e75 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1,2105 +1,2102 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exec, "char *"); SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, "Enable file path packing in 'procstat -f' coredump notes"); int coredump_pack_vmmapinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, &coredump_pack_vmmapinfo, 0, "Enable file path packing in 'procstat -v' coredump notes"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU", "Location of process' ps_strings structure"); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU", "Top of process stack"); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_stackprot, "I", "Stack memory permissions"); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, "Process' command line characters cache limit"); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int core_dump_can_intr = 1; SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN, &core_dump_can_intr, 0, "Core dumping interruptible with SIGKILL"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; vm_offset_t ps_strings; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)PROC_PS_STRINGS(p); return (SYSCTL_OUT(req, &val, sizeof(val))); } #endif ps_strings = PROC_PS_STRINGS(p); return (SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings))); } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; vm_offset_t val; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val32; val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop); return (SYSCTL_OUT(req, &val32, sizeof(val32))); } #endif val = round_page(p->p_vmspace->vm_stacktop); return (SYSCTL_OUT(req, &val, sizeof(val))); } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); - error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, - uap->argv, uap->envv); + error = exec_copyin_args(&args, uap->fname, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; }; #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); - error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, - uap->argv, uap->envv); + error = exec_copyin_args(&args, NULL, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); - error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, - uap->argv, uap->envv); + error = exec_copyin_args(&args, uap->fname, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == EJUSTRETURN) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } exec_cleanup(td, oldvmspace); } /* * kern_execve() has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { TSEXEC(td->td_proc->p_pid, args->begin_argv); AUDIT_ARG_ARGV(args->begin_argv, args->argc, exec_args_get_begin_envv(args) - args->begin_argv); AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc, args->endp - exec_args_get_begin_envv(args)); #ifdef KTRACE if (KTRPOINT(td, KTR_ARGS)) { ktrdata(KTR_ARGS, args->begin_argv, exec_args_get_begin_envv(args) - args->begin_argv); } if (KTRPOINT(td, KTR_ENVS)) { ktrdata(KTR_ENVS, exec_args_get_begin_envv(args), args->endp - exec_args_get_begin_envv(args)); } #endif /* Must have at least one argument. */ if (args->argc == 0) { exec_free_args(args); return (EINVAL); } return (do_execve(td, args, mac_p, oldvmspace)); } static void execve_nosetid(struct image_params *imgp) { imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace) { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; uintptr_t stack_base; struct image_params image_params, *imgp; struct vattr attr; struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct ktr_io_params *kiop; #endif struct vnode *oldtextvp, *newtextvp; struct vnode *oldtextdvp, *newtextdvp; char *oldbinname, *newbinname; bool credential_changing; #ifdef MAC struct label *interpvplabel = NULL; bool will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif int error, i, orig_osrel; uint32_t orig_fctl0; Elf_Brandinfo *orig_brandinfo; size_t freepath_size; static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; oldtextvp = oldtextdvp = NULL; newtextvp = newtextdvp = NULL; newbinname = oldbinname = NULL; #ifdef KTRACE kiop = NULL; #endif /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; orig_osrel = p->p_osrel; orig_fctl0 = p->p_fctl0; orig_brandinfo = p->p_elf_brandinfo; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE if (CAP_TRACING(td)) ktrcapfail(CAPFAIL_NAMEI, args->fname); /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif /* * Translate the file name. namei() returns a vnode * pointer in ni_vp among other things. */ NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE, args->fname); error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; newtextdvp = nd.ni_dvp; nd.ni_dvp = NULL; newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS, M_WAITOK); memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen); newbinname[nd.ni_cnd.cn_namelen] = '\0'; imgp->vp = newtextvp; /* * Do the best to calculate the full path to the image file. */ if (args->fname[0] == '/') { imgp->execpath = args->fname; } else { VOP_UNLOCK(imgp->vp); freepath_size = MAXPATHLEN; if (vn_fullpath_hardlink(newtextvp, newtextdvp, newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath, &imgp->freepath, &freepath_size) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } } else if (imgp->interpreter_vp) { /* * An image activator has already provided an open vnode */ newtextvp = imgp->interpreter_vp; imgp->interpreter_vp = NULL; if (vn_fullpath(newtextvp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(newtextvp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * If the descriptors was not opened with O_PATH, then * we require that it was opened with O_EXEC or * O_RDONLY. In either case, exec_check_permissions() * below checks _current_ file access mode regardless * of the permissions additionally checked at the * open(2). */ error = fgetvp_exec(td, args->fd, &cap_fexecve_rights, &newtextvp); if (error != 0) goto exec_fail; if (vn_fullpath(newtextvp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(newtextvp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions. Also 'opens' file and sets its vnode to * text mode. */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; imgp->proc->p_fctl0 = 0; imgp->proc->p_elf_brandinfo = NULL; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = false; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp) != 0; credential_changing |= will_transition; #endif /* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */ if (credential_changing) imgp->proc->p_pdeathsig = 0; if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ error = -1; for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL) continue; error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) error = ENOEXEC; goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * The text reference needs to be removed for scripts. * There is a short period before we determine that * something is a script where text reference is active. * The vnode lock is held over this entire period * so nothing should illegitimately be blocked. */ MPASS(imgp->textset); VOP_UNSET_TEXT_CHECKED(newtextvp); imgp->textset = false; /* free name buffer and old vnode */ #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = false; } vput(newtextvp); imgp->vp = newtextvp = NULL; if (args->fname != NULL) { if (newtextdvp != NULL) { vrele(newtextdvp); newtextdvp = NULL; } NDFREE_PNBUF(&nd); free(newbinname, M_PARGS); newbinname = NULL; } vm_object_deallocate(imgp->object); imgp->object = NULL; execve_nosetid(imgp); imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ if (imgp->interpreter_vp) { args->fname = NULL; } else { args->fname = imgp->interpreter_name; } goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Copy out strings (args and env) and initialize stack base. */ error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * Stack setup. */ error = (*p->p_sysent->sv_fixup)(&stack_base, imgp); if (error != 0) { vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* * For security and other reasons, the file descriptor table cannot be * shared after an exec. */ fdunshare(td); pdunshare(td); /* close files on exec */ fdcloseexec(td); /* * Malloc things before we need locks. */ i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0) p->p_flag2 &= ~P2_STKGAP_DISABLE; p->p_flag2 &= ~(P2_MEMBAR_PRIVE | P2_MEMBAR_PRIVE_SYNCORE | P2_MEMBAR_GLOBE); if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); /* STOPs are no longer ignored, arrange for AST */ signotify(td); } if ((imgp->sysent->sv_setid_allowed != NULL && !(*imgp->sysent->sv_setid_allowed)(td, imgp)) || (p->p_flag2 & P2_NO_NEW_PRIVS) != 0) execve_nosetid(imgp); /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE kiop = ktrprocexec(p); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in kern.proc.pathname. This vnode was * referenced by namei() or by fexecve variant of fname handling. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; oldtextdvp = p->p_textdvp; p->p_textdvp = newtextdvp; newtextdvp = NULL; oldbinname = p->p_binname; p->p_binname = newbinname; newbinname = NULL; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; PROC_UNLOCK(p); #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { VOP_UNLOCK(imgp->vp); pe.pm_credentialschanged = credential_changing; pe.pm_baseaddr = imgp->reloc_base; pe.pm_dynaddr = imgp->et_dyn_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } #endif /* Set values passed into the program in registers. */ (*p->p_sysent->sv_setregs)(td, imgp, stack_base); VOP_MMAPPED(imgp->vp); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (error != 0) { p->p_osrel = orig_osrel; p->p_fctl0 = orig_fctl0; p->p_elf_brandinfo = orig_brandinfo; } if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (imgp->textset) VOP_UNSET_TEXT_CHECKED(imgp->vp); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp); if (args->fname != NULL) NDFREE_PNBUF(&nd); if (newtextdvp != NULL) vrele(newtextdvp); free(newbinname, M_PARGS); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { if (p->p_ptevents & PTRACE_EXEC) { PROC_LOCK(p); if (p->p_ptevents & PTRACE_EXEC) td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); } } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); if (oldtextdvp != NULL) vrele(oldtextdvp); free(oldbinname, M_PARGS); #ifdef KTRACE ktr_io_params_free(kiop); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exec_cleanup(td, oldvmspace); exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif /* * We don't want cpu_set_syscall_retval() to overwrite any of * the register values put in place by exec_setregs(). * Implementations of cpu_set_syscall_retval() will leave * registers unmodified when returning EJUSTRETURN. */ return (error == 0 ? EJUSTRETURN : error); } void exec_cleanup(struct thread *td, struct vmspace *oldvmspace) { if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(td->td_proc->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } int exec_map_first_page(struct image_params *imgp) { vm_object_t object; vm_page_t m; int error; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); #if VM_NRESERVLEVEL > 0 if ((object->flags & OBJ_COLORED) == 0) { VM_OBJECT_WLOCK(object); vm_object_color(object, 0); VM_OBJECT_WUNLOCK(object); } #endif error = vm_page_grab_valid_unlocked(&m, object, 0, VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); if (error != VM_PAGER_OK) return (EIO); imgp->firstpage = sf_buf_alloc(m, 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(struct image_params *imgp) { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_unwire(m, PQ_ACTIVE); } } void exec_onexec_old(struct thread *td) { sigfastblock_clear(td); umtx_exec(td->td_proc); } /* * This is an optimization which removes the unmanaged shared page * mapping. In combination with pmap_remove_pages(), which cleans all * managed mappings in the process' vmspace pmap, no work will be left * for pmap_remove(min, max). */ void exec_free_abi_mappings(struct proc *p) { struct vmspace *vmspace; vmspace = p->p_vmspace; if (refcount_load(&vmspace->vm_refcnt) != 1) return; if (!PROC_HAS_SHP(p)) return; pmap_remove(vmspace_pmap(vmspace), vmspace->vm_shp_base, vmspace->vm_shp_base + p->p_sysent->sv_shared_page_len); } /* * Run down the current address space and install a new one. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; struct thread *td = curthread; vm_offset_t sv_minuser; vm_map_t map; imgp->vmspace_destroyed = true; imgp->sysent = sv; if (p->p_sysent->sv_onexec_old != NULL) p->p_sysent->sv_onexec_old(td); itimers_exec(p); EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (refcount_load(&vmspace->vm_refcnt) == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser && cpu_exec_vmspace_reuse(p, map)) { exec_free_abi_mappings(p); shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); /* * An exec terminates mlockall(MCL_FUTURE). * ASLR and W^X states must be re-evaluated. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } map->flags |= imgp->map_flags; return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0); } /* * Compute the stack size limit and map the main process stack. * Map the shared page. */ int exec_map_stack(struct image_params *imgp) { struct rlimit rlim_stack; struct sysentvec *sv; struct proc *p; vm_map_t map; struct vmspace *vmspace; vm_offset_t stack_addr, stack_top; vm_offset_t sharedpage_addr; u_long ssiz; int error, find_space, stack_off; vm_prot_t stack_prot; vm_object_t obj; p = imgp->proc; sv = p->p_sysent; if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } vmspace = p->p_vmspace; map = &vmspace->vm_map; stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot; if ((map->flags & MAP_ASLR_STACK) != 0) { stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + lim_max(curthread, RLIMIT_DATA)); find_space = VMFS_ANY_SPACE; } else { stack_addr = sv->sv_usrstack - ssiz; find_space = VMFS_NO_SPACE; } error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz, sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL, MAP_STACK_AREA); if (error != KERN_SUCCESS) { uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x " "failed, mach error %d errno %d\n", (uintmax_t)ssiz, stack_prot, error, vm_mmap_to_errno(error)); return (vm_mmap_to_errno(error)); } stack_top = stack_addr + ssiz; if ((map->flags & MAP_ASLR_STACK) != 0) { /* Randomize within the first page of the stack. */ arc4rand(&stack_off, sizeof(stack_off), 0); stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *)); } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj == NULL) { sharedpage_addr = 0; goto out; } /* * If randomization is disabled then the shared page will * be mapped at address specified in sysentvec. * Otherwise any address above .data section can be selected. * Same logic is used for stack address randomization. * If the address randomization is applied map a guard page * at the top of UVA. */ vm_object_reference(obj); if ((imgp->imgp_flags & IMGP_ASLR_SHARED_PAGE) != 0) { sharedpage_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + lim_max(curthread, RLIMIT_DATA)); error = vm_map_fixed(map, NULL, 0, sv->sv_maxuser - PAGE_SIZE, PAGE_SIZE, VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD); if (error != KERN_SUCCESS) { /* * This is not fatal, so let's just print a warning * and continue. */ uprintf("%s: Mapping guard page at the top of UVA failed" " mach error %d errno %d", __func__, error, vm_mmap_to_errno(error)); } error = vm_map_find(map, obj, 0, &sharedpage_addr, sv->sv_shared_page_len, sv->sv_maxuser, VMFS_ANY_SPACE, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); } else { sharedpage_addr = sv->sv_shared_page_base; vm_map_fixed(map, obj, 0, sharedpage_addr, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); } if (error != KERN_SUCCESS) { uprintf("%s: mapping shared page at addr: %p" "failed, mach error %d errno %d\n", __func__, (void *)sharedpage_addr, error, vm_mmap_to_errno(error)); vm_object_deallocate(obj); return (vm_mmap_to_errno(error)); } out: /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_maxsaddr = (char *)stack_addr; vmspace->vm_stacktop = stack_top; vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_shp_base = sharedpage_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, const char *fname, - enum uio_seg segflg, char **argv, char **envv) + char **argv, char **envv) { u_long arg, env; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ - error = exec_args_add_fname(args, fname, segflg); + error = exec_args_add_fname(args, fname, UIO_USERSPACE); if (error != 0) goto err_exit; /* * extract arguments first */ for (;;) { error = fueword(argv++, &arg); if (error == -1) { error = EFAULT; goto err_exit; } if (arg == 0) break; error = exec_args_add_arg(args, (char *)(uintptr_t)arg, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &env); if (error == -1) { error = EFAULT; goto err_exit; } if (env == 0) break; error = exec_args_add_env(args, (char *)(uintptr_t)env, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } struct exec_args_kva { vm_offset_t addr; u_int gen; SLIST_ENTRY(exec_args_kva) next; }; DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva); static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; static struct mtx exec_args_kva_mtx; static u_int exec_args_gen; static void exec_prealloc_args_kva(void *arg __unused) { struct exec_args_kva *argkva; u_int i; SLIST_INIT(&exec_args_kva_freelist); mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); for (i = 0; i < exec_map_entries; i++) { argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); argkva->gen = exec_args_gen; SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); } } SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); static vm_offset_t exec_alloc_args_kva(void **cookie) { struct exec_args_kva *argkva; argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_PTR(exec_args_kva)); if (argkva == NULL) { mtx_lock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) (void)mtx_sleep(&exec_args_kva_freelist, &exec_args_kva_mtx, 0, "execkva", 0); SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); mtx_unlock(&exec_args_kva_mtx); } kasan_mark((void *)argkva->addr, exec_map_entry_size, exec_map_entry_size, 0); *(struct exec_args_kva **)cookie = argkva; return (argkva->addr); } static void exec_release_args_kva(struct exec_args_kva *argkva, u_int gen) { vm_offset_t base; base = argkva->addr; kasan_mark((void *)argkva->addr, 0, exec_map_entry_size, KASAN_EXEC_ARGS_FREED); if (argkva->gen != gen) { (void)vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE); argkva->gen = gen; } if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), (uintptr_t)NULL, (uintptr_t)argkva)) { mtx_lock(&exec_args_kva_mtx); SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); wakeup_one(&exec_args_kva_freelist); mtx_unlock(&exec_args_kva_mtx); } } static void exec_free_args_kva(void *cookie) { exec_release_args_kva(cookie, exec_args_gen); } static void exec_args_kva_lowmem(void *arg __unused, int flags __unused) { SLIST_HEAD(, exec_args_kva) head; struct exec_args_kva *argkva; u_int gen; int i; gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1; /* * Force an madvise of each KVA range. Any currently allocated ranges * will have MADV_FREE applied once they are freed. */ SLIST_INIT(&head); mtx_lock(&exec_args_kva_mtx); SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva); mtx_unlock(&exec_args_kva_mtx); while ((argkva = SLIST_FIRST(&head)) != NULL) { SLIST_REMOVE_HEAD(&head, next); exec_release_args_kva(argkva, gen); } CPU_FOREACH(i) { argkva = (void *)atomic_readandclear_ptr( (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva)); if (argkva != NULL) exec_release_args_kva(argkva, gen); } } EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL, EVENTHANDLER_PRI_ANY); /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)exec_alloc_args_kva(&args->bufkva); return (0); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } } /* * A set to functions to fill struct image args. * * NOTE: exec_args_add_fname() must be called (possibly with a NULL * fname) before the other functions. All exec_args_add_arg() calls must * be made before any exec_args_add_env() calls. exec_args_adjust_args() * may be called any time after exec_args_add_fname(). * * exec_args_add_fname() - install path to be executed * exec_args_add_arg() - append an argument string * exec_args_add_env() - append an env string * exec_args_adjust_args() - adjust location of the argument list to * allow new arguments to be prepended */ int exec_args_add_fname(struct image_args *args, const char *fname, enum uio_seg segflg) { int error; size_t length; KASSERT(args->fname == NULL, ("fname already appended")); KASSERT(args->endp == NULL, ("already appending to args")); if (fname != NULL) { args->fname = args->buf; error = segflg == UIO_SYSSPACE ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); } else length = 0; /* Set up for _arg_*()/_env_*() */ args->endp = args->buf + length; /* begin_argv must be set and kept updated */ args->begin_argv = args->endp; KASSERT(exec_map_entry_size - length >= ARG_MAX, ("too little space remaining for arguments %zu < %zu", exec_map_entry_size - length, (size_t)ARG_MAX)); args->stringspace = ARG_MAX; return (0); } static int exec_args_add_str(struct image_args *args, const char *str, enum uio_seg segflg, int *countp) { int error; size_t length; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); error = (segflg == UIO_SYSSPACE) ? copystr(str, args->endp, args->stringspace, &length) : copyinstr(str, args->endp, args->stringspace, &length); if (error != 0) return (error == ENAMETOOLONG ? E2BIG : error); args->stringspace -= length; args->endp += length; (*countp)++; return (0); } int exec_args_add_arg(struct image_args *args, const char *argp, enum uio_seg segflg) { KASSERT(args->envc == 0, ("appending args after env")); return (exec_args_add_str(args, argp, segflg, &args->argc)); } int exec_args_add_env(struct image_args *args, const char *envp, enum uio_seg segflg) { if (args->envc == 0) args->begin_envv = args->endp; return (exec_args_add_str(args, envp, segflg, &args->envc)); } int exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend) { ssize_t offset; KASSERT(args->endp != NULL, ("endp not initialized")); KASSERT(args->begin_argv != NULL, ("begin_argp not initialized")); offset = extend - consume; if (args->stringspace < offset) return (E2BIG); memmove(args->begin_argv + extend, args->begin_argv + consume, args->endp - args->begin_argv + consume); if (args->envc > 0) args->begin_envv += offset; args->endp += offset; args->stringspace -= offset; return (0); } char * exec_args_get_begin_envv(struct image_args *args) { KASSERT(args->endp != NULL, ("endp not initialized")); if (args->envc > 0) return (args->begin_envv); return (args->endp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ int exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc; char **vectp; char *stringp; uintptr_t destp, ustringp; struct ps_strings *arginfo; struct proc *p; struct sysentvec *sysent; size_t execpath_len; int error, szsigcode; char canary[sizeof(long) * 8]; p = imgp->proc; sysent = p->p_sysent; destp = PROC_PS_STRINGS(p); arginfo = imgp->ps_strings = (void *)destp; /* * Install sigcode. */ if (sysent->sv_shared_page_base == 0 && sysent->sv_szsigcode != NULL) { szsigcode = *(sysent->sv_szsigcode); destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) { execpath_len = strlen(imgp->execpath) + 1; destp -= execpath_len; destp = rounddown2(destp, sizeof(void *)); imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ imgp->pagesizeslen = sizeof(pagesizes[0]) * MAXPAGESIZES; destp -= imgp->pagesizeslen; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes, imgp->pagesizes, imgp->pagesizeslen); if (error != 0) return (error); /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf_Auxinfo); destp = rounddown2(destp, sizeof(void *)); } vectp = (char **)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(struct image_params *imgp) { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. * * Add a text reference now so no one can write to the * executable while we're activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ error = VOP_SET_TEXT(vp); if (error != 0) return (error); imgp->textset = true; /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = true; return (error); } /* * Exec handler registration */ int exec_register(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; u_int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(const struct execsw *execsw_arg) { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } /* * Write out a core segment to the compression stream. */ static int compress_chunk(struct coredump_params *cp, char *base, char *buf, size_t len) { size_t chunk_len; int error; error = 0; while (len > 0) { chunk_len = MIN(len, CORE_BUF_SIZE); /* * We can get EFAULT error here. * In that case zero out the current chunk of the segment. */ error = copyin(base, buf, chunk_len); if (error != 0) bzero(buf, chunk_len); error = compressor_write(cp->comp, buf, chunk_len); if (error != 0) break; base += chunk_len; len -= chunk_len; } return (error); } int core_write(struct coredump_params *cp, const void *base, size_t len, off_t offset, enum uio_seg seg, size_t *resid) { return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base), len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, cp->active_cred, cp->file_cred, resid, cp->td)); } int core_output(char *base, size_t len, off_t offset, struct coredump_params *cp, void *tmpbuf) { vm_map_t map; struct mount *mp; size_t resid, runlen; int error; bool success; KASSERT((uintptr_t)base % PAGE_SIZE == 0, ("%s: user address %p is not page-aligned", __func__, base)); if (cp->comp != NULL) return (compress_chunk(cp, base, tmpbuf, len)); error = 0; map = &cp->td->td_proc->p_vmspace->vm_map; for (; len > 0; base += runlen, offset += runlen, len -= runlen) { /* * Attempt to page in all virtual pages in the range. If a * virtual page is not backed by the pager, it is represented as * a hole in the file. This can occur with zero-filled * anonymous memory or truncated files, for example. */ for (runlen = 0; runlen < len; runlen += PAGE_SIZE) { if (core_dump_can_intr && curproc_sigkilled()) return (EINTR); error = vm_fault(map, (uintptr_t)base + runlen, VM_PROT_READ, VM_FAULT_NOFILL, NULL); if (runlen == 0) success = error == KERN_SUCCESS; else if ((error == KERN_SUCCESS) != success) break; } if (success) { error = core_write(cp, base, runlen, offset, UIO_USERSPACE, &resid); if (error != 0) { if (error != EFAULT) break; /* * EFAULT may be returned if the user mapping * could not be accessed, e.g., because a mapped * file has been truncated. Skip the page if no * progress was made, to protect against a * hypothetical scenario where vm_fault() was * successful but core_write() returns EFAULT * anyway. */ runlen -= resid; if (runlen == 0) { success = false; runlen = PAGE_SIZE; } } } if (!success) { error = vn_start_write(cp->vp, &mp, V_WAIT); if (error != 0) break; vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY); error = vn_truncate_locked(cp->vp, offset + runlen, false, cp->td->td_ucred); VOP_UNLOCK(cp->vp); vn_finished_write(mp); if (error != 0) break; } } return (error); } /* * Drain into a core file. */ int sbuf_drain_core_output(void *arg, const char *data, int len) { struct coredump_params *cp; struct proc *p; int error, locked; cp = arg; p = cp->td->td_proc; /* * Some kern_proc out routines that print to this sbuf may * call us with the process lock held. Draining with the * non-sleepable lock held is unsafe. The lock is needed for * those routines when dumping a live process. In our case we * can safely release the lock before draining and acquire * again after. */ locked = PROC_LOCKED(p); if (locked) PROC_UNLOCK(p); if (cp->comp != NULL) error = compressor_write(cp->comp, __DECONST(char *, data), len); else error = core_write(cp, __DECONST(void *, data), len, cp->offset, UIO_SYSSPACE, NULL); if (locked) PROC_LOCK(p); if (error != 0) return (-error); cp->offset += len; return (len); } diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index 4f0c8aff9f42..c1c94a2eabfd 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -1,128 +1,127 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_IMGACT_H_ #define _SYS_IMGACT_H_ #include #include #define MAXSHELLCMDLEN PAGE_SIZE struct ucred; struct image_args { char *buf; /* pointer to string buffer */ void *bufkva; /* cookie for string buffer KVA */ char *begin_argv; /* beginning of argv in buf */ char *begin_envv; /* (interal use only) beginning of envv in buf, * access with exec_args_get_begin_envv(). */ char *endp; /* current `end' pointer of arg & env strings */ char *fname; /* pointer to filename of executable (system space) */ char *fname_buf; /* pointer to optional malloc(M_TEMP) buffer */ int stringspace; /* space left in arg & env buffer */ int argc; /* count of argument strings */ int envc; /* count of environment strings */ int fd; /* file descriptor of the executable */ }; struct image_params { struct proc *proc; /* our process */ struct label *execlabel; /* optional exec label */ struct vnode *vp; /* pointer to vnode of file to exec */ struct vm_object *object; /* The vm object for this vp */ struct vattr *attr; /* attributes of file */ const char *image_header; /* header of file to exec */ unsigned long entry_addr; /* entry address of target executable */ unsigned long reloc_base; /* load address of image */ unsigned long et_dyn_addr; /* PIE load base */ char *interpreter_name; /* name of the interpreter */ void *auxargs; /* ELF Auxinfo structure pointer */ struct sf_buf *firstpage; /* first page that we mapped */ void *ps_strings; /* pointer to ps_string (user space) */ struct image_args *args; /* system call arguments */ struct sysentvec *sysent; /* system entry vector */ void *argv; /* pointer to argv (user space) */ void *envv; /* pointer to envv (user space) */ char *execpath; void *execpathp; char *freepath; void *canary; int canarylen; void *pagesizes; int pagesizeslen; vm_prot_t stack_prot; u_long stack_sz; struct ucred *newcred; /* new credentials if changing */ #define IMGACT_SHELL 0x1 #define IMGACT_BINMISC 0x2 unsigned char interpreted; /* mask of interpreters that have run */ bool credential_setid; /* true if becoming setid */ bool vmspace_destroyed; /* we've blown away original vm space */ bool opened; /* we have opened executable vnode */ bool textset; u_int map_flags; #define IMGP_ASLR_SHARED_PAGE 0x1 uint32_t imgp_flags; struct vnode *interpreter_vp; /* vnode of the interpreter */ }; #ifdef _KERNEL struct sysentvec; struct thread; struct vmspace; int exec_alloc_args(struct image_args *); int exec_args_add_arg(struct image_args *args, const char *argp, enum uio_seg segflg); int exec_args_add_env(struct image_args *args, const char *envp, enum uio_seg segflg); int exec_args_add_fname(struct image_args *args, const char *fname, enum uio_seg segflg); int exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend); char *exec_args_get_begin_envv(struct image_args *args); int exec_check_permissions(struct image_params *); void exec_cleanup(struct thread *td, struct vmspace *); int exec_copyout_strings(struct image_params *, uintptr_t *); void exec_free_args(struct image_args *); int exec_map_stack(struct image_params *); int exec_new_vmspace(struct image_params *, struct sysentvec *); void exec_setregs(struct thread *, struct image_params *, uintptr_t); int exec_shell_imgact(struct image_params *); -int exec_copyin_args(struct image_args *, const char *, enum uio_seg, - char **, char **); +int exec_copyin_args(struct image_args *, const char *, char **, char **); int pre_execve(struct thread *td, struct vmspace **oldvmspace); void post_execve(struct thread *td, int error, struct vmspace *oldvmspace); #endif #endif /* !_SYS_IMGACT_H_ */