Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 313280) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 313281) @@ -1,3127 +1,3115 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); #ifndef __mips__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifndef __mips__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); CTASSERT(sizeof(struct kevent32) == 20); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifndef __mips__ CTASSERT(sizeof(struct stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD10 int freebsd10_freebsd32_pipe(struct thread *td, struct freebsd10_freebsd32_pipe_args *uap) { return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); post_execve(td, error, oldvmspace); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } post_execve(td, error, oldvmspace); return (error); } int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { struct mprotect_args ap; ap.addr = PTRIN(uap->addr); ap.len = uap->len; ap.prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (ap.prot & PROT_READ) != 0) ap.prot |= PROT_EXEC; #endif return (sys_mprotect(td, &ap)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { struct mmap_args ap; vm_offset_t addr = (vm_offset_t) uap->addr; vm_size_t len = uap->len; int prot = uap->prot; int flags = uap->flags; int fd = uap->fd; off_t pos = PAIR32TO64(off_t,uap->pos); #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif ap.addr = (void *) addr; ap.len = len; ap.prot = prot; ap.flags = flags; ap.fd = fd; ap.pos = pos; return (sys_mmap(td, &ap)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { struct freebsd32_mmap_args ap; ap.addr = uap->addr; ap.len = uap->len; ap.prot = uap->prot; ap.flags = uap->flags; ap.fd = uap->fd; ap.pos1 = uap->pos1; ap.pos2 = uap->pos2; return (freebsd32_mmap(td, &ap)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, freebsd32_kevent_copyout, freebsd32_kevent_copyin}; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct mbuf *m; void *md; u_int idx, len, msglen; int error; buflen = FREEBSD32_ALIGN(buflen); if (buflen > MCLBYTES) return (EINVAL); /* * Iterate over the buffer and get the length of each message * in there. This has 32-bit alignment and padding. Use it to * determine the length of these messages when using 64-bit * alignment and padding. */ idx = 0; len = 0; while (idx < buflen) { error = copyin(buf + idx, &msglen, sizeof(msglen)); if (error) return (error); if (msglen < sizeof(struct cmsghdr)) return (EINVAL); msglen = FREEBSD32_ALIGN(msglen); if (idx + msglen > buflen) return (EINVAL); idx += msglen; msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); len += CMSG_ALIGN(msglen); } if (len > MCLBYTES) return (EINVAL); m = m_get(M_WAITOK, MT_CONTROL); if (len > MLEN) MCLGET(m, M_WAITOK); m->m_len = len; md = mtod(m, void *); while (buflen > 0) { error = copyin(buf, md, sizeof(struct cmsghdr)); if (error) break; msglen = *(u_int *)md; msglen = FREEBSD32_ALIGN(msglen); /* Modify the message length to account for alignment. */ *(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr)); buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr)); buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); if (msglen > 0) { error = copyin(buf, md, msglen); if (error) break; md = (char *)md + CMSG_ALIGN(msglen); buf += msglen; buflen -= msglen; } } if (error) m_free(m); else *mp = m; return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); CP(*in, *out, st_size); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif int freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub, NULL); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } int freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(uap->rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (uap->rmtp && !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_nanosleep(td, &rqt, &rmt); if (error && uap->rmtp) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, uap->rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { - struct cpuset_getaffinity_args ap; - ap.level = uap->level; - ap.which = uap->which; - ap.id = PAIR32TO64(id_t,uap->id); - ap.cpusetsize = uap->cpusetsize; - ap.mask = uap->mask; - - return (sys_cpuset_getaffinity(td, &ap)); + return (kern_cpuset_getaffinity(td, uap->level, uap->which, + PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { - struct cpuset_setaffinity_args ap; - ap.level = uap->level; - ap.which = uap->which; - ap.id = PAIR32TO64(id_t,uap->id); - ap.cpusetsize = uap->cpusetsize; - ap.mask = uap->mask; - - return (sys_cpuset_setaffinity(td, &ap)); + return (kern_cpuset_setaffinity(td, uap->level, uap->which, + PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags) { if ((flags & ~SY_THR_STATIC) != 0) return (EINVAL); if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags); return (0); } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset == 0) return (0); freebsd32_sysent[*offset] = *old_sysent; return (0); } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent, SY_THR_STATIC_KLD); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent, flags); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp; uintptr_t destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = destp; copyout(pagesizes32, (void *)destp, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); } /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat stat; struct kld32_file_stat stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); error = kern_kldstat(td, uap->fileid, &stat); if (error != 0) return (error); bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name)); CP(stat, stat32, refs); CP(stat, stat32, id); PTROUT_CP(stat, stat32, address); CP(stat, stat32, size); bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags; switch (uap->com) { case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } Index: head/sys/compat/linux/linux_misc.c =================================================================== --- head/sys/compat/linux/linux_misc.c (revision 313280) +++ head/sys/compat/linux/linux_misc.c (revision 313281) @@ -1,2522 +1,2513 @@ /*- * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #include #include /** * Special DTrace provider for the linuxulator. * * In this file we define the provider for the entire linuxulator. All * modules (= files of the linuxulator) use it. * * We define a different name depending on the emulated bitsize, see * ../..//linux{,32}/linux.h, e.g.: * native bitsize = linuxulator * amd64, 32bit emulation = linuxulator32 */ LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalbig; l_ulong freebig; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; struct l_pselect6arg { l_uintptr_t ss; l_size_t ss_len; }; static int linux_utimensat_nsec_valid(l_long); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; vm_object_t object; int i, j; struct timespec ts; bzero(&sysinfo, sizeof(sysinfo)); getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = sysinfo.totalram - vm_cnt.v_wire_count * PAGE_SIZE; sysinfo.sharedram = 0; mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) if (object->shadow_count > 1) sysinfo.sharedram += object->resident_page_count; mtx_unlock(&vm_object_list_mtx); sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* The following are only present in newer Linux kernels. */ sysinfo.totalbig = 0; sysinfo.freebig = 0; sysinfo.mem_unit = 1; return (copyout(&sysinfo, args->info, sizeof(sysinfo))); } int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; u_int secs; int error; #ifdef DEBUG if (ldebug(alarm)) printf(ARGS(alarm, "%u"), args->secs); #endif secs = args->secs; /* * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 * to match kern_setitimer()'s limit to avoid error from it. * * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit * platforms. */ if (secs > INT32_MAX / 2) secs = INT32_MAX / 2; it.it_value.tv_sec = secs; it.it_value.tv_usec = 0; timevalclear(&it.it_interval); error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); KASSERT(error == 0, ("kern_setitimer returns %d", error)); if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || old_it.it_value.tv_usec >= 500000) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; return (0); } int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; vm_offset_t new, old; struct obreak_args /* { char * nsize; } */ tmp; #ifdef DEBUG if (ldebug(brk)) printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); #endif old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (vm_offset_t)args->dsend; tmp.nsize = (char *)new; if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) td->td_retval[0] = (long)new; else td->td_retval[0] = (long)old; return (0); } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; unsigned long bss_size; char *library; ssize_t aresid; int error, locked, writecount; LCONVPATHEXIST(td, args->library, &library); #ifdef DEBUG if (ldebug(uselib)) printf(ARGS(uselib, "%s"), library); #endif a_out = NULL; locked = 0; vp = NULL; NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, library, td); error = namei(&ni); LFREEPATH(library); if (error) goto cleanup; vp = ni.ni_vp; NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = 1; /* Writable? */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) goto cleanup; if (writecount != 0) { error = ETXTBSY; goto cleanup; } /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, VREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; /* Pull in executable header into exec_map */ error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || racct_set(td->td_proc, RACCT_DATA, a_out->a_data + bss_size) != 0) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. * XXX: Note that if any of the VM operations fail below we don't * clear this flag. */ VOP_SET_TEXT(vp); /* * Lock no longer needed */ locked = 0; VOP_UNLOCK(vp, 0); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { #ifdef DEBUG printf("uselib: Non page aligned binary %lu\n", file_offset); #endif /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, td->td_ucred, NOCRED, &aresid, td); if (error != 0) goto cleanup; if (aresid != 0) { error = ENOEXEC; goto cleanup; } } else { #ifdef DEBUG printf("uselib: Page aligned binary %lu\n", file_offset); #endif /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; } #ifdef DEBUG printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], ((long *)vmaddr)[1]); #endif if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: /* Unlock vnode if needed */ if (locked) VOP_UNLOCK(vp, 0); /* Release the temporary mapping. */ if (a_out) kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); return (error); } #endif /* __i386__ */ int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; #ifdef DEBUG if (ldebug(select)) printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, (void *)args->readfds, (void *)args->writefds, (void *)args->exceptfds, (void *)args->timeout); #endif /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; #ifdef DEBUG if (ldebug(select)) printf(LMSG("incoming timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, LINUX_NFDBITS); #ifdef DEBUG if (ldebug(select)) printf(LMSG("real select returns %d"), error); #endif if (error) goto select_out; if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); #ifdef DEBUG if (ldebug(select)) printf(LMSG("outgoing timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: #ifdef DEBUG if (ldebug(select)) printf(LMSG("select_out -> %d"), error); #endif return (error); } int linux_mremap(struct thread *td, struct linux_mremap_args *args) { struct munmap_args /* { void *addr; size_t len; } */ bsd_args; int error = 0; #ifdef DEBUG if (ldebug(mremap)) printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), (void *)(uintptr_t)args->addr, (unsigned long)args->old_len, (unsigned long)args->new_len, (unsigned long)args->flags); #endif if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return (ENOMEM); } if (args->new_len < args->old_len) { bsd_args.addr = (caddr_t)((uintptr_t)args->addr + args->new_len); bsd_args.len = args->old_len - args->new_len; error = sys_munmap(td, &bsd_args); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return (error); } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { struct msync_args bsd_args; bsd_args.addr = (caddr_t)(uintptr_t)args->addr; bsd_args.len = (uintptr_t)args->len; bsd_args.flags = args->fl & ~LINUX_MS_SYNC; return (sys_msync(td, &bsd_args)); } int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; #ifdef DEBUG if (ldebug(time)) printf(ARGS(time, "*")); #endif microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return (error); td->td_retval[0] = tm; return (0); } struct l_times_argv { l_clock_t tms_utime; l_clock_t tms_stime; l_clock_t tms_cutime; l_clock_t tms_cstime; }; /* * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK * auxiliary vector entry. */ #define CLK_TCK 100 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ CONVNTCK(r) : CONVOTCK(r)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; #ifdef DEBUG if (ldebug(times)) printf(ARGS(times, "*")); #endif if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_STATLOCK(p); calcru(p, &utime, &stime); PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return (error); } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return (0); } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; #ifdef DEBUG if (ldebug(newuname)) printf(ARGS(newuname, "*")); #endif linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); return (copyout(&utsname, args->buf, sizeof(utsname))); } struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utime)) printf(ARGS(utime, "%s, *"), fname); #endif if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utimes)) printf(ARGS(utimes, "%s, *"), fname); #endif if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } static int linux_utimensat_nsec_valid(l_long nsec) { if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) return (0); if (nsec >= 0 && nsec <= 999999999) return (0); return (1); } int linux_utimensat(struct thread *td, struct linux_utimensat_args *args) { struct l_timespec l_times[2]; struct timespec times[2], *timesp = NULL; char *path = NULL; int error, dfd, flags = 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; #ifdef DEBUG if (ldebug(utimensat)) printf(ARGS(utimensat, "%d, *"), dfd); #endif if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); if (args->times != NULL) { error = copyin(args->times, l_times, sizeof(l_times)); if (error != 0) return (error); if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) return (EINVAL); times[0].tv_sec = l_times[0].tv_sec; switch (l_times[0].tv_nsec) { case LINUX_UTIME_OMIT: times[0].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[0].tv_nsec = UTIME_NOW; break; default: times[0].tv_nsec = l_times[0].tv_nsec; } times[1].tv_sec = l_times[1].tv_sec; switch (l_times[1].tv_nsec) { case LINUX_UTIME_OMIT: times[1].tv_nsec = UTIME_OMIT; break; case LINUX_UTIME_NOW: times[1].tv_nsec = UTIME_NOW; break; default: times[1].tv_nsec = l_times[1].tv_nsec; break; } timesp = times; /* This breaks POSIX, but is what the Linux kernel does * _on purpose_ (documented in the man page for utimensat(2)), * so we must follow that behaviour. */ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) return (0); } if (args->pathname != NULL) LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); else if (args->flags != 0) return (EINVAL); if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) flags |= AT_SYMLINK_NOFOLLOW; if (path == NULL) error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); else { error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, UIO_SYSSPACE, flags); LFREEPATH(path); } return (error); } int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); #ifdef DEBUG if (ldebug(futimesat)) printf(ARGS(futimesat, "%s, *"), fname); #endif if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_common_wait(struct thread *td, int pid, int *status, int options, struct rusage *ru) { int error, tmpstat; error = kern_wait(td, pid, &tmpstat, options, ru); if (error) return (error); if (status) { tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | bsd_to_linux_signal(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); else if (WIFCONTINUED(tmpstat)) tmpstat = 0xffff; error = copyout(&tmpstat, status, sizeof(int)); } return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { struct linux_wait4_args wait4_args; #ifdef DEBUG if (ldebug(waitpid)) printf(ARGS(waitpid, "%d, %p, %d"), args->pid, (void *)args->status, args->options); #endif wait4_args.pid = args->pid; wait4_args.status = args->status; wait4_args.options = args->options; wait4_args.rusage = NULL; return (linux_wait4(td, &wait4_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options; struct rusage ru, *rup; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); options = WEXITED; linux_to_bsd_waitopts(args->options, &options); if (args->rusage != NULL) rup = &ru; else rup = NULL; error = linux_common_wait(td, args->pid, args->status, options, rup); if (error != 0) return (error); if (args->rusage != NULL) error = linux_copyout_rusage(&ru, args->rusage); return (error); } int linux_waitid(struct thread *td, struct linux_waitid_args *args) { int status, options, sig; struct __wrusage wru; siginfo_t siginfo; l_siginfo_t lsi; idtype_t idtype; struct proc *p; int error; options = 0; linux_to_bsd_waitopts(args->options, &options); if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) return (EINVAL); if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) return (EINVAL); switch (args->idtype) { case LINUX_P_ALL: idtype = P_ALL; break; case LINUX_P_PID: if (args->id <= 0) return (EINVAL); idtype = P_PID; break; case LINUX_P_PGID: if (args->id <= 0) return (EINVAL); idtype = P_PGID; break; default: return (EINVAL); } error = kern_wait6(td, idtype, args->id, &status, options, &wru, &siginfo); if (error != 0) return (error); if (args->rusage != NULL) { error = linux_copyout_rusage(&wru.wru_children, args->rusage); if (error != 0) return (error); } if (args->info != NULL) { p = td->td_proc; if (td->td_retval[0] == 0) bzero(&lsi, sizeof(lsi)); else { sig = bsd_to_linux_signal(siginfo.si_signo); siginfo_to_lsiginfo(&siginfo, &lsi, sig); } error = copyout(&lsi, args->info, sizeof(lsi)); } td->td_retval[0] = 0; return (error); } int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mknod)) printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, (uintmax_t)args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(mknodat)) printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { struct linux_pemuldata *pem; struct proc *p = td->td_proc; uint32_t old; #ifdef DEBUG if (ldebug(personality)) printf(ARGS(personality, "%u"), args->per); #endif PROC_LOCK(p); pem = pem_find(p); old = pem->persona; if (args->per != 0xffffffff) pem->persona = args->per; PROC_UNLOCK(p); td->td_retval[0] = old; return (0); } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; #ifdef DEBUG if (ldebug(setitimer)) printf(ARGS(setitimer, "%p, %p"), (void *)uap->itv, (void *)uap->oitv); #endif if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); #ifdef DEBUG if (ldebug(setitimer)) { printf("setitimer: value: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); printf("setitimer: interval: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); } #endif error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; #ifdef DEBUG if (ldebug(getitimer)) printf(ARGS(getitimer, "%p"), (void *)uap->itv); #endif error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { struct setpriority_args bsd_args; bsd_args.which = PRIO_PROCESS; bsd_args.who = 0; /* current process */ bsd_args.prio = args->inc; return (sys_setpriority(td, &bsd_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; newcred = crget(); crextend(newcred, ngrp + 1); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; crcopy(newcred, oldcred); /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { PROC_UNLOCK(p); crfree(newcred); goto out; } if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); proc_set_cred(p, newcred); PROC_UNLOCK(p); crfree(oldcred); error = 0; out: free(linux_gidset, M_LINUX); return (error); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t *linux_gidset; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); free(linux_gidset, M_LINUX); if (error) return (error); td->td_retval[0] = ngrp; return (0); } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; #ifdef DEBUG if (ldebug(setrlimit)) printf(ARGS(setrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(old_getrlimit)) printf(ARGS(old_getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(getrlimit)) printf(ARGS(getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); lim_rlimit(td, which, &bsd_rlim); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_param sched_param; struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_setscheduler)) printf(ARGS(sched_setscheduler, "%d, %d, %p"), args->pid, args->policy, (const void *)args->param); #endif switch (args->policy) { case LINUX_SCHED_OTHER: policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: policy = SCHED_FIFO; break; case LINUX_SCHED_RR: policy = SCHED_RR; break; default: return (EINVAL); } error = copyin(args->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setscheduler(td, tdt, policy, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct thread *tdt; int error, policy; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getscheduler(td, tdt, &policy); PROC_UNLOCK(tdt->td_proc); switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return (error); } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_max)) printf(ARGS(sched_get_priority_max, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_max(td, &bsd)); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_min)) printf(ARGS(sched_get_priority_min, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return (EINVAL); } return (sys_sched_get_priority_min(td, &bsd)); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; #ifdef DEBUG if (ldebug(reboot)) printf(ARGS(reboot, "0x%x"), args->cmd); #endif if (args->magic1 != REBOOT_MAGIC1) return (EINVAL); switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return (EINVAL); } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return (EINVAL); } return (sys_reboot(td, &bsd_args)); } /* * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that * are assumed to be preserved. The following lightweight syscalls fixes * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c * * linux_getpid() - MP SAFE * linux_getgid() - MP SAFE * linux_getuid() - MP SAFE */ int linux_getpid(struct thread *td, struct linux_getpid_args *args) { #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { struct linux_emuldata *em; #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif em = em_find(td); KASSERT(em != NULL, ("gettid: emuldata not found.\n")); td->td_retval[0] = em->em_tid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif td->td_retval[0] = kern_getppid(td); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { #ifdef DEBUG if (ldebug(getgid)) printf(ARGS(getgid, "")); #endif td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { #ifdef DEBUG if (ldebug(getuid)) printf(ARGS(getuid, "")); #endif td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { struct getsid_args bsd; #ifdef DEBUG if (ldebug(getsid)) printf(ARGS(getsid, "%i"), args->pid); #endif bsd.pid = args->pid; return (sys_getsid(td, &bsd)); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { struct getpriority_args bsd_args; int error; #ifdef DEBUG if (ldebug(getpriority)) printf(ARGS(getpriority, "%i, %i"), args->which, args->who); #endif bsd_args.which = args->which; bsd_args.who = args->who; error = sys_getpriority(td, &bsd_args); td->td_retval[0] = 20 - td->td_retval[0]; return (error); } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(sethostname)) printf(ARGS(sethostname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(setdomainname)) printf(ARGS(setdomainname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_NISDOMAINNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, args->error_code); /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, args->error_code, 0); /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION 0x19980330 struct l_user_cap_header { l_int version; l_int pid; }; struct l_user_cap_data { l_int effective; l_int permitted; l_int inheritable; }; int linux_capget(struct thread *td, struct linux_capget_args *args) { struct l_user_cap_header luch; struct l_user_cap_data lucd; int error; if (args->hdrp == NULL) return (EFAULT); error = copyin(args->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); if (luch.version != _LINUX_CAPABILITY_VERSION) { luch.version = _LINUX_CAPABILITY_VERSION; error = copyout(&luch, args->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); if (args->datap) { /* * The current implementation doesn't support setting * a capability (it's essentially a stub) so indicate * that no capabilities are currently set or available * to request. */ bzero (&lucd, sizeof(lucd)); error = copyout(&lucd, args->datap, sizeof(lucd)); } return (error); } int linux_capset(struct thread *td, struct linux_capset_args *args) { struct l_user_cap_header luch; struct l_user_cap_data lucd; int error; if (args->hdrp == NULL || args->datap == NULL) return (EFAULT); error = copyin(args->hdrp, &luch, sizeof(luch)); if (error != 0) return (error); if (luch.version != _LINUX_CAPABILITY_VERSION) { luch.version = _LINUX_CAPABILITY_VERSION; error = copyout(&luch, args->hdrp, sizeof(luch)); if (error) return (error); return (EINVAL); } if (luch.pid) return (EPERM); error = copyin(args->datap, &lucd, sizeof(lucd)); if (error != 0) return (error); /* We currently don't support setting any capabilities. */ if (lucd.effective || lucd.permitted || lucd.inheritable) { linux_msg(td, "capset effective=0x%x, permitted=0x%x, " "inheritable=0x%x is not implemented", (int)lucd.effective, (int)lucd.permitted, (int)lucd.inheritable); return (EPERM); } return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; struct linux_emuldata *em; int pdeath_signal; #ifdef DEBUG if (ldebug(prctl)) printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, (uintmax_t)args->arg2, (uintmax_t)args->arg3, (uintmax_t)args->arg4, (uintmax_t)args->arg5); #endif switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); em->pdeath_signal = args->arg2; break; case LINUX_PR_GET_PDEATHSIG: em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); pdeath_signal = em->pdeath_signal; error = copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal)); break; case LINUX_PR_GET_KEEPCAPS: /* * Indicate that we always clear the effective and * permitted capability sets when the user id becomes * non-zero (actually the capability sets are simply * always zero in the current implementation). */ td->td_retval[0] = 0; break; case LINUX_PR_SET_KEEPCAPS: /* * Ignore requests to keep the effective and permitted * capability sets when the user id becomes non-zero. */ break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; default: error = EINVAL; break; } return (error); } int linux_sched_setparam(struct thread *td, struct linux_sched_setparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_setparam)) printf(ARGS(sched_setparam, "%d, *"), uap->pid); #endif error = copyin(uap->param, &sched_param, sizeof(sched_param)); if (error) return (error); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_setparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); return (error); } int linux_sched_getparam(struct thread *td, struct linux_sched_getparam_args *uap) { struct sched_param sched_param; struct thread *tdt; int error; #ifdef DEBUG if (ldebug(sched_getparam)) printf(ARGS(sched_getparam, "%d, *"), uap->pid); #endif tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); PROC_UNLOCK(tdt->td_proc); if (error == 0) error = copyout(&sched_param, uap->param, sizeof(sched_param)); return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct thread *tdt; - struct cpuset_getaffinity_args cga; #ifdef DEBUG if (ldebug(sched_getaffinity)) printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); - cga.level = CPU_LEVEL_WHICH; - cga.which = CPU_WHICH_TID; - cga.id = tdt->td_tid; - cga.cpusetsize = sizeof(cpuset_t); - cga.mask = (cpuset_t *) args->user_mask_ptr; - if ((error = sys_cpuset_getaffinity(td, &cga)) == 0) + error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, + tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); + if (error == 0) td->td_retval[0] = sizeof(cpuset_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { - struct cpuset_setaffinity_args csa; struct thread *tdt; #ifdef DEBUG if (ldebug(sched_setaffinity)) printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, args->len); #endif if (args->len < sizeof(cpuset_t)) return (EINVAL); tdt = linux_tdfind(td, args->pid, -1); if (tdt == NULL) return (ESRCH); PROC_UNLOCK(tdt->td_proc); - csa.level = CPU_LEVEL_WHICH; - csa.which = CPU_WHICH_TID; - csa.id = tdt->td_tid; - csa.cpusetsize = sizeof(cpuset_t); - csa.mask = (cpuset_t *) args->user_mask_ptr; - return (sys_cpuset_setaffinity(td, &csa)); + return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, + tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); } struct linux_rlimit64 { uint64_t rlim_cur; uint64_t rlim_max; }; int linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) { struct rlimit rlim, nrlim; struct linux_rlimit64 lrlim; struct proc *p; u_int which; int flags; int error; #ifdef DEBUG if (ldebug(prlimit64)) printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, args->resource, (void *)args->new, (void *)args->old); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); if (args->new != NULL) { /* * Note. Unlike FreeBSD where rlim is signed 64-bit Linux * rlim is unsigned 64-bit. FreeBSD treats negative limits * as INFINITY so we do not need a conversion even. */ error = copyin(args->new, &nrlim, sizeof(nrlim)); if (error != 0) return (error); } flags = PGET_HOLD | PGET_NOTWEXIT; if (args->new != NULL) flags |= PGET_CANDEBUG; else flags |= PGET_CANSEE; error = pget(args->pid, flags, &p); if (error != 0) return (error); if (args->old != NULL) { PROC_LOCK(p); lim_rlimit_proc(p, which, &rlim); PROC_UNLOCK(p); if (rlim.rlim_cur == RLIM_INFINITY) lrlim.rlim_cur = LINUX_RLIM_INFINITY; else lrlim.rlim_cur = rlim.rlim_cur; if (rlim.rlim_max == RLIM_INFINITY) lrlim.rlim_max = LINUX_RLIM_INFINITY; else lrlim.rlim_max = rlim.rlim_max; error = copyout(&lrlim, args->old, sizeof(lrlim)); if (error != 0) goto out; } if (args->new != NULL) error = kern_proc_setrlimit(td, p, which, &nrlim); out: PRELE(p); return (error); } int linux_pselect6(struct thread *td, struct linux_pselect6_args *args) { struct timeval utv, tv0, tv1, *tvp; struct l_pselect6arg lpse6; struct l_timespec lts; struct timespec uts; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; ssp = NULL; if (args->sig != NULL) { error = copyin(args->sig, &lpse6, sizeof(lpse6)); if (error != 0) return (error); if (lpse6.ss_len != sizeof(l_ss)) return (EINVAL); if (lpse6.ss != 0) { error = copyin(PTRIN(lpse6.ss), &l_ss, sizeof(l_ss)); if (error != 0) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } } /* * Currently glibc changes nanosecond number to microsecond. * This mean losing precision but for now it is hardly seen. */ if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error != 0) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); TIMESPEC_TO_TIMEVAL(&utv, &uts); if (itimerfix(&utv)) return (EINVAL); microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_pselect(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp, ssp, LINUX_NFDBITS); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0] != 0) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); TIMEVAL_TO_TIMESPEC(&utv, &uts); native_to_linux_timespec(<s, &uts); error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } int linux_ppoll(struct thread *td, struct linux_ppoll_args *args) { struct timespec ts0, ts1; struct l_timespec lts; struct timespec uts, *tsp; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; if (args->sset != NULL) { if (args->ssize != sizeof(l_ss)) return (EINVAL); error = copyin(args->sset, &l_ss, sizeof(l_ss)); if (error) return (error); linux_to_bsd_sigset(&l_ss, &ss); ssp = &ss; } else ssp = NULL; if (args->tsp != NULL) { error = copyin(args->tsp, <s, sizeof(lts)); if (error) return (error); error = linux_to_native_timespec(&uts, <s); if (error != 0) return (error); nanotime(&ts0); tsp = &uts; } else tsp = NULL; error = kern_poll(td, args->fds, args->nfds, tsp, ssp); if (error == 0 && args->tsp != NULL) { if (td->td_retval[0]) { nanotime(&ts1); timespecsub(&ts1, &ts0); timespecsub(&uts, &ts1); if (uts.tv_sec < 0) timespecclear(&uts); } else timespecclear(&uts); native_to_linux_timespec(<s, &uts); error = copyout(<s, args->tsp, sizeof(lts)); } return (error); } #if defined(DEBUG) || defined(KTR) /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ #ifdef COMPAT_LINUX32 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL #else #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL #endif u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; static int linux_debug(int syscall, int toggle, int global) { if (global) { char c = toggle ? 0 : 0xff; memset(linux_debug_map, c, sizeof(linux_debug_map)); return (0); } if (syscall < 0 || syscall >= L_MAXSYSCALL) return (EINVAL); if (toggle) clrbit(linux_debug_map, syscall); else setbit(linux_debug_map, syscall); return (0); } #undef L_MAXSYSCALL /* * Usage: sysctl linux.debug=.<0/1> * * E.g.: sysctl linux.debug=21.0 * * As a special case, syscall "all" will apply to all syscalls globally. */ #define LINUX_MAX_DEBUGSTR 16 int linux_sysctl_debug(SYSCTL_HANDLER_ARGS) { char value[LINUX_MAX_DEBUGSTR], *p; int error, sysc, toggle; int global = 0; value[0] = '\0'; error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); if (error || req->newptr == NULL) return (error); for (p = value; *p != '\0' && *p != '.'; p++); if (*p == '\0') return (EINVAL); *p++ = '\0'; sysc = strtol(value, NULL, 0); toggle = strtol(p, NULL, 0); if (strcmp(value, "all") == 0) global = 1; error = linux_debug(sysc, toggle, global); return (error); } #endif /* DEBUG || KTR */ int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) { struct timespec ts; struct l_timespec lts; struct thread *tdt; int error; /* * According to man in case the invalid pid specified * EINVAL should be returned. */ if (uap->pid < 0) return (EINVAL); tdt = linux_tdfind(td, uap->pid, -1); if (tdt == NULL) return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); native_to_linux_timespec(<s, &ts); return (copyout(<s, uap->interval, sizeof(lts))); } /* * In case when the Linux thread is the initial thread in * the thread group thread id is equal to the process id. * Glibc depends on this magic (assert in pthread_getattr_np.c). */ struct thread * linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) { struct linux_emuldata *em; struct thread *tdt; struct proc *p; tdt = NULL; if (tid == 0 || tid == td->td_tid) { tdt = td; PROC_LOCK(tdt->td_proc); } else if (tid > PID_MAX) tdt = tdfind(tid, pid); else { /* * Initial thread where the tid equal to the pid. */ p = pfind(tid); if (p != NULL) { if (SV_PROC_ABI(p) != SV_ABI_LINUX) { /* * p is not a Linuxulator process. */ PROC_UNLOCK(p); return (NULL); } FOREACH_THREAD_IN_PROC(p, tdt) { em = em_find(tdt); if (tid == em->em_tid) return (tdt); } PROC_UNLOCK(p); } return (NULL); } return (tdt); } void linux_to_bsd_waitopts(int options, int *bsdopts) { if (options & LINUX_WNOHANG) *bsdopts |= WNOHANG; if (options & LINUX_WUNTRACED) *bsdopts |= WUNTRACED; if (options & LINUX_WEXITED) *bsdopts |= WEXITED; if (options & LINUX_WCONTINUED) *bsdopts |= WCONTINUED; if (options & LINUX_WNOWAIT) *bsdopts |= WNOWAIT; if (options & __WCLONE) *bsdopts |= WLINUXCLONE; } Index: head/sys/kern/kern_cpuset.c =================================================================== --- head/sys/kern/kern_cpuset.c (revision 313280) +++ head/sys/kern/kern_cpuset.c (revision 313281) @@ -1,1307 +1,1322 @@ /*- * Copyright (c) 2008, Jeffrey Roberson * All rights reserved. * * Copyright (c) 2008 Nokia Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* DDB */ /* * cpusets provide a mechanism for creating and manipulating sets of * processors for the purpose of constraining the scheduling of threads to * specific processors. * * Each process belongs to an identified set, by default this is set 1. Each * thread may further restrict the cpus it may run on to a subset of this * named set. This creates an anonymous set which other threads and processes * may not join by number. * * The named set is referred to herein as the 'base' set to avoid ambiguity. * This set is usually a child of a 'root' set while the anonymous set may * simply be referred to as a mask. In the syscall api these are referred to * as the ROOT, CPUSET, and MASK levels where CPUSET is called 'base' here. * * Threads inherit their set from their creator whether it be anonymous or * not. This means that anonymous sets are immutable because they may be * shared. To modify an anonymous set a new set is created with the desired * mask and the same parent as the existing anonymous set. This gives the * illusion of each thread having a private mask. * * Via the syscall apis a user may ask to retrieve or modify the root, base, * or mask that is discovered via a pid, tid, or setid. Modifying a set * modifies all numbered and anonymous child sets to comply with the new mask. * Modifying a pid or tid's mask applies only to that tid but must still * exist within the assigned parent set. * * A thread may not be assigned to a group separate from other threads in * the process. This is to remove ambiguity when the setid is queried with * a pid argument. There is no other technical limitation. * * This somewhat complex arrangement is intended to make it easy for * applications to query available processors and bind their threads to * specific processors while also allowing administrators to dynamically * reprovision by changing sets which apply to groups of processes. * * A simple application should not concern itself with sets at all and * rather apply masks to its own threads via CPU_WHICH_TID and a -1 id * meaning 'curthread'. It may query available cpus for that tid with a * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). */ static uma_zone_t cpuset_zone; static struct mtx cpuset_lock; static struct setlist cpuset_ids; static struct unrhdr *cpuset_unr; static struct cpuset *cpuset_zero, *cpuset_default; /* Return the size of cpuset_t at the kernel level */ SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, sizeof(cpuset_t), "sizeof(cpuset_t)"); cpuset_t *cpuset_root; cpuset_t cpuset_domain[MAXMEMDOM]; /* * Acquire a reference to a cpuset, all pointers must be tracked with refs. */ struct cpuset * cpuset_ref(struct cpuset *set) { refcount_acquire(&set->cs_ref); return (set); } /* * Walks up the tree from 'set' to find the root. Returns the root * referenced. */ static struct cpuset * cpuset_refroot(struct cpuset *set) { for (; set->cs_parent != NULL; set = set->cs_parent) if (set->cs_flags & CPU_SET_ROOT) break; cpuset_ref(set); return (set); } /* * Find the first non-anonymous set starting from 'set'. Returns this set * referenced. May return the passed in set with an extra ref if it is * not anonymous. */ static struct cpuset * cpuset_refbase(struct cpuset *set) { if (set->cs_id == CPUSET_INVALID) set = set->cs_parent; cpuset_ref(set); return (set); } /* * Release a reference in a context where it is safe to allocate. */ void cpuset_rel(struct cpuset *set) { cpusetid_t id; if (refcount_release(&set->cs_ref) == 0) return; mtx_lock_spin(&cpuset_lock); LIST_REMOVE(set, cs_siblings); id = set->cs_id; if (id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); mtx_unlock_spin(&cpuset_lock); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); if (id != CPUSET_INVALID) free_unr(cpuset_unr, id); } /* * Deferred release must be used when in a context that is not safe to * allocate/free. This places any unreferenced sets on the list 'head'. */ static void cpuset_rel_defer(struct setlist *head, struct cpuset *set) { if (refcount_release(&set->cs_ref) == 0) return; mtx_lock_spin(&cpuset_lock); LIST_REMOVE(set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_REMOVE(set, cs_link); LIST_INSERT_HEAD(head, set, cs_link); mtx_unlock_spin(&cpuset_lock); } /* * Complete a deferred release. Removes the set from the list provided to * cpuset_rel_defer. */ static void cpuset_rel_complete(struct cpuset *set) { LIST_REMOVE(set, cs_link); cpuset_rel(set->cs_parent); uma_zfree(cpuset_zone, set); } /* * Find a set based on an id. Returns it with a ref. */ static struct cpuset * cpuset_lookup(cpusetid_t setid, struct thread *td) { struct cpuset *set; if (setid == CPUSET_INVALID) return (NULL); mtx_lock_spin(&cpuset_lock); LIST_FOREACH(set, &cpuset_ids, cs_link) if (set->cs_id == setid) break; if (set) cpuset_ref(set); mtx_unlock_spin(&cpuset_lock); KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__)); if (set != NULL && jailed(td->td_ucred)) { struct cpuset *jset, *tset; jset = td->td_ucred->cr_prison->pr_cpuset; for (tset = set; tset != NULL; tset = tset->cs_parent) if (tset == jset) break; if (tset == NULL) { cpuset_rel(set); set = NULL; } } return (set); } /* * Create a set in the space provided in 'set' with the provided parameters. * The set is returned with a single ref. May return EDEADLK if the set * will have no valid cpu based on restrictions from the parent. */ static int _cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask, cpusetid_t id) { if (!CPU_OVERLAP(&parent->cs_mask, mask)) return (EDEADLK); CPU_COPY(mask, &set->cs_mask); LIST_INIT(&set->cs_children); refcount_init(&set->cs_ref, 1); set->cs_flags = 0; mtx_lock_spin(&cpuset_lock); CPU_AND(&set->cs_mask, &parent->cs_mask); set->cs_id = id; set->cs_parent = cpuset_ref(parent); LIST_INSERT_HEAD(&parent->cs_children, set, cs_siblings); if (set->cs_id != CPUSET_INVALID) LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); mtx_unlock_spin(&cpuset_lock); return (0); } /* * Create a new non-anonymous set with the requested parent and mask. May * return failures if the mask is invalid or a new number can not be * allocated. */ static int cpuset_create(struct cpuset **setp, struct cpuset *parent, const cpuset_t *mask) { struct cpuset *set; cpusetid_t id; int error; id = alloc_unr(cpuset_unr); if (id == -1) return (ENFILE); *setp = set = uma_zalloc(cpuset_zone, M_WAITOK); error = _cpuset_create(set, parent, mask, id); if (error == 0) return (0); free_unr(cpuset_unr, id); uma_zfree(cpuset_zone, set); return (error); } /* * Recursively check for errors that would occur from applying mask to * the tree of sets starting at 'set'. Checks for sets that would become * empty as well as RDONLY flags. */ static int cpuset_testupdate(struct cpuset *set, cpuset_t *mask, int check_mask) { struct cpuset *nset; cpuset_t newmask; int error; mtx_assert(&cpuset_lock, MA_OWNED); if (set->cs_flags & CPU_SET_RDONLY) return (EPERM); if (check_mask) { if (!CPU_OVERLAP(&set->cs_mask, mask)) return (EDEADLK); CPU_COPY(&set->cs_mask, &newmask); CPU_AND(&newmask, mask); } else CPU_COPY(mask, &newmask); error = 0; LIST_FOREACH(nset, &set->cs_children, cs_siblings) if ((error = cpuset_testupdate(nset, &newmask, 1)) != 0) break; return (error); } /* * Applies the mask 'mask' without checking for empty sets or permissions. */ static void cpuset_update(struct cpuset *set, cpuset_t *mask) { struct cpuset *nset; mtx_assert(&cpuset_lock, MA_OWNED); CPU_AND(&set->cs_mask, mask); LIST_FOREACH(nset, &set->cs_children, cs_siblings) cpuset_update(nset, &set->cs_mask); return; } /* * Modify the set 'set' to use a copy of the mask provided. Apply this new * mask to restrict all children in the tree. Checks for validity before * applying the changes. */ static int cpuset_modify(struct cpuset *set, cpuset_t *mask) { struct cpuset *root; int error; error = priv_check(curthread, PRIV_SCHED_CPUSET); if (error) return (error); /* * In case we are called from within the jail * we do not allow modifying the dedicated root * cpuset of the jail but may still allow to * change child sets. */ if (jailed(curthread->td_ucred) && set->cs_flags & CPU_SET_ROOT) return (EPERM); /* * Verify that we have access to this set of * cpus. */ root = set->cs_parent; if (root && !CPU_SUBSET(&root->cs_mask, mask)) return (EINVAL); mtx_lock_spin(&cpuset_lock); error = cpuset_testupdate(set, mask, 0); if (error) goto out; CPU_COPY(mask, &set->cs_mask); cpuset_update(set, mask); out: mtx_unlock_spin(&cpuset_lock); return (error); } /* * Resolve the 'which' parameter of several cpuset apis. * * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also * checks for permission via p_cansched(). * * For WHICH_SET returns a valid set with a new reference. * * -1 may be supplied for any argument to mean the current proc/thread or * the base set of the current thread. May fail with ESRCH/EPERM. */ int cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp, struct cpuset **setp) { struct cpuset *set; struct thread *td; struct proc *p; int error; *pp = p = NULL; *tdp = td = NULL; *setp = set = NULL; switch (which) { case CPU_WHICH_PID: if (id == -1) { PROC_LOCK(curproc); p = curproc; break; } if ((p = pfind(id)) == NULL) return (ESRCH); break; case CPU_WHICH_TID: if (id == -1) { PROC_LOCK(curproc); p = curproc; td = curthread; break; } td = tdfind(id, -1); if (td == NULL) return (ESRCH); p = td->td_proc; break; case CPU_WHICH_CPUSET: if (id == -1) { thread_lock(curthread); set = cpuset_refbase(curthread->td_cpuset); thread_unlock(curthread); } else set = cpuset_lookup(id, curthread); if (set) { *setp = set; return (0); } return (ESRCH); case CPU_WHICH_JAIL: { /* Find `set' for prison with given id. */ struct prison *pr; sx_slock(&allprison_lock); pr = prison_find_child(curthread->td_ucred->cr_prison, id); sx_sunlock(&allprison_lock); if (pr == NULL) return (ESRCH); cpuset_ref(pr->pr_cpuset); *setp = pr->pr_cpuset; mtx_unlock(&pr->pr_mtx); return (0); } case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: return (0); default: return (EINVAL); } error = p_cansched(curthread, p); if (error) { PROC_UNLOCK(p); return (error); } if (td == NULL) td = FIRST_THREAD_IN_PROC(p); *pp = p; *tdp = td; return (0); } /* * Create an anonymous set with the provided mask in the space provided by * 'fset'. If the passed in set is anonymous we use its parent otherwise * the new set is a child of 'set'. */ static int cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask) { struct cpuset *parent; if (set->cs_id == CPUSET_INVALID) parent = set->cs_parent; else parent = set; if (!CPU_SUBSET(&parent->cs_mask, mask)) return (EDEADLK); return (_cpuset_create(fset, parent, mask, CPUSET_INVALID)); } /* * Handle two cases for replacing the base set or mask of an entire process. * * 1) Set is non-null and mask is null. This reparents all anonymous sets * to the provided set and replaces all non-anonymous td_cpusets with the * provided set. * 2) Mask is non-null and set is null. This replaces or creates anonymous * sets for every thread with the existing base as a parent. * * This is overly complicated because we can't allocate while holding a * spinlock and spinlocks must be held while changing and examining thread * state. */ static int cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) { struct setlist freelist; struct setlist droplist; struct cpuset *tdset; struct cpuset *nset; struct thread *td; struct proc *p; int threads; int nfree; int error; /* * The algorithm requires two passes due to locking considerations. * * 1) Lookup the process and acquire the locks in the required order. * 2) If enough cpusets have not been allocated release the locks and * allocate them. Loop. */ LIST_INIT(&freelist); LIST_INIT(&droplist); nfree = 0; for (;;) { error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset); if (error) goto out; if (nfree >= p->p_numthreads) break; threads = p->p_numthreads; PROC_UNLOCK(p); for (; nfree < threads; nfree++) { nset = uma_zalloc(cpuset_zone, M_WAITOK); LIST_INSERT_HEAD(&freelist, nset, cs_link); } } PROC_LOCK_ASSERT(p, MA_OWNED); /* * Now that the appropriate locks are held and we have enough cpusets, * make sure the operation will succeed before applying changes. The * proc lock prevents td_cpuset from changing between calls. */ error = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); tdset = td->td_cpuset; /* * Verify that a new mask doesn't specify cpus outside of * the set the thread is a member of. */ if (mask) { if (tdset->cs_id == CPUSET_INVALID) tdset = tdset->cs_parent; if (!CPU_SUBSET(&tdset->cs_mask, mask)) error = EDEADLK; /* * Verify that a new set won't leave an existing thread * mask without a cpu to run on. It can, however, restrict * the set. */ } else if (tdset->cs_id == CPUSET_INVALID) { if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask)) error = EDEADLK; } thread_unlock(td); if (error) goto unlock_out; } /* * Replace each thread's cpuset while using deferred release. We * must do this because the thread lock must be held while operating * on the thread and this limits the type of operations allowed. */ FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); /* * If we presently have an anonymous set or are applying a * mask we must create an anonymous shadow set. That is * either parented to our existing base or the supplied set. * * If we have a base set with no anonymous shadow we simply * replace it outright. */ tdset = td->td_cpuset; if (tdset->cs_id == CPUSET_INVALID || mask) { nset = LIST_FIRST(&freelist); LIST_REMOVE(nset, cs_link); if (mask) error = cpuset_shadow(tdset, nset, mask); else error = _cpuset_create(nset, set, &tdset->cs_mask, CPUSET_INVALID); if (error) { LIST_INSERT_HEAD(&freelist, nset, cs_link); thread_unlock(td); break; } } else nset = cpuset_ref(set); cpuset_rel_defer(&droplist, tdset); td->td_cpuset = nset; sched_affinity(td); thread_unlock(td); } unlock_out: PROC_UNLOCK(p); out: while ((nset = LIST_FIRST(&droplist)) != NULL) cpuset_rel_complete(nset); while ((nset = LIST_FIRST(&freelist)) != NULL) { LIST_REMOVE(nset, cs_link); uma_zfree(cpuset_zone, nset); } return (error); } /* * Return a string representing a valid layout for a cpuset_t object. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ char * cpusetobj_strprint(char *buf, const cpuset_t *set) { char *tbuf; size_t i, bytesp, bufsiz; tbuf = buf; bytesp = 0; bufsiz = CPUSETBUFSIZ; for (i = 0; i < (_NCPUWORDS - 1); i++) { bytesp = snprintf(tbuf, bufsiz, "%lx,", set->__bits[i]); bufsiz -= bytesp; tbuf += bytesp; } snprintf(tbuf, bufsiz, "%lx", set->__bits[_NCPUWORDS - 1]); return (buf); } /* * Build a valid cpuset_t object from a string representation. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. */ int cpusetobj_strscan(cpuset_t *set, const char *buf) { u_int nwords; int i, ret; if (strlen(buf) > CPUSETBUFSIZ - 1) return (-1); /* Allow to pass a shorter version of the mask when necessary. */ nwords = 1; for (i = 0; buf[i] != '\0'; i++) if (buf[i] == ',') nwords++; if (nwords > _NCPUWORDS) return (-1); CPU_ZERO(set); for (i = 0; i < (nwords - 1); i++) { ret = sscanf(buf, "%lx,", &set->__bits[i]); if (ret == 0 || ret == -1) return (-1); buf = strstr(buf, ","); if (buf == NULL) return (-1); buf++; } ret = sscanf(buf, "%lx", &set->__bits[nwords - 1]); if (ret == 0 || ret == -1) return (-1); return (0); } /* * Apply an anonymous mask to a single thread. */ int cpuset_setthread(lwpid_t id, cpuset_t *mask) { struct cpuset *nset; struct cpuset *set; struct thread *td; struct proc *p; int error; nset = uma_zalloc(cpuset_zone, M_WAITOK); error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); if (error) goto out; set = NULL; thread_lock(td); error = cpuset_shadow(td->td_cpuset, nset, mask); if (error == 0) { set = td->td_cpuset; td->td_cpuset = nset; sched_affinity(td); nset = NULL; } thread_unlock(td); PROC_UNLOCK(p); if (set) cpuset_rel(set); out: if (nset) uma_zfree(cpuset_zone, nset); return (error); } /* * Apply new cpumask to the ithread. */ int cpuset_setithread(lwpid_t id, int cpu) { struct cpuset *nset, *rset; struct cpuset *parent, *old_set; struct thread *td; struct proc *p; cpusetid_t cs_id; cpuset_t mask; int error; nset = uma_zalloc(cpuset_zone, M_WAITOK); rset = uma_zalloc(cpuset_zone, M_WAITOK); cs_id = CPUSET_INVALID; CPU_ZERO(&mask); if (cpu == NOCPU) CPU_COPY(cpuset_root, &mask); else CPU_SET(cpu, &mask); error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &old_set); if (error != 0 || ((cs_id = alloc_unr(cpuset_unr)) == CPUSET_INVALID)) goto out; /* cpuset_which() returns with PROC_LOCK held. */ old_set = td->td_cpuset; if (cpu == NOCPU) { /* * roll back to default set. We're not using cpuset_shadow() * here because we can fail CPU_SUBSET() check. This can happen * if default set does not contain all CPUs. */ error = _cpuset_create(nset, cpuset_default, &mask, CPUSET_INVALID); goto applyset; } if (old_set->cs_id == 1 || (old_set->cs_id == CPUSET_INVALID && old_set->cs_parent->cs_id == 1)) { /* * Current set is either default (1) or * shadowed version of default set. * * Allocate new root set to be able to shadow it * with any mask. */ error = _cpuset_create(rset, cpuset_zero, &cpuset_zero->cs_mask, cs_id); if (error != 0) { PROC_UNLOCK(p); goto out; } rset->cs_flags |= CPU_SET_ROOT; parent = rset; rset = NULL; cs_id = CPUSET_INVALID; } else { /* Assume existing set was already allocated by previous call */ parent = old_set; old_set = NULL; } error = cpuset_shadow(parent, nset, &mask); applyset: if (error == 0) { thread_lock(td); td->td_cpuset = nset; sched_affinity(td); thread_unlock(td); nset = NULL; } else old_set = NULL; PROC_UNLOCK(p); if (old_set != NULL) cpuset_rel(old_set); out: if (nset != NULL) uma_zfree(cpuset_zone, nset); if (rset != NULL) uma_zfree(cpuset_zone, rset); if (cs_id != CPUSET_INVALID) free_unr(cpuset_unr, cs_id); return (error); } /* * Creates system-wide cpusets and the cpuset for thread0 including two * sets: * * 0 - The root set which should represent all valid processors in the * system. It is initially created with a mask of all processors * because we don't know what processors are valid until cpuset_init() * runs. This set is immutable. * 1 - The default set which all processes are a member of until changed. * This allows an administrator to move all threads off of given cpus to * dedicate them to high priority tasks or save power etc. */ struct cpuset * cpuset_thread0(void) { struct cpuset *set; int error, i; cpuset_zone = uma_zcreate("cpuset", sizeof(struct cpuset), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); /* * Create the root system set for the whole machine. Doesn't use * cpuset_create() due to NULL parent. */ set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); CPU_FILL(&set->cs_mask); LIST_INIT(&set->cs_children); LIST_INSERT_HEAD(&cpuset_ids, set, cs_link); set->cs_ref = 1; set->cs_flags = CPU_SET_ROOT; cpuset_zero = set; cpuset_root = &set->cs_mask; /* * Now derive a default, modifiable set from that to give out. */ set = uma_zalloc(cpuset_zone, M_WAITOK); error = _cpuset_create(set, cpuset_zero, &cpuset_zero->cs_mask, 1); KASSERT(error == 0, ("Error creating default set: %d\n", error)); cpuset_default = set; /* * Initialize the unit allocator. 0 and 1 are allocated above. */ cpuset_unr = new_unrhdr(2, INT_MAX, NULL); /* * If MD code has not initialized per-domain cpusets, place all * CPUs in domain 0. */ for (i = 0; i < MAXMEMDOM; i++) if (!CPU_EMPTY(&cpuset_domain[i])) goto domains_set; CPU_COPY(&all_cpus, &cpuset_domain[0]); domains_set: return (set); } /* * Create a cpuset, which would be cpuset_create() but * mark the new 'set' as root. * * We are not going to reparent the td to it. Use cpuset_setproc_update_set() * for that. * * In case of no error, returns the set in *setp locked with a reference. */ int cpuset_create_root(struct prison *pr, struct cpuset **setp) { struct cpuset *set; int error; KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__)); KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__)); error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask); if (error) return (error); KASSERT(*setp != NULL, ("[%s:%d] cpuset_create returned invalid data", __func__, __LINE__)); /* Mark the set as root. */ set = *setp; set->cs_flags |= CPU_SET_ROOT; return (0); } int cpuset_setproc_update_set(struct proc *p, struct cpuset *set) { int error; KASSERT(p != NULL, ("[%s:%d] invalid proc", __func__, __LINE__)); KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__)); cpuset_ref(set); error = cpuset_setproc(p->p_pid, set, NULL); if (error) return (error); cpuset_rel(set); return (0); } /* * This is called once the final set of system cpus is known. Modifies * the root set and all children and mark the root read-only. */ static void cpuset_init(void *arg) { cpuset_t mask; mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; } SYSINIT(cpuset, SI_SUB_SMP, SI_ORDER_ANY, cpuset_init, NULL); #ifndef _SYS_SYSPROTO_H_ struct cpuset_args { cpusetid_t *setid; }; #endif int sys_cpuset(struct thread *td, struct cpuset_args *uap) { struct cpuset *root; struct cpuset *set; int error; thread_lock(td); root = cpuset_refroot(td->td_cpuset); thread_unlock(td); error = cpuset_create(&set, root, &root->cs_mask); cpuset_rel(root); if (error) return (error); error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id)); if (error == 0) error = cpuset_setproc(-1, set, NULL); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setid_args { cpuwhich_t which; id_t id; cpusetid_t setid; }; #endif int sys_cpuset_setid(struct thread *td, struct cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, uap->id, uap->setid)); } int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid) { struct cpuset *set; int error; /* * Presently we only support per-process sets. */ if (which != CPU_WHICH_PID) return (EINVAL); set = cpuset_lookup(setid, td); if (set == NULL) return (ESRCH); error = cpuset_setproc(id, set, NULL); cpuset_rel(set); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getid_args { cpulevel_t level; cpuwhich_t which; id_t id; cpusetid_t *setid; }; #endif int sys_cpuset_getid(struct thread *td, struct cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, uap->id, uap->setid)); } int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid) { struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; cpusetid_t tmpid; int error; if (level == CPU_LEVEL_WHICH && which != CPU_WHICH_CPUSET) return (EINVAL); error = cpuset_which(which, id, &p, &ttd, &set); if (error) return (error); switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_refbase(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: return (EINVAL); } switch (level) { case CPU_LEVEL_ROOT: nset = cpuset_refroot(set); cpuset_rel(set); set = nset; break; case CPU_LEVEL_CPUSET: break; case CPU_LEVEL_WHICH: break; } tmpid = set->cs_id; cpuset_rel(set); if (error == 0) error = copyout(&tmpid, setid, sizeof(id)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_getaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; cpuset_t *mask; }; #endif int sys_cpuset_getaffinity(struct thread *td, struct cpuset_getaffinity_args *uap) { + + return (kern_cpuset_getaffinity(td, uap->level, uap->which, + uap->id, uap->cpusetsize, uap->mask)); +} + +int +kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, + id_t id, size_t cpusetsize, cpuset_t *maskp) +{ struct thread *ttd; struct cpuset *nset; struct cpuset *set; struct proc *p; cpuset_t *mask; int error; size_t size; - if (uap->cpusetsize < sizeof(cpuset_t) || - uap->cpusetsize > CPU_MAXSIZE / NBBY) + if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) return (ERANGE); - size = uap->cpusetsize; + size = cpusetsize; mask = malloc(size, M_TEMP, M_WAITOK | M_ZERO); - error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); + error = cpuset_which(which, id, &p, &ttd, &set); if (error) goto out; - switch (uap->level) { + switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: - switch (uap->which) { + switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: error = EINVAL; goto out; } - if (uap->level == CPU_LEVEL_ROOT) + if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); CPU_COPY(&nset->cs_mask, mask); cpuset_rel(nset); break; case CPU_LEVEL_WHICH: - switch (uap->which) { + switch (which) { case CPU_WHICH_TID: thread_lock(ttd); CPU_COPY(&ttd->td_cpuset->cs_mask, mask); thread_unlock(ttd); break; case CPU_WHICH_PID: FOREACH_THREAD_IN_PROC(p, ttd) { thread_lock(ttd); CPU_OR(mask, &ttd->td_cpuset->cs_mask); thread_unlock(ttd); } break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: CPU_COPY(&set->cs_mask, mask); break; case CPU_WHICH_IRQ: - error = intr_getaffinity(uap->id, mask); + error = intr_getaffinity(id, mask); break; case CPU_WHICH_DOMAIN: - if (uap->id < 0 || uap->id >= MAXMEMDOM) + if (id < 0 || id >= MAXMEMDOM) error = ESRCH; else - CPU_COPY(&cpuset_domain[uap->id], mask); + CPU_COPY(&cpuset_domain[id], mask); break; } break; default: error = EINVAL; break; } if (set) cpuset_rel(set); if (p) PROC_UNLOCK(p); if (error == 0) - error = copyout(mask, uap->mask, size); + error = copyout(mask, maskp, size); out: free(mask, M_TEMP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct cpuset_setaffinity_args { cpulevel_t level; cpuwhich_t which; id_t id; size_t cpusetsize; const cpuset_t *mask; }; #endif int sys_cpuset_setaffinity(struct thread *td, struct cpuset_setaffinity_args *uap) { + + return (kern_cpuset_setaffinity(td, uap->level, uap->which, + uap->id, uap->cpusetsize, uap->mask)); +} + +int +kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, + id_t id, size_t cpusetsize, const cpuset_t *maskp) +{ struct cpuset *nset; struct cpuset *set; struct thread *ttd; struct proc *p; cpuset_t *mask; int error; - if (uap->cpusetsize < sizeof(cpuset_t) || - uap->cpusetsize > CPU_MAXSIZE / NBBY) + if (cpusetsize < sizeof(cpuset_t) || cpusetsize > CPU_MAXSIZE / NBBY) return (ERANGE); - mask = malloc(uap->cpusetsize, M_TEMP, M_WAITOK | M_ZERO); - error = copyin(uap->mask, mask, uap->cpusetsize); + mask = malloc(cpusetsize, M_TEMP, M_WAITOK | M_ZERO); + error = copyin(maskp, mask, cpusetsize); if (error) goto out; /* * Verify that no high bits are set. */ - if (uap->cpusetsize > sizeof(cpuset_t)) { + if (cpusetsize > sizeof(cpuset_t)) { char *end; char *cp; end = cp = (char *)&mask->__bits; - end += uap->cpusetsize; + end += cpusetsize; cp += sizeof(cpuset_t); while (cp != end) if (*cp++ != 0) { error = EINVAL; goto out; } } - switch (uap->level) { + switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: - error = cpuset_which(uap->which, uap->id, &p, &ttd, &set); + error = cpuset_which(which, id, &p, &ttd, &set); if (error) break; - switch (uap->which) { + switch (which) { case CPU_WHICH_TID: case CPU_WHICH_PID: thread_lock(ttd); set = cpuset_ref(ttd->td_cpuset); thread_unlock(ttd); PROC_UNLOCK(p); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: break; case CPU_WHICH_IRQ: case CPU_WHICH_DOMAIN: error = EINVAL; goto out; } - if (uap->level == CPU_LEVEL_ROOT) + if (level == CPU_LEVEL_ROOT) nset = cpuset_refroot(set); else nset = cpuset_refbase(set); error = cpuset_modify(nset, mask); cpuset_rel(nset); cpuset_rel(set); break; case CPU_LEVEL_WHICH: - switch (uap->which) { + switch (which) { case CPU_WHICH_TID: - error = cpuset_setthread(uap->id, mask); + error = cpuset_setthread(id, mask); break; case CPU_WHICH_PID: - error = cpuset_setproc(uap->id, NULL, mask); + error = cpuset_setproc(id, NULL, mask); break; case CPU_WHICH_CPUSET: case CPU_WHICH_JAIL: - error = cpuset_which(uap->which, uap->id, &p, - &ttd, &set); + error = cpuset_which(which, id, &p, &ttd, &set); if (error == 0) { error = cpuset_modify(set, mask); cpuset_rel(set); } break; case CPU_WHICH_IRQ: - error = intr_setaffinity(uap->id, mask); + error = intr_setaffinity(id, mask); break; default: error = EINVAL; break; } break; default: error = EINVAL; break; } out: free(mask, M_TEMP); return (error); } #ifdef DDB void ddb_display_cpuset(const cpuset_t *set) { int cpu, once; for (once = 0, cpu = 0; cpu < CPU_SETSIZE; cpu++) { if (CPU_ISSET(cpu, set)) { if (once == 0) { db_printf("%d", cpu); once = 1; } else db_printf(",%d", cpu); } } if (once == 0) db_printf(""); } DB_SHOW_COMMAND(cpusets, db_show_cpusets) { struct cpuset *set; LIST_FOREACH(set, &cpuset_ids, cs_link) { db_printf("set=%p id=%-6u ref=%-6d flags=0x%04x parent id=%d\n", set, set->cs_id, set->cs_ref, set->cs_flags, (set->cs_parent != NULL) ? set->cs_parent->cs_id : 0); db_printf(" mask="); ddb_display_cpuset(&set->cs_mask); db_printf("\n"); if (db_pager_quit) break; } } #endif /* DDB */ Index: head/sys/sys/syscallsubr.h =================================================================== --- head/sys/sys/syscallsubr.h (revision 313280) +++ head/sys/sys/syscallsubr.h (revision 313281) @@ -1,275 +1,281 @@ /*- * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include #include #include +#include struct file; struct filecaps; enum idtype; struct itimerval; struct image_args; struct jail; struct kevent; struct kevent_copyops; struct kld_file_stat; struct ksiginfo; struct mbuf; struct msghdr; struct msqid_ds; struct pollfd; struct ogetdirentries_args; struct rlimit; struct rusage; union semun; struct sockaddr; struct stat; struct thr_param; struct sched_param; struct __wrusage; int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen, u_int path_max); int kern_accept(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, struct file **fp); int kern_accept4(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, int flags, struct file **fp); int kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_alternate_path(struct thread *td, const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd); int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds); int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights); int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg); int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id); int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts); int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_close(struct thread *td, int fd); int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); +int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, + cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp); +int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, + cpuwhich_t which, id_t id, size_t cpusetsize, + const cpuset_t *maskp); int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid); int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid); int kern_dup(struct thread *td, u_int mode, int flags, int old, int new); int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p); int kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, mode_t mode, int flag); int kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int uid, int gid, int flag); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg); int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg); int kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, long *basep, ssize_t *residp, enum uio_seg bufseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getppid(struct thread *); int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getrusage(struct thread *td, int who, struct rusage *rup); int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops); int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); int kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, enum uio_seg segflg, int follow); int kern_listen(struct thread *td, int s, int backlog); int kern_lseek(struct thread *td, int fd, off_t offset, int whence); int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, int mode); int kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int mode, int dev); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); int kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode); int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, u_long flags); int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2); int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); int kern_posix_error(struct thread *td, int error); int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, off_t offset); int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp); int kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, enum uio_seg pathseg); int kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, int *policy); int kern_sched_setparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_setscheduler(struct thread *td, struct thread *targettd, int policy, struct sched_param *param); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd, struct timespec *ts); int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg); int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_shutdown(struct thread *td, int s, int how); int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_sigtimedwait(struct thread *td, sigset_t waitset, struct ksiginfo *ksi, struct timespec *timeout); int kern_socket(struct thread *td, int domain, int type, int protocol); int kern_statat(struct thread *td, int flag, int fd, char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, enum uio_seg segflg); int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id); int kern_ktimer_delete(struct thread *, int); int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval); int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val); int kern_ktimer_getoverrun(struct thread *td, int timer_id); int kern_thr_alloc(struct proc *, int pages, struct thread **); int kern_thr_exit(struct thread *td); int kern_thr_new(struct thread *td, struct thr_param *param); int kern_thr_suspend(struct thread *td, struct timespec *tsp); int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length); int kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int follow); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status, int options, struct __wrusage *wrup, siginfo_t *sip); int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ #endif /* !_SYS_SYSCALLSUBR_H_ */