Index: head/sys/compat/freebsd32/freebsd32.h =================================================================== --- head/sys/compat/freebsd32/freebsd32.h (revision 367743) +++ head/sys/compat/freebsd32/freebsd32.h (revision 367744) @@ -1,391 +1,412 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COMPAT_FREEBSD32_FREEBSD32_H_ #define _COMPAT_FREEBSD32_FREEBSD32_H_ #include #include #include #include /* * i386 is the only arch with a 32-bit time_t */ #ifdef __amd64__ typedef int32_t time32_t; #else typedef int64_t time32_t; #endif struct timeval32 { time32_t tv_sec; int32_t tv_usec; }; struct timespec32 { time32_t tv_sec; int32_t tv_nsec; }; struct itimerspec32 { struct timespec32 it_interval; struct timespec32 it_value; }; struct bintime32 { time32_t sec; uint32_t frac[2]; }; struct rusage32 { struct timeval32 ru_utime; struct timeval32 ru_stime; int32_t ru_maxrss; int32_t ru_ixrss; int32_t ru_idrss; int32_t ru_isrss; int32_t ru_minflt; int32_t ru_majflt; int32_t ru_nswap; int32_t ru_inblock; int32_t ru_oublock; int32_t ru_msgsnd; int32_t ru_msgrcv; int32_t ru_nsignals; int32_t ru_nvcsw; int32_t ru_nivcsw; }; struct wrusage32 { struct rusage32 wru_self; struct rusage32 wru_children; }; struct itimerval32 { struct timeval32 it_interval; struct timeval32 it_value; }; +struct umtx_time32 { + struct timespec32 _timeout; + uint32_t _flags; + uint32_t _clockid; +}; + +struct umtx_robust_lists_params_compat32 { + uint32_t robust_list_offset; + uint32_t robust_priv_list_offset; + uint32_t robust_inact_offset; +}; + +struct umutex32 { + volatile __lwpid_t m_owner; /* Owner of the mutex */ + __uint32_t m_flags; /* Flags of the mutex */ + __uint32_t m_ceilings[2]; /* Priority protect ceiling */ + __uint32_t m_rb_lnk; /* Robust linkage */ + __uint32_t m_pad; + __uint32_t m_spare[2]; +}; + #define FREEBSD4_MFSNAMELEN 16 #define FREEBSD4_MNAMELEN (88 - 2 * sizeof(int32_t)) /* 4.x version */ struct statfs32 { int32_t f_spare2; int32_t f_bsize; int32_t f_iosize; int32_t f_blocks; int32_t f_bfree; int32_t f_bavail; int32_t f_files; int32_t f_ffree; fsid_t f_fsid; uid_t f_owner; int32_t f_type; int32_t f_flags; int32_t f_syncwrites; int32_t f_asyncwrites; char f_fstypename[FREEBSD4_MFSNAMELEN]; char f_mntonname[FREEBSD4_MNAMELEN]; int32_t f_syncreads; int32_t f_asyncreads; int16_t f_spares1; char f_mntfromname[FREEBSD4_MNAMELEN]; int16_t f_spares2 __packed; int32_t f_spare[2]; }; struct iovec32 { u_int32_t iov_base; int iov_len; }; struct msghdr32 { u_int32_t msg_name; socklen_t msg_namelen; u_int32_t msg_iov; int msg_iovlen; u_int32_t msg_control; socklen_t msg_controllen; int msg_flags; }; #if defined(__amd64__) #define __STAT32_TIME_T_EXT 1 #endif struct stat32 { dev_t st_dev; ino_t st_ino; nlink_t st_nlink; mode_t st_mode; u_int16_t st_padding0; uid_t st_uid; gid_t st_gid; u_int32_t st_padding1; dev_t st_rdev; #ifdef __STAT32_TIME_T_EXT __int32_t st_atim_ext; #endif struct timespec32 st_atim; #ifdef __STAT32_TIME_T_EXT __int32_t st_mtim_ext; #endif struct timespec32 st_mtim; #ifdef __STAT32_TIME_T_EXT __int32_t st_ctim_ext; #endif struct timespec32 st_ctim; #ifdef __STAT32_TIME_T_EXT __int32_t st_btim_ext; #endif struct timespec32 st_birthtim; off_t st_size; int64_t st_blocks; u_int32_t st_blksize; u_int32_t st_flags; u_int64_t st_gen; u_int64_t st_spare[10]; }; struct freebsd11_stat32 { u_int32_t st_dev; u_int32_t st_ino; mode_t st_mode; u_int16_t st_nlink; uid_t st_uid; gid_t st_gid; u_int32_t st_rdev; struct timespec32 st_atim; struct timespec32 st_mtim; struct timespec32 st_ctim; off_t st_size; int64_t st_blocks; u_int32_t st_blksize; u_int32_t st_flags; u_int32_t st_gen; int32_t st_lspare; struct timespec32 st_birthtim; unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); }; struct ostat32 { __uint16_t st_dev; __uint32_t st_ino; mode_t st_mode; __uint16_t st_nlink; __uint16_t st_uid; __uint16_t st_gid; __uint16_t st_rdev; __int32_t st_size; struct timespec32 st_atim; struct timespec32 st_mtim; struct timespec32 st_ctim; __int32_t st_blksize; __int32_t st_blocks; u_int32_t st_flags; __uint32_t st_gen; }; struct jail32_v0 { u_int32_t version; uint32_t path; uint32_t hostname; u_int32_t ip_number; }; struct jail32 { uint32_t version; uint32_t path; uint32_t hostname; uint32_t jailname; uint32_t ip4s; uint32_t ip6s; uint32_t ip4; uint32_t ip6; }; struct sigaction32 { u_int32_t sa_u; int sa_flags; sigset_t sa_mask; }; struct thr_param32 { uint32_t start_func; uint32_t arg; uint32_t stack_base; uint32_t stack_size; uint32_t tls_base; uint32_t tls_size; uint32_t child_tid; uint32_t parent_tid; int32_t flags; uint32_t rtp; uint32_t spare[3]; }; struct i386_ldt_args32 { uint32_t start; uint32_t descs; uint32_t num; }; struct mq_attr32 { int mq_flags; int mq_maxmsg; int mq_msgsize; int mq_curmsgs; int __reserved[4]; }; struct kinfo_proc32 { int ki_structsize; int ki_layout; uint32_t ki_args; uint32_t ki_paddr; uint32_t ki_addr; uint32_t ki_tracep; uint32_t ki_textvp; uint32_t ki_fd; uint32_t ki_vmspace; uint32_t ki_wchan; pid_t ki_pid; pid_t ki_ppid; pid_t ki_pgid; pid_t ki_tpgid; pid_t ki_sid; pid_t ki_tsid; short ki_jobc; short ki_spare_short1; uint32_t ki_tdev_freebsd11; sigset_t ki_siglist; sigset_t ki_sigmask; sigset_t ki_sigignore; sigset_t ki_sigcatch; uid_t ki_uid; uid_t ki_ruid; uid_t ki_svuid; gid_t ki_rgid; gid_t ki_svgid; short ki_ngroups; short ki_spare_short2; gid_t ki_groups[KI_NGROUPS]; uint32_t ki_size; int32_t ki_rssize; int32_t ki_swrss; int32_t ki_tsize; int32_t ki_dsize; int32_t ki_ssize; u_short ki_xstat; u_short ki_acflag; fixpt_t ki_pctcpu; u_int ki_estcpu; u_int ki_slptime; u_int ki_swtime; u_int ki_cow; u_int64_t ki_runtime; struct timeval32 ki_start; struct timeval32 ki_childtime; int ki_flag; int ki_kiflag; int ki_traceflag; char ki_stat; signed char ki_nice; char ki_lock; char ki_rqindex; u_char ki_oncpu_old; u_char ki_lastcpu_old; char ki_tdname[TDNAMLEN+1]; char ki_wmesg[WMESGLEN+1]; char ki_login[LOGNAMELEN+1]; char ki_lockname[LOCKNAMELEN+1]; char ki_comm[COMMLEN+1]; char ki_emul[KI_EMULNAMELEN+1]; char ki_loginclass[LOGINCLASSLEN+1]; char ki_moretdname[MAXCOMLEN-TDNAMLEN+1]; char ki_sparestrings[46]; int ki_spareints[KI_NSPARE_INT]; uint64_t ki_tdev; int ki_oncpu; int ki_lastcpu; int ki_tracer; int ki_flag2; int ki_fibnum; u_int ki_cr_flags; int ki_jid; int ki_numthreads; lwpid_t ki_tid; struct priority ki_pri; struct rusage32 ki_rusage; struct rusage32 ki_rusage_ch; uint32_t ki_pcb; uint32_t ki_kstack; uint32_t ki_udata; uint32_t ki_tdaddr; uint32_t ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ int ki_sparelongs[KI_NSPARE_LONG]; int ki_sflag; int ki_tdflags; }; struct kinfo_sigtramp32 { uint32_t ksigtramp_start; uint32_t ksigtramp_end; uint32_t ksigtramp_spare[4]; }; struct kld32_file_stat_1 { int version; /* set to sizeof(struct kld_file_stat_1) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ }; struct kld32_file_stat { int version; /* set to sizeof(struct kld_file_stat) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ char pathname[MAXPATHLEN]; }; struct procctl_reaper_pids32 { u_int rp_count; u_int rp_pad0[15]; uint32_t rp_pids; }; #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 367743) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 367744) @@ -1,3767 +1,3776 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ktrace.h" #define __ELF_WORD_SIZE 32 #ifdef COMPAT_FREEBSD11 #define _WANT_FREEBSD11_KEVENT #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef INET #include #endif #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #endif #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); struct ptrace_io_desc32 { int piod_op; uint32_t piod_offs; uint32_t piod_addr; uint32_t piod_len; }; struct ptrace_sc_ret32 { uint32_t sr_retval[2]; int sr_error; }; struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; uint32_t pve_start; uint32_t pve_end; uint32_t pve_offset; u_int pve_prot; u_int pve_pathlen; int32_t pve_fileid; u_int pve_fsid; uint32_t pve_path; }; #ifdef __amd64__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); CTASSERT(sizeof(struct bintime32) == 12); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifdef __amd64__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); #ifdef __amd64__ CTASSERT(sizeof(struct kevent32) == 56); #else CTASSERT(sizeof(struct kevent32) == 64); #endif CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifdef __amd64__ CTASSERT(sizeof(struct stat32) == 208); CTASSERT(sizeof(struct freebsd11_stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD10 int freebsd10_freebsd32_pipe(struct thread *td, struct freebsd10_freebsd32_pipe_args *uap) { return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, const char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ error = exec_args_add_fname(args, fname, segflg); if (error != 0) goto err_exit; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = exec_args_add_arg(args, argp, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = exec_args_add_env(args, envp, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_mknodat(struct thread *td, struct freebsd32_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, PAIR32TO64(dev_t, uap->dev))); } int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ) != 0) prot |= PROT_EXEC; #endif return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len, prot)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; uint64_t e; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); #if BYTE_ORDER == LITTLE_ENDIAN ks32[i].data1 = kevp[i].data; ks32[i].data2 = kevp[i].data >> 32; #else ks32[i].data1 = kevp[i].data >> 32; ks32[i].data2 = kevp[i].data; #endif PTROUT_CP(kevp[i], ks32[i], udata); for (j = 0; j < nitems(kevp->ext); j++) { e = kevp[i].ext[j]; #if BYTE_ORDER == LITTLE_ENDIAN ks32[i].ext64[2 * j] = e; ks32[i].ext64[2 * j + 1] = e >> 32; #else ks32[i].ext64[2 * j] = e >> 32; ks32[i].ext64[2 * j + 1] = e; #endif } } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; uint64_t e; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) { #if BYTE_ORDER == LITTLE_ENDIAN e = ks32[i].ext64[2 * j + 1]; e <<= 32; e += ks32[i].ext64[2 * j]; #else e = ks32[i].ext64[2 * j]; e <<= 32; e += ks32[i].ext64[2 * j + 1]; #endif kevp[i].ext[j] = e; } } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent_copyout, .k_copyin = freebsd32_kevent_copyin, }; #ifdef KTRACE struct kevent32 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct kevent32)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct kevent32)); #endif return (error); } #ifdef COMPAT_FREEBSD11 static int freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct kevent32_freebsd11 ks32[KQ_NEVENTS]; int i, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct kevent32_freebsd11 ks32[KQ_NEVENTS]; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) kevp[i].ext[j] = 0; } done: return (error); } int freebsd11_freebsd32_kevent(struct thread *td, struct freebsd11_freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent11_copyout, .k_copyin = freebsd32_kevent11_copyin, }; #ifdef KTRACE struct kevent32_freebsd11 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32_freebsd11", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct kevent32_freebsd11)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32_freebsd11", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct kevent32_freebsd11)); #endif return (error); } #endif int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error == 0) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static void ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl, struct ptrace_lwpinfo32 *pl32) { bzero(pl32, sizeof(*pl32)); pl32->pl_lwpid = pl->pl_lwpid; pl32->pl_event = pl->pl_event; pl32->pl_flags = pl->pl_flags; pl32->pl_sigmask = pl->pl_sigmask; pl32->pl_siglist = pl->pl_siglist; siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo); strcpy(pl32->pl_tdname, pl->pl_tdname); pl32->pl_child_pid = pl->pl_child_pid; pl32->pl_syscall_code = pl->pl_syscall_code; pl32->pl_syscall_narg = pl->pl_syscall_narg; } static void ptrace_sc_ret_to32(const struct ptrace_sc_ret *psr, struct ptrace_sc_ret32 *psr32) { bzero(psr32, sizeof(*psr32)); psr32->sr_retval[0] = psr->sr_retval[0]; psr32->sr_retval[1] = psr->sr_retval[1]; psr32->sr_error = psr->sr_error; } int freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap) { union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct dbreg32 dbreg; struct fpreg32 fpreg; struct reg32 reg; register_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret psr; int ptevents; } r; union { struct ptrace_io_desc32 piod; struct ptrace_lwpinfo32 pl; struct ptrace_vm_entry32 pve; uint32_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret32 psr; } r32; void *addr; int data, error = 0, i; AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; data = uap->data; switch (uap->req) { case PT_GET_EVENT_MASK: case PT_GET_SC_ARGS: case PT_GET_SC_RET: break; case PT_LWPINFO: if (uap->data > sizeof(r32.pl)) return (EINVAL); /* * Pass size of native structure in 'data'. Truncate * if necessary to avoid siginfo. */ data = sizeof(r.pl); if (uap->data < offsetof(struct ptrace_lwpinfo32, pl_siginfo) + sizeof(struct siginfo32)) data = offsetof(struct ptrace_lwpinfo, pl_siginfo); break; case PT_GETREGS: bzero(&r.reg, sizeof(r.reg)); break; case PT_GETFPREGS: bzero(&r.fpreg, sizeof(r.fpreg)); break; case PT_GETDBREGS: bzero(&r.dbreg, sizeof(r.dbreg)); break; case PT_SETREGS: error = copyin(uap->addr, &r.reg, sizeof(r.reg)); break; case PT_SETFPREGS: error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg)); break; case PT_SETDBREGS: error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg)); break; case PT_SET_EVENT_MASK: if (uap->data != sizeof(r.ptevents)) error = EINVAL; else error = copyin(uap->addr, &r.ptevents, uap->data); break; case PT_IO: error = copyin(uap->addr, &r32.piod, sizeof(r32.piod)); if (error) break; CP(r32.piod, r.piod, piod_op); PTRIN_CP(r32.piod, r.piod, piod_offs); PTRIN_CP(r32.piod, r.piod, piod_addr); CP(r32.piod, r.piod, piod_len); break; case PT_VM_ENTRY: error = copyin(uap->addr, &r32.pve, sizeof(r32.pve)); if (error) break; CP(r32.pve, r.pve, pve_entry); CP(r32.pve, r.pve, pve_timestamp); CP(r32.pve, r.pve, pve_start); CP(r32.pve, r.pve, pve_end); CP(r32.pve, r.pve, pve_offset); CP(r32.pve, r.pve, pve_prot); CP(r32.pve, r.pve, pve_pathlen); CP(r32.pve, r.pve, pve_fileid); CP(r32.pve, r.pve, pve_fsid); PTRIN_CP(r32.pve, r.pve, pve_path); break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: CP(r.pve, r32.pve, pve_entry); CP(r.pve, r32.pve, pve_timestamp); CP(r.pve, r32.pve, pve_start); CP(r.pve, r32.pve, pve_end); CP(r.pve, r32.pve, pve_offset); CP(r.pve, r32.pve, pve_prot); CP(r.pve, r32.pve, pve_pathlen); CP(r.pve, r32.pve, pve_fileid); CP(r.pve, r32.pve, pve_fsid); error = copyout(&r32.pve, uap->addr, sizeof(r32.pve)); break; case PT_IO: CP(r.piod, r32.piod, piod_len); error = copyout(&r32.piod, uap->addr, sizeof(r32.piod)); break; case PT_GETREGS: error = copyout(&r.reg, uap->addr, sizeof(r.reg)); break; case PT_GETFPREGS: error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg)); break; case PT_GETDBREGS: error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg)); break; case PT_GET_EVENT_MASK: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.ptevents, uap->addr, uap->data); break; case PT_LWPINFO: ptrace_lwpinfo_to32(&r.pl, &r32.pl); error = copyout(&r32.pl, uap->addr, uap->data); break; case PT_GET_SC_ARGS: for (i = 0; i < nitems(r.args); i++) r32.args[i] = (uint32_t)r.args[i]; error = copyout(r32.args, uap->addr, MIN(uap->data, sizeof(r32.args))); break; case PT_GET_SC_RET: ptrace_sc_ret_to32(&r.psr, &r32.psr); error = copyout(&r32.psr, uap->addr, MIN(uap->data, sizeof(r32.psr))); break; } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static size_t freebsd32_cmsg_convert(const struct cmsghdr *cm, void *data, socklen_t datalen) { size_t copylen; union { struct timespec32 ts; struct timeval32 tv; struct bintime32 bt; } tmp32; union { struct timespec ts; struct timeval tv; struct bintime bt; } *in; in = data; copylen = 0; switch (cm->cmsg_level) { case SOL_SOCKET: switch (cm->cmsg_type) { case SCM_TIMESTAMP: TV_CP(*in, tmp32, tv); copylen = sizeof(tmp32.tv); break; case SCM_BINTIME: BT_CP(*in, tmp32, bt); copylen = sizeof(tmp32.bt); break; case SCM_REALTIME: case SCM_MONOTONIC: TS_CP(*in, tmp32, ts); copylen = sizeof(tmp32.ts); break; default: break; } default: break; } if (copylen == 0) return (datalen); KASSERT((datalen >= copylen), ("corrupted cmsghdr")); bcopy(&tmp32, data, copylen); return (copylen); } static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen, datalen_out, oldclen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; ctlbuf = msg->msg_control; for (m = control; m != NULL && len > 0; m = m->m_next) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; datalen_out = freebsd32_cmsg_convert(cm, data, datalen); /* * Copy out the message header. Preserve the native * message size in case we need to inspect the message * contents later. */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); goto exit; } oldclen = cm->cmsg_len; cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen_out; error = copyout(cm, ctlbuf, copylen); cm->cmsg_len = oldclen; if (error != 0) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); copylen = datalen_out; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); break; } /* Copy out the message data. */ error = copyout(data, ctlbuf, copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } msg->msg_controllen += FREEBSD32_CMSG_SPACE(datalen_out); } } if (len == 0 && m != NULL) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); } exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } return (error); } /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct cmsghdr *cm; struct mbuf *m; void *in, *in1, *md; u_int msglen, outlen; int error; if (buflen > MCLBYTES) return (EINVAL); in = malloc(buflen, M_TEMP, M_WAITOK); error = copyin(buf, in, buflen); if (error != 0) goto out; /* * Make a pass over the input buffer to determine the amount of space * required for 64 bit-aligned copies of the control messages. */ in1 = in; outlen = 0; while (buflen > 0) { if (buflen < sizeof(*cm)) { error = EINVAL; break; } cm = (struct cmsghdr *)in1; if (cm->cmsg_len < FREEBSD32_ALIGN(sizeof(*cm))) { error = EINVAL; break; } msglen = FREEBSD32_ALIGN(cm->cmsg_len); if (msglen > buflen || msglen < cm->cmsg_len) { error = EINVAL; break; } buflen -= msglen; in1 = (char *)in1 + msglen; outlen += CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen - FREEBSD32_ALIGN(sizeof(*cm))); } if (error == 0 && outlen > MCLBYTES) { /* * XXXMJ This implies that the upper limit on 32-bit aligned * control messages is less than MCLBYTES, and so we are not * perfectly compatible. However, there is no platform * guarantee that mbuf clusters larger than MCLBYTES can be * allocated. */ error = EINVAL; } if (error != 0) goto out; m = m_get2(outlen, M_WAITOK, MT_CONTROL, 0); m->m_len = outlen; md = mtod(m, void *); /* * Make a second pass over input messages, copying them into the output * buffer. */ in1 = in; while (outlen > 0) { /* Copy the message header and align the length field. */ cm = md; memcpy(cm, in1, sizeof(*cm)); msglen = cm->cmsg_len - FREEBSD32_ALIGN(sizeof(*cm)); cm->cmsg_len = CMSG_ALIGN(sizeof(*cm)) + msglen; /* Copy the message body. */ in1 = (char *)in1 + FREEBSD32_ALIGN(sizeof(*cm)); md = (char *)md + CMSG_ALIGN(sizeof(*cm)); memcpy(md, in1, msglen); in1 = (char *)in1 + FREEBSD32_ALIGN(msglen); md = (char *)md + CMSG_ALIGN(msglen); KASSERT(outlen >= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen), ("outlen %u underflow, msglen %u", outlen, msglen)); outlen -= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen); } *mp = m; out: free(in, M_TEMP); return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif #if defined(COMPAT_FREEBSD11) int freebsd11_freebsd32_getdirentries(struct thread *td, struct freebsd11_freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } int freebsd11_freebsd32_getdents(struct thread *td, struct freebsd11_freebsd32_getdents_args *uap) { struct freebsd11_freebsd32_getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (freebsd11_freebsd32_getdirentries(td, &ap)); } #endif /* COMPAT_FREEBSD11 */ #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_padding0 = 0; out->st_padding1 = 0; #ifdef __STAT32_TIME_T_EXT out->st_atim_ext = 0; out->st_mtim_ext = 0; out->st_ctim_ext = 0; out->st_btim_ext = 0; #endif bzero(out->st_spare, sizeof(out->st_spare)); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { bzero(out, sizeof(*out)); CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); out->st_size = MIN(in->st_size, INT32_MAX); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub, NULL); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap) { struct stat sb; struct stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #if defined(COMPAT_FREEBSD11) extern int ino64_trunc_error; static int freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out) { CP(*in, *out, st_ino); if (in->st_ino != out->st_ino) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_ino = UINT32_MAX; break; } } CP(*in, *out, st_nlink); if (in->st_nlink != out->st_nlink) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_nlink = UINT16_MAX; break; } } out->st_dev = in->st_dev; if (out->st_dev != in->st_dev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } CP(*in, *out, st_mode); CP(*in, *out, st_uid); CP(*in, *out, st_gid); out->st_rdev = in->st_rdev; if (out->st_rdev != in->st_rdev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_lspare = 0; bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim), sizeof(*out) - offsetof(struct freebsd11_stat32, st_birthtim) - sizeof(out->st_birthtim)); return (0); } int freebsd11_freebsd32_stat(struct thread *td, struct freebsd11_freebsd32_stat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstat(struct thread *td, struct freebsd11_freebsd32_fstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_fstat(td, uap->fd, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstatat(struct thread *td, struct freebsd11_freebsd32_fstatat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->buf, sizeof (sb32)); return (error); } int freebsd11_freebsd32_lstat(struct thread *td, struct freebsd11_freebsd32_lstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fhstat(struct thread *td, struct freebsd11_freebsd32_fhstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #endif int freebsd32___sysctl(struct thread *td, struct freebsd32___sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32___sysctlbyname(struct thread *td, struct freebsd32___sysctlbyname_args *uap) { size_t oldlen, rv; int error; uint32_t tmp; if (uap->oldlenp != NULL) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { error = oldlen = 0; } if (error != 0) return (EFAULT); error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old, &oldlen, uap->new, uap->newlen, &rv, SCTL_MASK32, 1); if (error != 0) return (error); if (uap->oldlenp != NULL) error = suword32(uap->oldlenp, rv); return (error); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } int freebsd32_clock_nanosleep(struct thread *td, struct freebsd32_clock_nanosleep_args *uap) { int error; error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error, error2; error = copyin(ua_rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32)); if (error2 != 0) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } #ifndef _FREEBSD32_SYSPROTO_H_ struct freebsd32_sigqueue_args { pid_t pid; int signum; /* union sigval32 */ int value; }; #endif int freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap) { union sigval sv; /* * On 32-bit ABIs, sival_int and sival_ptr are the same. * On 64-bit little-endian ABIs, the low bits are the same. * In 64-bit big-endian ABIs, sival_int overlaps with * sival_ptr's HIGH bits. We choose to support sival_int * rather than sival_ptr in this case as it seems to be * more common. */ bzero(&sv, sizeof(sv)); sv.sival_int = uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { return (kern_cpuset_getaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { return (kern_cpuset_setaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_cpuset_getdomain(struct thread *td, struct freebsd32_cpuset_getdomain_args *uap) { return (kern_cpuset_getdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy)); } int freebsd32_cpuset_setdomain(struct thread *td, struct freebsd32_cpuset_setdomain_args *uap) { return (kern_cpuset_setdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_module_handler(struct module *mod, int what, void *arg) { return (kern_syscall_module_handler(freebsd32_sysent, mod, what, arg)); } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { return (kern_syscall_helper_register(freebsd32_sysent, sd, flags)); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { return (kern_syscall_helper_unregister(freebsd32_sysent, sd)); } int freebsd32_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { int argc, envc, i; u_int32_t *vectp; char *stringp; uintptr_t destp, ustringp; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int error, szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; imgp->ps_strings = arginfo; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); error = copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes32, imgp->pagesizes, sizeof(pagesizes32)); if (error != 0) return (error); imgp->pagesizeslen = sizeof(pagesizes32); /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); ustringp = destp; if (imgp->sysent->sv_stackgap != NULL) imgp->sysent->sv_stackgap(imgp, &destp); if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(uint32_t)); } vectp = (uint32_t *)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword32(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword32(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat *stat; struct kld32_file_stat *stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO); stat32 = malloc(sizeof(*stat32), M_TEMP, M_WAITOK | M_ZERO); error = kern_kldstat(td, uap->fileid, stat); if (error == 0) { bcopy(&stat->name[0], &stat32->name[0], sizeof(stat->name)); CP(*stat, *stat32, refs); CP(*stat, *stat32, id); PTROUT_CP(*stat, *stat32, address); CP(*stat, *stat32, size); bcopy(&stat->pathname[0], &stat32->pathname[0], sizeof(stat->pathname)); stat32->version = version; error = copyout(stat32, uap->stat, version); } free(stat, M_TEMP); free(stat32, M_TEMP); return (error); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags, signum; if (uap->com >= PROC_PROCCTL_MD_MIN) return (cpu_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, PTRIN(uap->data))); switch (uap->com) { case PROC_ASLR_CTL: case PROC_PROTMAX_CTL: case PROC_SPROTECT: case PROC_STACKGAP_CTL: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; break; case PROC_PDEATHSIG_CTL: error = copyin(uap->data, &signum, sizeof(signum)); if (error != 0) return (error); data = &signum; break; case PROC_PDEATHSIG_STATUS: data = &signum; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; case PROC_PDEATHSIG_STATUS: if (error == 0) error = copyout(&signum, uap->data, sizeof(signum)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } int freebsd32_sched_rr_get_interval(struct thread *td, struct freebsd32_sched_rr_get_interval_args *uap) { struct timespec ts; struct timespec32 ts32; int error; error = kern_sched_rr_get_interval(td, uap->pid, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->interval, sizeof(ts32)); } return (error); +} + +int +freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) +{ + + return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr, + uap->uaddr2, &umtx_native_ops32)); } Index: head/sys/kern/kern_umtx.c =================================================================== --- head/sys/kern/kern_umtx.c (revision 367743) +++ head/sys/kern/kern_umtx.c (revision 367744) @@ -1,4669 +1,4531 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2015, 2016 The FreeBSD Foundation * Copyright (c) 2004, David Xu * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_umtx_profiling.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 #include #include #endif #define _UMUTEX_TRY 1 #define _UMUTEX_WAIT 2 #ifdef UMTX_PROFILING #define UPROF_PERC_BIGGER(w, f, sw, sf) \ (((w) > (sw)) || ((w) == (sw) && (f) > (sf))) #endif /* Priority inheritance mutex info. */ struct umtx_pi { /* Owner thread */ struct thread *pi_owner; /* Reference count */ int pi_refcount; /* List entry to link umtx holding by thread */ TAILQ_ENTRY(umtx_pi) pi_link; /* List entry in hash */ TAILQ_ENTRY(umtx_pi) pi_hashlink; /* List for waiters */ TAILQ_HEAD(,umtx_q) pi_blocked; /* Identify a userland lock object */ struct umtx_key pi_key; }; /* A userland synchronous object user. */ struct umtx_q { /* Linked list for the hash. */ TAILQ_ENTRY(umtx_q) uq_link; /* Umtx key. */ struct umtx_key uq_key; /* Umtx flags. */ int uq_flags; #define UQF_UMTXQ 0x0001 /* The thread waits on. */ struct thread *uq_thread; /* * Blocked on PI mutex. read can use chain lock * or umtx_lock, write must have both chain lock and * umtx_lock being hold. */ struct umtx_pi *uq_pi_blocked; /* On blocked list */ TAILQ_ENTRY(umtx_q) uq_lockq; /* Thread contending with us */ TAILQ_HEAD(,umtx_pi) uq_pi_contested; /* Inherited priority from PP mutex */ u_char uq_inherited_pri; /* Spare queue ready to be reused */ struct umtxq_queue *uq_spare_queue; /* The queue we on */ struct umtxq_queue *uq_cur_queue; }; TAILQ_HEAD(umtxq_head, umtx_q); /* Per-key wait-queue */ struct umtxq_queue { struct umtxq_head head; struct umtx_key key; LIST_ENTRY(umtxq_queue) link; int length; }; LIST_HEAD(umtxq_list, umtxq_queue); /* Userland lock object's wait-queue chain */ struct umtxq_chain { /* Lock for this chain. */ struct mtx uc_lock; /* List of sleep queues. */ struct umtxq_list uc_queue[2]; #define UMTX_SHARED_QUEUE 0 #define UMTX_EXCLUSIVE_QUEUE 1 LIST_HEAD(, umtxq_queue) uc_spare_queue; /* Busy flag */ char uc_busy; /* Chain lock waiters */ int uc_waiters; /* All PI in the list */ TAILQ_HEAD(,umtx_pi) uc_pi_list; #ifdef UMTX_PROFILING u_int length; u_int max_length; #endif }; #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED) /* * Don't propagate time-sharing priority, there is a security reason, * a user can simply introduce PI-mutex, let thread A lock the mutex, * and let another thread B block on the mutex, because B is * sleeping, its priority will be boosted, this causes A's priority to * be boosted via priority propagating too and will never be lowered even * if it is using 100%CPU, this is unfair to other processes. */ #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\ (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\ PRI_MAX_TIMESHARE : (td)->td_user_pri) #define GOLDEN_RATIO_PRIME 2654404609U #ifndef UMTX_CHAINS #define UMTX_CHAINS 512 #endif #define UMTX_SHIFTS (__WORD_BIT - 9) #define GET_SHARE(flags) \ (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE) #define BUSY_SPINS 200 struct abs_timeout { int clockid; bool is_abs_real; /* TIMER_ABSTIME && CLOCK_REALTIME* */ struct timespec cur; struct timespec end; }; -#ifdef COMPAT_FREEBSD32 -struct umutex32 { - volatile __lwpid_t m_owner; /* Owner of the mutex */ - __uint32_t m_flags; /* Flags of the mutex */ - __uint32_t m_ceilings[2]; /* Priority protect ceiling */ - __uint32_t m_rb_lnk; /* Robust linkage */ - __uint32_t m_pad; - __uint32_t m_spare[2]; -}; - _Static_assert(sizeof(struct umutex) == sizeof(struct umutex32), "umutex32"); _Static_assert(__offsetof(struct umutex, m_spare[0]) == __offsetof(struct umutex32, m_spare[0]), "m_spare32"); -#endif int umtx_shm_vnobj_persistent = 0; SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_vnode_persistent, CTLFLAG_RWTUN, &umtx_shm_vnobj_persistent, 0, "False forces destruction of umtx attached to file, on last close"); static int umtx_max_rb = 1000; SYSCTL_INT(_kern_ipc, OID_AUTO, umtx_max_robust, CTLFLAG_RWTUN, &umtx_max_rb, 0, "Maximum number of robust mutexes allowed for each thread"); static uma_zone_t umtx_pi_zone; static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS]; static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory"); static int umtx_pi_allocated; static SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "umtx debug"); SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD, &umtx_pi_allocated, 0, "Allocated umtx_pi"); static int umtx_verbose_rb = 1; SYSCTL_INT(_debug_umtx, OID_AUTO, robust_faults_verbose, CTLFLAG_RWTUN, &umtx_verbose_rb, 0, ""); #ifdef UMTX_PROFILING static long max_length; SYSCTL_LONG(_debug_umtx, OID_AUTO, max_length, CTLFLAG_RD, &max_length, 0, "max_length"); static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "umtx chain stats"); #endif static void abs_timeout_update(struct abs_timeout *timo); static void umtx_shm_init(void); static void umtxq_sysinit(void *); static void umtxq_hash(struct umtx_key *key); static struct umtxq_chain *umtxq_getchain(struct umtx_key *key); static void umtxq_lock(struct umtx_key *key); static void umtxq_unlock(struct umtx_key *key); static void umtxq_busy(struct umtx_key *key); static void umtxq_unbusy(struct umtx_key *key); static void umtxq_insert_queue(struct umtx_q *uq, int q); static void umtxq_remove_queue(struct umtx_q *uq, int q); static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *); static int umtxq_count(struct umtx_key *key); static struct umtx_pi *umtx_pi_alloc(int); static void umtx_pi_free(struct umtx_pi *pi); static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb); static void umtx_thread_cleanup(struct thread *td); static void umtx_exec_hook(void *arg __unused, struct proc *p __unused, struct image_params *imgp __unused); SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL); #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE) #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE) #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE) static struct mtx umtx_lock; #ifdef UMTX_PROFILING static void umtx_init_profiling(void) { struct sysctl_oid *chain_oid; char chain_name[10]; int i; for (i = 0; i < UMTX_CHAINS; ++i) { snprintf(chain_name, sizeof(chain_name), "%d", i); chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_umtx_chains), OID_AUTO, chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "umtx hash stats"); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_length0", CTLFLAG_RD, &umtxq_chains[0][i].max_length, 0, NULL); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, "max_length1", CTLFLAG_RD, &umtxq_chains[1][i].max_length, 0, NULL); } } static int sysctl_debug_umtx_chains_peaks(SYSCTL_HANDLER_ARGS) { char buf[512]; struct sbuf sb; struct umtxq_chain *uc; u_int fract, i, j, tot, whole; u_int sf0, sf1, sf2, sf3, sf4; u_int si0, si1, si2, si3, si4; u_int sw0, sw1, sw2, sw3, sw4; sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); for (i = 0; i < 2; i++) { tot = 0; for (j = 0; j < UMTX_CHAINS; ++j) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); tot += uc->max_length; mtx_unlock(&uc->uc_lock); } if (tot == 0) sbuf_printf(&sb, "%u) Empty ", i); else { sf0 = sf1 = sf2 = sf3 = sf4 = 0; si0 = si1 = si2 = si3 = si4 = 0; sw0 = sw1 = sw2 = sw3 = sw4 = 0; for (j = 0; j < UMTX_CHAINS; j++) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); whole = uc->max_length * 100; mtx_unlock(&uc->uc_lock); fract = (whole % tot) * 100; if (UPROF_PERC_BIGGER(whole, fract, sw0, sf0)) { sf0 = fract; si0 = j; sw0 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw1, sf1)) { sf1 = fract; si1 = j; sw1 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw2, sf2)) { sf2 = fract; si2 = j; sw2 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw3, sf3)) { sf3 = fract; si3 = j; sw3 = whole; } else if (UPROF_PERC_BIGGER(whole, fract, sw4, sf4)) { sf4 = fract; si4 = j; sw4 = whole; } } sbuf_printf(&sb, "queue %u:\n", i); sbuf_printf(&sb, "1st: %u.%u%% idx: %u\n", sw0 / tot, sf0 / tot, si0); sbuf_printf(&sb, "2nd: %u.%u%% idx: %u\n", sw1 / tot, sf1 / tot, si1); sbuf_printf(&sb, "3rd: %u.%u%% idx: %u\n", sw2 / tot, sf2 / tot, si2); sbuf_printf(&sb, "4th: %u.%u%% idx: %u\n", sw3 / tot, sf3 / tot, si3); sbuf_printf(&sb, "5th: %u.%u%% idx: %u\n", sw4 / tot, sf4 / tot, si4); } } sbuf_trim(&sb); sbuf_finish(&sb); sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); sbuf_delete(&sb); return (0); } static int sysctl_debug_umtx_chains_clear(SYSCTL_HANDLER_ARGS) { struct umtxq_chain *uc; u_int i, j; int clear, error; clear = 0; error = sysctl_handle_int(oidp, &clear, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (clear != 0) { for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { uc = &umtxq_chains[i][j]; mtx_lock(&uc->uc_lock); uc->length = 0; uc->max_length = 0; mtx_unlock(&uc->uc_lock); } } } return (0); } SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, clear, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_debug_umtx_chains_clear, "I", "Clear umtx chains statistics"); SYSCTL_PROC(_debug_umtx_chains, OID_AUTO, peaks, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_debug_umtx_chains_peaks, "A", "Highest peaks in chains max length"); #endif static void umtxq_sysinit(void *arg __unused) { int i, j; umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); for (i = 0; i < 2; ++i) { for (j = 0; j < UMTX_CHAINS; ++j) { mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL, MTX_DEF | MTX_DUPOK); LIST_INIT(&umtxq_chains[i][j].uc_queue[0]); LIST_INIT(&umtxq_chains[i][j].uc_queue[1]); LIST_INIT(&umtxq_chains[i][j].uc_spare_queue); TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list); umtxq_chains[i][j].uc_busy = 0; umtxq_chains[i][j].uc_waiters = 0; #ifdef UMTX_PROFILING umtxq_chains[i][j].length = 0; umtxq_chains[i][j].max_length = 0; #endif } } #ifdef UMTX_PROFILING umtx_init_profiling(); #endif mtx_init(&umtx_lock, "umtx lock", NULL, MTX_DEF); EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL, EVENTHANDLER_PRI_ANY); umtx_shm_init(); } struct umtx_q * umtxq_alloc(void) { struct umtx_q *uq; uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO); uq->uq_spare_queue = malloc(sizeof(struct umtxq_queue), M_UMTX, M_WAITOK | M_ZERO); TAILQ_INIT(&uq->uq_spare_queue->head); TAILQ_INIT(&uq->uq_pi_contested); uq->uq_inherited_pri = PRI_MAX; return (uq); } void umtxq_free(struct umtx_q *uq) { MPASS(uq->uq_spare_queue != NULL); free(uq->uq_spare_queue, M_UMTX); free(uq, M_UMTX); } static inline void umtxq_hash(struct umtx_key *key) { unsigned n; n = (uintptr_t)key->info.both.a + key->info.both.b; key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS; } static inline struct umtxq_chain * umtxq_getchain(struct umtx_key *key) { if (key->type <= TYPE_SEM) return (&umtxq_chains[1][key->hash]); return (&umtxq_chains[0][key->hash]); } /* * Lock a chain. */ static inline void umtxq_lock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_lock(&uc->uc_lock); } /* * Unlock a chain. */ static inline void umtxq_unlock(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_unlock(&uc->uc_lock); } /* * Set chain to busy state when following operation * may be blocked (kernel mutex can not be used). */ static inline void umtxq_busy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); if (uc->uc_busy) { #ifdef SMP if (smp_cpus > 1) { int count = BUSY_SPINS; if (count > 0) { umtxq_unlock(key); while (uc->uc_busy && --count > 0) cpu_spinwait(); umtxq_lock(key); } } #endif while (uc->uc_busy) { uc->uc_waiters++; msleep(uc, &uc->uc_lock, 0, "umtxqb", 0); uc->uc_waiters--; } } uc->uc_busy = 1; } /* * Unbusy a chain. */ static inline void umtxq_unbusy(struct umtx_key *key) { struct umtxq_chain *uc; uc = umtxq_getchain(key); mtx_assert(&uc->uc_lock, MA_OWNED); KASSERT(uc->uc_busy != 0, ("not busy")); uc->uc_busy = 0; if (uc->uc_waiters) wakeup_one(uc); } static inline void umtxq_unbusy_unlocked(struct umtx_key *key) { umtxq_lock(key); umtxq_unbusy(key); umtxq_unlock(key); } static struct umtxq_queue * umtxq_queue_lookup(struct umtx_key *key, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); LIST_FOREACH(uh, &uc->uc_queue[q], link) { if (umtx_key_match(&uh->key, key)) return (uh); } return (NULL); } static inline void umtxq_insert_queue(struct umtx_q *uq, int q) { struct umtxq_queue *uh; struct umtxq_chain *uc; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT((uq->uq_flags & UQF_UMTXQ) == 0, ("umtx_q is already on queue")); uh = umtxq_queue_lookup(&uq->uq_key, q); if (uh != NULL) { LIST_INSERT_HEAD(&uc->uc_spare_queue, uq->uq_spare_queue, link); } else { uh = uq->uq_spare_queue; uh->key = uq->uq_key; LIST_INSERT_HEAD(&uc->uc_queue[q], uh, link); #ifdef UMTX_PROFILING uc->length++; if (uc->length > uc->max_length) { uc->max_length = uc->length; if (uc->max_length > max_length) max_length = uc->max_length; } #endif } uq->uq_spare_queue = NULL; TAILQ_INSERT_TAIL(&uh->head, uq, uq_link); uh->length++; uq->uq_flags |= UQF_UMTXQ; uq->uq_cur_queue = uh; return; } static inline void umtxq_remove_queue(struct umtx_q *uq, int q) { struct umtxq_chain *uc; struct umtxq_queue *uh; uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); if (uq->uq_flags & UQF_UMTXQ) { uh = uq->uq_cur_queue; TAILQ_REMOVE(&uh->head, uq, uq_link); uh->length--; uq->uq_flags &= ~UQF_UMTXQ; if (TAILQ_EMPTY(&uh->head)) { KASSERT(uh->length == 0, ("inconsistent umtxq_queue length")); #ifdef UMTX_PROFILING uc->length--; #endif LIST_REMOVE(uh, link); } else { uh = LIST_FIRST(&uc->uc_spare_queue); KASSERT(uh != NULL, ("uc_spare_queue is empty")); LIST_REMOVE(uh, link); } uq->uq_spare_queue = uh; uq->uq_cur_queue = NULL; } } /* * Check if there are multiple waiters */ static int umtxq_count(struct umtx_key *key) { struct umtxq_queue *uh; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) return (uh->length); return (0); } /* * Check if there are multiple PI waiters and returns first * waiter. */ static int umtxq_count_pi(struct umtx_key *key, struct umtx_q **first) { struct umtxq_queue *uh; *first = NULL; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, UMTX_SHARED_QUEUE); if (uh != NULL) { *first = TAILQ_FIRST(&uh->head); return (uh->length); } return (0); } /* * Wake up threads waiting on an userland object. */ static int umtxq_signal_queue(struct umtx_key *key, int n_wake, int q) { struct umtxq_queue *uh; struct umtx_q *uq; int ret; ret = 0; UMTXQ_LOCKED_ASSERT(umtxq_getchain(key)); uh = umtxq_queue_lookup(key, q); if (uh != NULL) { while ((uq = TAILQ_FIRST(&uh->head)) != NULL) { umtxq_remove_queue(uq, q); wakeup(uq); if (++ret >= n_wake) return (ret); } } return (ret); } /* * Wake up specified thread. */ static inline void umtxq_signal_thread(struct umtx_q *uq) { UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); umtxq_remove(uq); wakeup(uq); } static inline int tstohz(const struct timespec *tsp) { struct timeval tv; TIMESPEC_TO_TIMEVAL(&tv, tsp); return tvtohz(&tv); } static void abs_timeout_init(struct abs_timeout *timo, int clockid, int absolute, const struct timespec *timeout) { timo->clockid = clockid; if (!absolute) { timo->is_abs_real = false; abs_timeout_update(timo); timespecadd(&timo->cur, timeout, &timo->end); } else { timo->end = *timeout; timo->is_abs_real = clockid == CLOCK_REALTIME || clockid == CLOCK_REALTIME_FAST || clockid == CLOCK_REALTIME_PRECISE; /* * If is_abs_real, umtxq_sleep will read the clock * after setting td_rtcgen; otherwise, read it here. */ if (!timo->is_abs_real) { abs_timeout_update(timo); } } } static void abs_timeout_init2(struct abs_timeout *timo, const struct _umtx_time *umtxtime) { abs_timeout_init(timo, umtxtime->_clockid, (umtxtime->_flags & UMTX_ABSTIME) != 0, &umtxtime->_timeout); } static inline void abs_timeout_update(struct abs_timeout *timo) { kern_clock_gettime(curthread, timo->clockid, &timo->cur); } static int abs_timeout_gethz(struct abs_timeout *timo) { struct timespec tts; if (timespeccmp(&timo->end, &timo->cur, <=)) return (-1); timespecsub(&timo->end, &timo->cur, &tts); return (tstohz(&tts)); } static uint32_t umtx_unlock_val(uint32_t flags, bool rb) { if (rb) return (UMUTEX_RB_OWNERDEAD); else if ((flags & UMUTEX_NONCONSISTENT) != 0) return (UMUTEX_RB_NOTRECOV); else return (UMUTEX_UNOWNED); } /* * Put thread into sleep state, before sleeping, check if * thread was removed from umtx queue. */ static inline int umtxq_sleep(struct umtx_q *uq, const char *wmesg, struct abs_timeout *abstime) { struct umtxq_chain *uc; int error, timo; if (abstime != NULL && abstime->is_abs_real) { curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); } uc = umtxq_getchain(&uq->uq_key); UMTXQ_LOCKED_ASSERT(uc); for (;;) { if (!(uq->uq_flags & UQF_UMTXQ)) { error = 0; break; } if (abstime != NULL) { timo = abs_timeout_gethz(abstime); if (timo < 0) { error = ETIMEDOUT; break; } } else timo = 0; error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo); if (error == EINTR || error == ERESTART) { umtxq_lock(&uq->uq_key); break; } if (abstime != NULL) { if (abstime->is_abs_real) curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation); abs_timeout_update(abstime); } umtxq_lock(&uq->uq_key); } curthread->td_rtcgen = 0; return (error); } /* * Convert userspace address into unique logical address. */ int umtx_key_get(const void *addr, int type, int share, struct umtx_key *key) { struct thread *td = curthread; vm_map_t map; vm_map_entry_t entry; vm_pindex_t pindex; vm_prot_t prot; boolean_t wired; key->type = type; if (share == THREAD_SHARE) { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } else { MPASS(share == PROCESS_SHARE || share == AUTO_SHARE); map = &td->td_proc->p_vmspace->vm_map; if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE, &entry, &key->info.shared.object, &pindex, &prot, &wired) != KERN_SUCCESS) { return (EFAULT); } if ((share == PROCESS_SHARE) || (share == AUTO_SHARE && VM_INHERIT_SHARE == entry->inheritance)) { key->shared = 1; key->info.shared.offset = (vm_offset_t)addr - entry->start + entry->offset; vm_object_reference(key->info.shared.object); } else { key->shared = 0; key->info.private.vs = td->td_proc->p_vmspace; key->info.private.addr = (uintptr_t)addr; } vm_map_lookup_done(map, entry); } umtxq_hash(key); return (0); } /* * Release key. */ void umtx_key_release(struct umtx_key *key) { if (key->shared) vm_object_deallocate(key->info.shared.object); } /* * Fetch and compare value, sleep on the address if value is not changed. */ static int do_wait(struct thread *td, void *addr, u_long id, struct _umtx_time *timeout, int compat32, int is_private) { struct abs_timeout timo; struct umtx_q *uq; u_long tmp; uint32_t tmp32; int error = 0; uq = td->td_umtxq; if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); if (compat32 == 0) { error = fueword(addr, &tmp); if (error != 0) error = EFAULT; } else { error = fueword32(addr, &tmp32); if (error == 0) tmp = tmp32; else error = EFAULT; } umtxq_lock(&uq->uq_key); if (error == 0) { if (tmp == id) error = umtxq_sleep(uq, "uwait", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else umtxq_remove(uq); } else if ((uq->uq_flags & UQF_UMTXQ) != 0) { umtxq_remove(uq); } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } /* * Wake up threads sleeping on the specified address. */ int kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private) { struct umtx_key key; int ret; if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT, is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0) return (ret); umtxq_lock(&key); umtxq_signal(&key, n_wake); umtxq_unlock(&key); umtx_key_release(&key); return (0); } /* * Lock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int mode) { struct abs_timeout timo; struct umtx_q *uq; uint32_t owner, old, id; int error, rv; id = td->td_tid; uq = td->td_umtxq; error = 0; if (timeout != NULL) abs_timeout_init2(&timo, timeout); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { rv = fueword32(&m->m_owner, &owner); if (rv == -1) return (EFAULT); if (mode == _UMUTEX_WAIT) { if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV) return (0); } else { /* * Robust mutex terminated. Kernel duty is to * return EOWNERDEAD to the userspace. The * umutex.m_flags UMUTEX_NONCONSISTENT is set * by the common userspace code. */ if (owner == UMUTEX_RB_OWNERDEAD) { rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, &owner, id | UMUTEX_CONTESTED); if (rv == -1) return (EFAULT); if (rv == 0) { MPASS(owner == UMUTEX_RB_OWNERDEAD); return (EOWNERDEAD); /* success */ } MPASS(rv == 1); rv = thread_check_susp(td, false); if (rv != 0) return (rv); continue; } if (owner == UMUTEX_RB_NOTRECOV) return (ENOTRECOVERABLE); /* * Try the uncontested case. This should be * done in userland. */ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); /* The address was invalid. */ if (rv == -1) return (EFAULT); /* The acquire succeeded. */ if (rv == 0) { MPASS(owner == UMUTEX_UNOWNED); return (0); } /* * If no one owns it but it is contested try * to acquire it. */ MPASS(rv == 1); if (owner == UMUTEX_CONTESTED) { rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) return (EFAULT); if (rv == 0) { MPASS(owner == UMUTEX_CONTESTED); return (0); } if (rv == 1) { rv = thread_check_susp(td, false); if (rv != 0) return (rv); } /* * If this failed the lock has * changed, restart. */ continue; } /* rv == 1 but not contested, likely store failure */ rv = thread_check_susp(td, false); if (rv != 0) return (rv); } if (mode == _UMUTEX_TRY) return (EBUSY); /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) return (error); if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ rv = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); /* The address was invalid or casueword failed to store. */ if (rv == -1 || rv == 1) { umtxq_lock(&uq->uq_key); umtxq_remove(uq); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (rv == -1) return (EFAULT); if (rv == 1) { rv = thread_check_susp(td, false); if (rv != 0) return (rv); } continue; } /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); MPASS(old == owner); error = umtxq_sleep(uq, "umtxn", timeout == NULL ? NULL : &timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0) error = thread_check_susp(td, false); } return (0); } /* * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex. */ static int do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; uint32_t owner, old, id, newlock; int error, count; id = td->td_tid; again: /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); newlock = umtx_unlock_val(flags, rb); if ((owner & UMUTEX_CONTESTED) == 0) { error = casueword32(&m->m_owner, owner, &old, newlock); if (error == -1) return (EFAULT); if (error == 1) { error = thread_check_susp(td, false); if (error != 0) return (error); goto again; } MPASS(old == owner); return (0); } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ if (count > 1) newlock |= UMUTEX_CONTESTED; error = casueword32(&m->m_owner, owner, &old, newlock); umtxq_lock(&key); umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); if (error == -1) return (EFAULT); if (error == 1) { if (old != owner) return (EINVAL); error = thread_check_susp(td, false); if (error != 0) return (error); goto again; } return (0); } /* * Check if the mutex is available and wake up a waiter, * only for simple mutex. */ static int do_wake_umutex(struct thread *td, struct umutex *m) { struct umtx_key key; uint32_t owner; uint32_t flags; int error; int count; again: error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != 0 && owner != UMUTEX_RB_OWNERDEAD && owner != UMUTEX_RB_NOTRECOV) return (0); error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); if (count <= 1 && owner != UMUTEX_RB_OWNERDEAD && owner != UMUTEX_RB_NOTRECOV) { error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, UMUTEX_UNOWNED); if (error == -1) { error = EFAULT; } else if (error == 1) { umtxq_lock(&key); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); error = thread_check_susp(td, false); if (error != 0) return (error); goto again; } } umtxq_lock(&key); if (error == 0 && count != 0) { MPASS((owner & ~UMUTEX_CONTESTED) == 0 || owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV); umtxq_signal(&key, 1); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } /* * Check if the mutex has waiters and tries to fix contention bit. */ static int do_wake2_umutex(struct thread *td, struct umutex *m, uint32_t flags) { struct umtx_key key; uint32_t owner, old; int type; int error; int count; switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST)) { case 0: case UMUTEX_ROBUST: type = TYPE_NORMAL_UMUTEX; break; case UMUTEX_PRIO_INHERIT: type = TYPE_PI_UMUTEX; break; case (UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST): type = TYPE_PI_ROBUST_UMUTEX; break; case UMUTEX_PRIO_PROTECT: type = TYPE_PP_UMUTEX; break; case (UMUTEX_PRIO_PROTECT | UMUTEX_ROBUST): type = TYPE_PP_ROBUST_UMUTEX; break; default: return (EINVAL); } if ((error = umtx_key_get(m, type, GET_SHARE(flags), &key)) != 0) return (error); owner = 0; umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count(&key); umtxq_unlock(&key); error = fueword32(&m->m_owner, &owner); if (error == -1) error = EFAULT; /* * Only repair contention bit if there is a waiter, this means * the mutex is still being referenced by userland code, * otherwise don't update any memory. */ while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 && (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) { error = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); if (error == -1) { error = EFAULT; break; } if (error == 0) { MPASS(old == owner); break; } owner = old; error = thread_check_susp(td, false); } umtxq_lock(&key); if (error == EFAULT) { umtxq_signal(&key, INT_MAX); } else if (count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 || owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV)) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static inline struct umtx_pi * umtx_pi_alloc(int flags) { struct umtx_pi *pi; pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags); TAILQ_INIT(&pi->pi_blocked); atomic_add_int(&umtx_pi_allocated, 1); return (pi); } static inline void umtx_pi_free(struct umtx_pi *pi) { uma_zfree(umtx_pi_zone, pi); atomic_add_int(&umtx_pi_allocated, -1); } /* * Adjust the thread's position on a pi_state after its priority has been * changed. */ static int umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td) { struct umtx_q *uq, *uq1, *uq2; struct thread *td1; mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (0); uq = td->td_umtxq; /* * Check if the thread needs to be moved on the blocked chain. * It needs to be moved if either its priority is lower than * the previous thread or higher than the next thread. */ uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq); uq2 = TAILQ_NEXT(uq, uq_lockq); if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) || (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) { /* * Remove thread from blocked chain and determine where * it should be moved to. */ TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { td1 = uq1->uq_thread; MPASS(td1->td_proc->p_magic == P_MAGIC); if (UPRI(td1) > UPRI(td)) break; } if (uq1 == NULL) TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); else TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); } return (1); } static struct umtx_pi * umtx_pi_next(struct umtx_pi *pi) { struct umtx_q *uq_owner; if (pi->pi_owner == NULL) return (NULL); uq_owner = pi->pi_owner->td_umtxq; if (uq_owner == NULL) return (NULL); return (uq_owner->uq_pi_blocked); } /* * Floyd's Cycle-Finding Algorithm. */ static bool umtx_pi_check_loop(struct umtx_pi *pi) { struct umtx_pi *pi1; /* fast iterator */ mtx_assert(&umtx_lock, MA_OWNED); if (pi == NULL) return (false); pi1 = pi; for (;;) { pi = umtx_pi_next(pi); if (pi == NULL) break; pi1 = umtx_pi_next(pi1); if (pi1 == NULL) break; pi1 = umtx_pi_next(pi1); if (pi1 == NULL) break; if (pi == pi1) return (true); } return (false); } /* * Propagate priority when a thread is blocked on POSIX * PI mutex. */ static void umtx_propagate_priority(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; int pri; mtx_assert(&umtx_lock, MA_OWNED); pri = UPRI(td); uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) return; if (umtx_pi_check_loop(pi)) return; for (;;) { td = pi->pi_owner; if (td == NULL || td == curthread) return; MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); thread_lock(td); if (td->td_lend_user_pri > pri) sched_lend_user_prio(td, pri); else { thread_unlock(td); break; } thread_unlock(td); /* * Pick up the lock that td is blocked on. */ uq = td->td_umtxq; pi = uq->uq_pi_blocked; if (pi == NULL) break; /* Resort td on the list if needed. */ umtx_pi_adjust_thread(pi, td); } } /* * Unpropagate priority for a PI mutex when a thread blocked on * it is interrupted by signal or resumed by others. */ static void umtx_repropagate_priority(struct umtx_pi *pi) { struct umtx_q *uq, *uq_owner; struct umtx_pi *pi2; int pri; mtx_assert(&umtx_lock, MA_OWNED); if (umtx_pi_check_loop(pi)) return; while (pi != NULL && pi->pi_owner != NULL) { pri = PRI_MAX; uq_owner = pi->pi_owner->td_umtxq; TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) { uq = TAILQ_FIRST(&pi2->pi_blocked); if (uq != NULL) { if (pri > UPRI(uq->uq_thread)) pri = UPRI(uq->uq_thread); } } if (pri > uq_owner->uq_inherited_pri) pri = uq_owner->uq_inherited_pri; thread_lock(pi->pi_owner); sched_lend_user_prio(pi->pi_owner, pri); thread_unlock(pi->pi_owner); if ((pi = uq_owner->uq_pi_blocked) != NULL) umtx_pi_adjust_thread(pi, uq_owner->uq_thread); } } /* * Insert a PI mutex into owned list. */ static void umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq_owner; uq_owner = owner->td_umtxq; mtx_assert(&umtx_lock, MA_OWNED); MPASS(pi->pi_owner == NULL); pi->pi_owner = owner; TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link); } /* * Disown a PI mutex, and remove it from the owned list. */ static void umtx_pi_disown(struct umtx_pi *pi) { mtx_assert(&umtx_lock, MA_OWNED); TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested, pi, pi_link); pi->pi_owner = NULL; } /* * Claim ownership of a PI mutex. */ static int umtx_pi_claim(struct umtx_pi *pi, struct thread *owner) { struct umtx_q *uq; int pri; mtx_lock(&umtx_lock); if (pi->pi_owner == owner) { mtx_unlock(&umtx_lock); return (0); } if (pi->pi_owner != NULL) { /* * userland may have already messed the mutex, sigh. */ mtx_unlock(&umtx_lock); return (EPERM); } umtx_pi_setowner(pi, owner); uq = TAILQ_FIRST(&pi->pi_blocked); if (uq != NULL) { pri = UPRI(uq->uq_thread); thread_lock(owner); if (pri < UPRI(owner)) sched_lend_user_prio(owner, pri); thread_unlock(owner); } mtx_unlock(&umtx_lock); return (0); } /* * Adjust a thread's order position in its blocked PI mutex, * this may result new priority propagating process. */ void umtx_pi_adjust(struct thread *td, u_char oldpri) { struct umtx_q *uq; struct umtx_pi *pi; uq = td->td_umtxq; mtx_lock(&umtx_lock); /* * Pick up the lock that td is blocked on. */ pi = uq->uq_pi_blocked; if (pi != NULL) { umtx_pi_adjust_thread(pi, td); umtx_repropagate_priority(pi); } mtx_unlock(&umtx_lock); } /* * Sleep on a PI mutex. */ static int umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi, uint32_t owner, const char *wmesg, struct abs_timeout *timo, bool shared) { struct thread *td, *td1; struct umtx_q *uq1; int error, pri; #ifdef INVARIANTS struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); #endif error = 0; td = uq->uq_thread; KASSERT(td == curthread, ("inconsistent uq_thread")); UMTXQ_LOCKED_ASSERT(umtxq_getchain(&uq->uq_key)); KASSERT(uc->uc_busy != 0, ("umtx chain is not busy")); umtxq_insert(uq); mtx_lock(&umtx_lock); if (pi->pi_owner == NULL) { mtx_unlock(&umtx_lock); td1 = tdfind(owner, shared ? -1 : td->td_proc->p_pid); mtx_lock(&umtx_lock); if (td1 != NULL) { if (pi->pi_owner == NULL) umtx_pi_setowner(pi, td1); PROC_UNLOCK(td1->td_proc); } } TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) { pri = UPRI(uq1->uq_thread); if (pri > UPRI(td)) break; } if (uq1 != NULL) TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq); else TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq); uq->uq_pi_blocked = pi; thread_lock(td); td->td_flags |= TDF_UPIBLOCKED; thread_unlock(td); umtx_propagate_priority(td); mtx_unlock(&umtx_lock); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, wmesg, timo); umtxq_remove(uq); mtx_lock(&umtx_lock); uq->uq_pi_blocked = NULL; thread_lock(td); td->td_flags &= ~TDF_UPIBLOCKED; thread_unlock(td); TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq); umtx_repropagate_priority(pi); mtx_unlock(&umtx_lock); umtxq_unlock(&uq->uq_key); return (error); } /* * Add reference count for a PI mutex. */ static void umtx_pi_ref(struct umtx_pi *pi) { UMTXQ_LOCKED_ASSERT(umtxq_getchain(&pi->pi_key)); pi->pi_refcount++; } /* * Decrease reference count for a PI mutex, if the counter * is decreased to zero, its memory space is freed. */ static void umtx_pi_unref(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); KASSERT(pi->pi_refcount > 0, ("invalid reference count")); if (--pi->pi_refcount == 0) { mtx_lock(&umtx_lock); if (pi->pi_owner != NULL) umtx_pi_disown(pi); KASSERT(TAILQ_EMPTY(&pi->pi_blocked), ("blocked queue not empty")); mtx_unlock(&umtx_lock); TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink); umtx_pi_free(pi); } } /* * Find a PI mutex in hash table. */ static struct umtx_pi * umtx_pi_lookup(struct umtx_key *key) { struct umtxq_chain *uc; struct umtx_pi *pi; uc = umtxq_getchain(key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) { if (umtx_key_match(&pi->pi_key, key)) { return (pi); } } return (NULL); } /* * Insert a PI mutex into hash table. */ static inline void umtx_pi_insert(struct umtx_pi *pi) { struct umtxq_chain *uc; uc = umtxq_getchain(&pi->pi_key); UMTXQ_LOCKED_ASSERT(uc); TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink); } /* * Lock a PI mutex. */ static int do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq; struct umtx_pi *pi, *new_pi; uint32_t id, old_owner, owner, old; int error, rv; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi == NULL) { new_pi = umtx_pi_alloc(M_NOWAIT); if (new_pi == NULL) { umtxq_unlock(&uq->uq_key); new_pi = umtx_pi_alloc(M_WAITOK); umtxq_lock(&uq->uq_key); pi = umtx_pi_lookup(&uq->uq_key); if (pi != NULL) { umtx_pi_free(new_pi); new_pi = NULL; } } if (new_pi != NULL) { new_pi->pi_key = uq->uq_key; umtx_pi_insert(new_pi); pi = new_pi; } } umtx_pi_ref(pi); umtxq_unlock(&uq->uq_key); /* * Care must be exercised when dealing with umtx structure. It * can fault on any access. */ for (;;) { /* * Try the uncontested case. This should be done in userland. */ rv = casueword32(&m->m_owner, UMUTEX_UNOWNED, &owner, id); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } /* The acquire succeeded. */ if (rv == 0) { MPASS(owner == UMUTEX_UNOWNED); error = 0; break; } if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; break; } /* * Avoid overwriting a possible error from sleep due * to the pending signal with suspension check result. */ if (error == 0) { error = thread_check_susp(td, true); if (error != 0) break; } /* If no one owns it but it is contested try to acquire it. */ if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) { old_owner = owner; rv = casueword32(&m->m_owner, owner, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } if (rv == 1) { if (error == 0) { error = thread_check_susp(td, true); if (error != 0) break; } /* * If this failed the lock could * changed, restart. */ continue; } MPASS(rv == 0); MPASS(owner == old_owner); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); error = umtx_pi_claim(pi, td); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); if (error != 0) { /* * Since we're going to return an * error, restore the m_owner to its * previous, unowned state to avoid * compounding the problem. */ (void)casuword32(&m->m_owner, id | UMUTEX_CONTESTED, old_owner); } if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD) error = EOWNERDEAD; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { error = EDEADLK; break; } if (try != 0) { error = EBUSY; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Set the contested bit so that a release in user space * knows to use the system call for unlock. If this fails * either some one else has acquired the lock or it has been * released. */ rv = casueword32(&m->m_owner, owner, &old, owner | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } if (rv == 1) { umtxq_unbusy_unlocked(&uq->uq_key); error = thread_check_susp(td, true); if (error != 0) break; /* * The lock changed and we need to retry or we * lost a race to the thread unlocking the * umtx. Note that the UMUTEX_RB_OWNERDEAD * value for owner is impossible there. */ continue; } umtxq_lock(&uq->uq_key); /* We set the contested bit, sleep. */ MPASS(old == owner); error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED, "umtxpi", timeout == NULL ? NULL : &timo, (flags & USYNC_PROCESS_SHARED) != 0); if (error != 0) continue; error = thread_check_susp(td, false); if (error != 0) break; } umtxq_lock(&uq->uq_key); umtx_pi_unref(pi); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PI mutex. */ static int do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq_first, *uq_first2, *uq_me; struct umtx_pi *pi, *pi2; uint32_t id, new_owner, old, owner; int count, error, pri; id = td->td_tid; usrloop: /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); new_owner = umtx_unlock_val(flags, rb); /* This should be done in userland */ if ((owner & UMUTEX_CONTESTED) == 0) { error = casueword32(&m->m_owner, owner, &old, new_owner); if (error == -1) return (EFAULT); if (error == 1) { error = thread_check_susp(td, true); if (error != 0) return (error); goto usrloop; } if (old == owner) return (0); owner = old; } /* We should only ever be in here for contested locks */ if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PI_ROBUST_UMUTEX : TYPE_PI_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); count = umtxq_count_pi(&key, &uq_first); if (uq_first != NULL) { mtx_lock(&umtx_lock); pi = uq_first->uq_pi_blocked; KASSERT(pi != NULL, ("pi == NULL?")); if (pi->pi_owner != td && !(rb && pi->pi_owner == NULL)) { mtx_unlock(&umtx_lock); umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); /* userland messed the mutex */ return (EPERM); } uq_me = td->td_umtxq; if (pi->pi_owner == td) umtx_pi_disown(pi); /* get highest priority thread which is still sleeping. */ uq_first = TAILQ_FIRST(&pi->pi_blocked); while (uq_first != NULL && (uq_first->uq_flags & UQF_UMTXQ) == 0) { uq_first = TAILQ_NEXT(uq_first, uq_lockq); } pri = PRI_MAX; TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) { uq_first2 = TAILQ_FIRST(&pi2->pi_blocked); if (uq_first2 != NULL) { if (pri > UPRI(uq_first2->uq_thread)) pri = UPRI(uq_first2->uq_thread); } } thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); if (uq_first) umtxq_signal_thread(uq_first); } else { pi = umtx_pi_lookup(&key); /* * A umtx_pi can exist if a signal or timeout removed the * last waiter from the umtxq, but there is still * a thread in do_lock_pi() holding the umtx_pi. */ if (pi != NULL) { /* * The umtx_pi can be unowned, such as when a thread * has just entered do_lock_pi(), allocated the * umtx_pi, and unlocked the umtxq. * If the current thread owns it, it must disown it. */ mtx_lock(&umtx_lock); if (pi->pi_owner == td) umtx_pi_disown(pi); mtx_unlock(&umtx_lock); } } umtxq_unlock(&key); /* * When unlocking the umtx, it must be marked as unowned if * there is zero or one thread only waiting for it. * Otherwise, it must be marked as contested. */ if (count > 1) new_owner |= UMUTEX_CONTESTED; again: error = casueword32(&m->m_owner, owner, &old, new_owner); if (error == 1) { error = thread_check_susp(td, false); if (error == 0) goto again; } umtxq_unbusy_unlocked(&key); umtx_key_release(&key); if (error == -1) return (EFAULT); if (error == 0 && old != owner) return (EINVAL); return (error); } /* * Lock a PP mutex. */ static int do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, struct _umtx_time *timeout, int try) { struct abs_timeout timo; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t ceiling; uint32_t owner, id; int error, pri, old_inherited_pri, su, rv; id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); for (;;) { old_inherited_pri = uq->uq_inherited_pri; umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); rv = fueword32(&m->m_ceilings[0], &ceiling); if (rv == -1) { error = EFAULT; goto out; } ceiling = RTP_PRIO_MAX - ceiling; if (ceiling > RTP_PRIO_MAX) { error = EINVAL; goto out; } mtx_lock(&umtx_lock); if (UPRI(td) < PRI_MIN_REALTIME + ceiling) { mtx_unlock(&umtx_lock); error = EINVAL; goto out; } if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) { uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling; thread_lock(td); if (uq->uq_inherited_pri < UPRI(td)) sched_lend_user_prio(td, uq->uq_inherited_pri); thread_unlock(td); } mtx_unlock(&umtx_lock); rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); /* The address was invalid. */ if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(owner == UMUTEX_CONTESTED); error = 0; break; } /* rv == 1 */ if (owner == UMUTEX_RB_OWNERDEAD) { rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD, &owner, id | UMUTEX_CONTESTED); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(owner == UMUTEX_RB_OWNERDEAD); error = EOWNERDEAD; /* success */ break; } /* * rv == 1, only check for suspension if we * did not already catched a signal. If we * get an error from the check, the same * condition is checked by the umtxq_sleep() * call below, so we should obliterate the * error to not skip the last loop iteration. */ if (error == 0) { error = thread_check_susp(td, false); if (error == 0) { if (try != 0) error = EBUSY; else continue; } error = 0; } } else if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; } if (try != 0) error = EBUSY; /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", timeout == NULL ? NULL : &timo); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); mtx_lock(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } if (error != 0 && error != EOWNERDEAD) { mtx_lock(&umtx_lock); uq->uq_inherited_pri = old_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } out: umtxq_unbusy_unlocked(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Unlock a PP mutex. */ static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags, bool rb) { struct umtx_key key; struct umtx_q *uq, *uq2; struct umtx_pi *pi; uint32_t id, owner, rceiling; int error, pri, new_inherited_pri, su; id = td->td_tid; uq = td->td_umtxq; su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0); /* * Make sure we own this mtx. */ error = fueword32(&m->m_owner, &owner); if (error == -1) return (EFAULT); if ((owner & ~UMUTEX_CONTESTED) != id) return (EPERM); error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t)); if (error != 0) return (error); if (rceiling == -1) new_inherited_pri = PRI_MAX; else { rceiling = RTP_PRIO_MAX - rceiling; if (rceiling > RTP_PRIO_MAX) return (EINVAL); new_inherited_pri = PRI_MIN_REALTIME + rceiling; } if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_unlock(&key); /* * For priority protected mutex, always set unlocked state * to UMUTEX_CONTESTED, so that userland always enters kernel * to lock the mutex, it is necessary because thread priority * has to be adjusted for such mutex. */ error = suword32(&m->m_owner, umtx_unlock_val(flags, rb) | UMUTEX_CONTESTED); umtxq_lock(&key); if (error == 0) umtxq_signal(&key, 1); umtxq_unbusy(&key); umtxq_unlock(&key); if (error == -1) error = EFAULT; else { mtx_lock(&umtx_lock); if (su != 0) uq->uq_inherited_pri = new_inherited_pri; pri = PRI_MAX; TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) { uq2 = TAILQ_FIRST(&pi->pi_blocked); if (uq2 != NULL) { if (pri > UPRI(uq2->uq_thread)) pri = UPRI(uq2->uq_thread); } } if (pri > uq->uq_inherited_pri) pri = uq->uq_inherited_pri; thread_lock(td); sched_lend_user_prio(td, pri); thread_unlock(td); mtx_unlock(&umtx_lock); } umtx_key_release(&key); return (error); } static int do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling, uint32_t *old_ceiling) { struct umtx_q *uq; uint32_t flags, id, owner, save_ceiling; int error, rv, rv1; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); if ((flags & UMUTEX_PRIO_PROTECT) == 0) return (EINVAL); if (ceiling > RTP_PRIO_MAX) return (EINVAL); id = td->td_tid; uq = td->td_umtxq; if ((error = umtx_key_get(m, (flags & UMUTEX_ROBUST) != 0 ? TYPE_PP_ROBUST_UMUTEX : TYPE_PP_UMUTEX, GET_SHARE(flags), &uq->uq_key)) != 0) return (error); for (;;) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); rv = fueword32(&m->m_ceilings[0], &save_ceiling); if (rv == -1) { error = EFAULT; break; } rv = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner, id | UMUTEX_CONTESTED); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(owner == UMUTEX_CONTESTED); rv = suword32(&m->m_ceilings[0], ceiling); rv1 = suword32(&m->m_owner, UMUTEX_CONTESTED); error = (rv == 0 && rv1 == 0) ? 0: EFAULT; break; } if ((owner & ~UMUTEX_CONTESTED) == id) { rv = suword32(&m->m_ceilings[0], ceiling); error = rv == 0 ? 0 : EFAULT; break; } if (owner == UMUTEX_RB_OWNERDEAD) { error = EOWNERDEAD; break; } else if (owner == UMUTEX_RB_NOTRECOV) { error = ENOTRECOVERABLE; break; } /* * If we caught a signal, we have retried and now * exit immediately. */ if (error != 0) break; /* * We set the contested bit, sleep. Otherwise the lock changed * and we need to retry or we lost a race to the thread * unlocking the umtx. */ umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "umtxpp", NULL); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); } umtxq_lock(&uq->uq_key); if (error == 0) umtxq_signal(&uq->uq_key, INT_MAX); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (error == 0 && old_ceiling != NULL) { rv = suword32(old_ceiling, save_ceiling); error = rv == 0 ? 0 : EFAULT; } return (error); } /* * Lock a userland POSIX mutex. */ static int do_lock_umutex(struct thread *td, struct umutex *m, struct _umtx_time *timeout, int mode) { uint32_t flags; int error; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: error = do_lock_normal(td, m, flags, timeout, mode); break; case UMUTEX_PRIO_INHERIT: error = do_lock_pi(td, m, flags, timeout, mode); break; case UMUTEX_PRIO_PROTECT: error = do_lock_pp(td, m, flags, timeout, mode); break; default: return (EINVAL); } if (timeout == NULL) { if (error == EINTR && mode != _UMUTEX_WAIT) error = ERESTART; } else { /* Timed-locking is not restarted. */ if (error == ERESTART) error = EINTR; } return (error); } /* * Unlock a userland POSIX mutex. */ static int do_unlock_umutex(struct thread *td, struct umutex *m, bool rb) { uint32_t flags; int error; error = fueword32(&m->m_flags, &flags); if (error == -1) return (EFAULT); switch (flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) { case 0: return (do_unlock_normal(td, m, flags, rb)); case UMUTEX_PRIO_INHERIT: return (do_unlock_pi(td, m, flags, rb)); case UMUTEX_PRIO_PROTECT: return (do_unlock_pp(td, m, flags, rb)); } return (EINVAL); } static int do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m, struct timespec *timeout, u_long wflags) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, clockid, hasw; int error; uq = td->td_umtxq; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if ((wflags & CVWAIT_CLOCKID) != 0) { error = fueword32(&cv->c_clockid, &clockid); if (error == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } if (clockid < CLOCK_REALTIME || clockid >= CLOCK_THREAD_CPUTIME_ID) { /* hmm, only HW clock id will work. */ umtx_key_release(&uq->uq_key); return (EINVAL); } } else { clockid = CLOCK_REALTIME; } umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); /* * Set c_has_waiters to 1 before releasing user mutex, also * don't modify cache line when unnecessary. */ error = fueword32(&cv->c_has_waiters, &hasw); if (error == 0 && hasw == 0) suword32(&cv->c_has_waiters, 1); umtxq_unbusy_unlocked(&uq->uq_key); error = do_unlock_umutex(td, m, false); if (timeout != NULL) abs_timeout_init(&timo, clockid, (wflags & CVWAIT_ABSTIME) != 0, timeout); umtxq_lock(&uq->uq_key); if (error == 0) { error = umtxq_sleep(uq, "ucond", timeout == NULL ? NULL : &timo); } if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { /* * This must be timeout,interrupted by signal or * surprious wakeup, clear c_has_waiter flag when * necessary. */ umtxq_busy(&uq->uq_key); if ((uq->uq_flags & UQF_UMTXQ) != 0) { int oldlen = uq->uq_cur_queue->length; umtxq_remove(uq); if (oldlen == 1) { umtxq_unlock(&uq->uq_key); suword32(&cv->c_has_waiters, 0); umtxq_lock(&uq->uq_key); } } umtxq_unbusy(&uq->uq_key); if (error == ERESTART) error = EINTR; } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland condition variable. */ static int do_cv_signal(struct thread *td, struct ucond *cv) { struct umtx_key key; int error, cnt, nwake; uint32_t flags; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); nwake = umtxq_signal(&key, 1); if (cnt <= nwake) { umtxq_unlock(&key); error = suword32(&cv->c_has_waiters, 0); if (error == -1) error = EFAULT; umtxq_lock(&key); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } static int do_cv_broadcast(struct thread *td, struct ucond *cv) { struct umtx_key key; int error; uint32_t flags; error = fueword32(&cv->c_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); umtxq_signal(&key, INT_MAX); umtxq_unlock(&key); error = suword32(&cv->c_has_waiters, 0); if (error == -1) error = EFAULT; umtxq_unbusy_unlocked(&key); umtx_key_release(&key); return (error); } static int do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, wrflags; int32_t state, oldstate; int32_t blocked_readers; int error, error1, rv; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); wrflags = URWLOCK_WRITE_OWNER; if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER)) wrflags |= URWLOCK_WRITE_WAITERS; for (;;) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } /* try to lock it */ while (!(state & wrflags)) { if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) { umtx_key_release(&uq->uq_key); return (EAGAIN); } rv = casueword32(&rwlock->rw_state, state, &oldstate, state + 1); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } if (rv == 0) { MPASS(oldstate == state); umtx_key_release(&uq->uq_key); return (0); } error = thread_check_susp(td, true); if (error != 0) break; state = oldstate; } if (error) break; /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * re-read the state, in case it changed between the try-lock above * and the check below */ rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) error = EFAULT; /* set read contention bit */ while (error == 0 && (state & wrflags) && !(state & URWLOCK_READ_WAITERS)) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_READ_WAITERS); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(oldstate == state); goto sleep; } state = oldstate; error = thread_check_susp(td, false); if (error != 0) break; } if (error != 0) { umtxq_unbusy_unlocked(&uq->uq_key); break; } /* state is changed while setting flags, restart */ if (!(state & wrflags)) { umtxq_unbusy_unlocked(&uq->uq_key); error = thread_check_susp(td, true); if (error != 0) break; continue; } sleep: /* * Contention bit is set, before sleeping, increase * read waiter count. */ rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_readers, blocked_readers+1); while (state & wrflags) { umtxq_lock(&uq->uq_key); umtxq_insert(uq); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "urdlck", timeout == NULL ? NULL : &timo); umtxq_busy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); if (error) break; rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { error = EFAULT; break; } } /* decrease read waiter count, and may clear read contention bit */ rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_readers, blocked_readers-1); if (blocked_readers == 1) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_READ_WAITERS); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(oldstate == state); break; } state = oldstate; error1 = thread_check_susp(td, false); if (error1 != 0) { if (error == 0) error = error1; break; } } } umtxq_unbusy_unlocked(&uq->uq_key); if (error != 0) break; } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } static int do_rw_wrlock(struct thread *td, struct urwlock *rwlock, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int32_t blocked_writers; int32_t blocked_readers; int error, error1, rv; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); blocked_readers = 0; for (;;) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } while ((state & URWLOCK_WRITE_OWNER) == 0 && URWLOCK_READER_COUNT(state) == 0) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_WRITE_OWNER); if (rv == -1) { umtx_key_release(&uq->uq_key); return (EFAULT); } if (rv == 0) { MPASS(oldstate == state); umtx_key_release(&uq->uq_key); return (0); } state = oldstate; error = thread_check_susp(td, true); if (error != 0) break; } if (error) { if ((state & (URWLOCK_WRITE_OWNER | URWLOCK_WRITE_WAITERS)) == 0 && blocked_readers != 0) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } break; } /* grab monitor lock */ umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_unlock(&uq->uq_key); /* * Re-read the state, in case it changed between the * try-lock above and the check below. */ rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) error = EFAULT; while (error == 0 && ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) && (state & URWLOCK_WRITE_WAITERS) == 0) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state | URWLOCK_WRITE_WAITERS); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(oldstate == state); goto sleep; } state = oldstate; error = thread_check_susp(td, false); if (error != 0) break; } if (error != 0) { umtxq_unbusy_unlocked(&uq->uq_key); break; } if ((state & URWLOCK_WRITE_OWNER) == 0 && URWLOCK_READER_COUNT(state) == 0) { umtxq_unbusy_unlocked(&uq->uq_key); error = thread_check_susp(td, false); if (error != 0) break; continue; } sleep: rv = fueword32(&rwlock->rw_blocked_writers, &blocked_writers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_writers, blocked_writers + 1); while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) { umtxq_lock(&uq->uq_key); umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "uwrlck", timeout == NULL ? NULL : &timo); umtxq_busy(&uq->uq_key); umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE); umtxq_unlock(&uq->uq_key); if (error) break; rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { error = EFAULT; break; } } rv = fueword32(&rwlock->rw_blocked_writers, &blocked_writers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } suword32(&rwlock->rw_blocked_writers, blocked_writers-1); if (blocked_writers == 1) { rv = fueword32(&rwlock->rw_state, &state); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_WRITE_WAITERS); if (rv == -1) { error = EFAULT; break; } if (rv == 0) { MPASS(oldstate == state); break; } state = oldstate; error1 = thread_check_susp(td, false); /* * We are leaving the URWLOCK_WRITE_WAITERS * behind, but this should not harm the * correctness. */ if (error1 != 0) { if (error == 0) error = error1; break; } } rv = fueword32(&rwlock->rw_blocked_readers, &blocked_readers); if (rv == -1) { umtxq_unbusy_unlocked(&uq->uq_key); error = EFAULT; break; } } else blocked_readers = 0; umtxq_unbusy_unlocked(&uq->uq_key); } umtx_key_release(&uq->uq_key); if (error == ERESTART) error = EINTR; return (error); } static int do_rw_unlock(struct thread *td, struct urwlock *rwlock) { struct umtx_q *uq; uint32_t flags; int32_t state, oldstate; int error, rv, q, count; uq = td->td_umtxq; error = fueword32(&rwlock->rw_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); error = fueword32(&rwlock->rw_state, &state); if (error == -1) { error = EFAULT; goto out; } if (state & URWLOCK_WRITE_OWNER) { for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state & ~URWLOCK_WRITE_OWNER); if (rv == -1) { error = EFAULT; goto out; } if (rv == 1) { state = oldstate; if (!(oldstate & URWLOCK_WRITE_OWNER)) { error = EPERM; goto out; } error = thread_check_susp(td, true); if (error != 0) goto out; } else break; } } else if (URWLOCK_READER_COUNT(state) != 0) { for (;;) { rv = casueword32(&rwlock->rw_state, state, &oldstate, state - 1); if (rv == -1) { error = EFAULT; goto out; } if (rv == 1) { state = oldstate; if (URWLOCK_READER_COUNT(oldstate) == 0) { error = EPERM; goto out; } error = thread_check_susp(td, true); if (error != 0) goto out; } else break; } } else { error = EPERM; goto out; } count = 0; if (!(flags & URWLOCK_PREFER_READER)) { if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } else if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } } else { if (state & URWLOCK_READ_WAITERS) { count = INT_MAX; q = UMTX_SHARED_QUEUE; } else if (state & URWLOCK_WRITE_WAITERS) { count = 1; q = UMTX_EXCLUSIVE_QUEUE; } } if (count) { umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_signal_queue(&uq->uq_key, count, q); umtxq_unbusy(&uq->uq_key); umtxq_unlock(&uq->uq_key); } out: umtx_key_release(&uq->uq_key); return (error); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) static int do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t flags, count, count1; int error, rv, rv1; uq = td->td_umtxq; error = fueword32(&sem->_flags, &flags); if (error == -1) return (EFAULT); error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); if (timeout != NULL) abs_timeout_init2(&timo, timeout); again: umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); rv = casueword32(&sem->_has_waiters, 0, &count1, 1); if (rv == 0) rv1 = fueword32(&sem->_count, &count); if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || (rv == 1 && count1 == 0)) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); if (rv == 1) { rv = thread_check_susp(td, true); if (rv == 0) goto again; error = rv; goto out; } if (rv == 0) rv = rv1; error = rv == -1 ? EFAULT : 0; goto out; } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { umtxq_remove(uq); /* A relative timeout cannot be restarted. */ if (error == ERESTART && timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) error = EINTR; } umtxq_unlock(&uq->uq_key); out: umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland semaphore. */ static int do_sem_wake(struct thread *td, struct _usem *sem) { struct umtx_key key; int error, cnt; uint32_t flags; error = fueword32(&sem->_flags, &flags); if (error == -1) return (EFAULT); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); if (cnt > 0) { /* * Check if count is greater than 0, this means the memory is * still being referenced by user code, so we can safely * update _has_waiters flag. */ if (cnt == 1) { umtxq_unlock(&key); error = suword32(&sem->_has_waiters, 0); umtxq_lock(&key); if (error == -1) error = EFAULT; } umtxq_signal(&key, 1); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } #endif static int do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout) { struct abs_timeout timo; struct umtx_q *uq; uint32_t count, flags; int error, rv; uq = td->td_umtxq; flags = fuword32(&sem->_flags); if (timeout != NULL) abs_timeout_init2(&timo, timeout); again: error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key); if (error != 0) return (error); umtxq_lock(&uq->uq_key); umtxq_busy(&uq->uq_key); umtxq_insert(uq); umtxq_unlock(&uq->uq_key); rv = fueword32(&sem->_count, &count); if (rv == -1) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (EFAULT); } for (;;) { if (USEM_COUNT(count) != 0) { umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (0); } if (count == USEM_HAS_WAITERS) break; rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS); if (rv == 0) break; umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); umtxq_remove(uq); umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); if (rv == -1) return (EFAULT); rv = thread_check_susp(td, true); if (rv != 0) return (rv); goto again; } umtxq_lock(&uq->uq_key); umtxq_unbusy(&uq->uq_key); error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo); if ((uq->uq_flags & UQF_UMTXQ) == 0) error = 0; else { umtxq_remove(uq); if (timeout != NULL && (timeout->_flags & UMTX_ABSTIME) == 0) { /* A relative timeout cannot be restarted. */ if (error == ERESTART) error = EINTR; if (error == EINTR) { abs_timeout_update(&timo); timespecsub(&timo.end, &timo.cur, &timeout->_timeout); } } } umtxq_unlock(&uq->uq_key); umtx_key_release(&uq->uq_key); return (error); } /* * Signal a userland semaphore. */ static int do_sem2_wake(struct thread *td, struct _usem2 *sem) { struct umtx_key key; int error, cnt, rv; uint32_t count, flags; rv = fueword32(&sem->_flags, &flags); if (rv == -1) return (EFAULT); if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0) return (error); umtxq_lock(&key); umtxq_busy(&key); cnt = umtxq_count(&key); if (cnt > 0) { /* * If this was the last sleeping thread, clear the waiters * flag in _count. */ if (cnt == 1) { umtxq_unlock(&key); rv = fueword32(&sem->_count, &count); while (rv != -1 && count & USEM_HAS_WAITERS) { rv = casueword32(&sem->_count, count, &count, count & ~USEM_HAS_WAITERS); if (rv == 1) { rv = thread_check_susp(td, true); if (rv != 0) break; } } if (rv == -1) error = EFAULT; else if (rv > 0) { error = rv; } umtxq_lock(&key); } umtxq_signal(&key, 1); } umtxq_unbusy(&key); umtxq_unlock(&key); umtx_key_release(&key); return (error); } inline int -umtx_copyin_timeout(const void *addr, struct timespec *tsp) +umtx_copyin_timeout(const void *uaddr, struct timespec *tsp) { int error; - error = copyin(addr, tsp, sizeof(struct timespec)); + error = copyin(uaddr, tsp, sizeof(*tsp)); if (error == 0) { if (tsp->tv_sec < 0 || tsp->tv_nsec >= 1000000000 || tsp->tv_nsec < 0) error = EINVAL; } return (error); } static inline int -umtx_copyin_umtx_time(const void *addr, size_t size, struct _umtx_time *tp) +umtx_copyin_umtx_time(const void *uaddr, size_t size, struct _umtx_time *tp) { int error; - if (size <= sizeof(struct timespec)) { + if (size <= sizeof(tp->_timeout)) { tp->_clockid = CLOCK_REALTIME; tp->_flags = 0; - error = copyin(addr, &tp->_timeout, sizeof(struct timespec)); + error = copyin(uaddr, &tp->_timeout, sizeof(tp->_timeout)); } else - error = copyin(addr, tp, sizeof(struct _umtx_time)); + error = copyin(uaddr, tp, sizeof(*tp)); if (error != 0) return (error); if (tp->_timeout.tv_sec < 0 || tp->_timeout.tv_nsec >= 1000000000 || tp->_timeout.tv_nsec < 0) return (EINVAL); return (0); } static int -__umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap) +umtx_copyin_robust_lists(const void *uaddr, size_t size, + struct umtx_robust_lists_params *rb) { + if (size > sizeof(*rb)) + return (EINVAL); + return (copyin(uaddr, &rb, size)); +} + +static int +umtx_copyout_timeout(void *uaddr, size_t sz, struct timespec *tsp) +{ + + /* + * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) + * and we're only called if sz >= sizeof(timespec) as supplied in the + * copyops. + */ + KASSERT(sz >= sizeof(*tsp), + ("umtx_copyops specifies incorrect sizes")); + + return (copyout(tsp, uaddr, sizeof(*tsp))); +} + +static int +__umtx_op_unimpl(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) +{ + return (EOPNOTSUPP); } static int -__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wait(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time timeout, *tm_p; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } - return (do_wait(td, uap->obj, uap->val, tm_p, 0, 0)); + return (do_wait(td, uap->obj, uap->val, tm_p, ops->compat32, 0)); } static int -__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time timeout, *tm_p; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); } static int -__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time *tm_p, timeout; int error; if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); } static int -__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wake(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (kern_umtx_wake(td, uap->obj, uap->val, 0)); } #define BATCH_SIZE 128 static int -__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_nwake_private_native(struct thread *td, struct _umtx_op_args *uap) { char *uaddrs[BATCH_SIZE], **upp; int count, error, i, pos, tocopy; upp = (char **)uap->obj; error = 0; for (count = uap->val, pos = 0; count > 0; count -= tocopy, pos += tocopy) { tocopy = MIN(count, BATCH_SIZE); error = copyin(upp + pos, uaddrs, tocopy * sizeof(char *)); if (error != 0) break; - for (i = 0; i < tocopy; ++i) + for (i = 0; i < tocopy; ++i) { kern_umtx_wake(td, uaddrs[i], INT_MAX, 1); + } maybe_yield(); } return (error); } static int -__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_nwake_private_compat32(struct thread *td, struct _umtx_op_args *uap) { + uint32_t uaddrs[BATCH_SIZE], *upp; + int count, error, i, pos, tocopy; + upp = (uint32_t *)uap->obj; + error = 0; + for (count = uap->val, pos = 0; count > 0; count -= tocopy, + pos += tocopy) { + tocopy = MIN(count, BATCH_SIZE); + error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); + if (error != 0) + break; + for (i = 0; i < tocopy; ++i) { + kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], + INT_MAX, 1); + } + maybe_yield(); + } + return (error); +} + +static int +__umtx_op_nwake_private(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) +{ + + if (ops->compat32) + return (__umtx_op_nwake_private_compat32(td, uap)); + return (__umtx_op_nwake_private_native(td, uap)); +} + +static int +__umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) +{ + return (kern_umtx_wake(td, uap->obj, uap->val, 1)); } static int -__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, 0)); } static int -__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY)); } static int -__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); } static int -__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_wake_umutex(td, uap->obj)); } static int -__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_unlock_umutex(td, uap->obj, false)); } static int -__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1)); } static int -__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct timespec *ts, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) ts = NULL; else { - error = umtx_copyin_timeout(uap->uaddr2, &timeout); + error = ops->copyin_timeout(uap->uaddr2, &timeout); if (error != 0) return (error); ts = &timeout; } return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); } static int -__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_cv_signal(td, uap->obj)); } static int -__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_cv_broadcast(td, uap->obj)); } static int -__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_rdlock(td, uap->obj, uap->val, 0); } else { - error = umtx_copyin_umtx_time(uap->uaddr2, + error = ops->copyin_umtx_time(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); } return (error); } static int -__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { error = do_rw_wrlock(td, uap->obj, 0); } else { - error = umtx_copyin_umtx_time(uap->uaddr2, + error = ops->copyin_umtx_time(uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); error = do_rw_wrlock(td, uap->obj, &timeout); } return (error); } static int -__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_rw_unlock(td, uap->obj)); } #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) static int -__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time *tm_p, timeout; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) tm_p = NULL; else { - error = umtx_copyin_umtx_time( + error = ops->copyin_umtx_time( uap->uaddr2, (size_t)uap->uaddr1, &timeout); if (error != 0) return (error); tm_p = &timeout; } return (do_sem_wait(td, uap->obj, tm_p)); } static int -__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_sem_wake(td, uap->obj)); } #endif static int -__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_wake2_umutex(td, uap->obj, uap->val)); } static int -__umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { struct _umtx_time *tm_p, timeout; size_t uasize; int error; /* Allow a null timespec (wait forever). */ if (uap->uaddr2 == NULL) { uasize = 0; tm_p = NULL; } else { uasize = (size_t)uap->uaddr1; - error = umtx_copyin_umtx_time(uap->uaddr2, uasize, &timeout); + error = ops->copyin_umtx_time(uap->uaddr2, uasize, &timeout); if (error != 0) return (error); tm_p = &timeout; } error = do_sem2_wait(td, uap->obj, tm_p); if (error == EINTR && uap->uaddr2 != NULL && (timeout._flags & UMTX_ABSTIME) == 0 && - uasize >= sizeof(struct _umtx_time) + sizeof(struct timespec)) { - error = copyout(&timeout._timeout, - (struct _umtx_time *)uap->uaddr2 + 1, - sizeof(struct timespec)); + uasize >= ops->umtx_time_sz + ops->timespec_sz) { + error = ops->copyout_timeout( + (void *)((uintptr_t)uap->uaddr2 + ops->umtx_time_sz), + uasize - ops->umtx_time_sz, &timeout._timeout); if (error == 0) { error = EINTR; } } return (error); } static int -__umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (do_sem2_wake(td, uap->obj)); } #define USHM_OBJ_UMTX(o) \ ((struct umtx_shm_obj_list *)(&(o)->umtx_data)) #define USHMF_REG_LINKED 0x0001 #define USHMF_OBJ_LINKED 0x0002 struct umtx_shm_reg { TAILQ_ENTRY(umtx_shm_reg) ushm_reg_link; LIST_ENTRY(umtx_shm_reg) ushm_obj_link; struct umtx_key ushm_key; struct ucred *ushm_cred; struct shmfd *ushm_obj; u_int ushm_refcnt; u_int ushm_flags; }; LIST_HEAD(umtx_shm_obj_list, umtx_shm_reg); TAILQ_HEAD(umtx_shm_reg_head, umtx_shm_reg); static uma_zone_t umtx_shm_reg_zone; static struct umtx_shm_reg_head umtx_shm_registry[UMTX_CHAINS]; static struct mtx umtx_shm_lock; static struct umtx_shm_reg_head umtx_shm_reg_delfree = TAILQ_HEAD_INITIALIZER(umtx_shm_reg_delfree); static void umtx_shm_free_reg(struct umtx_shm_reg *reg); static void umtx_shm_reg_delfree_tq(void *context __unused, int pending __unused) { struct umtx_shm_reg_head d; struct umtx_shm_reg *reg, *reg1; TAILQ_INIT(&d); mtx_lock(&umtx_shm_lock); TAILQ_CONCAT(&d, &umtx_shm_reg_delfree, ushm_reg_link); mtx_unlock(&umtx_shm_lock); TAILQ_FOREACH_SAFE(reg, &d, ushm_reg_link, reg1) { TAILQ_REMOVE(&d, reg, ushm_reg_link); umtx_shm_free_reg(reg); } } static struct task umtx_shm_reg_delfree_task = TASK_INITIALIZER(0, umtx_shm_reg_delfree_tq, NULL); static struct umtx_shm_reg * umtx_shm_find_reg_locked(const struct umtx_key *key) { struct umtx_shm_reg *reg; struct umtx_shm_reg_head *reg_head; KASSERT(key->shared, ("umtx_p_find_rg: private key")); mtx_assert(&umtx_shm_lock, MA_OWNED); reg_head = &umtx_shm_registry[key->hash]; TAILQ_FOREACH(reg, reg_head, ushm_reg_link) { KASSERT(reg->ushm_key.shared, ("non-shared key on reg %p %d", reg, reg->ushm_key.shared)); if (reg->ushm_key.info.shared.object == key->info.shared.object && reg->ushm_key.info.shared.offset == key->info.shared.offset) { KASSERT(reg->ushm_key.type == TYPE_SHM, ("TYPE_USHM")); KASSERT(reg->ushm_refcnt > 0, ("reg %p refcnt 0 onlist", reg)); KASSERT((reg->ushm_flags & USHMF_REG_LINKED) != 0, ("reg %p not linked", reg)); reg->ushm_refcnt++; return (reg); } } return (NULL); } static struct umtx_shm_reg * umtx_shm_find_reg(const struct umtx_key *key) { struct umtx_shm_reg *reg; mtx_lock(&umtx_shm_lock); reg = umtx_shm_find_reg_locked(key); mtx_unlock(&umtx_shm_lock); return (reg); } static void umtx_shm_free_reg(struct umtx_shm_reg *reg) { chgumtxcnt(reg->ushm_cred->cr_ruidinfo, -1, 0); crfree(reg->ushm_cred); shm_drop(reg->ushm_obj); uma_zfree(umtx_shm_reg_zone, reg); } static bool umtx_shm_unref_reg_locked(struct umtx_shm_reg *reg, bool force) { bool res; mtx_assert(&umtx_shm_lock, MA_OWNED); KASSERT(reg->ushm_refcnt > 0, ("ushm_reg %p refcnt 0", reg)); reg->ushm_refcnt--; res = reg->ushm_refcnt == 0; if (res || force) { if ((reg->ushm_flags & USHMF_REG_LINKED) != 0) { TAILQ_REMOVE(&umtx_shm_registry[reg->ushm_key.hash], reg, ushm_reg_link); reg->ushm_flags &= ~USHMF_REG_LINKED; } if ((reg->ushm_flags & USHMF_OBJ_LINKED) != 0) { LIST_REMOVE(reg, ushm_obj_link); reg->ushm_flags &= ~USHMF_OBJ_LINKED; } } return (res); } static void umtx_shm_unref_reg(struct umtx_shm_reg *reg, bool force) { vm_object_t object; bool dofree; if (force) { object = reg->ushm_obj->shm_object; VM_OBJECT_WLOCK(object); object->flags |= OBJ_UMTXDEAD; VM_OBJECT_WUNLOCK(object); } mtx_lock(&umtx_shm_lock); dofree = umtx_shm_unref_reg_locked(reg, force); mtx_unlock(&umtx_shm_lock); if (dofree) umtx_shm_free_reg(reg); } void umtx_shm_object_init(vm_object_t object) { LIST_INIT(USHM_OBJ_UMTX(object)); } void umtx_shm_object_terminated(vm_object_t object) { struct umtx_shm_reg *reg, *reg1; bool dofree; if (LIST_EMPTY(USHM_OBJ_UMTX(object))) return; dofree = false; mtx_lock(&umtx_shm_lock); LIST_FOREACH_SAFE(reg, USHM_OBJ_UMTX(object), ushm_obj_link, reg1) { if (umtx_shm_unref_reg_locked(reg, true)) { TAILQ_INSERT_TAIL(&umtx_shm_reg_delfree, reg, ushm_reg_link); dofree = true; } } mtx_unlock(&umtx_shm_lock); if (dofree) taskqueue_enqueue(taskqueue_thread, &umtx_shm_reg_delfree_task); } static int umtx_shm_create_reg(struct thread *td, const struct umtx_key *key, struct umtx_shm_reg **res) { struct umtx_shm_reg *reg, *reg1; struct ucred *cred; int error; reg = umtx_shm_find_reg(key); if (reg != NULL) { *res = reg; return (0); } cred = td->td_ucred; if (!chgumtxcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_UMTXP))) return (ENOMEM); reg = uma_zalloc(umtx_shm_reg_zone, M_WAITOK | M_ZERO); reg->ushm_refcnt = 1; bcopy(key, ®->ushm_key, sizeof(*key)); reg->ushm_obj = shm_alloc(td->td_ucred, O_RDWR, false); reg->ushm_cred = crhold(cred); error = shm_dotruncate(reg->ushm_obj, PAGE_SIZE); if (error != 0) { umtx_shm_free_reg(reg); return (error); } mtx_lock(&umtx_shm_lock); reg1 = umtx_shm_find_reg_locked(key); if (reg1 != NULL) { mtx_unlock(&umtx_shm_lock); umtx_shm_free_reg(reg); *res = reg1; return (0); } reg->ushm_refcnt++; TAILQ_INSERT_TAIL(&umtx_shm_registry[key->hash], reg, ushm_reg_link); LIST_INSERT_HEAD(USHM_OBJ_UMTX(key->info.shared.object), reg, ushm_obj_link); reg->ushm_flags = USHMF_REG_LINKED | USHMF_OBJ_LINKED; mtx_unlock(&umtx_shm_lock); *res = reg; return (0); } static int umtx_shm_alive(struct thread *td, void *addr) { vm_map_t map; vm_map_entry_t entry; vm_object_t object; vm_pindex_t pindex; vm_prot_t prot; int res, ret; boolean_t wired; map = &td->td_proc->p_vmspace->vm_map; res = vm_map_lookup(&map, (uintptr_t)addr, VM_PROT_READ, &entry, &object, &pindex, &prot, &wired); if (res != KERN_SUCCESS) return (EFAULT); if (object == NULL) ret = EINVAL; else ret = (object->flags & OBJ_UMTXDEAD) != 0 ? ENOTTY : 0; vm_map_lookup_done(map, entry); return (ret); } static void umtx_shm_init(void) { int i; umtx_shm_reg_zone = uma_zcreate("umtx_shm", sizeof(struct umtx_shm_reg), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mtx_init(&umtx_shm_lock, "umtxshm", NULL, MTX_DEF); for (i = 0; i < nitems(umtx_shm_registry); i++) TAILQ_INIT(&umtx_shm_registry[i]); } static int umtx_shm(struct thread *td, void *addr, u_int flags) { struct umtx_key key; struct umtx_shm_reg *reg; struct file *fp; int error, fd; if (__bitcount(flags & (UMTX_SHM_CREAT | UMTX_SHM_LOOKUP | UMTX_SHM_DESTROY| UMTX_SHM_ALIVE)) != 1) return (EINVAL); if ((flags & UMTX_SHM_ALIVE) != 0) return (umtx_shm_alive(td, addr)); error = umtx_key_get(addr, TYPE_SHM, PROCESS_SHARE, &key); if (error != 0) return (error); KASSERT(key.shared == 1, ("non-shared key")); if ((flags & UMTX_SHM_CREAT) != 0) { error = umtx_shm_create_reg(td, &key, ®); } else { reg = umtx_shm_find_reg(&key); if (reg == NULL) error = ESRCH; } umtx_key_release(&key); if (error != 0) return (error); KASSERT(reg != NULL, ("no reg")); if ((flags & UMTX_SHM_DESTROY) != 0) { umtx_shm_unref_reg(reg, true); } else { #if 0 #ifdef MAC error = mac_posixshm_check_open(td->td_ucred, reg->ushm_obj, FFLAGS(O_RDWR)); if (error == 0) #endif error = shm_access(reg->ushm_obj, td->td_ucred, FFLAGS(O_RDWR)); if (error == 0) #endif error = falloc_caps(td, &fp, &fd, O_CLOEXEC, NULL); if (error == 0) { shm_hold(reg->ushm_obj); finit(fp, FFLAGS(O_RDWR), DTYPE_SHM, reg->ushm_obj, &shm_ops); td->td_retval[0] = fd; fdrop(fp, td); } } umtx_shm_unref_reg(reg, false); return (error); } static int -__umtx_op_shm(struct thread *td, struct _umtx_op_args *uap) +__umtx_op_shm(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops __unused) { return (umtx_shm(td, uap->uaddr1, uap->val)); } static int -umtx_robust_lists(struct thread *td, struct umtx_robust_lists_params *rbp) +__umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *ops) { - - td->td_rb_list = rbp->robust_list_offset; - td->td_rbp_list = rbp->robust_priv_list_offset; - td->td_rb_inact = rbp->robust_inact_offset; - return (0); -} - -static int -__umtx_op_robust_lists(struct thread *td, struct _umtx_op_args *uap) -{ struct umtx_robust_lists_params rb; int error; - if (uap->val > sizeof(rb)) - return (EINVAL); bzero(&rb, sizeof(rb)); - error = copyin(uap->uaddr1, &rb, uap->val); + error = ops->copyin_robust_lists(uap->uaddr1, uap->val, &rb); if (error != 0) return (error); - return (umtx_robust_lists(td, &rb)); -} -typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap); + if (ops->compat32) + td->td_pflags2 |= TDP2_COMPAT32RB; + else if ((td->td_pflags2 & TDP2_COMPAT32RB) != 0) + return (EINVAL); -static const _umtx_op_func op_table[] = { - [UMTX_OP_RESERVED0] = __umtx_op_unimpl, - [UMTX_OP_RESERVED1] = __umtx_op_unimpl, - [UMTX_OP_WAIT] = __umtx_op_wait, - [UMTX_OP_WAKE] = __umtx_op_wake, - [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, - [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, - [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, - [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, - [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, - [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, - [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, - [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, - [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, - [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, - [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, - [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, - [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, - [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, - [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, -#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) - [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, - [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, -#else - [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, - [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, -#endif - [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, - [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, - [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, - [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, - [UMTX_OP_SHM] = __umtx_op_shm, - [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, -}; - -int -sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) -{ - - if ((unsigned)uap->op < nitems(op_table)) - return (*op_table[uap->op])(td, uap); - return (EINVAL); + td->td_rb_list = rb.robust_list_offset; + td->td_rbp_list = rb.robust_priv_list_offset; + td->td_rb_inact = rb.robust_inact_offset; + return (0); } #ifdef COMPAT_FREEBSD32 - -struct umtx_time32 { - struct timespec32 timeout; - uint32_t flags; - uint32_t clockid; -}; - static inline int -umtx_copyin_timeout32(void *addr, struct timespec *tsp) +umtx_copyin_timeout32(const void *uaddr, struct timespec *tsp) { struct timespec32 ts32; int error; - error = copyin(addr, &ts32, sizeof(struct timespec32)); + error = copyin(uaddr, &ts32, sizeof(ts32)); if (error == 0) { if (ts32.tv_sec < 0 || ts32.tv_nsec >= 1000000000 || ts32.tv_nsec < 0) error = EINVAL; else { - tsp->tv_sec = ts32.tv_sec; - tsp->tv_nsec = ts32.tv_nsec; + CP(ts32, *tsp, tv_sec); + CP(ts32, *tsp, tv_nsec); } } return (error); } static inline int -umtx_copyin_umtx_time32(const void *addr, size_t size, struct _umtx_time *tp) +umtx_copyin_umtx_time32(const void *uaddr, size_t size, struct _umtx_time *tp) { struct umtx_time32 t32; int error; - t32.clockid = CLOCK_REALTIME; - t32.flags = 0; - if (size <= sizeof(struct timespec32)) - error = copyin(addr, &t32.timeout, sizeof(struct timespec32)); + t32._clockid = CLOCK_REALTIME; + t32._flags = 0; + if (size <= sizeof(t32._timeout)) + error = copyin(uaddr, &t32._timeout, sizeof(t32._timeout)); else - error = copyin(addr, &t32, sizeof(struct umtx_time32)); + error = copyin(uaddr, &t32, sizeof(t32)); if (error != 0) return (error); - if (t32.timeout.tv_sec < 0 || - t32.timeout.tv_nsec >= 1000000000 || t32.timeout.tv_nsec < 0) + if (t32._timeout.tv_sec < 0 || + t32._timeout.tv_nsec >= 1000000000 || t32._timeout.tv_nsec < 0) return (EINVAL); - tp->_timeout.tv_sec = t32.timeout.tv_sec; - tp->_timeout.tv_nsec = t32.timeout.tv_nsec; - tp->_flags = t32.flags; - tp->_clockid = t32.clockid; + TS_CP(t32, *tp, _timeout); + CP(t32, *tp, _flags); + CP(t32, *tp, _clockid); return (0); } static int -__umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap) +umtx_copyin_robust_lists32(const void *uaddr, size_t size, + struct umtx_robust_lists_params *rbp) { - struct _umtx_time *tm_p, timeout; + struct umtx_robust_lists_params_compat32 rb32; int error; - if (uap->uaddr2 == NULL) - tm_p = NULL; - else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - return (do_wait(td, uap->obj, uap->val, tm_p, 1, 0)); + if (size > sizeof(rb32)) + return (EINVAL); + bzero(&rb32, sizeof(rb32)); + error = copyin(uaddr, &rb32, size); + if (error != 0) + return (error); + CP(rb32, *rbp, robust_list_offset); + CP(rb32, *rbp, robust_priv_list_offset); + CP(rb32, *rbp, robust_inact_offset); + return (0); } static int -__umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) +umtx_copyout_timeout32(void *uaddr, size_t sz, struct timespec *tsp) { - struct _umtx_time *tm_p, timeout; - int error; + struct timespec32 remain32 = { + .tv_sec = tsp->tv_sec, + .tv_nsec = tsp->tv_nsec, + }; - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) - tm_p = NULL; - else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - return (do_lock_umutex(td, uap->obj, tm_p, 0)); -} + /* + * Should be guaranteed by the caller, sz == uaddr1 - sizeof(_umtx_time) + * and we're only called if sz >= sizeof(timespec) as supplied in the + * copyops. + */ + KASSERT(sz >= sizeof(remain32), + ("umtx_copyops specifies incorrect sizes")); -static int -__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time *tm_p, timeout; - int error; - - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) - tm_p = NULL; - else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - return (do_lock_umutex(td, uap->obj, tm_p, _UMUTEX_WAIT)); + return (copyout(&remain32, uaddr, sizeof(remain32))); } +#endif /* COMPAT_FREEBSD32 */ -static int -__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct timespec *ts, timeout; - int error; +typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap, + const struct umtx_copyops *umtx_ops); - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) - ts = NULL; - else { - error = umtx_copyin_timeout32(uap->uaddr2, &timeout); - if (error != 0) - return (error); - ts = &timeout; - } - return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val)); -} - -static int -__umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time timeout; - int error; - - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) { - error = do_rw_rdlock(td, uap->obj, uap->val, 0); - } else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - error = do_rw_rdlock(td, uap->obj, uap->val, &timeout); - } - return (error); -} - -static int -__umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time timeout; - int error; - - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) { - error = do_rw_wrlock(td, uap->obj, 0); - } else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - error = do_rw_wrlock(td, uap->obj, &timeout); - } - return (error); -} - -static int -__umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time *tm_p, timeout; - int error; - - if (uap->uaddr2 == NULL) - tm_p = NULL; - else { - error = umtx_copyin_umtx_time32( - uap->uaddr2, (size_t)uap->uaddr1,&timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - return (do_wait(td, uap->obj, uap->val, tm_p, 1, 1)); -} - -#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) -static int -__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time *tm_p, timeout; - int error; - - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) - tm_p = NULL; - else { - error = umtx_copyin_umtx_time32(uap->uaddr2, - (size_t)uap->uaddr1, &timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - return (do_sem_wait(td, uap->obj, tm_p)); -} -#endif - -static int -__umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct _umtx_time *tm_p, timeout; - size_t uasize; - int error; - - /* Allow a null timespec (wait forever). */ - if (uap->uaddr2 == NULL) { - uasize = 0; - tm_p = NULL; - } else { - uasize = (size_t)uap->uaddr1; - error = umtx_copyin_umtx_time32(uap->uaddr2, uasize, &timeout); - if (error != 0) - return (error); - tm_p = &timeout; - } - error = do_sem2_wait(td, uap->obj, tm_p); - if (error == EINTR && uap->uaddr2 != NULL && - (timeout._flags & UMTX_ABSTIME) == 0 && - uasize >= sizeof(struct umtx_time32) + sizeof(struct timespec32)) { - struct timespec32 remain32 = { - .tv_sec = timeout._timeout.tv_sec, - .tv_nsec = timeout._timeout.tv_nsec - }; - error = copyout(&remain32, - (struct umtx_time32 *)uap->uaddr2 + 1, - sizeof(struct timespec32)); - if (error == 0) { - error = EINTR; - } - } - - return (error); -} - -static int -__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap) -{ - uint32_t uaddrs[BATCH_SIZE], *upp; - int count, error, i, pos, tocopy; - - upp = (uint32_t *)uap->obj; - error = 0; - for (count = uap->val, pos = 0; count > 0; count -= tocopy, - pos += tocopy) { - tocopy = MIN(count, BATCH_SIZE); - error = copyin(upp + pos, uaddrs, tocopy * sizeof(uint32_t)); - if (error != 0) - break; - for (i = 0; i < tocopy; ++i) - kern_umtx_wake(td, (void *)(uintptr_t)uaddrs[i], - INT_MAX, 1); - maybe_yield(); - } - return (error); -} - -struct umtx_robust_lists_params_compat32 { - uint32_t robust_list_offset; - uint32_t robust_priv_list_offset; - uint32_t robust_inact_offset; -}; - -static int -__umtx_op_robust_lists_compat32(struct thread *td, struct _umtx_op_args *uap) -{ - struct umtx_robust_lists_params rb; - struct umtx_robust_lists_params_compat32 rb32; - int error; - - if (uap->val > sizeof(rb32)) - return (EINVAL); - bzero(&rb, sizeof(rb)); - bzero(&rb32, sizeof(rb32)); - error = copyin(uap->uaddr1, &rb32, uap->val); - if (error != 0) - return (error); - rb.robust_list_offset = rb32.robust_list_offset; - rb.robust_priv_list_offset = rb32.robust_priv_list_offset; - rb.robust_inact_offset = rb32.robust_inact_offset; - return (umtx_robust_lists(td, &rb)); -} - -static const _umtx_op_func op_table_compat32[] = { +static const _umtx_op_func op_table[] = { [UMTX_OP_RESERVED0] = __umtx_op_unimpl, [UMTX_OP_RESERVED1] = __umtx_op_unimpl, - [UMTX_OP_WAIT] = __umtx_op_wait_compat32, + [UMTX_OP_WAIT] = __umtx_op_wait, [UMTX_OP_WAKE] = __umtx_op_wake, [UMTX_OP_MUTEX_TRYLOCK] = __umtx_op_trylock_umutex, - [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex_compat32, + [UMTX_OP_MUTEX_LOCK] = __umtx_op_lock_umutex, [UMTX_OP_MUTEX_UNLOCK] = __umtx_op_unlock_umutex, [UMTX_OP_SET_CEILING] = __umtx_op_set_ceiling, - [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait_compat32, + [UMTX_OP_CV_WAIT] = __umtx_op_cv_wait, [UMTX_OP_CV_SIGNAL] = __umtx_op_cv_signal, [UMTX_OP_CV_BROADCAST] = __umtx_op_cv_broadcast, - [UMTX_OP_WAIT_UINT] = __umtx_op_wait_compat32, - [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock_compat32, - [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock_compat32, + [UMTX_OP_WAIT_UINT] = __umtx_op_wait_uint, + [UMTX_OP_RW_RDLOCK] = __umtx_op_rw_rdlock, + [UMTX_OP_RW_WRLOCK] = __umtx_op_rw_wrlock, [UMTX_OP_RW_UNLOCK] = __umtx_op_rw_unlock, - [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private_compat32, + [UMTX_OP_WAIT_UINT_PRIVATE] = __umtx_op_wait_uint_private, [UMTX_OP_WAKE_PRIVATE] = __umtx_op_wake_private, - [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex_compat32, + [UMTX_OP_MUTEX_WAIT] = __umtx_op_wait_umutex, [UMTX_OP_MUTEX_WAKE] = __umtx_op_wake_umutex, #if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10) - [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait_compat32, + [UMTX_OP_SEM_WAIT] = __umtx_op_sem_wait, [UMTX_OP_SEM_WAKE] = __umtx_op_sem_wake, #else [UMTX_OP_SEM_WAIT] = __umtx_op_unimpl, [UMTX_OP_SEM_WAKE] = __umtx_op_unimpl, #endif - [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private32, + [UMTX_OP_NWAKE_PRIVATE] = __umtx_op_nwake_private, [UMTX_OP_MUTEX_WAKE2] = __umtx_op_wake2_umutex, - [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait_compat32, + [UMTX_OP_SEM2_WAIT] = __umtx_op_sem2_wait, [UMTX_OP_SEM2_WAKE] = __umtx_op_sem2_wake, [UMTX_OP_SHM] = __umtx_op_shm, - [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists_compat32, + [UMTX_OP_ROBUST_LISTS] = __umtx_op_robust_lists, }; +static const struct umtx_copyops umtx_native_ops = { + .copyin_timeout = umtx_copyin_timeout, + .copyin_umtx_time = umtx_copyin_umtx_time, + .copyin_robust_lists = umtx_copyin_robust_lists, + .copyout_timeout = umtx_copyout_timeout, + .timespec_sz = sizeof(struct timespec), + .umtx_time_sz = sizeof(struct _umtx_time), +}; + +#ifdef COMPAT_FREEBSD32 +const struct umtx_copyops umtx_native_ops32 = { + .copyin_timeout = umtx_copyin_timeout32, + .copyin_umtx_time = umtx_copyin_umtx_time32, + .copyin_robust_lists = umtx_copyin_robust_lists32, + .copyout_timeout = umtx_copyout_timeout32, + .timespec_sz = sizeof(struct timespec32), + .umtx_time_sz = sizeof(struct umtx_time32), + .compat32 = true, +}; +#endif + int -freebsd32__umtx_op(struct thread *td, struct freebsd32__umtx_op_args *uap) +kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, + void *uaddr1, void *uaddr2, const struct umtx_copyops *ops) { + struct _umtx_op_args uap = { + .obj = obj, + .op = op, + .val = val, + .uaddr1 = uaddr1, + .uaddr2 = uaddr2 + }; - if ((unsigned)uap->op < nitems(op_table_compat32)) { - return (*op_table_compat32[uap->op])(td, - (struct _umtx_op_args *)uap); - } - return (EINVAL); + if ((uap.op >= nitems(op_table))) + return (EINVAL); + return ((*op_table[uap.op])(td, &uap, ops)); } -#endif +int +sys__umtx_op(struct thread *td, struct _umtx_op_args *uap) +{ + + return (kern__umtx_op(td, uap->obj, uap->op, uap->val, uap->uaddr1, + uap->uaddr2, &umtx_native_ops)); +} + void umtx_thread_init(struct thread *td) { td->td_umtxq = umtxq_alloc(); td->td_umtxq->uq_thread = td; } void umtx_thread_fini(struct thread *td) { umtxq_free(td->td_umtxq); } /* * It will be called when new thread is created, e.g fork(). */ void umtx_thread_alloc(struct thread *td) { struct umtx_q *uq; uq = td->td_umtxq; uq->uq_inherited_pri = PRI_MAX; KASSERT(uq->uq_flags == 0, ("uq_flags != 0")); KASSERT(uq->uq_thread == td, ("uq_thread != td")); KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL")); KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty")); } /* * exec() hook. * * Clear robust lists for all process' threads, not delaying the * cleanup to thread_exit hook, since the relevant address space is * destroyed right now. */ static void umtx_exec_hook(void *arg __unused, struct proc *p, struct image_params *imgp __unused) { struct thread *td; KASSERT(p == curproc, ("need curproc")); KASSERT((p->p_flag & P_HADTHREADS) == 0 || (p->p_flag & P_STOPPED_SINGLE) != 0, ("curproc must be single-threaded")); /* * There is no need to lock the list as only this thread can be * running. */ FOREACH_THREAD_IN_PROC(p, td) { KASSERT(td == curthread || ((td->td_flags & TDF_BOUNDARY) != 0 && TD_IS_SUSPENDED(td)), ("running thread %p %p", p, td)); umtx_thread_cleanup(td); td->td_rb_list = td->td_rbp_list = td->td_rb_inact = 0; } } /* * thread_exit() hook. */ void umtx_thread_exit(struct thread *td) { umtx_thread_cleanup(td); } static int -umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res) +umtx_read_uptr(struct thread *td, uintptr_t ptr, uintptr_t *res, bool compat32) { u_long res1; #ifdef COMPAT_FREEBSD32 uint32_t res32; #endif int error; #ifdef COMPAT_FREEBSD32 - if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + if (compat32) { error = fueword32((void *)ptr, &res32); if (error == 0) res1 = res32; } else #endif { error = fueword((void *)ptr, &res1); } if (error == 0) *res = res1; else error = EFAULT; return (error); } static void -umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list) +umtx_read_rb_list(struct thread *td, struct umutex *m, uintptr_t *rb_list, + bool compat32) { #ifdef COMPAT_FREEBSD32 struct umutex32 m32; - if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { + if (compat32) { memcpy(&m32, m, sizeof(m32)); *rb_list = m32.m_rb_lnk; } else #endif *rb_list = m->m_rb_lnk; } static int -umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact) +umtx_handle_rb(struct thread *td, uintptr_t rbp, uintptr_t *rb_list, bool inact, + bool compat32) { struct umutex m; int error; KASSERT(td->td_proc == curproc, ("need current vmspace")); error = copyin((void *)rbp, &m, sizeof(m)); if (error != 0) return (error); if (rb_list != NULL) - umtx_read_rb_list(td, &m, rb_list); + umtx_read_rb_list(td, &m, rb_list, compat32); if ((m.m_flags & UMUTEX_ROBUST) == 0) return (EINVAL); if ((m.m_owner & ~UMUTEX_CONTESTED) != td->td_tid) /* inact is cleared after unlock, allow the inconsistency */ return (inact ? 0 : EINVAL); return (do_unlock_umutex(td, (struct umutex *)rbp, true)); } static void umtx_cleanup_rb_list(struct thread *td, uintptr_t rb_list, uintptr_t *rb_inact, - const char *name) + const char *name, bool compat32) { int error, i; uintptr_t rbp; bool inact; if (rb_list == 0) return; - error = umtx_read_uptr(td, rb_list, &rbp); + error = umtx_read_uptr(td, rb_list, &rbp, compat32); for (i = 0; error == 0 && rbp != 0 && i < umtx_max_rb; i++) { if (rbp == *rb_inact) { inact = true; *rb_inact = 0; } else inact = false; - error = umtx_handle_rb(td, rbp, &rbp, inact); + error = umtx_handle_rb(td, rbp, &rbp, inact, compat32); } if (i == umtx_max_rb && umtx_verbose_rb) { uprintf("comm %s pid %d: reached umtx %smax rb %d\n", td->td_proc->p_comm, td->td_proc->p_pid, name, umtx_max_rb); } if (error != 0 && umtx_verbose_rb) { uprintf("comm %s pid %d: handling %srb error %d\n", td->td_proc->p_comm, td->td_proc->p_pid, name, error); } } /* * Clean up umtx data. */ static void umtx_thread_cleanup(struct thread *td) { struct umtx_q *uq; struct umtx_pi *pi; uintptr_t rb_inact; + bool compat32; /* * Disown pi mutexes. */ uq = td->td_umtxq; if (uq != NULL) { if (uq->uq_inherited_pri != PRI_MAX || !TAILQ_EMPTY(&uq->uq_pi_contested)) { mtx_lock(&umtx_lock); uq->uq_inherited_pri = PRI_MAX; while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) { pi->pi_owner = NULL; TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); } mtx_unlock(&umtx_lock); } sched_lend_user_prio_cond(td, PRI_MAX); } + compat32 = (td->td_pflags2 & TDP2_COMPAT32RB) != 0; + td->td_pflags2 &= ~TDP2_COMPAT32RB; + if (td->td_rb_inact == 0 && td->td_rb_list == 0 && td->td_rbp_list == 0) return; /* * Handle terminated robust mutexes. Must be done after * robust pi disown, otherwise unlock could see unowned * entries. */ rb_inact = td->td_rb_inact; if (rb_inact != 0) - (void)umtx_read_uptr(td, rb_inact, &rb_inact); - umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, ""); - umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv "); + (void)umtx_read_uptr(td, rb_inact, &rb_inact, compat32); + umtx_cleanup_rb_list(td, td->td_rb_list, &rb_inact, "", compat32); + umtx_cleanup_rb_list(td, td->td_rbp_list, &rb_inact, "priv ", compat32); if (rb_inact != 0) - (void)umtx_handle_rb(td, rb_inact, NULL, true); + (void)umtx_handle_rb(td, rb_inact, NULL, true, compat32); } Index: head/sys/sys/proc.h =================================================================== --- head/sys/sys/proc.h (revision 367743) +++ head/sys/sys/proc.h (revision 367744) @@ -1,1237 +1,1238 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* For struct callout. */ #include /* For struct klist. */ #ifdef _KERNEL #include #endif #include #ifndef _KERNEL #include #endif #include #include #include #include #include #include #include /* XXX. */ #include #include #include #include #include #ifndef _KERNEL #include /* For structs itimerval, timeval. */ #else #include #include #endif #include #include #include #include #include /* Machine-dependent proc substruct. */ #ifdef _KERNEL #include #endif /* * One structure allocated per session. * * List of locks * (m) locked by s_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct session { u_int s_count; /* Ref cnt; pgrps in session - atomic. */ struct proc *s_leader; /* (m + e) Session leader. */ struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */ struct cdev_priv *s_ttydp; /* (m) Device of controlling tty. */ struct tty *s_ttyp; /* (e) Controlling tty. */ pid_t s_sid; /* (c) Session ID. */ /* (m) Setlogin() name: */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; struct mtx s_mtx; /* Mutex to protect members. */ }; /* * One structure allocated per process group. * * List of locks * (m) locked by pg_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* (e) Hash chain. */ LIST_HEAD(, proc) pg_members; /* (m + e) Pointer to pgrp members. */ struct session *pg_session; /* (c) Pointer to session. */ struct sigiolst pg_sigiolst; /* (m) List of sigio sources. */ pid_t pg_id; /* (c) Process group id. */ int pg_jobc; /* (m) Job control process count. */ struct mtx pg_mtx; /* Mutex to protect members */ }; /* * pargs, used to hold a copy of the command line, if it had a sane length. */ struct pargs { u_int ar_ref; /* Reference count. */ u_int ar_length; /* Length. */ u_char ar_args[1]; /* Arguments. */ }; /*- * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(CPU)" below, * which might be addressable only on a processor on which the process * is running. * * Below is a key of locks used to protect each member of struct proc. The * lock is indicated by a reference to a specific character in parens in the * associated comment. * * - not yet protected * a - only touched by curproc or parent during fork/wait * b - created at fork, never changes * (exception aiods switch vmspaces, but they are also * marked 'P_SYSTEM' so hopefully it will be left alone) * c - locked by proc mtx * d - locked by allproc_lock lock * e - locked by proctree_lock lock * f - session mtx * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * kx- only accessed by curthread and by debugger * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy * o - ktrace lock * q - td_contested lock * r - p_peers lock * s - see sleepq_switch(), sleeping_on_old_rtc(), and sleep(9) * t - thread lock * u - process stat lock * w - process timer lock * x - created at fork, only changes during single threading in exec * y - created at first aio, doesn't change until exit or exec at which * point we are single-threaded and only curthread changes it * z - zombie threads lock * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held * for write access. */ struct cpuset; struct filecaps; struct filemon; struct kaioinfo; struct kaudit_record; struct kcov_info; struct kdtrace_proc; struct kdtrace_thread; struct mqueue_notifier; struct p_sched; struct proc; struct procdesc; struct racct; struct sbuf; struct sleepqueue; struct socket; struct syscall_args; struct td_sched; struct thread; struct trapframe; struct turnstile; struct vm_map; struct vm_map_entry; struct epoch_tracker; /* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts * (with control info in the "previous" times), and are converted when * userland asks for rusage info. Backwards compatibility prevents putting * this directly in the user-visible rusage struct. * * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux. * Locking for td_rux: (t) for all fields. */ struct rusage_ext { uint64_t rux_runtime; /* (cu) Real time. */ uint64_t rux_uticks; /* (cu) Statclock hits in user mode. */ uint64_t rux_sticks; /* (cu) Statclock hits in sys mode. */ uint64_t rux_iticks; /* (cu) Statclock hits in intr mode. */ uint64_t rux_uu; /* (c) Previous user time in usec. */ uint64_t rux_su; /* (c) Previous sys time in usec. */ uint64_t rux_tu; /* (c) Previous total time in usec. */ }; /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. */ struct thread { struct mtx *volatile td_lock; /* replaces sched lock */ struct proc *td_proc; /* (*) Associated process. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ TAILQ_ENTRY(thread) td_runq; /* (t) Run queue. */ union { TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */ struct thread *td_zombie; /* Zombie list linkage */ }; TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */ LIST_ENTRY(thread) td_hash; /* (d) Hash chain. */ struct cpuset *td_cpuset; /* (t) CPU affinity mask. */ struct domainset_ref td_domain; /* (a) NUMA policy */ struct seltd *td_sel; /* Select queue/channel. */ struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */ struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals u_char td_lend_user_pri; /* (t) Lend user pri. */ /* Cleared during fork1() */ #define td_startzero td_flags int td_flags; /* (t) TDF_* flags. */ int td_inhibitors; /* (t) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_pflags2; /* (k) Private thread (TDP2_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ int td_sqqueue; /* (t) Sleepqueue queue blocked on. */ const void *td_wchan; /* (t) Sleep address. */ const char *td_wmesg; /* (t) Reason for sleep. */ volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */ u_char td_tsqueue; /* (t) Turnstile queue blocked on. */ short td_locks; /* (k) Debug: count of non-spin locks */ short td_rw_rlocks; /* (k) Count of rwlock read locks. */ short td_sx_slocks; /* (k) Count of sx shared locks. */ short td_lk_slocks; /* (k) Count of lockmgr shared locks. */ short td_stopsched; /* (k) Scheduler stopped. */ struct turnstile *td_blocked; /* (t) Lock thread is blocked on. */ const char *td_lockname; /* (t) Name of lock blocked on. */ LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ struct ucred *td_realucred; /* (k) Reference to credentials. */ struct ucred *td_ucred; /* (k) Used credentials, temporarily switchable. */ struct plimit *td_limit; /* (k) Resource limits. */ int td_slptick; /* (t) Time at sleep. */ int td_blktick; /* (t) Time spent blocked. */ int td_swvoltick; /* (t) Time at last SW_VOL switch. */ int td_swinvoltick; /* (t) Time at last SW_INVOL switch. */ u_int td_cow; /* (*) Number of copy-on-write faults */ struct rusage td_ru; /* (t) rusage information. */ struct rusage_ext td_rux; /* (t) Internal rusage information. */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ u_int td_pticks; /* (t) Statclock hits for profiling */ u_int td_sticks; /* (t) Statclock hits in system mode. */ u_int td_iticks; /* (t) Statclock hits in intr mode. */ u_int td_uticks; /* (t) Statclock hits in user mode. */ int td_intrval; /* (t) Return value for sleepq. */ sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ char td_name[MAXCOMLEN + 1]; /* (*) Thread name. */ struct file *td_fpop; /* (k) file referencing cdev under op */ int td_dbgflags; /* (c) Userland debugger flags */ siginfo_t td_si; /* (c) For debugger or core file */ int td_ng_outbound; /* (k) Thread entered ng from above. */ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ struct vnode *td_vp_reserved;/* (k) Prealloated vnode. */ u_int td_no_sleeping; /* (k) Sleeping disabled count. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ int td_rtcgen; /* (s) rtc_generation of abs. sleep */ int td_errno; /* (k) Error from last syscall. */ size_t td_vslock_sz; /* (k) amount of vslock-ed space */ struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */ u_int td_ucredref; /* (k) references on td_realucred */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ #define td_startcopy td_endzero sigset_t td_sigmask; /* (c) Current signal mask. */ u_char td_rqindex; /* (t) Run queue index. */ u_char td_base_pri; /* (t) Thread base kernel priority. */ u_char td_priority; /* (t) Thread active priority. */ u_char td_pri_class; /* (t) Scheduling class. */ u_char td_user_pri; /* (t) User pri from estcpu and nice. */ u_char td_base_user_pri; /* (t) Base user pri */ u_char td_pre_epoch_prio; /* (k) User pri on entry to epoch */ uintptr_t td_rb_list; /* (k) Robust list head. */ uintptr_t td_rbp_list; /* (k) Robust priv list head. */ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ struct syscall_args td_sa; /* (kx) Syscall parameters. Copied on fork for child tracing. */ void *td_sigblock_ptr; /* (k) uptr for fast sigblock. */ uint32_t td_sigblock_val; /* (k) fast sigblock value read at td_sigblock_ptr on kern entry */ #define td_endcopy td_pcb /* * Fields that must be manually set in fork1() or create_thread() * or already have been set in the allocator, constructor, etc. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ enum td_states { TDS_INACTIVE = 0x0, TDS_INHIBITED, TDS_CAN_RUN, TDS_RUNQ, TDS_RUNNING } td_state; /* (t) thread state */ union { register_t tdu_retval[2]; off_t tdu_off; } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval u_int td_cowgen; /* (k) Generation of COW pointers. */ /* LP64 hole */ struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ int td_kstack_pages; /* (a) Size of the kstack. */ volatile u_int td_critnest; /* (k*) Critical section nest level. */ struct mdthread td_md; /* (k) Any machine-dependent fields. */ struct kaudit_record *td_ar; /* (k) Active audit record, if any. */ struct lpohead td_lprof[2]; /* (a) lock profiling objects. */ struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */ struct vnet *td_vnet; /* (k) Effective vnet. */ const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */ struct trapframe *td_intr_frame;/* (k) Frame of the current irq */ struct proc *td_rfppwait_p; /* (k) The vforked child */ struct vm_page **td_ma; /* (k) uio pages held */ int td_ma_cnt; /* (k) size of *td_ma */ /* LP64 hole */ void *td_emuldata; /* Emulator state data */ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ int td_pmcpend; #ifdef EPOCH_TRACE SLIST_HEAD(, epoch_tracker) td_epochs; #endif }; struct thread0_storage { struct thread t0st_thread; uint64_t t0st_sched[10]; }; struct mtx *thread_lock_block(struct thread *); void thread_lock_block_wait(struct thread *); void thread_lock_set(struct thread *, struct mtx *); void thread_lock_unblock(struct thread *, struct mtx *); #define THREAD_LOCK_ASSERT(td, type) \ mtx_assert((td)->td_lock, (type)) #define THREAD_LOCK_BLOCKED_ASSERT(td, type) \ do { \ struct mtx *__m = (td)->td_lock; \ if (__m != &blocked_lock) \ mtx_assert(__m, (type)); \ } while (0) #ifdef INVARIANTS #define THREAD_LOCKPTR_ASSERT(td, lock) \ do { \ struct mtx *__m; \ __m = (td)->td_lock; \ KASSERT(__m == (lock), \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) \ do { \ struct mtx *__m; \ __m = (td)->td_lock; \ KASSERT(__m == (lock) || __m == &blocked_lock, \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #define TD_LOCKS_INC(td) ((td)->td_locks++) #define TD_LOCKS_DEC(td) do { \ KASSERT(SCHEDULER_STOPPED_TD(td) || (td)->td_locks > 0, \ ("thread %p owns no locks", (td))); \ (td)->td_locks--; \ } while (0) #else #define THREAD_LOCKPTR_ASSERT(td, lock) #define THREAD_LOCKPTR_BLOCKED_ASSERT(td, lock) #define TD_LOCKS_INC(td) #define TD_LOCKS_DEC(td) #endif /* * Flags kept in td_flags: * To change these you MUST have the scheduler lock. */ #define TDF_BORROWING 0x00000001 /* Thread is borrowing pri from another. */ #define TDF_INPANIC 0x00000002 /* Caused a panic, let it drive crashdump. */ #define TDF_INMEM 0x00000004 /* Thread's stack is in memory. */ #define TDF_SINTR 0x00000008 /* Sleep is interruptible. */ #define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */ #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ #define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_UNUSED80 0x00000080 /* unused. */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_UNUSED12 0x00001000 /* --available-- */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */ #define TDF_SERESTART 0x00080000 /* ERESTART on stop attempts. */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_SEINTR 0x00200000 /* EINTR on stop attempts. */ #define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */ #define TDF_UNUSED23 0x00800000 /* --available-- */ #define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */ #define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */ #define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */ #define TDF_SCHED3 0x08000000 /* Reserved for scheduler private use */ #define TDF_ALRMPEND 0x10000000 /* Pending SIGVTALRM needs to be posted. */ #define TDF_PROFPEND 0x20000000 /* Pending SIGPROF needs to be posted. */ #define TDF_MACPEND 0x40000000 /* AST-based MAC event pending. */ /* Userland debug flags */ #define TDB_SUSPEND 0x00000001 /* Thread is suspended by debugger */ #define TDB_XSIG 0x00000002 /* Thread is exchanging signal under trace */ #define TDB_USERWR 0x00000004 /* Debugger modified memory or registers */ #define TDB_SCE 0x00000008 /* Thread performs syscall enter */ #define TDB_SCX 0x00000010 /* Thread performs syscall exit */ #define TDB_EXEC 0x00000020 /* TDB_SCX from exec(2) family */ #define TDB_FORK 0x00000040 /* TDB_SCX from fork(2) that created new process */ #define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child only) */ #define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */ #define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */ #define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */ #define TDB_VFORK 0x00000800 /* vfork indicator for ptrace() */ #define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */ #define TDB_STEP 0x00002000 /* (x86) PSL_T set for PT_STEP */ /* * "Private" flags kept in td_pflags: * These are only written by curthread and thus need no locking. */ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ #define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock acquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ #define TDP_SIGFASTBLOCK 0x00000100 /* Fast sigblock active */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ #define TDP_SCHED4 0x00008000 /* Reserved for scheduler private use */ #define TDP_GEOM 0x00010000 /* Settle GEOM before finishing syscall */ #define TDP_SOFTDEP 0x00020000 /* Stuck processing softdep worklist */ #define TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */ #define TDP_WAKEUP 0x00080000 /* Don't sleep in umtx cond_wait */ #define TDP_INBDFLUSH 0x00100000 /* Already in BO_BDFLUSH, do not recurse */ #define TDP_KTHREAD 0x00200000 /* This is an official kernel thread */ #define TDP_CALLCHAIN 0x00400000 /* Capture thread's callchain */ #define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */ #define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */ #define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ #define TDP_FORKING 0x20000000 /* Thread is being created through fork() */ #define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */ #define TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */ #define TDP2_SBPAGES 0x00000001 /* Owns sbusy on some pages */ +#define TDP2_COMPAT32RB 0x00000002 /* compat32 robust lists */ /* * Reasons that the current thread can not be run yet. * More than one may apply. */ #define TDI_SUSPENDED 0x0001 /* On suspension queue. */ #define TDI_SLEEPING 0x0002 /* Actually asleep! (tricky). */ #define TDI_SWAPPED 0x0004 /* Stack not in mem. Bad juju if run. */ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) #define TD_IS_SWAPPED(td) ((td)->td_inhibitors & TDI_SWAPPED) #define TD_ON_LOCK(td) ((td)->td_inhibitors & TDI_LOCK) #define TD_AWAITING_INTR(td) ((td)->td_inhibitors & TDI_IWAIT) #define TD_IS_RUNNING(td) ((td)->td_state == TDS_RUNNING) #define TD_ON_RUNQ(td) ((td)->td_state == TDS_RUNQ) #define TD_CAN_RUN(td) ((td)->td_state == TDS_CAN_RUN) #define TD_IS_INHIBITED(td) ((td)->td_state == TDS_INHIBITED) #define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED) #define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD) #define TD_CAN_ABORT(td) (TD_ON_SLEEPQ((td)) && \ ((td)->td_flags & TDF_SINTR) != 0) #define KTDSTATE(td) \ (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") #define TD_SET_INHIB(td, inhib) do { \ (td)->td_state = TDS_INHIBITED; \ (td)->td_inhibitors |= (inhib); \ } while (0) #define TD_CLR_INHIB(td, inhib) do { \ if (((td)->td_inhibitors & (inhib)) && \ (((td)->td_inhibitors &= ~(inhib)) == 0)) \ (td)->td_state = TDS_CAN_RUN; \ } while (0) #define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) #define TD_SET_SWAPPED(td) TD_SET_INHIB((td), TDI_SWAPPED) #define TD_SET_LOCK(td) TD_SET_INHIB((td), TDI_LOCK) #define TD_SET_SUSPENDED(td) TD_SET_INHIB((td), TDI_SUSPENDED) #define TD_SET_IWAIT(td) TD_SET_INHIB((td), TDI_IWAIT) #define TD_SET_EXITING(td) TD_SET_INHIB((td), TDI_EXITING) #define TD_CLR_SLEEPING(td) TD_CLR_INHIB((td), TDI_SLEEPING) #define TD_CLR_SWAPPED(td) TD_CLR_INHIB((td), TDI_SWAPPED) #define TD_CLR_LOCK(td) TD_CLR_INHIB((td), TDI_LOCK) #define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) #define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) #define TD_SET_RUNNING(td) (td)->td_state = TDS_RUNNING #define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN #define TD_SBDRY_INTR(td) \ (((td)->td_flags & (TDF_SEINTR | TDF_SERESTART)) != 0) #define TD_SBDRY_ERRNO(td) \ (((td)->td_flags & TDF_SEINTR) != 0 ? EINTR : ERESTART) /* * Process structure. */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */ struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Resource limits. */ struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ int p_flag; /* (c) P_* flags. */ int p_flag2; /* (c) P2_* flags. */ enum p_states { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ PRS_ZOMBIE } p_state; /* (j/c) Process status. */ pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ struct proc *p_reaper; /* (e) My reaper. */ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants (if I am reaper). */ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of the same reaper. */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ struct mtx p_profmtx; /* Lock for the profiling */ struct ksiginfo *p_ksi; /* Locked by parent proc lock */ sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */ #define p_siglist p_sigqueue.sq_signals pid_t p_oppid; /* (c + e) Real parent pid. */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_vmspace struct vmspace *p_vmspace; /* (b) Address space. */ u_int p_swtick; /* (c) Tick when swapped in or out. */ u_int p_cowgen; /* (c) Generation of COW pointers. */ struct itimerval p_realtimer; /* (c) Alarm timer. */ struct rusage p_ru; /* (a) Exit information. */ struct rusage_ext p_rux; /* (cu) Internal resource usage. */ struct rusage_ext p_crux; /* (c) Internal child resource usage. */ int p_profthreads; /* (c) Num threads in addupc_task. */ volatile int p_exitthreads; /* (j) Number of threads exiting */ int p_traceflag; /* (o) Kernel trace points. */ struct vnode *p_tracevp; /* (c + o) Trace to vnode. */ struct ucred *p_tracecred; /* (o) Credentials to trace with. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ u_int p_lock; /* (c) Proclock (prevent swap) count. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ int p_sigparent; /* (c) Signal to parent on exit. */ int p_sig; /* (n) For core dump/debugger XXX. */ u_int p_ptevents; /* (c + e) ptrace() event mask. */ struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */ struct thread *p_singlethread;/* (c + j) If single threading this is it */ int p_suspcount; /* (j) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (j) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */ u_int p_treeflag; /* (e) P_TREE flags */ int p_pendingexits; /* (c) Count of pending thread exits. */ struct filemon *p_filemon; /* (c) filemon-specific data. */ int p_pdeathsig; /* (c) Signal from parent on exit. */ /* End area that is zeroed on creation. */ #define p_endzero p_magic /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_endzero u_int p_magic; /* (b) Magic number. */ int p_osrel; /* (x) osreldate for the binary (from ELF note, if any) */ uint32_t p_fctl0; /* (x) ABI feature control, ELF note */ char p_comm[MAXCOMLEN + 1]; /* (x) Process name. */ struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ struct pargs *p_args; /* (c) Process arguments. */ rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ int p_fibnum; /* in this routing domain XXX MRT */ pid_t p_reapsubtree; /* (e) Pid of the direct child of the reaper which spawned our subtree. */ uint16_t p_elf_machine; /* (x) ELF machine type */ uint64_t p_elf_flags; /* (x) ELF flags */ /* End area that is copied on creation. */ #define p_endcopy p_xexit u_int p_xexit; /* (c) Exit code. */ u_int p_xsig; /* (c) Stop/kill sig. */ struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ struct knlist *p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ struct proc *p_peers; /* (r) */ struct proc *p_leader; /* (b) */ void *p_emuldata; /* (c) Emulator state data. */ struct label *p_label; /* (*) Proc (not subject) MAC label. */ STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */ LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/ struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */ struct cv p_pwait; /* (*) wait cv for exit/exec. */ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ int p_throttled; /* (c) Flag for racct pcpu throttling */ /* * An orphan is the child that has been re-parented to the * debugger as a result of attaching to it. Need to keep * track of them for parent to be able to collect the exit * status of what used to be children. */ LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */ }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id #define NOCPU (-1) /* For when we aren't on a CPU. */ #define NOCPU_OLD (255) #define MAXCPU_OLD (254) #define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock) #define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock) #define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type)) #define PROC_STATLOCK(p) mtx_lock_spin(&(p)->p_statmtx) #define PROC_STATUNLOCK(p) mtx_unlock_spin(&(p)->p_statmtx) #define PROC_STATLOCK_ASSERT(p, type) mtx_assert(&(p)->p_statmtx, (type)) #define PROC_ITIMLOCK(p) mtx_lock_spin(&(p)->p_itimmtx) #define PROC_ITIMUNLOCK(p) mtx_unlock_spin(&(p)->p_itimmtx) #define PROC_ITIMLOCK_ASSERT(p, type) mtx_assert(&(p)->p_itimmtx, (type)) #define PROC_PROFLOCK(p) mtx_lock_spin(&(p)->p_profmtx) #define PROC_PROFUNLOCK(p) mtx_unlock_spin(&(p)->p_profmtx) #define PROC_PROFLOCK_ASSERT(p, type) mtx_assert(&(p)->p_profmtx, (type)) /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00002 /* Has a controlling terminal. */ #define P_KPROC 0x00004 /* Kernel process. */ #define P_UNUSED3 0x00008 /* --available-- */ #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_PROFIL 0x00020 /* Has started profiling. */ #define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */ #define P_HADTHREADS 0x00080 /* Has had threads (no cleanup shortcuts) */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ #define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait. */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Someone is waiting for us. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ #define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ #define P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */ #define P_PROTECTED 0x100000 /* Do not kill on memory overcommit. */ #define P_SIGEVENT 0x200000 /* Process pending signals changed. */ #define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */ #define P_HWPMC 0x800000 /* Process is using HWPMCs */ #define P_JAILED 0x1000000 /* Process is in jail. */ #define P_TOTAL_STOP 0x2000000 /* Stopped in stop_all_proc. */ #define P_INEXEC 0x4000000 /* Process is in execve(). */ #define P_STATCHILD 0x8000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory. */ #define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */ #define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */ #define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) #define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* These flags are kept in p_flag2. */ #define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */ #define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */ #define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */ #define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */ #define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */ #define P2_TRAPCAP 0x00000020 /* SIGTRAP on ENOTCAPABLE */ #define P2_ASLR_ENABLE 0x00000040 /* Force enable ASLR. */ #define P2_ASLR_DISABLE 0x00000080 /* Force disable ASLR. */ #define P2_ASLR_IGNSTART 0x00000100 /* Enable ASLR to consume sbrk area. */ #define P2_PROTMAX_ENABLE 0x00000200 /* Force enable implied PROT_MAX. */ #define P2_PROTMAX_DISABLE 0x00000400 /* Force disable implied PROT_MAX. */ #define P2_STKGAP_DISABLE 0x00000800 /* Disable stack gap for MAP_STACK */ #define P2_STKGAP_DISABLE_EXEC 0x00001000 /* Stack gap disabled after exec */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan list */ #define P_TREE_REAPER 0x00000004 /* Reaper of subtree */ #define P_TREE_GRPEXITED 0x00000008 /* exit1() done with job ctl */ /* * These were process status values (p_stat), now they are only used in * legacy conversion code. */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SWAIT 6 /* Waiting for interrupt. */ #define SLOCK 7 /* Blocked on a lock. */ #define P_MAGIC 0xbeefface #ifdef _KERNEL /* Types and flags for mi_switch(). */ #define SW_TYPE_MASK 0xff /* First 8 bits are switch type */ #define SWT_NONE 0 /* Unspecified switch. */ #define SWT_PREEMPT 1 /* Switching due to preemption. */ #define SWT_OWEPREEMPT 2 /* Switching due to owepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ #define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */ #define SWT_IWAIT 9 /* Waiting for interrupts. */ #define SWT_SUSPEND 10 /* Thread suspended. */ #define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */ #define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */ #define SWT_COUNT 13 /* Number of switch types. */ /* Flags */ #define SW_VOL 0x0100 /* Voluntary switch. */ #define SW_INVOL 0x0200 /* Involuntary switch. */ #define SW_PREEMPT 0x0400 /* The invol switch is a preemption */ /* How values for thread_single(). */ #define SINGLE_NO_EXIT 0 #define SINGLE_EXIT 1 #define SINGLE_BOUNDARY 2 #define SINGLE_ALLPROC 3 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PARGS); MALLOC_DECLARE(M_PGRP); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); #endif #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) /* * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit * in a pid_t, as it is used to represent "no process group". */ #define PID_MAX 99999 #define NO_PID 100000 #define THREAD0_TID NO_PID extern pid_t pid_max; #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) /* Lock and unlock a process. */ #define PROC_LOCK(p) mtx_lock(&(p)->p_mtx) #define PROC_TRYLOCK(p) mtx_trylock(&(p)->p_mtx) #define PROC_UNLOCK(p) mtx_unlock(&(p)->p_mtx) #define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) #define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) /* Lock and unlock a process group. */ #define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) #define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) #define PGRP_LOCKED(pg) mtx_owned(&(pg)->pg_mtx) #define PGRP_LOCK_ASSERT(pg, type) mtx_assert(&(pg)->pg_mtx, (type)) #define PGRP_LOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_LOCK(pg); \ } while (0) #define PGRP_UNLOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_UNLOCK(pg); \ } while (0) /* Lock and unlock a session. */ #define SESS_LOCK(s) mtx_lock(&(s)->s_mtx) #define SESS_UNLOCK(s) mtx_unlock(&(s)->s_mtx) #define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx) #define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type)) /* * Non-zero p_lock ensures that: * - exit1() is not performed until p_lock reaches zero; * - the process' threads stack are not swapped out if they are currently * not (P_INMEM). * * PHOLD() asserts that the process (except the current process) is * not exiting, increments p_lock and swaps threads stacks into memory, * if needed. * _PHOLD() is same as PHOLD(), it takes the process locked. * _PHOLD_LITE() also takes the process locked, but comparing with * _PHOLD(), it only guarantees that exit1() is not executed, * faultin() is not called. */ #define PHOLD(p) do { \ PROC_LOCK(p); \ _PHOLD(p); \ PROC_UNLOCK(p); \ } while (0) #define _PHOLD(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ if (((p)->p_flag & P_INMEM) == 0) \ faultin((p)); \ } while (0) #define _PHOLD_LITE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ } while (0) #define PROC_ASSERT_HELD(p) do { \ KASSERT((p)->p_lock > 0, ("process %p not held", p)); \ } while (0) #define PRELE(p) do { \ PROC_LOCK((p)); \ _PRELE((p)); \ PROC_UNLOCK((p)); \ } while (0) #define _PRELE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ PROC_ASSERT_HELD(p); \ (--(p)->p_lock); \ if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0) \ wakeup(&(p)->p_lock); \ } while (0) #define PROC_ASSERT_NOT_HELD(p) do { \ KASSERT((p)->p_lock == 0, ("process %p held", p)); \ } while (0) #define PROC_UPDATE_COW(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ (p)->p_cowgen++; \ } while (0) /* Check whether a thread is safe to be swapped out. */ #define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP) /* Control whether or not it is safe for curthread to sleep. */ #define THREAD_NO_SLEEPING() do { \ curthread->td_no_sleeping++; \ MPASS(curthread->td_no_sleeping > 0); \ } while (0) #define THREAD_SLEEPING_OK() do { \ MPASS(curthread->td_no_sleeping > 0); \ curthread->td_no_sleeping--; \ } while (0) #define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) #define PIDHASHLOCK(pid) (&pidhashtbl_lock[((pid) & pidhashlock)]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern struct sx *pidhashtbl_lock; extern u_long pidhash; extern u_long pidhashlock; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct sx allproc_lock; extern int allproc_gen; extern struct sx proctree_lock; extern struct mtx ppeers_lock; extern struct mtx procid_lock; extern struct proc proc0; /* Process slot for swapper. */ extern struct thread0_storage thread0_st; /* Primary thread in proc0. */ #define thread0 (thread0_st.t0st_thread) extern struct vmspace vmspace0; /* VM space for proc0. */ extern int hogticks; /* Limit on kernel cpu hogs. */ extern int lastpid; extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern u_long ps_arg_cache_limit; LIST_HEAD(proclist, proc); TAILQ_HEAD(procqueue, proc); TAILQ_HEAD(threadqueue, thread); extern struct proclist allproc; /* List of all processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct uma_zone *proc_zone; struct proc *pfind(pid_t); /* Find process by id. */ struct proc *pfind_any(pid_t); /* Find (zombie) process by id. */ struct proc *pfind_any_locked(pid_t pid); /* Find process by id, locked. */ struct pgrp *pgfind(pid_t); /* Find process group by id. */ void pidhash_slockall(void); /* Shared lock all pid hash lists. */ void pidhash_sunlockall(void); /* Shared unlock all pid hash lists. */ struct fork_req { int fr_flags; int fr_pages; int *fr_pidp; struct proc **fr_procp; int *fr_pd_fd; int fr_pd_flags; struct filecaps *fr_pd_fcaps; int fr_flags2; #define FR2_DROPSIG_CAUGHT 0x00001 /* Drop caught non-DFL signals */ }; /* * pget() flags. */ #define PGET_HOLD 0x00001 /* Hold the process. */ #define PGET_CANSEE 0x00002 /* Check against p_cansee(). */ #define PGET_CANDEBUG 0x00004 /* Check against p_candebug(). */ #define PGET_ISCURRENT 0x00008 /* Check that the found process is current. */ #define PGET_NOTWEXIT 0x00010 /* Check that the process is not in P_WEXIT. */ #define PGET_NOTINEXEC 0x00020 /* Check that the process is not in P_INEXEC. */ #define PGET_NOTID 0x00040 /* Do not assume tid if pid > PID_MAX. */ #define PGET_WANTREAD (PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT) int pget(pid_t pid, int flags, struct proc **pp); void ast(struct trapframe *framep); struct thread *choosethread(void); int cr_cansee(struct ucred *u1, struct ucred *u2); int cr_canseesocket(struct ucred *cred, struct socket *so); int cr_canseeothergids(struct ucred *u1, struct ucred *u2); int cr_canseeotheruids(struct ucred *u1, struct ucred *u2); int cr_canseejailproc(struct ucred *u1, struct ucred *u2); int cr_cansignal(struct ucred *cred, struct proc *proc, int signum); int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess); int enterthispgrp(struct proc *p, struct pgrp *pgrp); void faultin(struct proc *p); int fork1(struct thread *, struct fork_req *); void fork_rfppwait(struct thread *); void fork_exit(void (*)(void *, struct trapframe *), void *, struct trapframe *); void fork_return(struct thread *, struct trapframe *); int inferior(struct proc *p); void kern_proc_vmmap_resident(struct vm_map *map, struct vm_map_entry *entry, int *resident_count, bool *super); void kern_yield(int); void kick_proc0(void); void killjobc(void); int leavepgrp(struct proc *p); int maybe_preempt(struct thread *td); void maybe_yield(void); void mi_switch(int flags); int p_candebug(struct thread *td, struct proc *p); int p_cansee(struct thread *td, struct proc *p); int p_cansched(struct thread *td, struct proc *p); int p_cansignal(struct thread *td, struct proc *p, int signum); int p_canwait(struct thread *td, struct proc *p); struct pargs *pargs_alloc(int len); void pargs_drop(struct pargs *pa); void pargs_hold(struct pargs *pa); int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); int proc_iterate(int (*cb)(struct proc *, void *), void *cbarg); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); struct proc *proc_realparent(struct proc *child); void proc_reap(struct thread *td, struct proc *p, int *status, int options); void proc_reparent(struct proc *child, struct proc *newparent, bool set_oppid); void proc_add_orphan(struct proc *child, struct proc *parent); void proc_set_traced(struct proc *p, bool stop); void proc_wkilled(struct proc *p); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); void pstats_free(struct pstats *ps); void proc_clear_orphan(struct proc *p); void reaper_abandon_children(struct proc *p, bool exiting); int securelevel_ge(struct ucred *cr, int level); int securelevel_gt(struct ucred *cr, int level); void sess_hold(struct session *); void sess_release(struct session *); int setrunnable(struct thread *, int); void setsugid(struct proc *p); int should_yield(void); int sigonstack(size_t sp); void stopevent(struct proc *, u_int, u_int); struct thread *tdfind(lwpid_t, pid_t); void threadinit(void); void tidhash_add(struct thread *); void tidhash_remove(struct thread *); void cpu_idle(int); int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map); int cpu_fetch_syscall_args(struct thread *td); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *); int cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data); void cpu_set_syscall_retval(struct thread *, int); void cpu_set_upcall(struct thread *, void (*)(void *), void *, stack_t *); int cpu_set_user_tls(struct thread *, void *tls_base); void cpu_thread_alloc(struct thread *); void cpu_thread_clean(struct thread *); void cpu_thread_exit(struct thread *); void cpu_thread_free(struct thread *); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); struct thread *thread_alloc(int pages); int thread_alloc_stack(struct thread *, int pages); int thread_check_susp(struct thread *td, bool sleep); void thread_cow_get_proc(struct thread *newtd, struct proc *p); void thread_cow_get(struct thread *newtd, struct thread *td); void thread_cow_free(struct thread *td); void thread_cow_update(struct thread *td); int thread_create(struct thread *td, struct rtprio *rtp, int (*initialize_thread)(struct thread *, void *), void *thunk); void thread_exit(void) __dead2; void thread_free(struct thread *td); void thread_link(struct thread *td, struct proc *p); void thread_reap(void); int thread_single(struct proc *p, int how); void thread_single_end(struct proc *p, int how); void thread_stash(struct thread *td); void thread_stopped(struct proc *p); void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); int thread_suspend_check(int how); bool thread_suspend_check_needed(void); void thread_suspend_switch(struct thread *, struct proc *p); void thread_suspend_one(struct thread *td); void thread_unlink(struct thread *td); void thread_unsuspend(struct proc *p); void thread_wait(struct proc *p); void stop_all_proc(void); void resume_all_proc(void); static __inline int curthread_pflags_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags & flags); td->td_pflags |= flags; return (save); } static __inline void curthread_pflags_restore(int save) { curthread->td_pflags &= save; } static __inline int curthread_pflags2_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags2 & flags); td->td_pflags2 |= flags; return (save); } static __inline void curthread_pflags2_restore(int save) { curthread->td_pflags2 &= save; } static __inline __pure2 struct td_sched * td_get_sched(struct thread *td) { return ((struct td_sched *)&td[1]); } extern void (*softdep_ast_cleanup)(struct thread *); static __inline void td_softdep_cleanup(struct thread *td) { if (td->td_su != NULL && softdep_ast_cleanup != NULL) softdep_ast_cleanup(td); } #define PROC_ID_PID 0 #define PROC_ID_GROUP 1 #define PROC_ID_SESSION 2 #define PROC_ID_REAP 3 void proc_id_set(int type, pid_t id); void proc_id_set_cond(int type, pid_t id); void proc_id_clear(int type, pid_t id); EVENTHANDLER_LIST_DECLARE(process_ctor); EVENTHANDLER_LIST_DECLARE(process_dtor); EVENTHANDLER_LIST_DECLARE(process_init); EVENTHANDLER_LIST_DECLARE(process_fini); EVENTHANDLER_LIST_DECLARE(process_exit); EVENTHANDLER_LIST_DECLARE(process_fork); EVENTHANDLER_LIST_DECLARE(process_exec); EVENTHANDLER_LIST_DECLARE(thread_ctor); EVENTHANDLER_LIST_DECLARE(thread_dtor); EVENTHANDLER_LIST_DECLARE(thread_init); #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ Index: head/sys/sys/syscallsubr.h =================================================================== --- head/sys/sys/syscallsubr.h (revision 367743) +++ head/sys/sys/syscallsubr.h (revision 367744) @@ -1,346 +1,349 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Ian Dowse. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_SYSCALLSUBR_H_ #define _SYS_SYSCALLSUBR_H_ #include #include #include #include #include #include #include struct __wrusage; struct file; struct filecaps; enum idtype; struct itimerval; struct image_args; struct jail; struct kevent; struct kevent_copyops; struct kld_file_stat; struct ksiginfo; struct mbuf; struct msghdr; struct msqid_ds; struct pollfd; struct ogetdirentries_args; struct rlimit; struct rusage; struct sched_param; union semun; struct sockaddr; struct stat; struct thr_param; struct uio; +struct umtx_copyops; struct vm_map; struct vmspace; typedef int (*mmap_check_fp_fn)(struct file *, int, int, int); struct mmap_req { vm_offset_t mr_hint; vm_size_t mr_len; int mr_prot; int mr_flags; int mr_fd; off_t mr_pos; mmap_check_fp_fn mr_check_fp_fn; }; int kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, size_t buflen, size_t path_max); +int kern__umtx_op(struct thread *td, void *obj, int op, unsigned long val, + void *uaddr1, void *uaddr2, const struct umtx_copyops *ops); int kern_accept(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, struct file **fp); int kern_accept4(struct thread *td, int s, struct sockaddr **name, socklen_t *namelen, int flags, struct file **fp); int kern_accessat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta); int kern_alternate_path(struct thread *td, const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd); int kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_break(struct thread *td, uintptr_t *addr); int kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds, size_t ncmds); int kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights); int kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg); int kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, clockid_t *clk_id); int kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts); int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats); int kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec *rqtp, struct timespec *rmtp); int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats); void kern_thread_cputime(struct thread *targettd, struct timespec *ats); void kern_process_cputime(struct proc *targetp, struct timespec *ats); int kern_close_range(struct thread *td, u_int lowfd, u_int highfd); int kern_close(struct thread *td, int fd); int kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa); int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, off_t *outoffp, size_t len, unsigned int flags); int kern_cpuset_getaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp); int kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t cpusetsize, const cpuset_t *maskp); int kern_cpuset_getdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, domainset_t *maskp, int *policyp); int kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, size_t domainsetsize, const domainset_t *maskp, int policy); int kern_cpuset_getid(struct thread *td, cpulevel_t level, cpuwhich_t which, id_t id, cpusetid_t *setid); int kern_cpuset_setid(struct thread *td, cpuwhich_t which, id_t id, cpusetid_t setid); int kern_dup(struct thread *td, u_int mode, int flags, int old, int new); int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p, struct vmspace *oldvmspace); int kern_fchmodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, mode_t mode, int flag); int kern_fchownat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int uid, int gid, int flag); int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg); int kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg); int kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf); int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf); int kern_fpathconf(struct thread *td, int fd, int name, long *valuep); int kern_fstat(struct thread *td, int fd, struct stat *sbp); int kern_fstatfs(struct thread *td, int fd, struct statfs *buf); int kern_fsync(struct thread *td, int fd, bool fullsync); int kern_ftruncate(struct thread *td, int fd, off_t length); int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg); int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg); int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, off_t *basep, ssize_t *residp, enum uio_seg bufseg); int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode); int kern_getitimer(struct thread *, u_int, struct itimerval *); int kern_getppid(struct thread *); int kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getpriority(struct thread *td, int which, int who); int kern_getrusage(struct thread *td, int who, struct rusage *rup); int kern_getsid(struct thread *td, pid_t pid); int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, socklen_t *alen); int kern_getsockopt(struct thread *td, int s, int level, int name, void *optval, enum uio_seg valseg, socklen_t *valsize); int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops); int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kill(struct thread *td, pid_t pid, int signum); int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); int kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, const char *path2, enum uio_seg segflg, int follow); int kern_listen(struct thread *td, int s, int backlog); int kern_lseek(struct thread *td, int fd, off_t offset, int whence); int kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav); int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec); int kern_minherit(struct thread *td, uintptr_t addr, size_t len, int inherit); int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode); int kern_mkfifoat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode); int kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode, dev_t dev); int kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr, size_t len); int kern_mmap(struct thread *td, uintptr_t addr, size_t len, int prot, int flags, int fd, off_t pos); int kern_mmap_racct_check(struct thread *td, struct vm_map *map, vm_size_t size); int kern_mmap_maxprot(struct proc *p, int prot); int kern_mmap_req(struct thread *td, const struct mmap_req *mrp); int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); int kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags); int kern_munlock(struct thread *td, uintptr_t addr, size_t size); int kern_munmap(struct thread *td, uintptr_t addr, size_t size); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode); int kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, int name, u_long flags, long *valuep); int kern_pipe(struct thread *td, int fildes[2], int flags, struct filecaps *fcaps1, struct filecaps *fcaps2); int kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, struct timespec *tsp, sigset_t *uset); int kern_posix_error(struct thread *td, int error); int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice); int kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len); int kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com, void *data); int kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset); int kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits); int kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data); int kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, off_t offset); int kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset); int kern_readlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count); int kern_readv(struct thread *td, int fd, struct uio *auio); int kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, struct mbuf **controlp); int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg); int kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag); int kern_sched_getparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_getscheduler(struct thread *td, struct thread *targettd, int *policy); int kern_sched_setparam(struct thread *td, struct thread *targettd, struct sched_param *param); int kern_sched_setscheduler(struct thread *td, struct thread *targettd, int policy, struct sched_param *param); int kern_sched_rr_get_interval(struct thread *td, pid_t pid, struct timespec *ts); int kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd, struct timespec *ts); int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval); int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits); int kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags, struct mbuf *control, enum uio_seg segflg); int kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups); int kern_setitimer(struct thread *, u_int, struct itimerval *, struct itimerval *); int kern_setpriority(struct thread *td, int which, int who, int prio); int kern_setrlimit(struct thread *, u_int, struct rlimit *); int kern_setsockopt(struct thread *td, int s, int level, int name, const void *optval, enum uio_seg valseg, socklen_t valsize); int kern_settimeofday(struct thread *td, struct timeval *tv, struct timezone *tzp); int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shm_open2(struct thread *td, const char *path, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, const char *name); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz); int kern_shutdown(struct thread *td, int s, int how); int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags); int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss); int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags); int kern_sigsuspend(struct thread *td, sigset_t mask); int kern_sigtimedwait(struct thread *td, sigset_t waitset, struct ksiginfo *ksi, struct timespec *timeout); int kern_sigqueue(struct thread *td, pid_t pid, int signum, union sigval *value); int kern_socket(struct thread *td, int domain, int type, int protocol); int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, enum uio_seg segflg); int kern_sync(struct thread *td); int kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, int *timerid, int preset_id); int kern_ktimer_delete(struct thread *, int); int kern_ktimer_settime(struct thread *td, int timer_id, int flags, struct itimerspec *val, struct itimerspec *oval); int kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val); int kern_ktimer_getoverrun(struct thread *td, int timer_id); int kern_thr_alloc(struct proc *, int pages, struct thread **); int kern_thr_exit(struct thread *td); int kern_thr_new(struct thread *td, struct thr_param *param); int kern_thr_suspend(struct thread *td, struct timespec *tsp); int kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, off_t length); int kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag, ino_t oldinum); int kern_utimesat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_utimensat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int follow); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status, int options, struct __wrusage *wrup, siginfo_t *sip); int kern_writev(struct thread *td, int fd, struct uio *auio); int kern_socketpair(struct thread *td, int domain, int type, int protocol, int *rsv); int kern_unmount(struct thread *td, const char *path, int flags); /* flags for kern_sigaction */ #define KSA_OSIGSET 0x0001 /* uses osigact_t */ #define KSA_FREEBSD4 0x0002 /* uses ucontext4 */ struct freebsd11_dirent; int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, long *basep, void (*func)(struct freebsd11_dirent *)); #endif /* !_SYS_SYSCALLSUBR_H_ */ Index: head/sys/sys/umtx.h =================================================================== --- head/sys/sys/umtx.h (revision 367743) +++ head/sys/sys/umtx.h (revision 367744) @@ -1,202 +1,219 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002, Jeffrey Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef _SYS_UMTX_H_ #define _SYS_UMTX_H_ #include /* Common lock flags */ #define USYNC_PROCESS_SHARED 0x0001 /* Process shared sync objs */ /* umutex flags */ #define UMUTEX_PRIO_INHERIT 0x0004 /* Priority inherited mutex */ #define UMUTEX_PRIO_PROTECT 0x0008 /* Priority protect mutex */ #define UMUTEX_ROBUST 0x0010 /* Robust mutex */ #define UMUTEX_NONCONSISTENT 0x0020 /* Robust locked but not consistent */ /* * The umutex.m_lock values and bits. The m_owner is the word which * serves as the lock. Its high bit is the contention indicator and * rest of bits records the owner TID. TIDs values start with PID_MAX * + 2 and end by INT32_MAX. The low range [1..PID_MAX] is guaranteed * to be useable as the special markers. */ #define UMUTEX_UNOWNED 0x0 #define UMUTEX_CONTESTED 0x80000000U #define UMUTEX_RB_OWNERDEAD (UMUTEX_CONTESTED | 0x10) #define UMUTEX_RB_NOTRECOV (UMUTEX_CONTESTED | 0x11) /* urwlock flags */ #define URWLOCK_PREFER_READER 0x0002 #define URWLOCK_WRITE_OWNER 0x80000000U #define URWLOCK_WRITE_WAITERS 0x40000000U #define URWLOCK_READ_WAITERS 0x20000000U #define URWLOCK_MAX_READERS 0x1fffffffU #define URWLOCK_READER_COUNT(c) ((c) & URWLOCK_MAX_READERS) /* _usem flags */ #define SEM_NAMED 0x0002 /* _usem2 count field */ #define USEM_HAS_WAITERS 0x80000000U #define USEM_MAX_COUNT 0x7fffffffU #define USEM_COUNT(c) ((c) & USEM_MAX_COUNT) /* op code for _umtx_op */ #define UMTX_OP_RESERVED0 0 #define UMTX_OP_RESERVED1 1 #define UMTX_OP_WAIT 2 #define UMTX_OP_WAKE 3 #define UMTX_OP_MUTEX_TRYLOCK 4 #define UMTX_OP_MUTEX_LOCK 5 #define UMTX_OP_MUTEX_UNLOCK 6 #define UMTX_OP_SET_CEILING 7 #define UMTX_OP_CV_WAIT 8 #define UMTX_OP_CV_SIGNAL 9 #define UMTX_OP_CV_BROADCAST 10 #define UMTX_OP_WAIT_UINT 11 #define UMTX_OP_RW_RDLOCK 12 #define UMTX_OP_RW_WRLOCK 13 #define UMTX_OP_RW_UNLOCK 14 #define UMTX_OP_WAIT_UINT_PRIVATE 15 #define UMTX_OP_WAKE_PRIVATE 16 #define UMTX_OP_MUTEX_WAIT 17 #define UMTX_OP_MUTEX_WAKE 18 /* deprecated */ #define UMTX_OP_SEM_WAIT 19 /* deprecated */ #define UMTX_OP_SEM_WAKE 20 /* deprecated */ #define UMTX_OP_NWAKE_PRIVATE 21 #define UMTX_OP_MUTEX_WAKE2 22 #define UMTX_OP_SEM2_WAIT 23 #define UMTX_OP_SEM2_WAKE 24 #define UMTX_OP_SHM 25 #define UMTX_OP_ROBUST_LISTS 26 /* Flags for UMTX_OP_CV_WAIT */ #define CVWAIT_CHECK_UNPARKING 0x01 #define CVWAIT_ABSTIME 0x02 #define CVWAIT_CLOCKID 0x04 #define UMTX_ABSTIME 0x01 #define UMTX_CHECK_UNPARKING CVWAIT_CHECK_UNPARKING /* Flags for UMTX_OP_SHM */ #define UMTX_SHM_CREAT 0x0001 #define UMTX_SHM_LOOKUP 0x0002 #define UMTX_SHM_DESTROY 0x0004 #define UMTX_SHM_ALIVE 0x0008 struct umtx_robust_lists_params { uintptr_t robust_list_offset; uintptr_t robust_priv_list_offset; uintptr_t robust_inact_offset; }; #ifndef _KERNEL __BEGIN_DECLS int _umtx_op(void *obj, int op, u_long val, void *uaddr, void *uaddr2); __END_DECLS #else /* * The umtx_key structure is used by both the Linux futex code and the * umtx implementation to map userland addresses to unique keys. */ enum { TYPE_SIMPLE_WAIT, TYPE_CV, TYPE_SEM, TYPE_SIMPLE_LOCK, TYPE_NORMAL_UMUTEX, TYPE_PI_UMUTEX, TYPE_PP_UMUTEX, TYPE_RWLOCK, TYPE_FUTEX, TYPE_SHM, TYPE_PI_ROBUST_UMUTEX, TYPE_PP_ROBUST_UMUTEX, }; /* Key to represent a unique userland synchronous object */ struct umtx_key { int hash; int type; int shared; union { struct { struct vm_object *object; uintptr_t offset; } shared; struct { struct vmspace *vs; uintptr_t addr; } private; struct { void *a; uintptr_t b; } both; } info; }; #define THREAD_SHARE 0 #define PROCESS_SHARE 1 #define AUTO_SHARE 2 struct thread; static inline int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2) { return (k1->type == k2->type && k1->info.both.a == k2->info.both.a && k1->info.both.b == k2->info.both.b); } int umtx_copyin_timeout(const void *, struct timespec *); int umtx_key_get(const void *, int, int, struct umtx_key *); void umtx_key_release(struct umtx_key *); struct umtx_q *umtxq_alloc(void); void umtxq_free(struct umtx_q *); int kern_umtx_wake(struct thread *, void *, int, int); void umtx_pi_adjust(struct thread *, u_char); void umtx_thread_init(struct thread *); void umtx_thread_fini(struct thread *); void umtx_thread_alloc(struct thread *); void umtx_thread_exit(struct thread *); + +struct umtx_copyops { + int (*copyin_timeout)(const void *uaddr, struct timespec *tsp); + int (*copyin_umtx_time)(const void *uaddr, size_t size, + struct _umtx_time *tp); + int (*copyin_robust_lists)(const void *uaddr, size_t size, + struct umtx_robust_lists_params *rbp); + int (*copyout_timeout)(void *uaddr, size_t size, + struct timespec *tsp); + const size_t timespec_sz; + const size_t umtx_time_sz; + const bool compat32; +}; + +#ifdef COMPAT_FREEBSD32 +extern const struct umtx_copyops umtx_native_ops32; +#endif #endif /* !_KERNEL */ #endif /* !_SYS_UMTX_H_ */