Index: head/contrib/netbsd-tests/kernel/kqueue/t_proc1.c =================================================================== --- head/contrib/netbsd-tests/kernel/kqueue/t_proc1.c (revision 320042) +++ head/contrib/netbsd-tests/kernel/kqueue/t_proc1.c (revision 320043) @@ -1,159 +1,155 @@ /* $NetBSD: t_proc1.c,v 1.3 2017/01/13 21:30:41 christos Exp $ */ /*- * Copyright (c) 2002, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Luke Mewburn and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __COPYRIGHT("@(#) Copyright (c) 2008\ The NetBSD Foundation, inc. All rights reserved."); __RCSID("$NetBSD: t_proc1.c,v 1.3 2017/01/13 21:30:41 christos Exp $"); /* * this also used to trigger problem fixed in * rev. 1.1.1.1.2.13 of sys/kern/kern_event.c */ #include #include #include #include #include #include #include #include #include #include "h_macros.h" static int child(void) { pid_t ch; int status; char *argv[] = { NULL, NULL }; char *envp[] = { NULL, NULL }; if ((argv[0] = strdup("true")) == NULL) err(EXIT_FAILURE, "strdup(\"true\")"); if ((envp[0] = strdup("FOO=BAZ")) == NULL) err(EXIT_FAILURE, "strdup(\"FOO=BAZ\")"); /* Ensure parent is ready */ (void)sleep(2); /* Do fork */ switch (ch = fork()) { case -1: return EXIT_FAILURE; /* NOTREACHED */ case 0: return EXIT_SUCCESS; /* NOTREACHED */ default: wait(&status); break; } /* Exec */ execve("/usr/bin/true", argv, envp); /* NOTREACHED */ return EXIT_FAILURE; } ATF_TC(proc1); ATF_TC_HEAD(proc1, tc) { atf_tc_set_md_var(tc, "descr", "Checks EVFILT_PROC"); } ATF_TC_BODY(proc1, tc) { struct kevent event[1]; pid_t pid; int kq, status; u_int want; RL(kq = kqueue()); /* fork a child for doing the events */ RL(pid = fork()); if (pid == 0) { _exit(child()); /* NOTREACHED */ } (void)sleep(1); /* give child some time to come up */ event[0].ident = (uintptr_t)pid; event[0].filter = EVFILT_PROC; event[0].flags = EV_ADD | EV_ENABLE; event[0].fflags = NOTE_EXIT | NOTE_FORK | NOTE_EXEC; /* | NOTE_TRACK;*/ want = NOTE_EXIT | NOTE_FORK | NOTE_EXEC; RL(kevent(kq, event, 1, NULL, 0, NULL)); /* wait until we get all events we want */ while (want) { RL(kevent(kq, NULL, 0, event, 1, NULL)); printf("%ld:", (long)event[0].ident); if (event[0].fflags & NOTE_EXIT) { want &= ~NOTE_EXIT; printf(" NOTE_EXIT"); } if (event[0].fflags & NOTE_EXEC) { want &= ~NOTE_EXEC; printf(" NOTE_EXEC"); } if (event[0].fflags & NOTE_FORK) { want &= ~NOTE_FORK; printf(" NOTE_FORK"); } if (event[0].fflags & NOTE_CHILD) -#ifdef __FreeBSD__ - printf(" NOTE_CHILD, parent = %" PRIdPTR, event[0].data); -#else printf(" NOTE_CHILD, parent = %" PRId64, event[0].data); -#endif printf("\n"); } (void)waitpid(pid, &status, 0); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, proc1); return atf_no_error(); } Index: head/contrib/netbsd-tests/kernel/kqueue/t_sig.c =================================================================== --- head/contrib/netbsd-tests/kernel/kqueue/t_sig.c (revision 320042) +++ head/contrib/netbsd-tests/kernel/kqueue/t_sig.c (revision 320043) @@ -1,147 +1,143 @@ /* $NetBSD: t_sig.c,v 1.3 2017/01/13 21:30:41 christos Exp $ */ /*- * Copyright (c) 2002, 2008 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Luke Mewburn and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __COPYRIGHT("@(#) Copyright (c) 2008\ The NetBSD Foundation, inc. All rights reserved."); __RCSID("$NetBSD: t_sig.c,v 1.3 2017/01/13 21:30:41 christos Exp $"); #include #include #include #include #include #include #include #include #include #include #include #include "h_macros.h" #define NSIGNALS 5 ATF_TC(sig); ATF_TC_HEAD(sig, tc) { atf_tc_set_md_var(tc, "descr", "Checks EVFILT_SIGNAL"); } ATF_TC_BODY(sig, tc) { struct timespec timeout; #ifdef __NetBSD__ struct kfilter_mapping km; #endif struct kevent event[1]; #ifdef __NetBSD__ char namebuf[32]; #endif pid_t pid, child; int kq, n, num, status; pid = getpid(); (void)printf("my pid: %d\n", pid); /* fork a child to send signals */ RL(child = fork()); if (child == 0) { int i; (void)sleep(2); for(i = 0; i < NSIGNALS; ++i) { (void)kill(pid, SIGUSR1); (void)sleep(2); } _exit(0); /* NOTREACHED */ } RL(kq = kqueue()); #ifdef __NetBSD__ (void)strlcpy(namebuf, "EVFILT_SIGNAL", sizeof(namebuf)); km.name = namebuf; RL(ioctl(kq, KFILTER_BYNAME, &km)); (void)printf("got %d as filter number for `%s'.\n", km.filter, km.name); #endif /* ignore the signal to avoid taking it for real */ REQUIRE_LIBC(signal(SIGUSR1, SIG_IGN), SIG_ERR); event[0].ident = SIGUSR1; #ifdef __NetBSD__ event[0].filter = km.filter; #else event[0].filter = EVFILT_SIGNAL; #endif event[0].flags = EV_ADD | EV_ENABLE; RL(kevent(kq, event, 1, NULL, 0, NULL)); (void)sleep(1); timeout.tv_sec = 1; timeout.tv_nsec = 0; for (num = 0; num < NSIGNALS; num += n) { struct timeval then, now, diff; RL(gettimeofday(&then, NULL)); RL(n = kevent(kq, NULL, 0, event, 1, &timeout)); RL(gettimeofday(&now, NULL)); timersub(&now, &then, &diff); (void)printf("sig: kevent returned %d in %lld.%06ld\n", n, (long long)diff.tv_sec, (long)diff.tv_usec); if (n == 0) continue; -#ifdef __FreeBSD__ - (void)printf("sig: kevent flags: 0x%x, data: %" PRIdPTR " (# " -#else (void)printf("sig: kevent flags: 0x%x, data: %" PRId64 " (# " -#endif "times signal posted)\n", event[0].flags, event[0].data); } (void)waitpid(child, &status, 0); (void)printf("sig: finished successfully\n"); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, sig); return atf_no_error(); } Index: head/lib/libc/include/compat.h =================================================================== --- head/lib/libc/include/compat.h (revision 320042) +++ head/lib/libc/include/compat.h (revision 320043) @@ -1,78 +1,80 @@ /*- * Copyright (c) 2009 Hudson River Trading LLC * Written by: John H. Baldwin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file defines compatibility symbol versions for old system calls. It * is included in all generated system call files. */ #ifndef __LIBC_COMPAT_H__ #define __LIBC_COMPAT_H__ #define __sym_compat(sym,impl,verid) \ .symver impl, sym@verid #ifndef NO_COMPAT7 __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0); __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0); __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0); #endif __sym_compat(nfstat, freebsd11_nfstat, FBSD_1.0); __sym_compat(nlstat, freebsd11_nlstat, FBSD_1.0); __sym_compat(nstat, freebsd11_nstat, FBSD_1.0); __sym_compat(fhstat, freebsd11_fhstat, FBSD_1.0); __sym_compat(fstat, freebsd11_fstat, FBSD_1.0); __sym_compat(fstatat, freebsd11_fstatat, FBSD_1.1); __sym_compat(lstat, freebsd11_lstat, FBSD_1.0); __sym_compat(stat, freebsd11_stat, FBSD_1.0); __sym_compat(getdents, freebsd11_getdents, FBSD_1.0); __sym_compat(getdirentries, freebsd11_getdirentries, FBSD_1.0); __sym_compat(getfsstat, freebsd11_getfsstat, FBSD_1.0); __sym_compat(fhstatfs, freebsd11_fhstatfs, FBSD_1.0); __sym_compat(fstatfs, freebsd11_fstatfs, FBSD_1.0); __sym_compat(statfs, freebsd11_statfs, FBSD_1.0); __sym_compat(mknod, freebsd11_mknod, FBSD_1.0); __sym_compat(mknodat, freebsd11_mknodat, FBSD_1.1); +__sym_compat(kevent, freebsd11_kevent, FBSD_1.0); + #undef __sym_compat #define __weak_reference(sym,alias) \ .weak alias;.equ alias,sym __weak_reference(__sys_fcntl,__fcntl_compat) #undef __weak_reference #endif /* __LIBC_COMPAT_H__ */ Index: head/lib/libc/sys/Symbol.map =================================================================== --- head/lib/libc/sys/Symbol.map (revision 320042) +++ head/lib/libc/sys/Symbol.map (revision 320043) @@ -1,1034 +1,1034 @@ /* * $FreeBSD$ */ /* * It'd be nice to automatically generate the syscall symbols, but we * don't know to what version they will eventually belong to, so for now * it has to be manual. */ FBSD_1.0 { __acl_aclcheck_fd; __acl_aclcheck_file; __acl_aclcheck_link; __acl_delete_fd; __acl_delete_file; __acl_delete_link; __acl_get_fd; __acl_get_file; __acl_get_link; __acl_set_fd; __acl_set_file; __acl_set_link; __getcwd; __mac_execve; __mac_get_fd; __mac_get_file; __mac_get_link; __mac_get_pid; __mac_get_proc; __mac_set_fd; __mac_set_file; __mac_set_link; __mac_set_proc; __setugid; __syscall; __sysctl; _umtx_op; abort2; accept; access; acct; adjtime; aio_cancel; aio_error; aio_fsync; aio_read; aio_return; aio_suspend; aio_waitcomplete; aio_write; audit; auditctl; auditon; bind; chdir; chflags; chmod; chown; chroot; clock_getres; clock_gettime; clock_settime; close; connect; dup; dup2; eaccess; execve; extattr_delete_fd; extattr_delete_file; extattr_delete_link; extattr_get_fd; extattr_get_file; extattr_get_link; extattr_list_fd; extattr_list_file; extattr_list_link; extattr_set_fd; extattr_set_file; extattr_set_link; extattrctl; fchdir; fchflags; fchmod; fchown; fcntl; fhopen; flock; fork; fpathconf; fsync; futimes; getaudit; getaudit_addr; getauid; getcontext; getdtablesize; getegid; geteuid; getfh; getgid; getgroups; getitimer; getpeername; getpgid; getpgrp; getpid; getppid; getpriority; getresgid; getresuid; getrlimit; getrusage; getsid; getsockname; getsockopt; gettimeofday; getuid; ioctl; issetugid; jail; jail_attach; kenv; - kevent; kill; kldfind; kldfirstmod; kldload; kldnext; kldstat; kldsym; kldunload; kldunloadf; kqueue; kmq_notify; /* Do we want these to be public interfaces? */ kmq_open; /* librt uses them to provide mq_xxx. */ kmq_setattr; kmq_timedreceive; kmq_timedsend; kmq_unlink; ksem_close; ksem_destroy; ksem_getvalue; ksem_init; ksem_open; ksem_post; ksem_timedwait; ksem_trywait; ksem_unlink; ksem_wait; ktrace; lchflags; lchmod; lchown; lgetfh; link; lio_listio; listen; lutimes; mac_syscall; madvise; mincore; minherit; mkdir; mkfifo; mlock; mlockall; modfind; modfnext; modnext; modstat; mount; mprotect; msgget; msgrcv; msgsnd; msgsys; msync; munlock; munlockall; munmap; nanosleep; netbsd_lchown; netbsd_msync; nfssvc; nmount; ntp_adjtime; ntp_gettime; open; pathconf; pipe; poll; posix_openpt; preadv; profil; pselect; ptrace; pwritev; quotactl; read; readlink; readv; reboot; recvfrom; recvmsg; rename; revoke; rfork; rmdir; rtprio; rtprio_thread; sched_get_priority_max; sched_get_priority_min; sched_getparam; sched_getscheduler; sched_rr_get_interval; sched_setparam; sched_setscheduler; sched_yield; select; semget; semop; semsys; sendfile; sendmsg; sendto; setaudit; setaudit_addr; setauid; setegid; seteuid; setgid; setgroups; setitimer; setlogin; setpgid; setpriority; setregid; setresgid; setresuid; setreuid; setrlimit; setsid; setsockopt; settimeofday; setuid; shm_open; shm_unlink; shmat; shmdt; shmget; shmsys; shutdown; sigaction; sigaltstack; sigpending; sigprocmask; sigqueue; sigreturn; sigsuspend; sigtimedwait; sigwait; sigwaitinfo; socket; socketpair; swapoff; swapon; symlink; sync; sysarch; syscall; thr_create; thr_exit; thr_kill; thr_kill2; thr_new; thr_self; thr_set_name; thr_suspend; thr_wake; ktimer_create; /* Do we want these to be public interfaces? */ ktimer_delete; /* librt uses them to provide timer_xxx. */ ktimer_getoverrun; ktimer_gettime; ktimer_settime; umask; undelete; unlink; unmount; utimes; utrace; uuidgen; vadvise; wait4; write; writev; __error; ftruncate; lseek; mmap; pread; pwrite; truncate; }; FBSD_1.1 { __semctl; closefrom; cpuset; cpuset_getid; cpuset_setid; cpuset_getaffinity; cpuset_setaffinity; faccessat; fchmodat; fchownat; fexecve; futimesat; jail_get; jail_set; jail_remove; linkat; lpathconf; mkdirat; mkfifoat; msgctl; readlinkat; renameat; setfib; shmctl; symlinkat; unlinkat; }; FBSD_1.2 { cap_enter; cap_getmode; getloginclass; pdfork; pdgetpid; pdkill; posix_fallocate; rctl_get_racct; rctl_get_rules; rctl_get_limits; rctl_add_rule; rctl_remove_rule; setloginclass; }; FBSD_1.3 { accept4; aio_mlock; bindat; cap_fcntls_get; cap_fcntls_limit; cap_ioctls_get; cap_ioctls_limit; __cap_rights_get; cap_rights_limit; cap_sandboxed; chflagsat; clock_getcpuclockid2; connectat; ffclock_getcounter; ffclock_getestimate; ffclock_setestimate; pipe2; posix_fadvise; procctl; wait6; }; FBSD_1.4 { futimens; ppoll; utimensat; numa_setaffinity; numa_getaffinity; sendmmsg; recvmmsg; }; FBSD_1.5 { clock_nanosleep; fdatasync; fhstat; fhstatfs; fstat; fstatat; fstatfs; getdents; getdirentries; getfsstat; + kevent; lstat; mknod; mknodat; stat; statfs; }; FBSDprivate_1.0 { ___acl_aclcheck_fd; __sys___acl_aclcheck_fd; ___acl_aclcheck_file; __sys___acl_aclcheck_file; ___acl_aclcheck_link; __sys___acl_aclcheck_link; ___acl_delete_fd; __sys___acl_delete_fd; ___acl_delete_file; __sys___acl_delete_file; ___acl_delete_link; __sys___acl_delete_link; ___acl_get_fd; __sys___acl_get_fd; ___acl_get_file; __sys___acl_get_file; ___acl_get_link; __sys___acl_get_link; ___acl_set_fd; __sys___acl_set_fd; ___acl_set_file; __sys___acl_set_file; ___acl_set_link; __sys___acl_set_link; ___getcwd; __sys___getcwd; ___mac_execve; __sys___mac_execve; ___mac_get_fd; __sys___mac_get_fd; ___mac_get_file; __sys___mac_get_file; ___mac_get_link; __sys___mac_get_link; ___mac_get_pid; __sys___mac_get_pid; ___mac_get_proc; __sys___mac_get_proc; ___mac_set_fd; __sys___mac_set_fd; ___mac_set_file; __sys___mac_set_file; ___mac_set_link; __sys___mac_set_link; ___mac_set_proc; __sys___mac_set_proc; ___semctl; __sys___semctl; ___setugid; __sys___setugid; ___syscall; __sys___syscall; ___sysctl; __sys___sysctl; __umtx_op; __sys__umtx_op; _abort2; __sys_abort2; _accept; __sys_accept; _accept4; __sys_accept4; _access; __sys_access; _acct; __sys_acct; _adjtime; __sys_adjtime; _aio_cancel; __sys_aio_cancel; _aio_error; __sys_aio_error; _aio_fsync; __sys_aio_fsync; _aio_read; __sys_aio_read; _aio_return; __sys_aio_return; _aio_suspend; __sys_aio_suspend; _aio_waitcomplete; __sys_aio_waitcomplete; _aio_write; __sys_aio_write; _audit; __sys_audit; _auditctl; __sys_auditctl; _auditon; __sys_auditon; _bind; __sys_bind; _chdir; __sys_chdir; _chflags; __sys_chflags; _chmod; __sys_chmod; _chown; __sys_chown; _chroot; __sys_chroot; _clock_getcpuclockid2; __sys_clock_getcpuclockid2; _clock_getres; __sys_clock_getres; _clock_gettime; __sys_clock_gettime; __sys_clock_nanosleep; _clock_settime; __sys_clock_settime; _close; __sys_close; _closefrom; __sys_closefrom; _connect; __sys_connect; _cpuset; __sys_cpuset; _cpuset_getid; __sys_cpuset_getid; _cpuset_setid; __sys_cpuset_setid; _cpuset_getaffinity; __sys_cpuset_getaffinity; _cpuset_setaffinity; __sys_cpuset_setaffinity; _dup; __sys_dup; _dup2; __sys_dup2; _eaccess; __sys_eaccess; _execve; __sys_execve; _extattr_delete_fd; __sys_extattr_delete_fd; _extattr_delete_file; __sys_extattr_delete_file; _extattr_delete_link; __sys_extattr_delete_link; _extattr_get_fd; __sys_extattr_get_fd; _extattr_get_file; __sys_extattr_get_file; _extattr_get_link; __sys_extattr_get_link; _extattr_list_fd; __sys_extattr_list_fd; _extattr_list_file; __sys_extattr_list_file; _extattr_list_link; __sys_extattr_list_link; _extattr_set_fd; __sys_extattr_set_fd; _extattr_set_file; __sys_extattr_set_file; _extattr_set_link; __sys_extattr_set_link; _extattrctl; __sys_extattrctl; _fchdir; __sys_fchdir; _fchflags; __sys_fchflags; _fchmod; __sys_fchmod; _fchown; __sys_fchown; _fcntl; __sys_fcntl; __fcntl_compat; _fhopen; __sys_fhopen; _fhstat; __sys_fhstat; _fhstatfs; __sys_fhstatfs; _flock; __sys_flock; _fork; __sys_fork; _fpathconf; __sys_fpathconf; _fstat; __sys_fstat; _fstatfs; __sys_fstatfs; _fsync; __sys_fsync; _fdatasync; __sys_fdatasync; _futimes; __sys_futimes; _getaudit; __sys_getaudit; _getaudit_addr; __sys_getaudit_addr; _getauid; __sys_getauid; _getcontext; __sys_getcontext; _getdirentries; __sys_getdirentries; _getdtablesize; __sys_getdtablesize; _getegid; __sys_getegid; _geteuid; __sys_geteuid; _getfh; __sys_getfh; _getfsstat; __sys_getfsstat; _getgid; __sys_getgid; _getgroups; __sys_getgroups; _getitimer; __sys_getitimer; _getpeername; __sys_getpeername; _getpgid; __sys_getpgid; _getpgrp; __sys_getpgrp; _getpid; __sys_getpid; _getppid; __sys_getppid; _getpriority; __sys_getpriority; _getresgid; __sys_getresgid; _getresuid; __sys_getresuid; _getrlimit; __sys_getrlimit; _getrusage; __sys_getrusage; _getsid; __sys_getsid; _getsockname; __sys_getsockname; _getsockopt; __sys_getsockopt; _gettimeofday; __sys_gettimeofday; _getuid; __sys_getuid; _ioctl; __sys_ioctl; _issetugid; __sys_issetugid; _jail; __sys_jail; _jail_attach; __sys_jail_attach; _kenv; __sys_kenv; _kevent; __sys_kevent; _kill; __sys_kill; _kldfind; __sys_kldfind; _kldfirstmod; __sys_kldfirstmod; _kldload; __sys_kldload; _kldnext; __sys_kldnext; _kldstat; __sys_kldstat; _kldsym; __sys_kldsym; _kldunload; __sys_kldunload; _kldunloadf; __sys_kldunloadf; _kmq_notify; __sys_kmq_notify; _kmq_open; __sys_kmq_open; _kmq_setattr; __sys_kmq_setattr; _kmq_timedreceive; __sys_kmq_timedreceive; _kmq_timedsend; __sys_kmq_timedsend; _kmq_unlink; __sys_kmq_unlink; _kqueue; __sys_kqueue; _ksem_close; __sys_ksem_close; _ksem_destroy; __sys_ksem_destroy; _ksem_getvalue; __sys_ksem_getvalue; _ksem_init; __sys_ksem_init; _ksem_open; __sys_ksem_open; _ksem_post; __sys_ksem_post; _ksem_timedwait; __sys_ksem_timedwait; _ksem_trywait; __sys_ksem_trywait; _ksem_unlink; __sys_ksem_unlink; _ksem_wait; __sys_ksem_wait; _ktrace; __sys_ktrace; _lchflags; __sys_lchflags; _lchmod; __sys_lchmod; _lchown; __sys_lchown; _lgetfh; __sys_lgetfh; _link; __sys_link; _lio_listio; __sys_lio_listio; _listen; __sys_listen; _lutimes; __sys_lutimes; _mac_syscall; __sys_mac_syscall; _madvise; __sys_madvise; _mincore; __sys_mincore; _minherit; __sys_minherit; _mkdir; __sys_mkdir; _mkfifo; __sys_mkfifo; _mknod; __sys_mknod; _mlock; __sys_mlock; _mlockall; __sys_mlockall; _modfind; __sys_modfind; _modfnext; __sys_modfnext; _modnext; __sys_modnext; _modstat; __sys_modstat; _mount; __sys_mount; _mprotect; __sys_mprotect; _msgctl; __sys_msgctl; _msgget; __sys_msgget; _msgrcv; __sys_msgrcv; _msgsnd; __sys_msgsnd; _msgsys; __sys_msgsys; _msync; __sys_msync; _munlock; __sys_munlock; _munlockall; __sys_munlockall; _munmap; __sys_munmap; _nanosleep; __sys_nanosleep; _netbsd_lchown; __sys_netbsd_lchown; _netbsd_msync; __sys_netbsd_msync; _nfssvc; __sys_nfssvc; _nmount; __sys_nmount; _ntp_adjtime; __sys_ntp_adjtime; _ntp_gettime; __sys_ntp_gettime; _open; __sys_open; _openat; __sys_openat; _pathconf; __sys_pathconf; _pipe; __sys_pipe; _poll; __sys_poll; _ppoll; __sys_ppoll; _preadv; __sys_preadv; _procctl; __sys_procctl; _profil; __sys_profil; _pselect; __sys_pselect; _ptrace; __sys_ptrace; _pwritev; __sys_pwritev; _quotactl; __sys_quotactl; _read; __sys_read; _readlink; __sys_readlink; _readv; __sys_readv; _reboot; __sys_reboot; _recvfrom; __sys_recvfrom; _recvmsg; __sys_recvmsg; _rename; __sys_rename; _revoke; __sys_revoke; _rfork; __sys_rfork; _rmdir; __sys_rmdir; _rtprio; __sys_rtprio; _rtprio_thread; __sys_rtprio_thread; _sched_get_priority_max; __sys_sched_get_priority_max; _sched_get_priority_min; __sys_sched_get_priority_min; _sched_getparam; __sys_sched_getparam; _sched_getscheduler; __sys_sched_getscheduler; _sched_rr_get_interval; __sys_sched_rr_get_interval; _sched_setparam; __sys_sched_setparam; _sched_setscheduler; __sys_sched_setscheduler; _sched_yield; __sys_sched_yield; _select; __sys_select; _semget; __sys_semget; _semop; __sys_semop; _semsys; __sys_semsys; _sendfile; __sys_sendfile; _sendmsg; __sys_sendmsg; _sendto; __sys_sendto; _setaudit; __sys_setaudit; _setaudit_addr; __sys_setaudit_addr; _setauid; __sys_setauid; _setcontext; __sys_setcontext; _setegid; __sys_setegid; _seteuid; __sys_seteuid; _setgid; __sys_setgid; _setgroups; __sys_setgroups; _setitimer; __sys_setitimer; _setlogin; __sys_setlogin; _setpgid; __sys_setpgid; _setpriority; __sys_setpriority; _setregid; __sys_setregid; _setresgid; __sys_setresgid; _setresuid; __sys_setresuid; _setreuid; __sys_setreuid; _setrlimit; __sys_setrlimit; _setsid; __sys_setsid; _setsockopt; __sys_setsockopt; _settimeofday; __sys_settimeofday; _setuid; __sys_setuid; _shm_open; __sys_shm_open; _shm_unlink; __sys_shm_unlink; _shmat; __sys_shmat; _shmctl; __sys_shmctl; _shmdt; __sys_shmdt; _shmget; __sys_shmget; _shmsys; __sys_shmsys; _shutdown; __sys_shutdown; _sigaction; __sys_sigaction; _sigaltstack; __sys_sigaltstack; _sigpending; __sys_sigpending; _sigprocmask; __sys_sigprocmask; _sigqueue; __sys_sigqueue; _sigreturn; __sys_sigreturn; _sigsuspend; __sys_sigsuspend; _sigtimedwait; __sys_sigtimedwait; _sigwait; __sigwait; __sys_sigwait; _sigwaitinfo; __sys_sigwaitinfo; _socket; __sys_socket; _socketpair; __sys_socketpair; _statfs; __sys_statfs; _swapcontext; __sys_swapcontext; _swapoff; __sys_swapoff; _swapon; __sys_swapon; _symlink; __sys_symlink; _sync; __sys_sync; _sysarch; __sys_sysarch; _syscall; __sys_syscall; _thr_create; __sys_thr_create; _thr_exit; __sys_thr_exit; _thr_kill; __sys_thr_kill; _thr_kill2; __sys_thr_kill2; _thr_new; __sys_thr_new; _thr_self; __sys_thr_self; _thr_set_name; __sys_thr_set_name; _thr_suspend; __sys_thr_suspend; _thr_wake; __sys_thr_wake; _ktimer_create; __sys_ktimer_create; _ktimer_delete; __sys_ktimer_delete; _ktimer_getoverrun; __sys_ktimer_getoverrun; _ktimer_gettime; __sys_ktimer_gettime; _ktimer_settime; __sys_ktimer_settime; _umask; __sys_umask; _undelete; __sys_undelete; _unlink; __sys_unlink; _unmount; __sys_unmount; _utimes; __sys_utimes; _utrace; __sys_utrace; _uuidgen; __sys_uuidgen; _vadvise; __sys_vadvise; _wait4; __sys_wait4; _wait6; __sys_wait6; _write; __sys_write; _writev; __sys_writev; __set_error_selector; nlm_syscall; gssd_syscall; __libc_interposing_slot; __libc_sigwait; }; Index: head/lib/libc/sys/kqueue.2 =================================================================== --- head/lib/libc/sys/kqueue.2 (revision 320042) +++ head/lib/libc/sys/kqueue.2 (revision 320043) @@ -1,777 +1,804 @@ .\" Copyright (c) 2000 Jonathan Lemon .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd April 18, 2017 +.Dd June 17, 2017 .Dt KQUEUE 2 .Os .Sh NAME .Nm kqueue , .Nm kevent .Nd kernel event notification mechanism .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/event.h .Ft int .Fn kqueue "void" .Ft int .Fn kevent "int kq" "const struct kevent *changelist" "int nchanges" "struct kevent *eventlist" "int nevents" "const struct timespec *timeout" .Fn EV_SET "kev" ident filter flags fflags data udata .Sh DESCRIPTION The .Fn kqueue system call provides a generic method of notifying the user when an event happens or a condition holds, based on the results of small pieces of kernel code termed filters. A kevent is identified by the (ident, filter) pair; there may only be one unique kevent per kqueue. .Pp The filter is executed upon the initial registration of a kevent in order to detect whether a preexisting condition is present, and is also executed whenever an event is passed to the filter for evaluation. If the filter determines that the condition should be reported, then the kevent is placed on the kqueue for the user to retrieve. .Pp The filter is also run when the user attempts to retrieve the kevent from the kqueue. If the filter indicates that the condition that triggered the event no longer holds, the kevent is removed from the kqueue and is not returned. .Pp Multiple events which trigger the filter do not result in multiple kevents being placed on the kqueue; instead, the filter will aggregate the events into a single struct kevent. Calling .Fn close on a file descriptor will remove any kevents that reference the descriptor. .Pp The .Fn kqueue system call creates a new kernel event queue and returns a descriptor. The queue is not inherited by a child created with .Xr fork 2 . However, if .Xr rfork 2 is called without the .Dv RFFDG flag, then the descriptor table is shared, which will allow sharing of the kqueue between two processes. .Pp The .Fn kevent system call is used to register events with the queue, and return any pending events to the user. The .Fa changelist argument is a pointer to an array of .Va kevent structures, as defined in .In sys/event.h . All changes contained in the .Fa changelist are applied before any pending events are read from the queue. The .Fa nchanges argument gives the size of .Fa changelist . The .Fa eventlist argument is a pointer to an array of kevent structures. The .Fa nevents argument determines the size of .Fa eventlist . When .Fa nevents is zero, .Fn kevent will return immediately even if there is a .Fa timeout specified unlike .Xr select 2 . If .Fa timeout is a non-NULL pointer, it specifies a maximum interval to wait for an event, which will be interpreted as a struct timespec. If .Fa timeout is a NULL pointer, .Fn kevent waits indefinitely. To effect a poll, the .Fa timeout argument should be non-NULL, pointing to a zero-valued .Va timespec structure. The same array may be used for the .Fa changelist and .Fa eventlist . .Pp The .Fn EV_SET macro is provided for ease of initializing a kevent structure. .Pp The .Va kevent structure is defined as: .Bd -literal struct kevent { - uintptr_t ident; /* identifier for this event */ + uintptr_t ident; /* identifier for this event */ short filter; /* filter for event */ u_short flags; /* action flags for kqueue */ u_int fflags; /* filter flag value */ - intptr_t data; /* filter data value */ + int64_t data; /* filter data value */ void *udata; /* opaque user data identifier */ + uint64_t ext[4]; /* extentions */ }; .Ed .Pp The fields of .Fa struct kevent are: .Bl -tag -width "Fa filter" .It Fa ident Value used to identify this event. The exact interpretation is determined by the attached filter, but often is a file descriptor. .It Fa filter Identifies the kernel filter used to process this event. The pre-defined system filters are described below. .It Fa flags Actions to perform on the event. .It Fa fflags Filter-specific flags. .It Fa data Filter-specific data value. .It Fa udata Opaque user-defined value passed through the kernel unchanged. +.It Fa ext +Extended data passed to and from kernel. +The +.Fa ext[0] +and +.Fa ext[1] +members use is defined by the filter. +If the filter does not use them, the members are copied unchanged. +The +.Fa ext[2] +and +.Fa ext[3] +members are always passed throught the kernel as-is, +making additional context available to application. .El .Pp The .Va flags field can contain the following values: .Bl -tag -width EV_DISPATCH .It Dv EV_ADD Adds the event to the kqueue. Re-adding an existing event will modify the parameters of the original event, and not result in a duplicate entry. Adding an event automatically enables it, unless overridden by the EV_DISABLE flag. .It Dv EV_ENABLE Permit .Fn kevent to return the event if it is triggered. .It Dv EV_DISABLE Disable the event so .Fn kevent will not return it. The filter itself is not disabled. .It Dv EV_DISPATCH Disable the event source immediately after delivery of an event. See .Dv EV_DISABLE above. .It Dv EV_DELETE Removes the event from the kqueue. Events which are attached to file descriptors are automatically deleted on the last close of the descriptor. .It Dv EV_RECEIPT This flag is useful for making bulk changes to a kqueue without draining any pending events. When passed as input, it forces .Dv EV_ERROR to always be returned. When a filter is successfully added the .Va data field will be zero. .It Dv EV_ONESHOT Causes the event to return only the first occurrence of the filter being triggered. After the user retrieves the event from the kqueue, it is deleted. .It Dv EV_CLEAR After the event is retrieved by the user, its state is reset. This is useful for filters which report state transitions instead of the current state. Note that some filters may automatically set this flag internally. .It Dv EV_EOF Filters may set this flag to indicate filter-specific EOF condition. .It Dv EV_ERROR See .Sx RETURN VALUES below. .El .Pp The predefined system filters are listed below. Arguments may be passed to and from the filter via the .Va fflags and .Va data fields in the kevent structure. .Bl -tag -width "Dv EVFILT_PROCDESC" .It Dv EVFILT_READ Takes a descriptor as the identifier, and returns whenever there is data available to read. The behavior of the filter is slightly different depending on the descriptor type. .Bl -tag -width 2n .It Sockets Sockets which have previously been passed to .Fn listen return when there is an incoming connection pending. .Va data contains the size of the listen backlog. .Pp Other socket descriptors return when there is data to be read, subject to the .Dv SO_RCVLOWAT value of the socket buffer. This may be overridden with a per-filter low water mark at the time the filter is added by setting the .Dv NOTE_LOWAT flag in .Va fflags , and specifying the new low water mark in .Va data . On return, .Va data contains the number of bytes of protocol data available to read. .Pp If the read direction of the socket has shutdown, then the filter also sets .Dv EV_EOF in .Va flags , and returns the socket error (if any) in .Va fflags . It is possible for EOF to be returned (indicating the connection is gone) while there is still data pending in the socket buffer. .It Vnodes Returns when the file pointer is not at the end of file. .Va data contains the offset from current position to end of file, and may be negative. .Pp This behavior is different from .Xr poll 2 , where read events are triggered for regular files unconditionally. This event can be triggered unconditionally by setting the .Dv NOTE_FILE_POLL flag in .Va fflags . .It "Fifos, Pipes" Returns when the there is data to read; .Va data contains the number of bytes available. .Pp When the last writer disconnects, the filter will set .Dv EV_EOF in .Va flags . This may be cleared by passing in .Dv EV_CLEAR , at which point the filter will resume waiting for data to become available before returning. .It "BPF devices" Returns when the BPF buffer is full, the BPF timeout has expired, or when the BPF has .Dq immediate mode enabled and there is any data to read; .Va data contains the number of bytes available. .El .It Dv EVFILT_WRITE Takes a descriptor as the identifier, and returns whenever it is possible to write to the descriptor. For sockets, pipes and fifos, .Va data will contain the amount of space remaining in the write buffer. The filter will set EV_EOF when the reader disconnects, and for the fifo case, this may be cleared by use of .Dv EV_CLEAR . Note that this filter is not supported for vnodes or BPF devices. .Pp For sockets, the low water mark and socket error handling is identical to the .Dv EVFILT_READ case. .It Dv EVFILT_EMPTY Takes a descriptor as the identifier, and returns whenever there is no remaining data in the write buffer. .It Dv EVFILT_AIO The sigevent portion of the AIO request is filled in, with .Va sigev_notify_kqueue containing the descriptor of the kqueue that the event should be attached to, .Va sigev_notify_kevent_flags containing the kevent flags which should be .Dv EV_ONESHOT , .Dv EV_CLEAR or .Dv EV_DISPATCH , .Va sigev_value containing the udata value, and .Va sigev_notify set to .Dv SIGEV_KEVENT . When the .Fn aio_* system call is made, the event will be registered with the specified kqueue, and the .Va ident argument set to the .Fa struct aiocb returned by the .Fn aio_* system call. The filter returns under the same conditions as .Fn aio_error . .It Dv EVFILT_VNODE Takes a file descriptor as the identifier and the events to watch for in .Va fflags , and returns when one or more of the requested events occurs on the descriptor. The events to monitor are: .Bl -tag -width "Dv NOTE_CLOSE_WRITE" .It Dv NOTE_ATTRIB The file referenced by the descriptor had its attributes changed. .It Dv NOTE_CLOSE A file descriptor referencing the monitored file, was closed. The closed file descriptor did not have write access. .It Dv NOTE_CLOSE_WRITE A file descriptor referencing the monitored file, was closed. The closed file descriptor had write access. .Pp This note, as well as .Dv NOTE_CLOSE , are not activated when files are closed forcibly by .Xr unmount 2 or .Xr revoke 2 . Instead, .Dv NOTE_REVOKE is sent for such events. .It Dv NOTE_DELETE The .Fn unlink system call was called on the file referenced by the descriptor. .It Dv NOTE_EXTEND For regular file, the file referenced by the descriptor was extended. .Pp For directory, reports that a directory entry was added or removed, as the result of rename operation. The .Dv NOTE_EXTEND event is not reported when a name is changed inside the directory. .It Dv NOTE_LINK The link count on the file changed. In particular, the .Dv NOTE_LINK event is reported if a subdirectory was created or deleted inside the directory referenced by the descriptor. .It Dv NOTE_OPEN The file referenced by the descriptor was opened. .It Dv NOTE_READ A read occurred on the file referenced by the descriptor. .It Dv NOTE_RENAME The file referenced by the descriptor was renamed. .It Dv NOTE_REVOKE Access to the file was revoked via .Xr revoke 2 or the underlying file system was unmounted. .It Dv NOTE_WRITE A write occurred on the file referenced by the descriptor. .El .Pp On return, .Va fflags contains the events which triggered the filter. .It Dv EVFILT_PROC Takes the process ID to monitor as the identifier and the events to watch for in .Va fflags , and returns when the process performs one or more of the requested events. If a process can normally see another process, it can attach an event to it. The events to monitor are: .Bl -tag -width "Dv NOTE_TRACKERR" .It Dv NOTE_EXIT The process has exited. The exit status will be stored in .Va data . .It Dv NOTE_FORK The process has called .Fn fork . .It Dv NOTE_EXEC The process has executed a new process via .Xr execve 2 or a similar call. .It Dv NOTE_TRACK Follow a process across .Fn fork calls. The parent process registers a new kevent to monitor the child process using the same .Va fflags as the original event. The child process will signal an event with .Dv NOTE_CHILD set in .Va fflags and the parent PID in .Va data . .Pp If the parent process fails to register a new kevent .Pq usually due to resource limitations , it will signal an event with .Dv NOTE_TRACKERR set in .Va fflags , and the child process will not signal a .Dv NOTE_CHILD event. .El .Pp On return, .Va fflags contains the events which triggered the filter. .It Dv EVFILT_PROCDESC Takes the process descriptor created by .Xr pdfork 2 to monitor as the identifier and the events to watch for in .Va fflags , and returns when the associated process performs one or more of the requested events. The events to monitor are: .Bl -tag -width "Dv NOTE_EXIT" .It Dv NOTE_EXIT The process has exited. The exit status will be stored in .Va data . .El .Pp On return, .Va fflags contains the events which triggered the filter. .It Dv EVFILT_SIGNAL Takes the signal number to monitor as the identifier and returns when the given signal is delivered to the process. This coexists with the .Fn signal and .Fn sigaction facilities, and has a lower precedence. The filter will record all attempts to deliver a signal to a process, even if the signal has been marked as .Dv SIG_IGN , except for the .Dv SIGCHLD signal, which, if ignored, won't be recorded by the filter. Event notification happens after normal signal delivery processing. .Va data returns the number of times the signal has occurred since the last call to .Fn kevent . This filter automatically sets the .Dv EV_CLEAR flag internally. .It Dv EVFILT_TIMER Establishes an arbitrary timer identified by .Va ident . When adding a timer, .Va data -specifies the timeout period. +specifies the moment to fire the timer (for +.Dv NOTE_ABSTIME ) +or the timeout period. The timer will be periodic unless .Dv EV_ONESHOT +or +.Dv NOTE_ABSTIME is specified. On return, .Va data contains the number of times the timeout has expired since the last call to .Fn kevent . -This filter automatically sets the EV_CLEAR flag internally. -.Bl -tag -width "Dv NOTE_USECONDS" +For non-monotonic timers, this filter automatically sets the +.Dv EV_CLEAR +flag internally. +.Pp +The filter accepts the following flags in the +.Va fflags +argument: +.Bl -tag -width "Dv NOTE_MSECONDS" .It Dv NOTE_SECONDS .Va data is in seconds. .It Dv NOTE_MSECONDS .Va data is in milliseconds. .It Dv NOTE_USECONDS .Va data is in microseconds. .It Dv NOTE_NSECONDS .Va data is in nanoseconds. +.It Dv NOTE_ABSTIME +The specified expiration time is absolute. .El .Pp If .Va fflags is not set, the default is milliseconds. On return, .Va fflags contains the events which triggered the filter. .Pp There is a system wide limit on the number of timers which is controlled by the .Va kern.kq_calloutmax sysctl. .It Dv EVFILT_USER Establishes a user event identified by .Va ident which is not associated with any kernel mechanism but is triggered by user level code. The lower 24 bits of the .Va fflags may be used for user defined flags and manipulated using the following: .Bl -tag -width "Dv NOTE_FFLAGSMASK" .It Dv NOTE_FFNOP Ignore the input .Va fflags . .It Dv NOTE_FFAND Bitwise AND .Va fflags . .It Dv NOTE_FFOR Bitwise OR .Va fflags . .It Dv NOTE_FFCOPY Copy .Va fflags . .It Dv NOTE_FFCTRLMASK Control mask for .Va fflags . .It Dv NOTE_FFLAGSMASK User defined flag mask for .Va fflags . .El .Pp A user event is triggered for output with the following: .Bl -tag -width "Dv NOTE_FFLAGSMASK" .It Dv NOTE_TRIGGER Cause the event to be triggered. .El .Pp On return, .Va fflags contains the users defined flags in the lower 24 bits. .El .Sh CANCELLATION BEHAVIOUR If .Fa nevents is non-zero, i.e. the function is potentially blocking, the call is a cancellation point. Otherwise, i.e. if .Fa nevents is zero, the call is not cancellable. Cancellation can only occur before any changes are made to the kqueue, or when the call was blocked and no changes to the queue were requested. .Sh RETURN VALUES The .Fn kqueue system call creates a new kernel event queue and returns a file descriptor. If there was an error creating the kernel event queue, a value of -1 is returned and errno set. .Pp The .Fn kevent system call returns the number of events placed in the .Fa eventlist , up to the value given by .Fa nevents . If an error occurs while processing an element of the .Fa changelist and there is enough room in the .Fa eventlist , then the event will be placed in the .Fa eventlist with .Dv EV_ERROR set in .Va flags and the system error in .Va data . Otherwise, .Dv -1 will be returned, and .Dv errno will be set to indicate the error condition. If the time limit expires, then .Fn kevent returns 0. .Sh EXAMPLES .Bd -literal -compact #include #include #include #include #include #include int main(int argc, char **argv) { struct kevent event; /* Event we want to monitor */ struct kevent tevent; /* Event triggered */ int kq, fd, ret; if (argc != 2) err(EXIT_FAILURE, "Usage: %s path\en", argv[0]); fd = open(argv[1], O_RDONLY); if (fd == -1) err(EXIT_FAILURE, "Failed to open '%s'", argv[1]); /* Create kqueue. */ kq = kqueue(); if (kq == -1) err(EXIT_FAILURE, "kqueue() failed"); /* Initialize kevent structure. */ EV_SET(&event, fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE, 0, NULL); /* Attach event to the kqueue. */ ret = kevent(kq, &event, 1, NULL, 0, NULL); if (ret == -1) err(EXIT_FAILURE, "kevent register"); if (event.flags & EV_ERROR) errx(EXIT_FAILURE, "Event error: %s", strerror(event.data)); for (;;) { /* Sleep until something happens. */ ret = kevent(kq, NULL, 0, &tevent, 1, NULL); if (ret == -1) { err(EXIT_FAILURE, "kevent wait"); } else if (ret > 0) { printf("Something was written in '%s'\en", argv[1]); } } } .Ed .Sh ERRORS The .Fn kqueue system call fails if: .Bl -tag -width Er .It Bq Er ENOMEM The kernel failed to allocate enough memory for the kernel queue. .It Bq Er ENOMEM The .Dv RLIMIT_KQUEUES rlimit (see .Xr getrlimit 2 ) for the current user would be exceeded. .It Bq Er EMFILE The per-process descriptor table is full. .It Bq Er ENFILE The system file table is full. .El .Pp The .Fn kevent system call fails if: .Bl -tag -width Er .It Bq Er EACCES The process does not have permission to register a filter. .It Bq Er EFAULT There was an error reading or writing the .Va kevent structure. .It Bq Er EBADF The specified descriptor is invalid. .It Bq Er EINTR A signal was delivered before the timeout expired and before any events were placed on the kqueue for return. .It Bq Er EINTR A cancellation request was delivered to the thread, but not yet handled. .It Bq Er EINVAL The specified time limit or filter is invalid. .It Bq Er ENOENT The event could not be found to be modified or deleted. .It Bq Er ENOMEM No memory was available to register the event or, in the special case of a timer, the maximum number of timers has been exceeded. This maximum is configurable via the .Va kern.kq_calloutmax sysctl. .It Bq Er ESRCH The specified process to attach to does not exist. .El .Pp When .Fn kevent call fails with .Er EINTR error, all changes in the .Fa changelist have been applied. .Sh SEE ALSO .Xr aio_error 2 , .Xr aio_read 2 , .Xr aio_return 2 , .Xr poll 2 , .Xr read 2 , .Xr select 2 , .Xr sigaction 2 , .Xr write 2 , .Xr pthread_setcancelstate 3 , .Xr signal 3 .Sh HISTORY The .Fn kqueue and .Fn kevent system calls first appeared in .Fx 4.1 . .Sh AUTHORS The .Fn kqueue system and this manual page were written by .An Jonathan Lemon Aq Mt jlemon@FreeBSD.org . .Sh BUGS The .Fa timeout value is limited to 24 hours; longer timeouts will be silently reinterpreted as 24 hours. .Pp Previous versions of .In sys/event.h fail to parse without including .In sys/types.h manually. Index: head/sys/compat/freebsd32/freebsd32.h =================================================================== --- head/sys/compat/freebsd32/freebsd32.h (revision 320042) +++ head/sys/compat/freebsd32/freebsd32.h (revision 320043) @@ -1,413 +1,414 @@ /*- * Copyright (c) 2001 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COMPAT_FREEBSD32_FREEBSD32_H_ #define _COMPAT_FREEBSD32_FREEBSD32_H_ #include #include #include #define PTRIN(v) (void *)(uintptr_t) (v) #define PTROUT(v) (u_int32_t)(uintptr_t) (v) #define CP(src,dst,fld) do { (dst).fld = (src).fld; } while (0) #define PTRIN_CP(src,dst,fld) \ do { (dst).fld = PTRIN((src).fld); } while (0) #define PTROUT_CP(src,dst,fld) \ do { (dst).fld = PTROUT((src).fld); } while (0) /* * Being a newer port, 32-bit FreeBSD/MIPS uses 64-bit time_t. */ #ifdef __mips__ typedef int64_t time32_t; #else typedef int32_t time32_t; #endif struct timeval32 { time32_t tv_sec; int32_t tv_usec; }; #define TV_CP(src,dst,fld) do { \ CP((src).fld,(dst).fld,tv_sec); \ CP((src).fld,(dst).fld,tv_usec); \ } while (0) struct timespec32 { time32_t tv_sec; int32_t tv_nsec; }; #define TS_CP(src,dst,fld) do { \ CP((src).fld,(dst).fld,tv_sec); \ CP((src).fld,(dst).fld,tv_nsec); \ } while (0) struct itimerspec32 { struct timespec32 it_interval; struct timespec32 it_value; }; #define ITS_CP(src, dst) do { \ TS_CP((src), (dst), it_interval); \ TS_CP((src), (dst), it_value); \ } while (0) struct rusage32 { struct timeval32 ru_utime; struct timeval32 ru_stime; int32_t ru_maxrss; int32_t ru_ixrss; int32_t ru_idrss; int32_t ru_isrss; int32_t ru_minflt; int32_t ru_majflt; int32_t ru_nswap; int32_t ru_inblock; int32_t ru_oublock; int32_t ru_msgsnd; int32_t ru_msgrcv; int32_t ru_nsignals; int32_t ru_nvcsw; int32_t ru_nivcsw; }; struct wrusage32 { struct rusage32 wru_self; struct rusage32 wru_children; }; struct itimerval32 { struct timeval32 it_interval; struct timeval32 it_value; }; #define FREEBSD4_MFSNAMELEN 16 #define FREEBSD4_MNAMELEN (88 - 2 * sizeof(int32_t)) /* 4.x version */ struct statfs32 { int32_t f_spare2; int32_t f_bsize; int32_t f_iosize; int32_t f_blocks; int32_t f_bfree; int32_t f_bavail; int32_t f_files; int32_t f_ffree; fsid_t f_fsid; uid_t f_owner; int32_t f_type; int32_t f_flags; int32_t f_syncwrites; int32_t f_asyncwrites; char f_fstypename[FREEBSD4_MFSNAMELEN]; char f_mntonname[FREEBSD4_MNAMELEN]; int32_t f_syncreads; int32_t f_asyncreads; int16_t f_spares1; char f_mntfromname[FREEBSD4_MNAMELEN]; int16_t f_spares2 __packed; int32_t f_spare[2]; }; struct kevent32 { - u_int32_t ident; /* identifier for this event */ + uint32_t ident; /* identifier for this event */ short filter; /* filter for event */ u_short flags; u_int fflags; - int32_t data; - u_int32_t udata; /* opaque user data identifier */ + int32_t data1, data2; + uint32_t udata; /* opaque user data identifier */ + uint32_t ext64[8]; }; struct iovec32 { u_int32_t iov_base; int iov_len; }; struct msghdr32 { u_int32_t msg_name; socklen_t msg_namelen; u_int32_t msg_iov; int msg_iovlen; u_int32_t msg_control; socklen_t msg_controllen; int msg_flags; }; #if defined(__amd64__) #define __STAT32_TIME_T_EXT 1 #endif struct stat32 { dev_t st_dev; ino_t st_ino; nlink_t st_nlink; mode_t st_mode; u_int16_t st_padding0; uid_t st_uid; gid_t st_gid; u_int32_t st_padding1; dev_t st_rdev; #ifdef __STAT32_TIME_T_EXT __int32_t st_atim_ext; #endif struct timespec32 st_atim; #ifdef __STAT32_TIME_T_EXT __int32_t st_mtim_ext; #endif struct timespec32 st_mtim; #ifdef __STAT32_TIME_T_EXT __int32_t st_ctim_ext; #endif struct timespec32 st_ctim; #ifdef __STAT32_TIME_T_EXT __int32_t st_btim_ext; #endif struct timespec32 st_birthtim; off_t st_size; int64_t st_blocks; u_int32_t st_blksize; u_int32_t st_flags; u_int64_t st_gen; u_int64_t st_spare[10]; }; struct freebsd11_stat32 { u_int32_t st_dev; u_int32_t st_ino; mode_t st_mode; u_int16_t st_nlink; uid_t st_uid; gid_t st_gid; u_int32_t st_rdev; struct timespec32 st_atim; struct timespec32 st_mtim; struct timespec32 st_ctim; off_t st_size; int64_t st_blocks; u_int32_t st_blksize; u_int32_t st_flags; u_int32_t st_gen; int32_t st_lspare; struct timespec32 st_birthtim; unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); unsigned int :(8 / 2) * (16 - (int)sizeof(struct timespec32)); }; struct ostat32 { __uint16_t st_dev; __uint32_t st_ino; mode_t st_mode; __uint16_t st_nlink; __uint16_t st_uid; __uint16_t st_gid; __uint16_t st_rdev; __int32_t st_size; struct timespec32 st_atim; struct timespec32 st_mtim; struct timespec32 st_ctim; __int32_t st_blksize; __int32_t st_blocks; u_int32_t st_flags; __uint32_t st_gen; }; struct jail32_v0 { u_int32_t version; uint32_t path; uint32_t hostname; u_int32_t ip_number; }; struct jail32 { uint32_t version; uint32_t path; uint32_t hostname; uint32_t jailname; uint32_t ip4s; uint32_t ip6s; uint32_t ip4; uint32_t ip6; }; struct sigaction32 { u_int32_t sa_u; int sa_flags; sigset_t sa_mask; }; struct thr_param32 { uint32_t start_func; uint32_t arg; uint32_t stack_base; uint32_t stack_size; uint32_t tls_base; uint32_t tls_size; uint32_t child_tid; uint32_t parent_tid; int32_t flags; uint32_t rtp; uint32_t spare[3]; }; struct i386_ldt_args32 { uint32_t start; uint32_t descs; uint32_t num; }; struct mq_attr32 { int mq_flags; int mq_maxmsg; int mq_msgsize; int mq_curmsgs; int __reserved[4]; }; struct kinfo_proc32 { int ki_structsize; int ki_layout; uint32_t ki_args; uint32_t ki_paddr; uint32_t ki_addr; uint32_t ki_tracep; uint32_t ki_textvp; uint32_t ki_fd; uint32_t ki_vmspace; uint32_t ki_wchan; pid_t ki_pid; pid_t ki_ppid; pid_t ki_pgid; pid_t ki_tpgid; pid_t ki_sid; pid_t ki_tsid; short ki_jobc; short ki_spare_short1; uint32_t ki_tdev_freebsd11; sigset_t ki_siglist; sigset_t ki_sigmask; sigset_t ki_sigignore; sigset_t ki_sigcatch; uid_t ki_uid; uid_t ki_ruid; uid_t ki_svuid; gid_t ki_rgid; gid_t ki_svgid; short ki_ngroups; short ki_spare_short2; gid_t ki_groups[KI_NGROUPS]; uint32_t ki_size; int32_t ki_rssize; int32_t ki_swrss; int32_t ki_tsize; int32_t ki_dsize; int32_t ki_ssize; u_short ki_xstat; u_short ki_acflag; fixpt_t ki_pctcpu; u_int ki_estcpu; u_int ki_slptime; u_int ki_swtime; u_int ki_cow; u_int64_t ki_runtime; struct timeval32 ki_start; struct timeval32 ki_childtime; int ki_flag; int ki_kiflag; int ki_traceflag; char ki_stat; signed char ki_nice; char ki_lock; char ki_rqindex; u_char ki_oncpu_old; u_char ki_lastcpu_old; char ki_tdname[TDNAMLEN+1]; char ki_wmesg[WMESGLEN+1]; char ki_login[LOGNAMELEN+1]; char ki_lockname[LOCKNAMELEN+1]; char ki_comm[COMMLEN+1]; char ki_emul[KI_EMULNAMELEN+1]; char ki_loginclass[LOGINCLASSLEN+1]; char ki_moretdname[MAXCOMLEN-TDNAMLEN+1]; char ki_sparestrings[46]; int ki_spareints[KI_NSPARE_INT]; uint64_t ki_tdev; int ki_oncpu; int ki_lastcpu; int ki_tracer; int ki_flag2; int ki_fibnum; u_int ki_cr_flags; int ki_jid; int ki_numthreads; lwpid_t ki_tid; struct priority ki_pri; struct rusage32 ki_rusage; struct rusage32 ki_rusage_ch; uint32_t ki_pcb; uint32_t ki_kstack; uint32_t ki_udata; uint32_t ki_tdaddr; uint32_t ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ int ki_sparelongs[KI_NSPARE_LONG]; int ki_sflag; int ki_tdflags; }; struct kinfo_sigtramp32 { uint32_t ksigtramp_start; uint32_t ksigtramp_end; uint32_t ksigtramp_spare[4]; }; struct kld32_file_stat_1 { int version; /* set to sizeof(struct kld_file_stat_1) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ }; struct kld32_file_stat { int version; /* set to sizeof(struct kld_file_stat) */ char name[MAXPATHLEN]; int refs; int id; uint32_t address; /* load address */ uint32_t size; /* size in bytes */ char pathname[MAXPATHLEN]; }; struct procctl_reaper_pids32 { u_int rp_count; u_int rp_pad0[15]; uint32_t rp_pids; }; #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */ Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 320042) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 320043) @@ -1,3353 +1,3476 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); #ifndef __mips__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifndef __mips__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); -CTASSERT(sizeof(struct kevent32) == 20); +CTASSERT(sizeof(struct kevent32) == 56); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifdef __amd64__ CTASSERT(sizeof(struct stat32) == 208); #endif #ifndef __mips__ CTASSERT(sizeof(struct freebsd11_stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD10 int freebsd10_freebsd32_pipe(struct thread *td, struct freebsd10_freebsd32_pipe_args *uap) { return (freebsd10_pipe(td, (struct freebsd10_pipe_args*)uap)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); post_execve(td, error, oldvmspace); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } post_execve(td, error, oldvmspace); return (error); } #if defined(COMPAT_FREEBSD11) int freebsd11_freebsd32_mknod(struct thread *td, struct freebsd11_freebsd32_mknod_args *uap) { return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } int freebsd11_freebsd32_mknodat(struct thread *td, struct freebsd11_freebsd32_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } #endif /* COMPAT_FREEBSD11 */ int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ) != 0) prot |= PROT_EXEC; #endif return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len, prot)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, (uintptr_t)uap->addr, uap->len, prot, uap->flags, uap->fd, PAIR32TO64(off_t, uap->pos))); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; - int i, error = 0; + uint64_t e; + int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); - CP(kevp[i], ks32[i], data); +#if BYTE_ORDER == LITTLE_ENDIAN + ks32[i].data1 = kevp[i].data; + ks32[i].data2 = kevp[i].data >> 32; +#else + ks32[i].data1 = kevp[i].data >> 32; + ks32[i].data2 = kevp[i].data; +#endif PTROUT_CP(kevp[i], ks32[i], udata); + for (j = 0; j < nitems(kevp->ext); j++) { + e = kevp[i].ext[j]; +#if BYTE_ORDER == LITTLE_ENDIAN + ks32[i].ext64[2 * j] = e; + ks32[i].ext64[2 * j + 1] = e >> 32; +#else + ks32[i].ext64[2 * j] = e >> 32; + ks32[i].ext64[2 * j + 1] = e; +#endif + } } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; - int i, error = 0; + uint64_t e; + int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); - CP(ks32[i], kevp[i], data); + kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data); PTRIN_CP(ks32[i], kevp[i], udata); + for (j = 0; j < nitems(kevp->ext); j++) { +#if BYTE_ORDER == LITTLE_ENDIAN + e = ks32[i].ext64[2 * j + 1]; + e <<= 32; + e += ks32[i].ext64[2 * j]; +#else + e = ks32[i].ext64[2 * j]; + e <<= 32; + e += ks32[i].ext64[2 * j + 1]; +#endif + kevp[i].ext[j] = e; + } } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent_copyout, .k_copyin = freebsd32_kevent_copyin, }; int error; + if (uap->timeout) { + error = copyin(uap->timeout, &ts32, sizeof(ts32)); + if (error) + return (error); + CP(ts32, ts, tv_sec); + CP(ts32, ts, tv_nsec); + tsp = &ts; + } else + tsp = NULL; + error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, + &k_ops, tsp); + return (error); +} +#ifdef COMPAT_FREEBSD11 +struct kevent32_freebsd11 { + u_int32_t ident; /* identifier for this event */ + short filter; /* filter for event */ + u_short flags; + u_int fflags; + int32_t data; + u_int32_t udata; /* opaque user data identifier */ +}; + +static int +freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count) +{ + struct freebsd11_freebsd32_kevent_args *uap; + struct kevent32_freebsd11 ks32[KQ_NEVENTS]; + int i, error; + + KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); + uap = (struct freebsd11_freebsd32_kevent_args *)arg; + + for (i = 0; i < count; i++) { + CP(kevp[i], ks32[i], ident); + CP(kevp[i], ks32[i], filter); + CP(kevp[i], ks32[i], flags); + CP(kevp[i], ks32[i], fflags); + CP(kevp[i], ks32[i], data); + PTROUT_CP(kevp[i], ks32[i], udata); + } + error = copyout(ks32, uap->eventlist, count * sizeof *ks32); + if (error == 0) + uap->eventlist += count; + return (error); +} + +/* + * Copy 'count' items from the list pointed to by uap->changelist. + */ +static int +freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count) +{ + struct freebsd11_freebsd32_kevent_args *uap; + struct kevent32_freebsd11 ks32[KQ_NEVENTS]; + int i, j, error; + + KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); + uap = (struct freebsd11_freebsd32_kevent_args *)arg; + + error = copyin(uap->changelist, ks32, count * sizeof *ks32); + if (error) + goto done; + uap->changelist += count; + + for (i = 0; i < count; i++) { + CP(ks32[i], kevp[i], ident); + CP(ks32[i], kevp[i], filter); + CP(ks32[i], kevp[i], flags); + CP(ks32[i], kevp[i], fflags); + CP(ks32[i], kevp[i], data); + PTRIN_CP(ks32[i], kevp[i], udata); + for (j = 0; j < nitems(kevp->ext); j++) + kevp[i].ext[j] = 0; + } +done: + return (error); +} + +int +freebsd11_freebsd32_kevent(struct thread *td, + struct freebsd11_freebsd32_kevent_args *uap) +{ + struct timespec32 ts32; + struct timespec ts, *tsp; + struct kevent_copyops k_ops = { + .arg = uap, + .k_copyout = freebsd32_kevent11_copyout, + .k_copyin = freebsd32_kevent11_copyin, + }; + int error; + if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } +#endif int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct mbuf *m; void *md; u_int idx, len, msglen; int error; buflen = FREEBSD32_ALIGN(buflen); if (buflen > MCLBYTES) return (EINVAL); /* * Iterate over the buffer and get the length of each message * in there. This has 32-bit alignment and padding. Use it to * determine the length of these messages when using 64-bit * alignment and padding. */ idx = 0; len = 0; while (idx < buflen) { error = copyin(buf + idx, &msglen, sizeof(msglen)); if (error) return (error); if (msglen < sizeof(struct cmsghdr)) return (EINVAL); msglen = FREEBSD32_ALIGN(msglen); if (idx + msglen > buflen) return (EINVAL); idx += msglen; msglen += CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); len += CMSG_ALIGN(msglen); } if (len > MCLBYTES) return (EINVAL); m = m_get(M_WAITOK, MT_CONTROL); if (len > MLEN) MCLGET(m, M_WAITOK); m->m_len = len; md = mtod(m, void *); while (buflen > 0) { error = copyin(buf, md, sizeof(struct cmsghdr)); if (error) break; msglen = *(u_int *)md; msglen = FREEBSD32_ALIGN(msglen); /* Modify the message length to account for alignment. */ *(u_int *)md = msglen + CMSG_ALIGN(sizeof(struct cmsghdr)) - FREEBSD32_ALIGN(sizeof(struct cmsghdr)); md = (char *)md + CMSG_ALIGN(sizeof(struct cmsghdr)); buf += FREEBSD32_ALIGN(sizeof(struct cmsghdr)); buflen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); msglen -= FREEBSD32_ALIGN(sizeof(struct cmsghdr)); if (msglen > 0) { error = copyin(buf, md, msglen); if (error) break; md = (char *)md + CMSG_ALIGN(msglen); buf += msglen; buflen -= msglen; } } if (error) m_free(m); else *mp = m; return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif #if defined(COMPAT_FREEBSD11) int freebsd11_freebsd32_getdirentries(struct thread *td, struct freebsd11_freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } int freebsd11_freebsd32_getdents(struct thread *td, struct freebsd11_freebsd32_getdents_args *uap) { struct freebsd11_freebsd32_getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (freebsd11_freebsd32_getdirentries(td, &ap)); } #endif /* COMPAT_FREEBSD11 */ int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_padding0 = 0; out->st_padding1 = 0; #ifdef __STAT32_TIME_T_EXT out->st_atim_ext = 0; out->st_mtim_ext = 0; out->st_ctim_ext = 0; out->st_btim_ext = 0; #endif bzero(out->st_spare, sizeof(out->st_spare)); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); CP(*in, *out, st_size); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub, NULL); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap) { struct stat sb; struct stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #if defined(COMPAT_FREEBSD11) extern int ino64_trunc_error; static int freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out) { CP(*in, *out, st_ino); if (in->st_ino != out->st_ino) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_ino = UINT32_MAX; break; } } CP(*in, *out, st_nlink); if (in->st_nlink != out->st_nlink) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_nlink = UINT16_MAX; break; } } CP(*in, *out, st_dev); CP(*in, *out, st_mode); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_lspare = 0; bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim), sizeof(*out) - offsetof(struct freebsd11_stat32, st_birthtim) - sizeof(out->st_birthtim)); return (0); } int freebsd11_freebsd32_stat(struct thread *td, struct freebsd11_freebsd32_stat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstat(struct thread *td, struct freebsd11_freebsd32_fstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_fstat(td, uap->fd, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstatat(struct thread *td, struct freebsd11_freebsd32_fstatat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->buf, sizeof (sb32)); return (error); } int freebsd11_freebsd32_lstat(struct thread *td, struct freebsd11_freebsd32_lstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fhstat(struct thread *td, struct freebsd11_freebsd32_fhstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #endif int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } int freebsd32_clock_nanosleep(struct thread *td, struct freebsd32_clock_nanosleep_args *uap) { int error; error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(ua_rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 && !useracc(ua_rmtp, sizeof(rmt32), VM_PROT_WRITE)) return (EFAULT); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } #ifndef _FREEBSD32_SYSPROTO_H_ struct freebsd32_sigqueue_args { pid_t pid; int signum; /* union sigval32 */ int value; }; #endif int freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap) { union sigval sv; /* * On 32-bit ABIs, sival_int and sival_ptr are the same. * On 64-bit little-endian ABIs, the low bits are the same. * In 64-bit big-endian ABIs, sival_int overlaps with * sival_ptr's HIGH bits. We choose to support sival_int * rather than sival_ptr in this case as it seems to be * more common. */ bzero(&sv, sizeof(sv)); sv.sival_int = uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { return (kern_cpuset_getaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { return (kern_cpuset_setaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent, int flags) { if ((flags & ~SY_THR_STATIC) != 0) return (EINVAL); if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; atomic_store_rel_32(&freebsd32_sysent[*offset].sy_thrcnt, flags); return (0); } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset == 0) return (0); freebsd32_sysent[*offset] = *old_sysent; return (0); } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent, SY_THR_STATIC_KLD); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent, flags); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp; uintptr_t destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); copyout(imgp->proc->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = destp; copyout(pagesizes32, (void *)destp, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); } /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat stat; struct kld32_file_stat stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); error = kern_kldstat(td, uap->fileid, &stat); if (error != 0) return (error); bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name)); CP(stat, stat32, refs); CP(stat, stat32, id); PTROUT_CP(stat, stat32, address); CP(stat, stat32, size); bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags; switch (uap->com) { case PROC_SPROTECT: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: data = &flags; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } Index: head/sys/compat/freebsd32/syscalls.master =================================================================== --- head/sys/compat/freebsd32/syscalls.master (revision 320042) +++ head/sys/compat/freebsd32/syscalls.master (revision 320043) @@ -1,1113 +1,1121 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; from: src/sys/kern/syscalls.master 1.107 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, ; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name psuedo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT10 #ifdef (FreeBSD 10 compat) ; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include #include #include #include #include #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL NOPROTO { int nosys(void); } syscall nosys_args int 1 AUE_EXIT NOPROTO { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK NOPROTO { int fork(void); } 3 AUE_READ NOPROTO { ssize_t read(int fd, void *buf, \ size_t nbyte); } 4 AUE_WRITE NOPROTO { ssize_t write(int fd, const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC NOPROTO { int open(char *path, int flags, \ int mode); } 6 AUE_CLOSE NOPROTO { int close(int fd); } 7 AUE_WAIT4 STD { int freebsd32_wait4(int pid, int *status, \ int options, struct rusage32 *rusage); } 8 AUE_CREAT OBSOL old creat 9 AUE_LINK NOPROTO { int link(char *path, char *link); } 10 AUE_UNLINK NOPROTO { int unlink(char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR NOPROTO { int chdir(char *path); } 13 AUE_FCHDIR NOPROTO { int fchdir(int fd); } 14 AUE_MKNOD COMPAT11 { int freebsd32_mknod(char *path, \ int mode, int dev); } 15 AUE_CHMOD NOPROTO { int chmod(char *path, int mode); } 16 AUE_CHOWN NOPROTO { int chown(char *path, int uid, int gid); } 17 AUE_NULL NOPROTO { int obreak(char *nsize); } break \ obreak_args int 18 AUE_GETFSSTAT COMPAT4 { int freebsd32_getfsstat( \ struct statfs32 *buf, long bufsize, \ int mode); } 19 AUE_LSEEK COMPAT { int freebsd32_lseek(int fd, int offset, \ int whence); } 20 AUE_GETPID NOPROTO { pid_t getpid(void); } 21 AUE_MOUNT NOPROTO { int mount(char *type, char *path, \ int flags, caddr_t data); } 22 AUE_UMOUNT NOPROTO { int unmount(char *path, int flags); } 23 AUE_SETUID NOPROTO { int setuid(uid_t uid); } 24 AUE_GETUID NOPROTO { uid_t getuid(void); } 25 AUE_GETEUID NOPROTO { uid_t geteuid(void); } 26 AUE_PTRACE NOPROTO { int ptrace(int req, pid_t pid, \ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int freebsd32_recvmsg(int s, struct msghdr32 *msg, \ int flags); } 28 AUE_SENDMSG STD { int freebsd32_sendmsg(int s, struct msghdr32 *msg, \ int flags); } 29 AUE_RECVFROM STD { int freebsd32_recvfrom(int s, uint32_t buf, \ uint32_t len, int flags, uint32_t from, \ uint32_t fromlenaddr); } 30 AUE_ACCEPT NOPROTO { int accept(int s, caddr_t name, \ int *anamelen); } 31 AUE_GETPEERNAME NOPROTO { int getpeername(int fdes, caddr_t asa, \ int *alen); } 32 AUE_GETSOCKNAME NOPROTO { int getsockname(int fdes, caddr_t asa, \ int *alen); } 33 AUE_ACCESS NOPROTO { int access(char *path, int amode); } 34 AUE_CHFLAGS NOPROTO { int chflags(const char *path, u_long flags); } 35 AUE_FCHFLAGS NOPROTO { int fchflags(int fd, u_long flags); } 36 AUE_SYNC NOPROTO { int sync(void); } 37 AUE_KILL NOPROTO { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int freebsd32_stat(char *path, \ struct ostat32 *ub); } 39 AUE_GETPPID NOPROTO { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int freebsd32_lstat(char *path, \ struct ostat *ub); } 41 AUE_DUP NOPROTO { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int freebsd32_pipe(void); } 43 AUE_GETEGID NOPROTO { gid_t getegid(void); } 44 AUE_PROFILE NOPROTO { int profil(caddr_t samples, size_t size, \ size_t offset, u_int scale); } 45 AUE_KTRACE NOPROTO { int ktrace(const char *fname, int ops, \ int facs, int pid); } 46 AUE_SIGACTION COMPAT { int freebsd32_sigaction( int signum, \ struct osigaction32 *nsa, \ struct osigaction32 *osa); } 47 AUE_GETGID NOPROTO { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int freebsd32_sigprocmask(int how, \ osigset_t mask); } 49 AUE_GETLOGIN NOPROTO { int getlogin(char *namebuf, \ u_int namelen); } 50 AUE_SETLOGIN NOPROTO { int setlogin(char *namebuf); } 51 AUE_ACCT NOPROTO { int acct(char *path); } 52 AUE_SIGPENDING COMPAT { int freebsd32_sigpending(void); } 53 AUE_SIGALTSTACK STD { int freebsd32_sigaltstack( \ struct sigaltstack32 *ss, \ struct sigaltstack32 *oss); } 54 AUE_IOCTL STD { int freebsd32_ioctl(int fd, uint32_t com, \ struct md_ioctl32 *data); } 55 AUE_REBOOT NOPROTO { int reboot(int opt); } 56 AUE_REVOKE NOPROTO { int revoke(char *path); } 57 AUE_SYMLINK NOPROTO { int symlink(char *path, char *link); } 58 AUE_READLINK NOPROTO { ssize_t readlink(char *path, char *buf, \ size_t count); } 59 AUE_EXECVE STD { int freebsd32_execve(char *fname, \ uint32_t *argv, uint32_t *envv); } 60 AUE_UMASK NOPROTO { int umask(int newmask); } umask \ umask_args int 61 AUE_CHROOT NOPROTO { int chroot(char *path); } 62 AUE_FSTAT COMPAT { int freebsd32_fstat(int fd, \ struct ostat32 *ub); } 63 AUE_NULL OBSOL ogetkerninfo 64 AUE_NULL COMPAT { int freebsd32_getpagesize( \ int32_t dummy); } 65 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } 66 AUE_VFORK NOPROTO { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK NOPROTO { int sbrk(int incr); } 70 AUE_SSTK NOPROTO { int sstk(int incr); } 71 AUE_MMAP COMPAT|NOPROTO { int mmap(void *addr, int len, \ int prot, int flags, int fd, int pos); } 72 AUE_O_VADVISE NOPROTO { int ovadvise(int anom); } vadvise \ ovadvise_args int 73 AUE_MUNMAP NOPROTO { int munmap(void *addr, size_t len); } 74 AUE_MPROTECT STD { int freebsd32_mprotect(void *addr, \ size_t len, int prot); } 75 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE NOPROTO { int mincore(const void *addr, size_t len, \ char *vec); } 79 AUE_GETGROUPS NOPROTO { int getgroups(u_int gidsetsize, \ gid_t *gidset); } 80 AUE_SETGROUPS NOPROTO { int setgroups(u_int gidsetsize, \ gid_t *gidset); } 81 AUE_GETPGRP NOPROTO { int getpgrp(void); } 82 AUE_SETPGRP NOPROTO { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int freebsd32_setitimer(u_int which, \ struct itimerval32 *itv, \ struct itimerval32 *oitv); } 84 AUE_NULL OBSOL owait ; XXX implement 85 AUE_SWAPON NOPROTO { int swapon(char *name); } 86 AUE_GETITIMER STD { int freebsd32_getitimer(u_int which, \ struct itimerval32 *itv); } 87 AUE_O_GETHOSTNAME OBSOL ogethostname 88 AUE_O_SETHOSTNAME OBSOL osethostname 89 AUE_GETDTABLESIZE NOPROTO { int getdtablesize(void); } 90 AUE_DUP2 NOPROTO { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int freebsd32_fcntl(int fd, int cmd, \ int arg); } 93 AUE_SELECT STD { int freebsd32_select(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ struct timeval32 *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC NOPROTO { int fsync(int fd); } 96 AUE_SETPRIORITY NOPROTO { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET NOPROTO { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT NOPROTO { int connect(int s, caddr_t name, \ int namelen); } 99 AUE_NULL OBSOL oaccept 100 AUE_GETPRIORITY NOPROTO { int getpriority(int which, int who); } 101 AUE_NULL OBSOL osend 102 AUE_NULL OBSOL orecv 103 AUE_SIGRETURN COMPAT { int freebsd32_sigreturn( \ struct ia32_sigcontext3 *sigcntxp); } 104 AUE_BIND NOPROTO { int bind(int s, caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT NOPROTO { int setsockopt(int s, int level, \ int name, caddr_t val, int valsize); } 106 AUE_LISTEN NOPROTO { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_O_SIGVEC COMPAT { int freebsd32_sigvec(int signum, \ struct sigvec32 *nsv, \ struct sigvec32 *osv); } 109 AUE_O_SIGBLOCK COMPAT { int freebsd32_sigblock(int mask); } 110 AUE_O_SIGSETMASK COMPAT { int freebsd32_sigsetmask( int mask); } 111 AUE_SIGSUSPEND COMPAT { int freebsd32_sigsuspend( int mask); } 112 AUE_O_SIGSTACK COMPAT { int freebsd32_sigstack( \ struct sigstack32 *nss, \ struct sigstack32 *oss); } 113 AUE_NULL OBSOL orecvmsg 114 AUE_NULL OBSOL osendmsg 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int freebsd32_gettimeofday( \ struct timeval32 *tp, \ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int freebsd32_getrusage(int who, \ struct rusage32 *rusage); } 118 AUE_GETSOCKOPT NOPROTO { int getsockopt(int s, int level, \ int name, caddr_t val, int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int freebsd32_readv(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 121 AUE_WRITEV STD { int freebsd32_writev(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int freebsd32_settimeofday( \ struct timeval32 *tv, \ struct timezone *tzp); } 123 AUE_FCHOWN NOPROTO { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD NOPROTO { int fchmod(int fd, int mode); } 125 AUE_RECVFROM OBSOL orecvfrom 126 AUE_SETREUID NOPROTO { int setreuid(int ruid, int euid); } 127 AUE_SETREGID NOPROTO { int setregid(int rgid, int egid); } 128 AUE_RENAME NOPROTO { int rename(char *from, char *to); } 129 AUE_TRUNCATE COMPAT|NOPROTO { int truncate(char *path, \ int length); } 130 AUE_FTRUNCATE COMPAT|NOPROTO { int ftruncate(int fd, int length); } 131 AUE_FLOCK NOPROTO { int flock(int fd, int how); } 132 AUE_MKFIFO NOPROTO { int mkfifo(char *path, int mode); } 133 AUE_SENDTO NOPROTO { int sendto(int s, caddr_t buf, \ size_t len, int flags, caddr_t to, \ int tolen); } 134 AUE_SHUTDOWN NOPROTO { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR NOPROTO { int socketpair(int domain, int type, \ int protocol, int *rsv); } 136 AUE_MKDIR NOPROTO { int mkdir(char *path, int mode); } 137 AUE_RMDIR NOPROTO { int rmdir(char *path); } 138 AUE_UTIMES STD { int freebsd32_utimes(char *path, \ struct timeval32 *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int freebsd32_adjtime( \ struct timeval32 *delta, \ struct timeval32 *olddelta); } 141 AUE_GETPEERNAME OBSOL ogetpeername 142 AUE_SYSCTL OBSOL ogethostid 143 AUE_SYSCTL OBSOL sethostid 144 AUE_GETRLIMIT OBSOL getrlimit 145 AUE_SETRLIMIT OBSOL setrlimit 146 AUE_KILLPG OBSOL killpg 147 AUE_SETSID NOPROTO { int setsid(void); } 148 AUE_QUOTACTL NOPROTO { int quotactl(char *path, int cmd, int uid, \ caddr_t arg); } 149 AUE_O_QUOTA OBSOL oquota 150 AUE_GETSOCKNAME OBSOL ogetsockname ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL UNIMPL nlm_syscall ; 155 is initialized by the NFS code, if present. ; XXX this is a problem!!! 155 AUE_NFS_SVC UNIMPL nfssvc 156 AUE_GETDIRENTRIES COMPAT { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, uint32_t *basep); } 157 AUE_STATFS COMPAT4 { int freebsd32_statfs(char *path, \ struct statfs32 *buf); } 158 AUE_FSTATFS COMPAT4 { int freebsd32_fstatfs(int fd, \ struct statfs32 *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH UNIMPL lgetfh 161 AUE_NFS_GETFH NOPROTO { int getfh(char *fname, \ struct fhandle *fhp); } 162 AUE_SYSCTL OBSOL getdomainname 163 AUE_SYSCTL OBSOL setdomainname 164 AUE_NULL OBSOL uname 165 AUE_SYSARCH STD { int freebsd32_sysarch(int op, char *parms); } 166 AUE_RTPRIO NOPROTO { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int freebsd32_semsys(int which, int a2, \ int a3, int a4, int a5); } 170 AUE_MSGSYS NOSTD { int freebsd32_msgsys(int which, int a2, \ int a3, int a4, int a5, int a6); } 171 AUE_SHMSYS NOSTD { int freebsd32_shmsys(uint32_t which, uint32_t a2, \ uint32_t a3, uint32_t a4); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t freebsd32_pread(int fd, void *buf, \ size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 174 AUE_PWRITE COMPAT6 { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 175 AUE_NULL UNIMPL nosys 176 AUE_NTP_ADJTIME NOPROTO { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID NOPROTO { int setgid(gid_t gid); } 182 AUE_SETEGID NOPROTO { int setegid(gid_t egid); } 183 AUE_SETEUID NOPROTO { int seteuid(uid_t euid); } 184 AUE_NULL UNIMPL lfs_bmapv 185 AUE_NULL UNIMPL lfs_markv 186 AUE_NULL UNIMPL lfs_segclean 187 AUE_NULL UNIMPL lfs_segwait 188 AUE_STAT COMPAT11 { int freebsd32_stat(char *path, \ struct freebsd11_stat32 *ub); } 189 AUE_FSTAT COMPAT11 { int freebsd32_fstat(int fd, \ struct freebsd11_stat32 *ub); } 190 AUE_LSTAT COMPAT11 { int freebsd32_lstat(char *path, \ struct freebsd11_stat32 *ub); } 191 AUE_PATHCONF NOPROTO { int pathconf(char *path, int name); } 192 AUE_FPATHCONF NOPROTO { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT NOPROTO { int getrlimit(u_int which, \ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT NOPROTO { int setrlimit(u_int which, \ struct rlimit *rlp); } setrlimit \ __setrlimit_args int 196 AUE_GETDIRENTRIES COMPAT11 { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, int32_t *basep); } 197 AUE_MMAP COMPAT6 { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, uint32_t pos1, uint32_t pos2); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t freebsd32_lseek(int fd, int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 200 AUE_TRUNCATE COMPAT6 { int freebsd32_truncate(char *path, \ int pad, uint32_t length1, \ uint32_t length2); } 201 AUE_FTRUNCATE COMPAT6 { int freebsd32_ftruncate(int fd, int pad, \ uint32_t length1, uint32_t length2); } 202 AUE_SYSCTL STD { int freebsd32_sysctl(int *name, \ u_int namelen, void *old, \ uint32_t *oldlenp, void *new, \ uint32_t newlen); } 203 AUE_MLOCK NOPROTO { int mlock(const void *addr, \ size_t len); } 204 AUE_MUNLOCK NOPROTO { int munlock(const void *addr, \ size_t len); } 205 AUE_UNDELETE NOPROTO { int undelete(char *path); } 206 AUE_FUTIMES STD { int freebsd32_futimes(int fd, \ struct timeval32 *tptr); } 207 AUE_GETPGID NOPROTO { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL newreboot (NetBSD) 209 AUE_POLL NOPROTO { int poll(struct pollfd *fds, u_int nfds, \ int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int ; ; The following were introduced with NetBSD/4.4Lite-2 ; They are initialized by their respective modules/sysinits ; XXX PROBLEM!! 220 AUE_SEMCTL COMPAT7|NOSTD { int freebsd32_semctl( \ int semid, int semnum, \ int cmd, union semun32 *arg); } 221 AUE_SEMGET NOSTD|NOPROTO { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD|NOPROTO { int semop(int semid, \ struct sembuf *sops, u_int nsops); } 223 AUE_NULL UNIMPL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int freebsd32_msgctl( \ int msqid, int cmd, \ struct msqid_ds32_old *buf); } 225 AUE_MSGGET NOSTD|NOPROTO { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int freebsd32_msgsnd(int msqid, void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { int freebsd32_msgrcv(int msqid, void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD|NOPROTO { int shmat(int shmid, void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int freebsd32_shmctl( \ int shmid, int cmd, \ struct shmid_ds32_old *buf); } 230 AUE_SHMDT NOSTD|NOPROTO { int shmdt(void *shmaddr); } 231 AUE_SHMGET NOSTD|NOPROTO { int shmget(key_t key, int size, \ int shmflg); } ; 232 AUE_NULL STD { int freebsd32_clock_gettime(clockid_t clock_id, \ struct timespec32 *tp); } 233 AUE_CLOCK_SETTIME STD { int freebsd32_clock_settime(clockid_t clock_id, \ const struct timespec32 *tp); } 234 AUE_NULL STD { int freebsd32_clock_getres(clockid_t clock_id, \ struct timespec32 *tp); } 235 AUE_NULL STD { int freebsd32_ktimer_create(\ clockid_t clock_id, \ struct sigevent32 *evp, int *timerid); } 236 AUE_NULL NOPROTO { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int freebsd32_ktimer_settime(int timerid,\ int flags, \ const struct itimerspec32 *value, \ struct itimerspec32 *ovalue); } 238 AUE_NULL STD { int freebsd32_ktimer_gettime(int timerid,\ struct itimerspec32 *value); } 239 AUE_NULL NOPROTO { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int freebsd32_nanosleep( \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 241 AUE_NULL NOPROTO { int ffclock_getcounter(ffcounter *ffcount); } 242 AUE_NULL NOPROTO { int ffclock_setestimate( \ struct ffclock_estimate *cest); } 243 AUE_NULL NOPROTO { int ffclock_getestimate( \ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int freebsd32_clock_nanosleep( \ clockid_t clock_id, int flags, \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int freebsd32_clock_getcpuclockid2(\ uint32_t id1, uint32_t id2,\ int which, clockid_t *clock_id); } 248 AUE_NULL UNIMPL ntp_gettime 249 AUE_NULL UNIMPL nosys ; syscall numbers initially used in OpenBSD 250 AUE_MINHERIT NOPROTO { int minherit(void *addr, size_t len, \ int inherit); } 251 AUE_RFORK NOPROTO { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID NOPROTO { int issetugid(void); } 254 AUE_LCHOWN NOPROTO { int lchown(char *path, int uid, int gid); } 255 AUE_AIO_READ STD { int freebsd32_aio_read( \ struct aiocb32 *aiocbp); } 256 AUE_AIO_WRITE STD { int freebsd32_aio_write( \ struct aiocb32 *aiocbp); } 257 AUE_LIO_LISTIO STD { int freebsd32_lio_listio(int mode, \ struct aiocb32 * const *acb_list, \ int nent, struct sigevent32 *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys 272 AUE_O_GETDENTS COMPAT11 { int freebsd32_getdents(int fd, char *buf, \ int count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD NOPROTO { int lchmod(char *path, mode_t mode); } 275 AUE_LCHOWN NOPROTO { int lchown(char *path, uid_t uid, \ gid_t gid); } netbsd_lchown \ lchown_args int 276 AUE_LUTIMES STD { int freebsd32_lutimes(char *path, \ struct timeval32 *tptr); } 277 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } netbsd_msync msync_args int 278 AUE_STAT COMPAT11|NOPROTO { int nstat(char *path, struct nstat *ub); } 279 AUE_FSTAT COMPAT11|NOPROTO { int nfstat(int fd, struct nstat *sb); } 280 AUE_LSTAT COMPAT11|NOPROTO { int nlstat(char *path, struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys ; 289 and 290 from NetBSD (OpenBSD: 267 and 268) 289 AUE_PREADV STD { ssize_t freebsd32_preadv(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 290 AUE_PWRITEV STD { ssize_t freebsd32_pwritev(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys ; XXX 297 is 300 in NetBSD 297 AUE_FHSTATFS COMPAT4 { int freebsd32_fhstatfs( \ const struct fhandle *u_fhp, \ struct statfs32 *buf); } 298 AUE_FHOPEN NOPROTO { int fhopen(const struct fhandle *u_fhp, \ int flags); } 299 AUE_FHSTAT COMPAT11 { int freebsd32_fhstat( \ const struct fhandle *u_fhp, \ struct freebsd11_stat32 *sb); } ; syscall numbers for FreeBSD 300 AUE_NULL NOPROTO { int modnext(int modid); } 301 AUE_NULL STD { int freebsd32_modstat(int modid, \ struct module_stat32* stat); } 302 AUE_NULL NOPROTO { int modfnext(int modid); } 303 AUE_NULL NOPROTO { int modfind(const char *name); } 304 AUE_MODLOAD NOPROTO { int kldload(const char *file); } 305 AUE_MODUNLOAD NOPROTO { int kldunload(int fileid); } 306 AUE_NULL NOPROTO { int kldfind(const char *file); } 307 AUE_NULL NOPROTO { int kldnext(int fileid); } 308 AUE_NULL STD { int freebsd32_kldstat(int fileid, \ struct kld32_file_stat* stat); } 309 AUE_NULL NOPROTO { int kldfirstmod(int fileid); } 310 AUE_GETSID NOPROTO { int getsid(pid_t pid); } 311 AUE_SETRESUID NOPROTO { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID NOPROTO { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { int freebsd32_aio_return( \ struct aiocb32 *aiocbp); } 315 AUE_AIO_SUSPEND STD { int freebsd32_aio_suspend( \ struct aiocb32 * const * aiocbp, int nent, \ const struct timespec32 *timeout); } 316 AUE_AIO_CANCEL NOPROTO { int aio_cancel(int fd, \ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int freebsd32_aio_error( \ struct aiocb32 *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int freebsd32_aio_read( \ struct oaiocb32 *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int freebsd32_aio_write( \ struct oaiocb32 *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int freebsd32_lio_listio(int mode, \ struct oaiocb32 * const *acb_list, \ int nent, struct osigevent32 *sig); } 321 AUE_NULL NOPROTO { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL NOPROTO { int mlockall(int how); } 325 AUE_MUNLOCKALL NOPROTO { int munlockall(void); } 326 AUE_GETCWD NOPROTO { int __getcwd(char *buf, size_t buflen); } 327 AUE_NULL NOPROTO { int sched_setparam (pid_t pid, \ const struct sched_param *param); } 328 AUE_NULL NOPROTO { int sched_getparam (pid_t pid, \ struct sched_param *param); } 329 AUE_NULL NOPROTO { int sched_setscheduler (pid_t pid, \ int policy, \ const struct sched_param *param); } 330 AUE_NULL NOPROTO { int sched_getscheduler (pid_t pid); } 331 AUE_NULL NOPROTO { int sched_yield (void); } 332 AUE_NULL NOPROTO { int sched_get_priority_max (int policy); } 333 AUE_NULL NOPROTO { int sched_get_priority_min (int policy); } 334 AUE_NULL NOPROTO { int sched_rr_get_interval (pid_t pid, \ struct timespec *interval); } 335 AUE_NULL NOPROTO { int utrace(const void *addr, size_t len); } 336 AUE_SENDFILE COMPAT4 { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 337 AUE_NULL NOPROTO { int kldsym(int fileid, int cmd, \ void *data); } 338 AUE_JAIL STD { int freebsd32_jail(struct jail32 *jail); } 339 AUE_NULL UNIMPL pioctl 340 AUE_SIGPROCMASK NOPROTO { int sigprocmask(int how, \ const sigset_t *set, sigset_t *oset); } 341 AUE_SIGSUSPEND NOPROTO { int sigsuspend(const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 343 AUE_SIGPENDING NOPROTO { int sigpending(sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int freebsd32_sigreturn( \ const struct freebsd4_freebsd32_ucontext *sigcntxp); } 345 AUE_SIGWAIT STD { int freebsd32_sigtimedwait(const sigset_t *set, \ siginfo_t *info, \ const struct timespec *timeout); } 346 AUE_NULL STD { int freebsd32_sigwaitinfo(const sigset_t *set, \ siginfo_t *info); } 347 AUE_ACL_GET_FILE NOPROTO { int __acl_get_file(const char *path, \ acl_type_t type, struct acl *aclp); } 348 AUE_ACL_SET_FILE NOPROTO { int __acl_set_file(const char *path, \ acl_type_t type, struct acl *aclp); } 349 AUE_ACL_GET_FD NOPROTO { int __acl_get_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 350 AUE_ACL_SET_FD NOPROTO { int __acl_set_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 351 AUE_ACL_DELETE_FILE NOPROTO { int __acl_delete_file(const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD NOPROTO { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE NOPROTO { int __acl_aclcheck_file(const char *path, \ acl_type_t type, struct acl *aclp); } 354 AUE_ACL_CHECK_FD NOPROTO { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 355 AUE_EXTATTRCTL NOPROTO { int extattrctl(const char *path, int cmd, \ const char *filename, int attrnamespace, \ const char *attrname); } 356 AUE_EXTATTR_SET_FILE NOPROTO { ssize_t extattr_set_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE NOPROTO { ssize_t extattr_get_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE NOPROTO { int extattr_delete_file( \ const char *path, int attrnamespace, \ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { int freebsd32_aio_waitcomplete( \ struct aiocb32 **aiocbp, \ struct timespec32 *timeout); } 360 AUE_GETRESUID NOPROTO { int getresuid(uid_t *ruid, uid_t *euid, \ uid_t *suid); } 361 AUE_GETRESGID NOPROTO { int getresgid(gid_t *rgid, gid_t *egid, \ gid_t *sgid); } 362 AUE_KQUEUE NOPROTO { int kqueue(void); } -363 AUE_KEVENT STD { int freebsd32_kevent(int fd, \ - const struct kevent32 *changelist, \ +363 AUE_KEVENT COMPAT11 { int freebsd32_kevent(int fd, \ + const struct kevent32_freebsd11 * \ + changelist, \ int nchanges, \ - struct kevent32 *eventlist, int nevents, \ + struct kevent32_freebsd11 *eventlist, \ + int nevents, \ const struct timespec32 *timeout); } 364 AUE_NULL UNIMPL __cap_get_proc 365 AUE_NULL UNIMPL __cap_set_proc 366 AUE_NULL UNIMPL __cap_get_fd 367 AUE_NULL UNIMPL __cap_get_file 368 AUE_NULL UNIMPL __cap_set_fd 369 AUE_NULL UNIMPL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD NOPROTO { ssize_t extattr_set_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 372 AUE_EXTATTR_GET_FD NOPROTO { ssize_t extattr_get_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD NOPROTO { int extattr_delete_fd(int fd, \ int attrnamespace, \ const char *attrname); } 374 AUE_SETUGID NOPROTO { int __setugid(int flag); } 375 AUE_NULL UNIMPL nfsclnt 376 AUE_EACCESS NOPROTO { int eaccess(char *path, int amode); } 377 AUE_NULL UNIMPL afs_syscall 378 AUE_NMOUNT STD { int freebsd32_nmount(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 379 AUE_NULL UNIMPL kse_exit 380 AUE_NULL UNIMPL kse_wakeup 381 AUE_NULL UNIMPL kse_create 382 AUE_NULL UNIMPL kse_thr_interrupt 383 AUE_NULL UNIMPL kse_release 384 AUE_NULL UNIMPL __mac_get_proc 385 AUE_NULL UNIMPL __mac_set_proc 386 AUE_NULL UNIMPL __mac_get_fd 387 AUE_NULL UNIMPL __mac_get_file 388 AUE_NULL UNIMPL __mac_set_fd 389 AUE_NULL UNIMPL __mac_set_file 390 AUE_NULL NOPROTO { int kenv(int what, const char *name, \ char *value, int len); } 391 AUE_LCHFLAGS NOPROTO { int lchflags(const char *path, \ u_long flags); } 392 AUE_NULL NOPROTO { int uuidgen(struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 394 AUE_NULL UNIMPL mac_syscall 395 AUE_GETFSSTAT COMPAT11|NOPROTO { int getfsstat( \ struct freebsd11_statfs *buf, \ long bufsize, int mode); } 396 AUE_STATFS COMPAT11|NOPROTO { int statfs(char *path, \ struct statfs *buf); } 397 AUE_FSTATFS COMPAT11|NOPROTO { int fstatfs(int fd, \ struct freebsd11_statfs *buf); } 398 AUE_FHSTATFS COMPAT11|NOPROTO { int fhstatfs( \ const struct fhandle *u_fhp, \ struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD|NOPROTO { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD|NOPROTO { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD|NOPROTO { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD|NOPROTO { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int freebsd32_ksem_init(semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int freebsd32_ksem_open(semid_t *idp, \ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD|NOPROTO { int ksem_unlink(const char *name); } 407 AUE_SEMGETVALUE NOSTD|NOPROTO { int ksem_getvalue(semid_t id, \ int *val); } 408 AUE_SEMDESTROY NOSTD|NOPROTO { int ksem_destroy(semid_t id); } 409 AUE_NULL UNIMPL __mac_get_pid 410 AUE_NULL UNIMPL __mac_get_link 411 AUE_NULL UNIMPL __mac_set_link 412 AUE_EXTATTR_SET_LINK NOPROTO { ssize_t extattr_set_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK NOPROTO { ssize_t extattr_get_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK NOPROTO { int extattr_delete_link( \ const char *path, int attrnamespace, \ const char *attrname); } 415 AUE_NULL UNIMPL __mac_execve 416 AUE_SIGACTION STD { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 417 AUE_SIGRETURN STD { int freebsd32_sigreturn( \ const struct freebsd32_ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int freebsd32_getcontext( \ struct freebsd32_ucontext *ucp); } 422 AUE_NULL STD { int freebsd32_setcontext( \ const struct freebsd32_ucontext *ucp); } 423 AUE_NULL STD { int freebsd32_swapcontext( \ struct freebsd32_ucontext *oucp, \ const struct freebsd32_ucontext *ucp); } 424 AUE_SWAPOFF UNIMPL swapoff 425 AUE_ACL_GET_LINK NOPROTO { int __acl_get_link(const char *path, \ acl_type_t type, struct acl *aclp); } 426 AUE_ACL_SET_LINK NOPROTO { int __acl_set_link(const char *path, \ acl_type_t type, struct acl *aclp); } 427 AUE_ACL_DELETE_LINK NOPROTO { int __acl_delete_link(const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK NOPROTO { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } 429 AUE_SIGWAIT NOPROTO { int sigwait(const sigset_t *set, \ int *sig); } 430 AUE_THR_CREATE UNIMPL thr_create; 431 AUE_THR_EXIT NOPROTO { void thr_exit(long *state); } 432 AUE_NULL NOPROTO { int thr_self(long *id); } 433 AUE_THR_KILL NOPROTO { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH NOPROTO { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD NOPROTO { ssize_t extattr_list_fd(int fd, \ int attrnamespace, void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE NOPROTO { ssize_t extattr_list_file( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK NOPROTO { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 440 AUE_NULL UNIMPL kse_switchin 441 AUE_SEMWAIT NOSTD { int freebsd32_ksem_timedwait(semid_t id, \ const struct timespec32 *abstime); } 442 AUE_NULL STD { int freebsd32_thr_suspend( \ const struct timespec32 *timeout); } 443 AUE_NULL NOPROTO { int thr_wake(long id); } 444 AUE_MODUNLOAD NOPROTO { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT NOPROTO { int audit(const void *record, \ u_int length); } 446 AUE_AUDITON NOPROTO { int auditon(int cmd, void *data, \ u_int length); } 447 AUE_GETAUID NOPROTO { int getauid(uid_t *auid); } 448 AUE_SETAUID NOPROTO { int setauid(uid_t *auid); } 449 AUE_GETAUDIT NOPROTO { int getaudit(struct auditinfo *auditinfo); } 450 AUE_SETAUDIT NOPROTO { int setaudit(struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR NOPROTO { int getaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR NOPROTO { int setaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL NOPROTO { int auditctl(char *path); } 454 AUE_NULL STD { int freebsd32_umtx_op(void *obj, int op,\ u_long val, void *uaddr, \ void *uaddr2); } 455 AUE_THR_NEW STD { int freebsd32_thr_new( \ struct thr_param32 *param, \ int param_size); } 456 AUE_NULL STD { int freebsd32_sigqueue(pid_t pid, \ int signum, int value); } 457 AUE_MQ_OPEN NOSTD { int freebsd32_kmq_open( \ const char *path, int flags, mode_t mode, \ const struct mq_attr32 *attr); } 458 AUE_MQ_SETATTR NOSTD { int freebsd32_kmq_setattr(int mqd, \ const struct mq_attr32 *attr, \ struct mq_attr32 *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int freebsd32_kmq_timedreceive(int mqd, \ char *msg_ptr, size_t msg_len, \ unsigned *msg_prio, \ const struct timespec32 *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int freebsd32_kmq_timedsend(int mqd, \ const char *msg_ptr, size_t msg_len,\ unsigned msg_prio, \ const struct timespec32 *abs_timeout);} 461 AUE_MQ_NOTIFY NOSTD { int freebsd32_kmq_notify(int mqd, \ const struct sigevent32 *sigev); } 462 AUE_MQ_UNLINK NOPROTO|NOSTD { int kmq_unlink(const char *path); } 463 AUE_NULL NOPROTO { int abort2(const char *why, int nargs, void **args); } 464 AUE_NULL NOPROTO { int thr_set_name(long id, const char *name); } 465 AUE_AIO_FSYNC STD { int freebsd32_aio_fsync(int op, \ struct aiocb32 *aiocbp); } 466 AUE_RTPRIO NOPROTO { int rtprio_thread(int function, \ lwpid_t lwpid, struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOPROTO|NOSTD { int sctp_peeloff(int sd, uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOPROTO|NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOPROTO|NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOPROTO|NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } #ifdef PAD64_REQUIRED 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ int pad, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ int pad, \ uint32_t length1, uint32_t length2); } #else 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ uint32_t length1, uint32_t length2); } #endif 481 AUE_THR_KILL2 NOPROTO { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN NOPROTO { int shm_open(const char *path, int flags, \ mode_t mode); } 483 AUE_SHMUNLINK NOPROTO { int shm_unlink(const char *path); } 484 AUE_NULL NOPROTO { int cpuset(cpusetid_t *setid); } #ifdef PAD64_REQUIRED 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ int pad, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #else 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #endif 486 AUE_NULL STD { int freebsd32_cpuset_getid(cpulevel_t level, \ cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t *setid); } 487 AUE_NULL STD { int freebsd32_cpuset_getaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ cpuset_t *mask); } 488 AUE_NULL STD { int freebsd32_cpuset_setaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ const cpuset_t *mask); } 489 AUE_FACCESSAT NOPROTO { int faccessat(int fd, char *path, int amode, \ int flag); } 490 AUE_FCHMODAT NOPROTO { int fchmodat(int fd, const char *path, \ mode_t mode, int flag); } 491 AUE_FCHOWNAT NOPROTO { int fchownat(int fd, char *path, uid_t uid, \ gid_t gid, int flag); } 492 AUE_FEXECVE STD { int freebsd32_fexecve(int fd, \ uint32_t *argv, uint32_t *envv); } 493 AUE_FSTATAT COMPAT11 { int freebsd32_fstatat(int fd, \ char *path, struct freebsd11_stat32 *buf, \ int flag); } 494 AUE_FUTIMESAT STD { int freebsd32_futimesat(int fd, char *path, \ struct timeval *times); } 495 AUE_LINKAT NOPROTO { int linkat(int fd1, char *path1, int fd2, \ char *path2, int flag); } 496 AUE_MKDIRAT NOPROTO { int mkdirat(int fd, char *path, \ mode_t mode); } 497 AUE_MKFIFOAT NOPROTO { int mkfifoat(int fd, char *path, \ mode_t mode); } 498 AUE_MKNODAT COMPAT11 { int freebsd32_mknodat(int fd, char *path, \ mode_t mode, uint32_t dev); } 499 AUE_OPENAT_RWTC NOPROTO { int openat(int fd, char *path, int flag, \ mode_t mode); } 500 AUE_READLINKAT NOPROTO { int readlinkat(int fd, char *path, char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT NOPROTO { int renameat(int oldfd, char *old, int newfd, \ const char *new); } 502 AUE_SYMLINKAT NOPROTO { int symlinkat(char *path1, int fd, \ char *path2); } 503 AUE_UNLINKAT NOPROTO { int unlinkat(int fd, char *path, \ int flag); } 504 AUE_POSIX_OPENPT NOPROTO { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL UNIMPL gssd_syscall 506 AUE_JAIL_GET STD { int freebsd32_jail_get(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int freebsd32_jail_set(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE NOPROTO { int jail_remove(int jid); } 509 AUE_CLOSEFROM NOPROTO { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int freebsd32_semctl(int semid, int semnum, \ int cmd, union semun32 *arg); } 511 AUE_MSGCTL NOSTD { int freebsd32_msgctl(int msqid, int cmd, \ struct msqid_ds32 *buf); } 512 AUE_SHMCTL NOSTD { int freebsd32_shmctl(int shmid, int cmd, \ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET NOPROTO { int __cap_rights_get(int version, \ int fd, cap_rights_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } 518 AUE_PDFORK NOPROTO { int pdfork(int *fdp, int flags); } 519 AUE_PDKILL NOPROTO { int pdkill(int fd, int signum); } 520 AUE_PDGETPID NOPROTO { int pdgetpid(int fd, pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int freebsd32_pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec32 *ts, \ const sigset_t *sm); } 523 AUE_GETLOGINCLASS NOPROTO { int getloginclass(char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS NOPROTO { int setloginclass(const char *namebuf); } 525 AUE_NULL NOPROTO { int rctl_get_racct(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 526 AUE_NULL NOPROTO { int rctl_get_rules(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 527 AUE_NULL NOPROTO { int rctl_get_limits(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 528 AUE_NULL NOPROTO { int rctl_add_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } #ifdef PAD64_REQUIRED 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, int pad, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #else 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd,\ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #endif 533 AUE_CAP_RIGHTS_LIMIT NOPROTO { \ int cap_rights_limit(int fd, \ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { \ int freebsd32_cap_ioctls_limit(int fd, \ const uint32_t *cmds, size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { \ ssize_t freebsd32_cap_ioctls_get(int fd, \ uint32_t *cmds, size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ uint32_t *fcntlrightsp); } 538 AUE_BINDAT NOPROTO { int bindat(int fd, int s, caddr_t name, \ int namelen); } 539 AUE_CONNECTAT NOPROTO { int connectat(int fd, int s, caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT NOPROTO { int chflagsat(int fd, const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT NOPROTO { int accept4(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE NOPROTO { int pipe2(int *fildes, int flags); } 543 AUE_AIO_MLOCK STD { int freebsd32_aio_mlock( \ struct aiocb32 *aiocbp); } #ifdef PAD64_REQUIRED 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, int pad, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #else 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #endif 545 AUE_POLL STD { int freebsd32_ppoll(struct pollfd *fds, \ u_int nfds, const struct timespec32 *ts, \ const sigset_t *set); } 546 AUE_FUTIMES STD { int freebsd32_futimens(int fd, \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \ char *path, \ struct timespec *times, int flag); } 548 AUE_NULL NOPROTO { int numa_getaffinity(cpuwhich_t which, \ id_t id, \ struct vm_domain_policy *policy); } 549 AUE_NULL NOPROTO { int numa_setaffinity(cpuwhich_t which, \ id_t id, \ const struct vm_domain_policy *policy); } 550 AUE_FSYNC NOPROTO { int fdatasync(int fd); } 551 AUE_FSTAT STD { int freebsd32_fstat(int fd, \ struct stat32 *ub); } 552 AUE_FSTATAT STD { int freebsd32_fstatat(int fd, \ char *path, struct stat32 *buf, \ int flag); } 553 AUE_FHSTAT STD { int freebsd32_fhstat( \ const struct fhandle *u_fhp, \ struct stat32 *sb); } 554 AUE_GETDIRENTRIES STD { ssize_t freebsd32_getdirentries( \ int fd, char *buf, size_t count, \ int32_t *basep); } 555 AUE_STATFS NOPROTO { int statfs(char *path, \ struct statfs32 *buf); } 556 AUE_FSTATFS NOPROTO { int fstatfs(int fd, struct statfs32 *buf); } 557 AUE_GETFSSTAT NOPROTO { int getfsstat(struct statfs32 *buf, \ long bufsize, int mode); } 558 AUE_FHSTATFS NOPROTO { int fhstatfs(const struct fhandle *u_fhp, \ struct statfs32 *buf); } 559 AUE_MKNODAT NOPROTO { int mknodat(int fd, char *path, mode_t mode, \ dev_t dev); } +560 AUE_KEVENT STD { int freebsd32_kevent(int fd, \ + const struct kevent32 *changelist, \ + int nchanges, \ + struct kevent32 *eventlist, \ + int nevents, \ + const struct timespec32 *timeout); } Index: head/sys/kern/kern_event.c =================================================================== --- head/sys/kern/kern_event.c (revision 320042) +++ head/sys/kern/kern_event.c (revision 320043) @@ -1,2551 +1,2661 @@ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon * Copyright 2004 John-Mark Gurney * Copyright (c) 2009 Apple, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); +#include "opt_compat.h" #include "opt_ktrace.h" #include "opt_kqueue.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); /* * This lock is used if multiple kq locks are required. This possibly * should be made into a per proc lock. */ static struct mtx kq_global; MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); #define KQ_GLOBAL_LOCK(lck, haslck) do { \ if (!haslck) \ mtx_lock(lck); \ haslck = 1; \ } while (0) #define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ if (haslck) \ mtx_unlock(lck); \ haslck = 0; \ } while (0) TASKQUEUE_DEFINE_THREAD(kqueue_ctx); static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count); static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok); static int kqueue_acquire(struct file *fp, struct kqueue **kqp); static void kqueue_release(struct kqueue *kq, int locked); static void kqueue_destroy(struct kqueue *kq); static void kqueue_drain(struct kqueue *kq, struct thread *td); static int kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, int waitok); static void kqueue_task(void *arg, int pending); static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *timeout, struct kevent *keva, struct thread *td); static void kqueue_wakeup(struct kqueue *kq); static struct filterops *kqueue_fo_find(int filt); static void kqueue_fo_release(int filt); +struct g_kevent_args; +static int kern_kevent_generic(struct thread *td, + struct g_kevent_args *uap, + struct kevent_copyops *k_ops); static fo_ioctl_t kqueue_ioctl; static fo_poll_t kqueue_poll; static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; static fo_fill_kinfo_t kqueue_fill_kinfo; static struct fileops kqueueops = { .fo_read = invfo_rdwr, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, .fo_ioctl = kqueue_ioctl, .fo_poll = kqueue_poll, .fo_kqfilter = kqueue_kqfilter, .fo_stat = kqueue_stat, .fo_close = kqueue_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_fill_kinfo = kqueue_fill_kinfo, }; static int knote_attach(struct knote *kn, struct kqueue *kq); static void knote_drop(struct knote *kn, struct thread *td); static void knote_drop_detached(struct knote *kn, struct thread *td); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); static void knote_init(void); static struct knote *knote_alloc(int waitok); static void knote_free(struct knote *kn); static void filt_kqdetach(struct knote *kn); static int filt_kqueue(struct knote *kn, long hint); static int filt_procattach(struct knote *kn); static void filt_procdetach(struct knote *kn); static int filt_proc(struct knote *kn, long hint); static int filt_fileattach(struct knote *kn); static void filt_timerexpire(void *knx); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); static void filt_userdetach(struct knote *kn); static int filt_user(struct knote *kn, long hint); static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type); static struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, }; static struct filterops kqread_filtops = { .f_isfd = 1, .f_detach = filt_kqdetach, .f_event = filt_kqueue, }; /* XXX - move to kern_proc.c? */ static struct filterops proc_filtops = { .f_isfd = 0, .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, }; static struct filterops timer_filtops = { .f_isfd = 0, .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = filt_timer, }; static struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, .f_event = filt_user, .f_touch = filt_usertouch, }; static uma_zone_t knote_zone; static unsigned int kq_ncallouts = 0; static unsigned int kq_calloutmax = 4 * 1024; SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); /* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ else \ KQ_LOCK((kn)->kn_kq); \ (kn)->kn_status |= KN_ACTIVE; \ if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ knote_enqueue((kn)); \ if (!(islock)) \ KQ_UNLOCK((kn)->kn_kq); \ } while(0) #define KQ_LOCK(kq) do { \ mtx_lock(&(kq)->kq_lock); \ } while (0) #define KQ_FLUX_WAKEUP(kq) do { \ if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ (kq)->kq_state &= ~KQ_FLUXWAIT; \ wakeup((kq)); \ } \ } while (0) #define KQ_UNLOCK_FLUX(kq) do { \ KQ_FLUX_WAKEUP(kq); \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_UNLOCK(kq) do { \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_OWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_OWNED); \ } while (0) #define KQ_NOTOWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ } while (0) static struct knlist * kn_list_lock(struct knote *kn) { struct knlist *knl; knl = kn->kn_knlist; if (knl != NULL) knl->kl_lock(knl->kl_lockarg); return (knl); } static void kn_list_unlock(struct knlist *knl) { bool do_free; if (knl == NULL) return; do_free = knl->kl_autodestroy && knlist_empty(knl); knl->kl_unlock(knl->kl_lockarg); if (do_free) { knlist_destroy(knl); free(knl, M_KQUEUE); } } static bool kn_in_flux(struct knote *kn) { return (kn->kn_influx > 0); } static void kn_enter_flux(struct knote *kn) { KQ_OWNED(kn->kn_kq); MPASS(kn->kn_influx < INT_MAX); kn->kn_influx++; } static bool kn_leave_flux(struct knote *kn) { KQ_OWNED(kn->kn_kq); MPASS(kn->kn_influx > 0); kn->kn_influx--; return (kn->kn_influx == 0); } #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ else \ KNL_ASSERT_UNLOCKED(knl); \ } while (0) #ifdef INVARIANTS #define KNL_ASSERT_LOCKED(knl) do { \ knl->kl_assert_locked((knl)->kl_lockarg); \ } while (0) #define KNL_ASSERT_UNLOCKED(knl) do { \ knl->kl_assert_unlocked((knl)->kl_lockarg); \ } while (0) #else /* !INVARIANTS */ #define KNL_ASSERT_LOCKED(knl) do {} while(0) #define KNL_ASSERT_UNLOCKED(knl) do {} while (0) #endif /* INVARIANTS */ #ifndef KN_HASHSIZE #define KN_HASHSIZE 64 /* XXX should be tunable */ #endif #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) static int filt_nullattach(struct knote *kn) { return (ENXIO); }; struct filterops null_filtops = { .f_isfd = 0, .f_attach = filt_nullattach, }; /* XXX - make SYSINIT to add these, and move into respective modules. */ extern struct filterops sig_filtops; extern struct filterops fs_filtops; /* * Table for for all system-defined filters. */ static struct mtx filterops_lock; MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", MTX_DEF); static struct { struct filterops *for_fop; int for_nolock; int for_refcnt; } sysfilt_ops[EVFILT_SYSCOUNT] = { { &file_filtops, 1 }, /* EVFILT_READ */ { &file_filtops, 1 }, /* EVFILT_WRITE */ { &null_filtops }, /* EVFILT_AIO */ { &file_filtops, 1 }, /* EVFILT_VNODE */ { &proc_filtops, 1 }, /* EVFILT_PROC */ { &sig_filtops, 1 }, /* EVFILT_SIGNAL */ { &timer_filtops, 1 }, /* EVFILT_TIMER */ { &file_filtops, 1 }, /* EVFILT_PROCDESC */ { &fs_filtops, 1 }, /* EVFILT_FS */ { &null_filtops }, /* EVFILT_LIO */ { &user_filtops, 1 }, /* EVFILT_USER */ { &null_filtops }, /* EVFILT_SENDFILE */ { &file_filtops, 1 }, /* EVFILT_EMPTY */ }; /* * Simple redirection for all cdevsw style objects to call their fo_kqfilter * method. */ static int filt_fileattach(struct knote *kn) { return (fo_kqfilter(kn->kn_fp, kn)); } /*ARGSUSED*/ static int kqueue_kqfilter(struct file *fp, struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; if (kn->kn_filter != EVFILT_READ) return (EINVAL); kn->kn_status |= KN_KQUEUE; kn->kn_fop = &kqread_filtops; knlist_add(&kq->kq_sel.si_note, kn, 0); return (0); } static void filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; knlist_remove(&kq->kq_sel.si_note, kn, 0); } /*ARGSUSED*/ static int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; kn->kn_data = kq->kq_count; return (kn->kn_data > 0); } /* XXX - move to kern_proc.c? */ static int filt_procattach(struct knote *kn) { struct proc *p; int error; bool exiting, immediate; exiting = immediate = false; p = pfind(kn->kn_id); if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { p = zpfind(kn->kn_id); exiting = true; } else if (p != NULL && (p->p_flag & P_WEXIT)) { exiting = true; } if (p == NULL) return (ESRCH); if ((error = p_cansee(curthread, p))) { PROC_UNLOCK(p); return (error); } kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ /* * Internal flag indicating registration done by kernel for the * purposes of getting a NOTE_CHILD notification. */ if (kn->kn_flags & EV_FLAG2) { kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); immediate = true; /* Force immediate activation of child note. */ } /* * Internal flag indicating registration done by kernel (for other than * NOTE_CHILD). */ if (kn->kn_flags & EV_FLAG1) { kn->kn_flags &= ~EV_FLAG1; } knlist_add(p->p_klist, kn, 1); /* * Immediately activate any child notes or, in the case of a zombie * target process, exit notes. The latter is necessary to handle the * case where the target process, e.g. a child, dies before the kevent * is registered. */ if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) KNOTE_ACTIVATE(kn, 0); PROC_UNLOCK(p); return (0); } /* * The knote may be attached to a different process, which may exit, * leaving nothing for the knote to be attached to. So when the process * exits, the knote is marked as DETACHED and also flagged as ONESHOT so * it will be deleted when read out. However, as part of the knote deletion, * this routine is called, so a check is needed to avoid actually performing * a detach, because the original process does not exist any more. */ /* XXX - move to kern_proc.c? */ static void filt_procdetach(struct knote *kn) { knlist_remove(kn->kn_knlist, kn, 0); kn->kn_ptr.p_proc = NULL; } /* XXX - move to kern_proc.c? */ static int filt_proc(struct knote *kn, long hint) { struct proc *p; u_int event; p = kn->kn_ptr.p_proc; if (p == NULL) /* already activated, from attach filter */ return (0); /* Mask off extra data. */ event = (u_int)hint & NOTE_PCTRLMASK; /* If the user is interested in this event, record it. */ if (kn->kn_sfflags & event) kn->kn_fflags |= event; /* Process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { kn->kn_flags |= EV_EOF | EV_ONESHOT; kn->kn_ptr.p_proc = NULL; if (kn->kn_fflags & NOTE_EXIT) kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig); if (kn->kn_fflags == 0) kn->kn_flags |= EV_DROP; return (1); } return (kn->kn_fflags != 0); } /* * Called when the process forked. It mostly does the same as the * knote(), activating all knotes registered to be activated when the * process forked. Additionally, for each knote attached to the * parent, check whether user wants to track the new process. If so * attach a new knote to it, and immediately report an event with the * child's pid. */ void knote_fork(struct knlist *list, int pid) { struct kqueue *kq; struct knote *kn; struct kevent kev; int error; if (list == NULL) return; list->kl_lock(list->kl_lockarg); SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } /* * The same as knote(), activate the event. */ if ((kn->kn_sfflags & NOTE_TRACK) == 0) { kn->kn_status |= KN_HASKQLOCK; if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 1); kn->kn_status &= ~KN_HASKQLOCK; KQ_UNLOCK(kq); continue; } /* * The NOTE_TRACK case. In addition to the activation * of the event, we need to register new events to * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); /* * Activate existing knote and register tracking knotes with * new process. * * First register a knote to get just the child notice. This * must be a separate note from a potential NOTE_EXIT * notification since both NOTE_CHILD and NOTE_EXIT are defined * to use the data field (in conflicting ways). */ kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; /* * Then register another knote to track other potential events * from the new process. */ kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } list->kl_unlock(list->kl_lockarg); } /* * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. */ #define NOTE_TIMER_PRECMASK \ (NOTE_SECONDS | NOTE_MSECONDS | NOTE_USECONDS | NOTE_NSECONDS) static sbintime_t timer2sbintime(intptr_t data, int flags) { int64_t secs; /* * Macros for converting to the fractional second portion of an * sbintime_t using 64bit multiplication to improve precision. */ #define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) #define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) #define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) switch (flags & NOTE_TIMER_PRECMASK) { case NOTE_SECONDS: #ifdef __LP64__ if (data > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return ((sbintime_t)data << 32); case NOTE_MSECONDS: /* FALLTHROUGH */ case 0: if (data >= 1000) { secs = data / 1000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | MS_TO_SBT(data % 1000)); } return (MS_TO_SBT(data)); case NOTE_USECONDS: if (data >= 1000000) { secs = data / 1000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | US_TO_SBT(data % 1000000)); } return (US_TO_SBT(data)); case NOTE_NSECONDS: if (data >= 1000000000) { secs = data / 1000000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return (SBT_MAX); #endif return (secs << 32 | US_TO_SBT(data % 1000000000)); } return (NS_TO_SBT(data)); default: break; } return (-1); } struct kq_timer_cb_data { struct callout c; sbintime_t next; /* next timer event fires at */ - sbintime_t to; /* precalculated timer period */ + sbintime_t to; /* precalculated timer period, 0 for abs */ }; static void filt_timerexpire(void *knx) { struct knote *kn; struct kq_timer_cb_data *kc; kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ if ((kn->kn_flags & EV_ONESHOT) != 0) return; - kc = kn->kn_ptr.p_v; + if (kc->to == 0) + return; kc->next += kc->to; callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); } /* * data contains amount of time to sleep */ static int filt_timerattach(struct knote *kn) { struct kq_timer_cb_data *kc; - sbintime_t to; + struct bintime bt; + sbintime_t to, sbt; unsigned int ncallouts; if (kn->kn_sdata < 0) return (EINVAL); if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; /* Only precision unit are supported in flags so far */ - if ((kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) != 0) + if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0) return (EINVAL); to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); + if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { + getboottimebin(&bt); + sbt = bttosbt(bt); + to -= sbt; + } if (to < 0) return (EINVAL); do { ncallouts = kq_ncallouts; if (ncallouts >= kq_calloutmax) return (ENOMEM); } while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1)); - kn->kn_flags |= EV_CLEAR; /* automatically set */ + if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) + kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); callout_init(&kc->c, 1); - kc->next = to + sbinuptime(); - kc->to = to; + if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { + kc->next = to; + kc->to = 0; + } else { + kc->next = to + sbinuptime(); + kc->to = to; + } callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); return (0); } static void filt_timerdetach(struct knote *kn) { struct kq_timer_cb_data *kc; unsigned int old; kc = kn->kn_ptr.p_v; callout_drain(&kc->c); free(kc, M_KQUEUE); old = atomic_fetchadd_int(&kq_ncallouts, -1); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } static int filt_timer(struct knote *kn, long hint) { return (kn->kn_data != 0); } static int filt_userattach(struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ kn->kn_hook = NULL; if (kn->kn_fflags & NOTE_TRIGGER) kn->kn_hookid = 1; else kn->kn_hookid = 0; return (0); } static void filt_userdetach(__unused struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ } static int filt_user(struct knote *kn, __unused long hint) { return (kn->kn_hookid); } static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) { u_int ffctrl; switch (type) { case EVENT_REGISTER: if (kev->fflags & NOTE_TRIGGER) kn->kn_hookid = 1; ffctrl = kev->fflags & NOTE_FFCTRLMASK; kev->fflags &= NOTE_FFLAGSMASK; switch (ffctrl) { case NOTE_FFNOP: break; case NOTE_FFAND: kn->kn_sfflags &= kev->fflags; break; case NOTE_FFOR: kn->kn_sfflags |= kev->fflags; break; case NOTE_FFCOPY: kn->kn_sfflags = kev->fflags; break; default: /* XXX Return error? */ break; } kn->kn_sdata = kev->data; if (kev->flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; case EVENT_PROCESS: *kev = kn->kn_kevent; kev->fflags = kn->kn_sfflags; kev->data = kn->kn_sdata; if (kn->kn_flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; default: panic("filt_usertouch() - invalid type (%ld)", type); break; } } int sys_kqueue(struct thread *td, struct kqueue_args *uap) { return (kern_kqueue(td, 0, NULL)); } static void kqueue_init(struct kqueue *kq) { mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&kq->kq_head); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); } int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) { struct filedesc *fdp; struct kqueue *kq; struct file *fp; struct ucred *cred; int fd, error; fdp = td->td_proc->p_fd; cred = td->td_ucred; if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); error = falloc_caps(td, &fp, &fd, flags, fcaps); if (error != 0) { chgkqcnt(cred->cr_ruidinfo, -1, 0); return (error); } /* An extra reference on `fp' has been held for us by falloc(). */ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); kqueue_init(kq); kq->kq_fdp = fdp; kq->kq_cred = crhold(cred); FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); FILEDESC_XUNLOCK(fdp); finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; return (0); } #ifdef KTRACE static size_t -kev_iovlen(int n, u_int kgio) +kev_iovlen(int n, u_int kgio, size_t kevent_size) { - if (n < 0 || n >= kgio / sizeof(struct kevent)) + if (n < 0 || n >= kgio / kevent_size) return (kgio); - return (n * sizeof(struct kevent)); + return (n * kevent_size); } #endif -#ifndef _SYS_SYSPROTO_H_ -struct kevent_args { +struct g_kevent_args { int fd; - const struct kevent *changelist; + void *changelist; int nchanges; - struct kevent *eventlist; + void *eventlist; int nevents; const struct timespec *timeout; }; -#endif + int sys_kevent(struct thread *td, struct kevent_args *uap) { - struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = kevent_copyout, .k_copyin = kevent_copyin, + .kevent_size = sizeof(struct kevent), }; + + return (kern_kevent_generic(td, (struct g_kevent_args *)uap, &k_ops)); +} + +static int +kern_kevent_generic(struct thread *td, struct g_kevent_args *uap, + struct kevent_copyops *k_ops) +{ + struct timespec ts, *tsp; int error; #ifdef KTRACE struct uio ktruio; struct iovec ktriov; struct uio *ktruioin = NULL; struct uio *ktruioout = NULL; u_int kgio; #endif if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) { kgio = ktr_geniosize; ktriov.iov_base = uap->changelist; - ktriov.iov_len = kev_iovlen(uap->nchanges, kgio); + ktriov.iov_len = kev_iovlen(uap->nchanges, kgio, + k_ops->kevent_size); ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, .uio_td = td }; ktruioin = cloneuio(&ktruio); ktriov.iov_base = uap->eventlist; - ktriov.iov_len = kev_iovlen(uap->nevents, kgio); - ktriov.iov_len = uap->nevents * sizeof(struct kevent); + ktriov.iov_len = kev_iovlen(uap->nevents, kgio, + k_ops->kevent_size); + ktriov.iov_len = uap->nevents * k_ops->kevent_size; ktruioout = cloneuio(&ktruio); } #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, - &k_ops, tsp); + k_ops, tsp); #ifdef KTRACE if (ktruioin != NULL) { - ktruioin->uio_resid = kev_iovlen(uap->nchanges, kgio); + ktruioin->uio_resid = kev_iovlen(uap->nchanges, kgio, + k_ops->kevent_size); ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); - ktruioout->uio_resid = kev_iovlen(td->td_retval[0], kgio); + ktruioout->uio_resid = kev_iovlen(td->td_retval[0], kgio, + k_ops->kevent_size); ktrgenio(uap->fd, UIO_READ, ktruioout, error); } #endif return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int kevent_copyout(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyout(kevp, uap->eventlist, count * sizeof *kevp); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int kevent_copyin(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyin(uap->changelist, kevp, count * sizeof *kevp); if (error == 0) uap->changelist += count; return (error); } + +#ifdef COMPAT_FREEBSD11 +struct kevent_freebsd11 { + __uintptr_t ident; /* identifier for this event */ + short filter; /* filter for event */ + unsigned short flags; + unsigned int fflags; + __intptr_t data; + void *udata; /* opaque user data identifier */ +}; + +static int +kevent11_copyout(void *arg, struct kevent *kevp, int count) +{ + struct freebsd11_kevent_args *uap; + struct kevent_freebsd11 kev11; + int error, i; + + KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); + uap = (struct freebsd11_kevent_args *)arg; + + for (i = 0; i < count; i++) { + kev11.ident = kevp->ident; + kev11.filter = kevp->filter; + kev11.flags = kevp->flags; + kev11.fflags = kevp->fflags; + kev11.data = kevp->data; + kev11.udata = kevp->udata; + error = copyout(&kev11, uap->eventlist, sizeof(kev11)); + if (error != 0) + break; + uap->eventlist++; + kevp++; + } + return (error); +} + +/* + * Copy 'count' items from the list pointed to by uap->changelist. + */ +static int +kevent11_copyin(void *arg, struct kevent *kevp, int count) +{ + struct freebsd11_kevent_args *uap; + struct kevent_freebsd11 kev11; + int error, i; + + KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); + uap = (struct freebsd11_kevent_args *)arg; + + for (i = 0; i < count; i++) { + error = copyin(uap->changelist, &kev11, sizeof(kev11)); + if (error != 0) + break; + kevp->ident = kev11.ident; + kevp->filter = kev11.filter; + kevp->flags = kev11.flags; + kevp->fflags = kev11.fflags; + kevp->data = (uintptr_t)kev11.data; + kevp->udata = kev11.udata; + bzero(&kevp->ext, sizeof(kevp->ext)); + uap->changelist++; + kevp++; + } + return (error); +} + +int +freebsd11_kevent(struct thread *td, struct freebsd11_kevent_args *uap) +{ + struct kevent_copyops k_ops = { + .arg = uap, + .k_copyout = kevent11_copyout, + .k_copyin = kevent11_copyin, + .kevent_size = sizeof(struct kevent_freebsd11), + }; + + return (kern_kevent_generic(td, (struct g_kevent_args *)uap, &k_ops)); +} +#endif int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { cap_rights_t rights; struct file *fp; int error; cap_rights_init(&rights); if (nchanges > 0) cap_rights_set(&rights, CAP_KQUEUE_CHANGE); if (nevents > 0) cap_rights_set(&rights, CAP_KQUEUE_EVENT); error = fget(td, fd, &rights, &fp); if (error != 0) return (error); error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout); fdrop(fp, td); return (error); } static int kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kevent keva[KQ_NEVENTS]; struct kevent *kevp, *changes; int i, n, nerrors, error; nerrors = 0; while (nchanges > 0) { n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; error = k_ops->k_copyin(k_ops->arg, keva, n); if (error) return (error); changes = keva; for (i = 0; i < n; i++) { kevp = &changes[i]; if (!kevp->filter) continue; kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp, td, 1); if (error || (kevp->flags & EV_RECEIPT)) { if (nevents == 0) return (error); kevp->flags = EV_ERROR; kevp->data = error; (void)k_ops->k_copyout(k_ops->arg, kevp, 1); nevents--; nerrors++; } } nchanges -= n; } if (nerrors) { td->td_retval[0] = nerrors; return (0); } return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td)); } int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kqueue *kq; int error; error = kqueue_acquire(fp, &kq); if (error != 0) return (error); error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout); kqueue_release(kq, 0); return (error); } /* * Performs a kevent() call on a temporarily created kqueue. This can be * used to perform one-shot polling, similar to poll() and select(). */ int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops) { struct kqueue kq = {}; int error; kqueue_init(&kq); kq.kq_refcnt = 1; error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); kqueue_drain(&kq, td); kqueue_destroy(&kq); return (error); } int kqueue_add_filteropts(int filt, struct filterops *filtops) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { printf( "trying to add a filterop that is out of range: %d is beyond %d\n", ~filt, EVFILT_SYSCOUNT); return EINVAL; } mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop != &null_filtops && sysfilt_ops[~filt].for_fop != NULL) error = EEXIST; else { sysfilt_ops[~filt].for_fop = filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return (error); } int kqueue_del_filteropts(int filt) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return EINVAL; mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop == &null_filtops || sysfilt_ops[~filt].for_fop == NULL) error = EINVAL; else if (sysfilt_ops[~filt].for_refcnt != 0) error = EBUSY; else { sysfilt_ops[~filt].for_fop = &null_filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return error; } static struct filterops * kqueue_fo_find(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return NULL; if (sysfilt_ops[~filt].for_nolock) return sysfilt_ops[~filt].for_fop; mtx_lock(&filterops_lock); sysfilt_ops[~filt].for_refcnt++; if (sysfilt_ops[~filt].for_fop == NULL) sysfilt_ops[~filt].for_fop = &null_filtops; mtx_unlock(&filterops_lock); return sysfilt_ops[~filt].for_fop; } static void kqueue_fo_release(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return; if (sysfilt_ops[~filt].for_nolock) return; mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, ("filter object refcount not valid on release")); sysfilt_ops[~filt].for_refcnt--; mtx_unlock(&filterops_lock); } /* * A ref to kq (obtained via kqueue_acquire) must be held. waitok will * influence if memory allocation should wait. Make sure it is 0 if you * hold any mutexes. */ static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok) { struct filterops *fops; struct file *fp; struct knote *kn, *tkn; struct knlist *knl; cap_rights_t rights; int error, filt, event; int haskqglobal, filedesc_unlock; if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) return (EINVAL); fp = NULL; kn = NULL; knl = NULL; error = 0; haskqglobal = 0; filedesc_unlock = 0; filt = kev->filter; fops = kqueue_fo_find(filt); if (fops == NULL) return EINVAL; if (kev->flags & EV_ADD) { /* * Prevent waiting with locks. Non-sleepable * allocation failures are handled in the loop, only * if the spare knote appears to be actually required. */ tkn = knote_alloc(waitok); } else { tkn = NULL; } findkn: if (fops->f_isfd) { KASSERT(td != NULL, ("td is NULL")); if (kev->ident > INT_MAX) error = EBADF; else error = fget(td, kev->ident, cap_rights_init(&rights, CAP_EVENT), &fp); if (error) goto done; if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, kev->ident, 0) != 0) { /* try again */ fdrop(fp, td); fp = NULL; error = kqueue_expand(kq, fops, kev->ident, waitok); if (error) goto done; goto findkn; } if (fp->f_type == DTYPE_KQUEUE) { /* * If we add some intelligence about what we are doing, * we should be able to support events on ourselves. * We need to know when we are doing this to prevent * getting both the knlist lock and the kq lock since * they are the same thing. */ if (fp->f_data == kq) { error = EINVAL; goto done; } /* * Pre-lock the filedesc before the global * lock mutex, see the comment in * kqueue_close(). */ FILEDESC_XLOCK(td->td_proc->p_fd); filedesc_unlock = 1; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); } KQ_LOCK(kq); if (kev->ident < kq->kq_knlistsize) { SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) if (kev->filter == kn->kn_filter) break; } } else { if ((kev->flags & EV_ADD) == EV_ADD) kqueue_expand(kq, fops, kev->ident, waitok); KQ_LOCK(kq); /* * If possible, find an existing knote to use for this kevent. */ if (kev->filter == EVFILT_PROC && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { /* This is an internal creation of a process tracking * note. Don't attempt to coalesce this with an * existing note. */ ; } else if (kq->kq_knhashmask != 0) { struct klist *list; list = &kq->kq_knhash[ KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; SLIST_FOREACH(kn, list, kn_link) if (kev->ident == kn->kn_id && kev->filter == kn->kn_filter) break; } } /* knote is in the process of changing, wait for it to stabilize. */ if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); filedesc_unlock = 0; } kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); if (fp != NULL) { fdrop(fp, td); fp = NULL; } goto findkn; } /* * kn now contains the matching knote, or NULL if no match */ if (kn == NULL) { if (kev->flags & EV_ADD) { kn = tkn; tkn = NULL; if (kn == NULL) { KQ_UNLOCK(kq); error = ENOMEM; goto done; } kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; /* * apply reference counts to knote structure, and * do not release it at the end of this routine. */ fops = NULL; fp = NULL; kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; kev->data = 0; kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); kn->kn_status = KN_DETACHED; kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); if (error != 0) { tkn = kn; goto done; } if ((error = kn->kn_fop->f_attach(kn)) != 0) { knote_drop_detached(kn, td); goto done; } knl = kn_list_lock(kn); goto done_ev_add; } else { /* No matching knote and the EV_ADD flag is not set. */ KQ_UNLOCK(kq); error = ENOENT; goto done; } } if (kev->flags & EV_DELETE) { kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); goto done; } if (kev->flags & EV_FORCEONESHOT) { kn->kn_flags |= EV_ONESHOT; KNOTE_ACTIVATE(kn, 1); } /* * The user may change some filter values after the initial EV_ADD, * but doing so will not reset any filter which has already been * triggered. */ kn->kn_status |= KN_SCAN; kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; if (!fops->f_isfd && fops->f_touch != NULL) { fops->f_touch(kn, kev, EVENT_REGISTER); } else { kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; } /* * We can get here with kn->kn_knlist == NULL. This can happen when * the initial attach event decides that the event is "completed" * already. i.e. filt_procattach is called on a zombie process. It * will call filt_proc which will remove it from the list, and NULL * kn_knlist. */ done_ev_add: if ((kev->flags & EV_ENABLE) != 0) kn->kn_status &= ~KN_DISABLED; else if ((kev->flags & EV_DISABLE) != 0) kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_DISABLED) == 0) event = kn->kn_fop->f_event(kn, 0); else event = 0; KQ_LOCK(kq); if (event) kn->kn_status |= KN_ACTIVE; if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); kn->kn_status &= ~KN_SCAN; kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); done: KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) FILEDESC_XUNLOCK(td->td_proc->p_fd); if (fp != NULL) fdrop(fp, td); knote_free(tkn); if (fops != NULL) kqueue_fo_release(filt); return (error); } static int kqueue_acquire(struct file *fp, struct kqueue **kqp) { int error; struct kqueue *kq; error = 0; kq = fp->f_data; if (fp->f_type != DTYPE_KQUEUE || kq == NULL) return (EBADF); *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); return (EBADF); } kq->kq_refcnt++; KQ_UNLOCK(kq); return error; } static void kqueue_release(struct kqueue *kq, int locked) { if (locked) KQ_OWNED(kq); else KQ_LOCK(kq); kq->kq_refcnt--; if (kq->kq_refcnt == 1) wakeup(&kq->kq_refcnt); if (!locked) KQ_UNLOCK(kq); } static void kqueue_schedtask(struct kqueue *kq) { KQ_OWNED(kq); KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), ("scheduling kqueue task while draining")); if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task); kq->kq_state |= KQ_TASKSCHED; } } /* * Expand the kq to make sure we have storage for fops/ident pair. * * Return 0 on success (or no work necessary), return errno on failure. * * Not calling hashinit w/ waitok (proper malloc flag) should be safe. * If kqueue_register is called from a non-fd context, there usually/should * be no locks held. */ static int kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, int waitok) { struct klist *list, *tmp_knhash, *to_free; u_long tmp_knhashmask; int size; int fd; int mflag = waitok ? M_WAITOK : M_NOWAIT; KQ_NOTOWNED(kq); to_free = NULL; if (fops->f_isfd) { fd = ident; if (kq->kq_knlistsize <= fd) { size = kq->kq_knlistsize; while (size <= fd) size += KQEXTENT; list = malloc(size * sizeof(*list), M_KQUEUE, mflag); if (list == NULL) return ENOMEM; KQ_LOCK(kq); if (kq->kq_knlistsize > fd) { to_free = list; list = NULL; } else { if (kq->kq_knlist != NULL) { bcopy(kq->kq_knlist, list, kq->kq_knlistsize * sizeof(*list)); to_free = kq->kq_knlist; kq->kq_knlist = NULL; } bzero((caddr_t)list + kq->kq_knlistsize * sizeof(*list), (size - kq->kq_knlistsize) * sizeof(*list)); kq->kq_knlistsize = size; kq->kq_knlist = list; } KQ_UNLOCK(kq); } } else { if (kq->kq_knhashmask == 0) { tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, &tmp_knhashmask); if (tmp_knhash == NULL) return ENOMEM; KQ_LOCK(kq); if (kq->kq_knhashmask == 0) { kq->kq_knhash = tmp_knhash; kq->kq_knhashmask = tmp_knhashmask; } else { to_free = tmp_knhash; } KQ_UNLOCK(kq); } } free(to_free, M_KQUEUE); KQ_NOTOWNED(kq); return 0; } static void kqueue_task(void *arg, int pending) { struct kqueue *kq; int haskqglobal; haskqglobal = 0; kq = arg; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); KQ_LOCK(kq); KNOTE_LOCKED(&kq->kq_sel.si_note, 0); kq->kq_state &= ~KQ_TASKSCHED; if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { wakeup(&kq->kq_state); } KQ_UNLOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); } /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *tsp, struct kevent *keva, struct thread *td) { struct kevent *kevp; struct knote *kn, *marker; struct knlist *knl; sbintime_t asbt, rsbt; int count, error, haskqglobal, influx, nkev, touch; count = maxevents; nkev = 0; error = 0; haskqglobal = 0; if (maxevents == 0) goto done_nl; rsbt = 0; if (tsp != NULL) { if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000) { error = EINVAL; goto done_nl; } if (timespecisset(tsp)) { if (tsp->tv_sec <= INT32_MAX) { rsbt = tstosbt(*tsp); if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = 0; rsbt >>= tc_precexp; } else asbt = 0; } else asbt = -1; } else asbt = 0; marker = knote_alloc(1); marker->kn_status = KN_MARKER; KQ_LOCK(kq); retry: kevp = keva; if (kq->kq_count == 0) { if (asbt == -1) { error = EWOULDBLOCK; } else { kq->kq_state |= KQ_SLEEP; error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH, "kqread", asbt, rsbt, C_ABSOLUTE); } if (error == 0) goto retry; /* don't restart after signals... */ if (error == ERESTART) error = EINTR; else if (error == EWOULDBLOCK) error = 0; goto done; } TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); influx = 0; while (count) { KQ_OWNED(kq); kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); } kq->kq_state |= KQ_FLUXWAIT; error = msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); continue; } TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { kn->kn_status &= ~KN_QUEUED; kq->kq_count--; continue; } if (kn == marker) { KQ_FLUX_WAKEUP(kq); if (count == maxevents) goto retry; goto done; } KASSERT(!kn_in_flux(kn), ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've * marked it as in flux. */ knote_drop(kn, td); KQ_LOCK(kq); continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've * marked the knote as being in flux. */ *kevp = kn->kn_kevent; knote_drop(kn, td); KQ_LOCK(kq); kn = NULL; } else { kn->kn_status |= KN_SCAN; kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); knl = kn_list_lock(kn); if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; continue; } touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); if (touch) kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); else *kevp = kn->kn_kevent; KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { /* * Manually clear knotes who weren't * 'touch'ed. */ if (touch == 0 && kn->kn_flags & EV_CLEAR) { kn->kn_data = 0; kn->kn_fflags = 0; } if (kn->kn_flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); kq->kq_count--; } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_SCAN; kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } /* we are returning a copy to the user */ kevp++; nkev++; count--; if (nkev == KQ_NEVENTS) { influx = 0; KQ_UNLOCK_FLUX(kq); error = k_ops->k_copyout(k_ops->arg, keva, nkev); nkev = 0; kevp = keva; KQ_LOCK(kq); if (error) break; } } TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); done: KQ_OWNED(kq); KQ_UNLOCK_FLUX(kq); knote_free(marker); done_nl: KQ_NOTOWNED(kq); if (nkev != 0) error = k_ops->k_copyout(k_ops->arg, keva, nkev); td->td_retval[0] = maxevents - count; return (error); } /*ARGSUSED*/ static int kqueue_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { /* * Enabling sigio causes two major problems: * 1) infinite recursion: * Synopsys: kevent is being used to track signals and have FIOASYNC * set. On receipt of a signal this will cause a kqueue to recurse * into itself over and over. Sending the sigio causes the kqueue * to become ready, which in turn posts sigio again, forever. * Solution: this can be solved by setting a flag in the kqueue that * we have a SIGIO in progress. * 2) locking problems: * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts * us above the proc and pgrp locks. * Solution: Post a signal using an async mechanism, being sure to * record a generation count in the delivery so that we do not deliver * a signal to the wrong process. * * Note, these two mechanisms are somewhat mutually exclusive! */ #if 0 struct kqueue *kq; kq = fp->f_data; switch (cmd) { case FIOASYNC: if (*(int *)data) { kq->kq_state |= KQ_ASYNC; } else { kq->kq_state &= ~KQ_ASYNC; } return (0); case FIOSETOWN: return (fsetown(*(int *)data, &kq->kq_sigio)); case FIOGETOWN: *(int *)data = fgetown(&kq->kq_sigio); return (0); } #endif return (ENOTTY); } /*ARGSUSED*/ static int kqueue_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct kqueue *kq; int revents = 0; int error; if ((error = kqueue_acquire(fp, &kq))) return POLLERR; KQ_LOCK(kq); if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); } else { selrecord(td, &kq->kq_sel); if (SEL_WAITING(&kq->kq_sel)) kq->kq_state |= KQ_SEL; } } kqueue_release(kq, 1); KQ_UNLOCK(kq); return (revents); } /*ARGSUSED*/ static int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, struct thread *td) { bzero((void *)st, sizeof *st); /* * We no longer return kq_count because the unlocked value is useless. * If you spent all this time getting the count, why not spend your * syscall better by calling kevent? * * XXX - This is needed for libc_r. */ st->st_mode = S_IFIFO; return (0); } static void kqueue_drain(struct kqueue *kq, struct thread *td) { struct knote *kn; int i; KQ_LOCK(kq); KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, ("kqueue already closing")); kq->kq_state |= KQ_CLOSING; if (kq->kq_refcnt > 1) msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); KASSERT(knlist_empty(&kq->kq_sel.si_note), ("kqueue's knlist not empty")); for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); KQ_LOCK(kq); } } if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); KQ_LOCK(kq); } } } if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { kq->kq_state |= KQ_TASKDRAIN; msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } KQ_UNLOCK(kq); } static void kqueue_destroy(struct kqueue *kq) { KASSERT(kq->kq_fdp == NULL, ("kqueue still attached to a file descriptor")); seldrain(&kq->kq_sel); knlist_destroy(&kq->kq_sel.si_note); mtx_destroy(&kq->kq_lock); if (kq->kq_knhash != NULL) free(kq->kq_knhash, M_KQUEUE); if (kq->kq_knlist != NULL) free(kq->kq_knlist, M_KQUEUE); funsetown(&kq->kq_sigio); } /*ARGSUSED*/ static int kqueue_close(struct file *fp, struct thread *td) { struct kqueue *kq = fp->f_data; struct filedesc *fdp; int error; int filedesc_unlock; if ((error = kqueue_acquire(fp, &kq))) return error; kqueue_drain(kq, td); /* * We could be called due to the knote_drop() doing fdrop(), * called from kqueue_register(). In this case the global * lock is owned, and filedesc sx is locked before, to not * take the sleepable lock after non-sleepable. */ fdp = kq->kq_fdp; kq->kq_fdp = NULL; if (!sx_xlocked(FILEDESC_LOCK(fdp))) { FILEDESC_XLOCK(fdp); filedesc_unlock = 1; } else filedesc_unlock = 0; TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); if (filedesc_unlock) FILEDESC_XUNLOCK(fdp); kqueue_destroy(kq); chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); crfree(kq->kq_cred); free(kq, M_KQUEUE); fp->f_data = NULL; return (0); } static int kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { kif->kf_type = KF_TYPE_KQUEUE; return (0); } static void kqueue_wakeup(struct kqueue *kq) { KQ_OWNED(kq); if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { kq->kq_state &= ~KQ_SLEEP; wakeup(kq); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } if (!knlist_empty(&kq->kq_sel.si_note)) kqueue_schedtask(kq); if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { pgsigio(&kq->kq_sigio, SIGIO, 0); } } /* * Walk down a list of knotes, activating them if their event has triggered. * * There is a possibility to optimize in the case of one kq watching another. * Instead of scheduling a task to wake it up, you could pass enough state * down the chain to make up the parent kqueue. Make this code functional * first. */ void knote(struct knlist *list, long hint, int lockflags) { struct kqueue *kq; struct knote *kn, *tkn; int error; if (list == NULL) return; KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_lock(list->kl_lockarg); /* * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters * or other threads could remove events. */ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the * kqueue_scan(). In the later case, we do * not interfere with the scan, since the code * fragment in kqueue_scan() locks the knlist, * and cannot proceed until we finished. */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); } else { kn->kn_status |= KN_HASKQLOCK; if (kn->kn_fop->f_event(kn, hint)) KNOTE_ACTIVATE(kn, 1); kn->kn_status &= ~KN_HASKQLOCK; KQ_UNLOCK(kq); } } if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_unlock(list->kl_lockarg); } /* * add a knote to a knlist */ void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); KASSERT((kn->kn_status & KN_DETACHED) != 0, ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); if (!islocked) knl->kl_unlock(knl->kl_lockarg); KQ_LOCK(kn->kn_kq); kn->kn_knlist = knl; kn->kn_status &= ~KN_DETACHED; KQ_UNLOCK(kn->kn_kq); } static void knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); KASSERT((kn->kn_status & KN_DETACHED) == 0, ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) kn_list_unlock(knl); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; if (!kqislocked) KQ_UNLOCK(kn->kn_kq); } /* * remove knote from the specified knlist */ void knlist_remove(struct knlist *knl, struct knote *kn, int islocked) { knlist_remove_kq(knl, kn, islocked, 0); } int knlist_empty(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); return (SLIST_EMPTY(&knl->kl_list)); } static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); static void knlist_mtx_lock(void *arg) { mtx_lock((struct mtx *)arg); } static void knlist_mtx_unlock(void *arg) { mtx_unlock((struct mtx *)arg); } static void knlist_mtx_assert_locked(void *arg) { mtx_assert((struct mtx *)arg, MA_OWNED); } static void knlist_mtx_assert_unlocked(void *arg) { mtx_assert((struct mtx *)arg, MA_NOTOWNED); } static void knlist_rw_rlock(void *arg) { rw_rlock((struct rwlock *)arg); } static void knlist_rw_runlock(void *arg) { rw_runlock((struct rwlock *)arg); } static void knlist_rw_assert_locked(void *arg) { rw_assert((struct rwlock *)arg, RA_LOCKED); } static void knlist_rw_assert_unlocked(void *arg) { rw_assert((struct rwlock *)arg, RA_UNLOCKED); } void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)) { if (lock == NULL) knl->kl_lockarg = &knlist_lock; else knl->kl_lockarg = lock; if (kl_lock == NULL) knl->kl_lock = knlist_mtx_lock; else knl->kl_lock = kl_lock; if (kl_unlock == NULL) knl->kl_unlock = knlist_mtx_unlock; else knl->kl_unlock = kl_unlock; if (kl_assert_locked == NULL) knl->kl_assert_locked = knlist_mtx_assert_locked; else knl->kl_assert_locked = kl_assert_locked; if (kl_assert_unlocked == NULL) knl->kl_assert_unlocked = knlist_mtx_assert_unlocked; else knl->kl_assert_unlocked = kl_assert_unlocked; knl->kl_autodestroy = 0; SLIST_INIT(&knl->kl_list); } void knlist_init_mtx(struct knlist *knl, struct mtx *lock) { knlist_init(knl, lock, NULL, NULL, NULL, NULL); } struct knlist * knlist_alloc(struct mtx *lock) { struct knlist *knl; knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); knlist_init_mtx(knl, lock); return (knl); } void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock) { knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock, knlist_rw_assert_locked, knlist_rw_assert_unlocked); } void knlist_destroy(struct knlist *knl) { KASSERT(KNLIST_EMPTY(knl), ("destroying knlist %p with knotes on it", knl)); } void knlist_detach(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); knl->kl_autodestroy = 1; if (knlist_empty(knl)) { knlist_destroy(knl); free(knl, M_KQUEUE); } } /* * Even if we are locked, we may need to drop the lock to allow any influx * knotes time to "settle". */ void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) { struct knote *kn, *kn2; struct kqueue *kq; KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); if (islocked) KNL_ASSERT_LOCKED(knl); else { KNL_ASSERT_UNLOCKED(knl); again: /* need to reacquire lock since we have dropped it */ knl->kl_lock(knl->kl_lockarg); } SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop_detached(kn, td); } else { /* Make sure cleared knotes disappear soon */ kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); kq = NULL; goto again; } if (islocked) KNL_ASSERT_LOCKED(knl); else { knl->kl_unlock(knl->kl_lockarg); KNL_ASSERT_UNLOCKED(knl); } } /* * Remove all knotes referencing a specified fd must be called with FILEDESC * lock. This prevents a race where a new fd comes along and occupies the * entry and we attach a knote to the fd. */ void knote_fdclose(struct thread *td, int fd) { struct filedesc *fdp = td->td_proc->p_fd; struct kqueue *kq; struct knote *kn; int influx; FILEDESC_XLOCK_ASSERT(fdp); /* * We shouldn't have to worry about new kevents appearing on fd * since filedesc is locked. */ TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { KQ_LOCK(kq); again: influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } kn_enter_flux(kn); KQ_UNLOCK(kq); influx = 1; knote_drop(kn, td); KQ_LOCK(kq); } KQ_UNLOCK_FLUX(kq); } } static int knote_attach(struct knote *kn, struct kqueue *kq) { struct klist *list; KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) return (ENOMEM); list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) return (ENOMEM); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } SLIST_INSERT_HEAD(list, kn, kn_link); return (0); } static void knote_drop(struct knote *kn, struct thread *td) { if ((kn->kn_status & KN_DETACHED) == 0) kn->kn_fop->f_detach(kn); knote_drop_detached(kn, td); } static void knote_drop_detached(struct knote *kn, struct thread *td) { struct kqueue *kq; struct klist *list; kq = kn->kn_kq; KASSERT((kn->kn_status & KN_DETACHED) != 0, ("knote %p still attached", kn)); KQ_NOTOWNED(kq); KQ_LOCK(kq); KASSERT(kn->kn_influx == 1, ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; if (!SLIST_EMPTY(list)) SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); KQ_UNLOCK_FLUX(kq); if (kn->kn_fop->f_isfd) { fdrop(kn->kn_fp, td); kn->kn_fp = NULL; } kqueue_fo_release(kn->kn_kevent.filter); kn->kn_fop = NULL; knote_free(kn); } static void knote_enqueue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; kqueue_wakeup(kq); } static void knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; } static void knote_init(void) { knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); static struct knote * knote_alloc(int waitok) { return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO)); } static void knote_free(struct knote *kn) { uma_zfree(knote_zone, kn); } /* * Register the kev w/ the kq specified by fd. */ int kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) { struct kqueue *kq; struct file *fp; cap_rights_t rights; int error; error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp); if (error != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto noacquire; error = kqueue_register(kq, kev, td, waitok); kqueue_release(kq, 0); noacquire: fdrop(fp, td); return (error); } Index: head/sys/kern/syscalls.master =================================================================== --- head/sys/kern/syscalls.master (revision 320042) +++ head/sys/kern/syscalls.master (revision 320043) @@ -1,1022 +1,1028 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, ; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name psuedo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat) ; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; NOTSTATIC syscall is loadable ; ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL STD { int nosys(void); } syscall nosys_args int 1 AUE_EXIT STD { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK STD { int fork(void); } 3 AUE_READ STD { ssize_t read(int fd, void *buf, \ size_t nbyte); } 4 AUE_WRITE STD { ssize_t write(int fd, const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC STD { int open(char *path, int flags, int mode); } ; XXX should be { int open(const char *path, int flags, ...); } ; but we're not ready for `const' or varargs. ; XXX man page says `mode_t mode'. 6 AUE_CLOSE STD { int close(int fd); } 7 AUE_WAIT4 STD { int wait4(int pid, int *status, \ int options, struct rusage *rusage); } 8 AUE_CREAT COMPAT { int creat(char *path, int mode); } 9 AUE_LINK STD { int link(char *path, char *link); } 10 AUE_UNLINK STD { int unlink(char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR STD { int chdir(char *path); } 13 AUE_FCHDIR STD { int fchdir(int fd); } 14 AUE_MKNOD COMPAT11 { int mknod(char *path, int mode, int dev); } 15 AUE_CHMOD STD { int chmod(char *path, int mode); } 16 AUE_CHOWN STD { int chown(char *path, int uid, int gid); } 17 AUE_NULL STD { int obreak(char *nsize); } break \ obreak_args int 18 AUE_GETFSSTAT COMPAT4 { int getfsstat(struct ostatfs *buf, \ long bufsize, int mode); } 19 AUE_LSEEK COMPAT { long lseek(int fd, long offset, \ int whence); } 20 AUE_GETPID STD { pid_t getpid(void); } 21 AUE_MOUNT STD { int mount(char *type, char *path, \ int flags, caddr_t data); } ; XXX `path' should have type `const char *' but we're not ready for that. 22 AUE_UMOUNT STD { int unmount(char *path, int flags); } 23 AUE_SETUID STD { int setuid(uid_t uid); } 24 AUE_GETUID STD { uid_t getuid(void); } 25 AUE_GETEUID STD { uid_t geteuid(void); } 26 AUE_PTRACE STD { int ptrace(int req, pid_t pid, \ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int recvmsg(int s, struct msghdr *msg, \ int flags); } 28 AUE_SENDMSG STD { int sendmsg(int s, struct msghdr *msg, \ int flags); } 29 AUE_RECVFROM STD { int recvfrom(int s, caddr_t buf, \ size_t len, int flags, \ struct sockaddr * __restrict from, \ __socklen_t * __restrict fromlenaddr); } 30 AUE_ACCEPT STD { int accept(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen); } 31 AUE_GETPEERNAME STD { int getpeername(int fdes, \ struct sockaddr * __restrict asa, \ __socklen_t * __restrict alen); } 32 AUE_GETSOCKNAME STD { int getsockname(int fdes, \ struct sockaddr * __restrict asa, \ __socklen_t * __restrict alen); } 33 AUE_ACCESS STD { int access(char *path, int amode); } 34 AUE_CHFLAGS STD { int chflags(const char *path, u_long flags); } 35 AUE_FCHFLAGS STD { int fchflags(int fd, u_long flags); } 36 AUE_SYNC STD { int sync(void); } 37 AUE_KILL STD { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int stat(char *path, struct ostat *ub); } 39 AUE_GETPPID STD { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int lstat(char *path, struct ostat *ub); } 41 AUE_DUP STD { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int pipe(void); } 43 AUE_GETEGID STD { gid_t getegid(void); } 44 AUE_PROFILE STD { int profil(caddr_t samples, size_t size, \ size_t offset, u_int scale); } 45 AUE_KTRACE STD { int ktrace(const char *fname, int ops, \ int facs, int pid); } 46 AUE_SIGACTION COMPAT { int sigaction(int signum, \ struct osigaction *nsa, \ struct osigaction *osa); } 47 AUE_GETGID STD { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int sigprocmask(int how, osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it, and we return the old mask as the ; (int) return value. 49 AUE_GETLOGIN STD { int getlogin(char *namebuf, u_int \ namelen); } 50 AUE_SETLOGIN STD { int setlogin(char *namebuf); } 51 AUE_ACCT STD { int acct(char *path); } 52 AUE_SIGPENDING COMPAT { int sigpending(void); } 53 AUE_SIGALTSTACK STD { int sigaltstack(stack_t *ss, \ stack_t *oss); } 54 AUE_IOCTL STD { int ioctl(int fd, u_long com, \ caddr_t data); } 55 AUE_REBOOT STD { int reboot(int opt); } 56 AUE_REVOKE STD { int revoke(char *path); } 57 AUE_SYMLINK STD { int symlink(char *path, char *link); } 58 AUE_READLINK STD { ssize_t readlink(char *path, char *buf, \ size_t count); } 59 AUE_EXECVE STD { int execve(char *fname, char **argv, \ char **envv); } 60 AUE_UMASK STD { int umask(int newmask); } umask umask_args \ int 61 AUE_CHROOT STD { int chroot(char *path); } 62 AUE_FSTAT COMPAT { int fstat(int fd, struct ostat *sb); } 63 AUE_NULL COMPAT { int getkerninfo(int op, char *where, \ size_t *size, int arg); } getkerninfo \ getkerninfo_args int 64 AUE_NULL COMPAT { int getpagesize(void); } getpagesize \ getpagesize_args int 65 AUE_MSYNC STD { int msync(void *addr, size_t len, \ int flags); } 66 AUE_VFORK STD { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK STD { int sbrk(int incr); } 70 AUE_SSTK STD { int sstk(int incr); } 71 AUE_MMAP COMPAT { int mmap(void *addr, int len, int prot, \ int flags, int fd, long pos); } 72 AUE_O_VADVISE STD { int ovadvise(int anom); } vadvise \ ovadvise_args int 73 AUE_MUNMAP STD { int munmap(void *addr, size_t len); } 74 AUE_MPROTECT STD { int mprotect(void *addr, size_t len, \ int prot); } 75 AUE_MADVISE STD { int madvise(void *addr, size_t len, \ int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE STD { int mincore(const void *addr, size_t len, \ char *vec); } 79 AUE_GETGROUPS STD { int getgroups(u_int gidsetsize, \ gid_t *gidset); } 80 AUE_SETGROUPS STD { int setgroups(u_int gidsetsize, \ gid_t *gidset); } 81 AUE_GETPGRP STD { int getpgrp(void); } 82 AUE_SETPGRP STD { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int setitimer(u_int which, struct \ itimerval *itv, struct itimerval *oitv); } 84 AUE_WAIT4 COMPAT { int wait(void); } 85 AUE_SWAPON STD { int swapon(char *name); } 86 AUE_GETITIMER STD { int getitimer(u_int which, \ struct itimerval *itv); } 87 AUE_SYSCTL COMPAT { int gethostname(char *hostname, \ u_int len); } gethostname \ gethostname_args int 88 AUE_SYSCTL COMPAT { int sethostname(char *hostname, \ u_int len); } sethostname \ sethostname_args int 89 AUE_GETDTABLESIZE STD { int getdtablesize(void); } 90 AUE_DUP2 STD { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int fcntl(int fd, int cmd, long arg); } ; XXX should be { int fcntl(int fd, int cmd, ...); } ; but we're not ready for varargs. 93 AUE_SELECT STD { int select(int nd, fd_set *in, fd_set *ou, \ fd_set *ex, struct timeval *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC STD { int fsync(int fd); } 96 AUE_SETPRIORITY STD { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET STD { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT STD { int connect(int s, caddr_t name, \ int namelen); } 99 AUE_ACCEPT COMPAT|NOARGS { int accept(int s, caddr_t name, \ int *anamelen); } accept accept_args int 100 AUE_GETPRIORITY STD { int getpriority(int which, int who); } 101 AUE_SEND COMPAT { int send(int s, caddr_t buf, int len, \ int flags); } 102 AUE_RECV COMPAT { int recv(int s, caddr_t buf, int len, \ int flags); } 103 AUE_SIGRETURN COMPAT { int sigreturn( \ struct osigcontext *sigcntxp); } 104 AUE_BIND STD { int bind(int s, caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT STD { int setsockopt(int s, int level, int name, \ caddr_t val, int valsize); } 106 AUE_LISTEN STD { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_NULL COMPAT { int sigvec(int signum, struct sigvec *nsv, \ struct sigvec *osv); } 109 AUE_NULL COMPAT { int sigblock(int mask); } 110 AUE_NULL COMPAT { int sigsetmask(int mask); } 111 AUE_NULL COMPAT { int sigsuspend(osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it. 112 AUE_NULL COMPAT { int sigstack(struct sigstack *nss, \ struct sigstack *oss); } 113 AUE_RECVMSG COMPAT { int recvmsg(int s, struct omsghdr *msg, \ int flags); } 114 AUE_SENDMSG COMPAT { int sendmsg(int s, caddr_t msg, \ int flags); } 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int gettimeofday(struct timeval *tp, \ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int getrusage(int who, \ struct rusage *rusage); } 118 AUE_GETSOCKOPT STD { int getsockopt(int s, int level, int name, \ caddr_t val, int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int readv(int fd, struct iovec *iovp, \ u_int iovcnt); } 121 AUE_WRITEV STD { int writev(int fd, struct iovec *iovp, \ u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int settimeofday(struct timeval *tv, \ struct timezone *tzp); } 123 AUE_FCHOWN STD { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD STD { int fchmod(int fd, int mode); } 125 AUE_RECVFROM COMPAT|NOARGS { int recvfrom(int s, caddr_t buf, \ size_t len, int flags, caddr_t from, int \ *fromlenaddr); } recvfrom recvfrom_args \ int 126 AUE_SETREUID STD { int setreuid(int ruid, int euid); } 127 AUE_SETREGID STD { int setregid(int rgid, int egid); } 128 AUE_RENAME STD { int rename(char *from, char *to); } 129 AUE_TRUNCATE COMPAT { int truncate(char *path, long length); } 130 AUE_FTRUNCATE COMPAT { int ftruncate(int fd, long length); } 131 AUE_FLOCK STD { int flock(int fd, int how); } 132 AUE_MKFIFO STD { int mkfifo(char *path, int mode); } 133 AUE_SENDTO STD { int sendto(int s, caddr_t buf, size_t len, \ int flags, caddr_t to, int tolen); } 134 AUE_SHUTDOWN STD { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR STD { int socketpair(int domain, int type, \ int protocol, int *rsv); } 136 AUE_MKDIR STD { int mkdir(char *path, int mode); } 137 AUE_RMDIR STD { int rmdir(char *path); } 138 AUE_UTIMES STD { int utimes(char *path, \ struct timeval *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int adjtime(struct timeval *delta, \ struct timeval *olddelta); } 141 AUE_GETPEERNAME COMPAT { int getpeername(int fdes, caddr_t asa, \ int *alen); } 142 AUE_SYSCTL COMPAT { long gethostid(void); } 143 AUE_SYSCTL COMPAT { int sethostid(long hostid); } 144 AUE_GETRLIMIT COMPAT { int getrlimit(u_int which, struct \ orlimit *rlp); } 145 AUE_SETRLIMIT COMPAT { int setrlimit(u_int which, \ struct orlimit *rlp); } 146 AUE_KILLPG COMPAT { int killpg(int pgid, int signum); } 147 AUE_SETSID STD { int setsid(void); } 148 AUE_QUOTACTL STD { int quotactl(char *path, int cmd, int uid, \ caddr_t arg); } 149 AUE_O_QUOTA COMPAT { int quota(void); } 150 AUE_GETSOCKNAME COMPAT|NOARGS { int getsockname(int fdec, \ caddr_t asa, int *alen); } getsockname \ getsockname_args int ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL NOSTD { int nlm_syscall(int debug_level, int grace_period, int addr_count, char **addrs); } ; 155 is initialized by the NFS code, if present. 155 AUE_NFS_SVC NOSTD { int nfssvc(int flag, caddr_t argp); } 156 AUE_GETDIRENTRIES COMPAT { int getdirentries(int fd, char *buf, \ u_int count, long *basep); } 157 AUE_STATFS COMPAT4 { int statfs(char *path, \ struct ostatfs *buf); } 158 AUE_FSTATFS COMPAT4 { int fstatfs(int fd, \ struct ostatfs *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH STD { int lgetfh(char *fname, \ struct fhandle *fhp); } 161 AUE_NFS_GETFH STD { int getfh(char *fname, \ struct fhandle *fhp); } 162 AUE_SYSCTL COMPAT4 { int getdomainname(char *domainname, \ int len); } 163 AUE_SYSCTL COMPAT4 { int setdomainname(char *domainname, \ int len); } 164 AUE_NULL COMPAT4 { int uname(struct utsname *name); } 165 AUE_SYSARCH STD { int sysarch(int op, char *parms); } 166 AUE_RTPRIO STD { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int semsys(int which, int a2, int a3, \ int a4, int a5); } ; XXX should be { int semsys(int which, ...); } 170 AUE_MSGSYS NOSTD { int msgsys(int which, int a2, int a3, \ int a4, int a5, int a6); } ; XXX should be { int msgsys(int which, ...); } 171 AUE_SHMSYS NOSTD { int shmsys(int which, int a2, int a3, \ int a4); } ; XXX should be { int shmsys(int which, ...); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t pread(int fd, void *buf, \ size_t nbyte, int pad, off_t offset); } 174 AUE_PWRITE COMPAT6 { ssize_t pwrite(int fd, \ const void *buf, \ size_t nbyte, int pad, off_t offset); } 175 AUE_SETFIB STD { int setfib(int fibnum); } 176 AUE_NTP_ADJTIME STD { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID STD { int setgid(gid_t gid); } 182 AUE_SETEGID STD { int setegid(gid_t egid); } 183 AUE_SETEUID STD { int seteuid(uid_t euid); } 184 AUE_NULL UNIMPL lfs_bmapv 185 AUE_NULL UNIMPL lfs_markv 186 AUE_NULL UNIMPL lfs_segclean 187 AUE_NULL UNIMPL lfs_segwait 188 AUE_STAT COMPAT11 { int stat(char *path, \ struct freebsd11_stat *ub); } 189 AUE_FSTAT COMPAT11 { int fstat(int fd, \ struct freebsd11_stat *sb); } 190 AUE_LSTAT COMPAT11 { int lstat(char *path, \ struct freebsd11_stat *ub); } 191 AUE_PATHCONF STD { int pathconf(char *path, int name); } 192 AUE_FPATHCONF STD { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT STD { int getrlimit(u_int which, \ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT STD { int setrlimit(u_int which, \ struct rlimit *rlp); } setrlimit \ __setrlimit_args int 196 AUE_GETDIRENTRIES COMPAT11 { int getdirentries(int fd, char *buf, \ u_int count, long *basep); } 197 AUE_MMAP COMPAT6 { caddr_t mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, off_t pos); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t lseek(int fd, int pad, \ off_t offset, int whence); } 200 AUE_TRUNCATE COMPAT6 { int truncate(char *path, int pad, \ off_t length); } 201 AUE_FTRUNCATE COMPAT6 { int ftruncate(int fd, int pad, \ off_t length); } 202 AUE_SYSCTL STD { int __sysctl(int *name, u_int namelen, \ void *old, size_t *oldlenp, void *new, \ size_t newlen); } __sysctl sysctl_args int 203 AUE_MLOCK STD { int mlock(const void *addr, size_t len); } 204 AUE_MUNLOCK STD { int munlock(const void *addr, size_t len); } 205 AUE_UNDELETE STD { int undelete(char *path); } 206 AUE_FUTIMES STD { int futimes(int fd, struct timeval *tptr); } 207 AUE_GETPGID STD { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL newreboot (NetBSD) 209 AUE_POLL STD { int poll(struct pollfd *fds, u_int nfds, \ int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int ; ; The following were introduced with NetBSD/4.4Lite-2 220 AUE_SEMCTL COMPAT7|NOSTD { int __semctl(int semid, int semnum, \ int cmd, union semun_old *arg); } 221 AUE_SEMGET NOSTD { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD { int semop(int semid, struct sembuf *sops, \ size_t nsops); } 223 AUE_NULL UNIMPL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \ struct msqid_ds_old *buf); } 225 AUE_MSGGET NOSTD { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int msgsnd(int msqid, const void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { ssize_t msgrcv(int msqid, void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD { int shmat(int shmid, const void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int shmctl(int shmid, int cmd, \ struct shmid_ds_old *buf); } 230 AUE_SHMDT NOSTD { int shmdt(const void *shmaddr); } 231 AUE_SHMGET NOSTD { int shmget(key_t key, size_t size, \ int shmflg); } ; 232 AUE_NULL STD { int clock_gettime(clockid_t clock_id, \ struct timespec *tp); } 233 AUE_CLOCK_SETTIME STD { int clock_settime( \ clockid_t clock_id, \ const struct timespec *tp); } 234 AUE_NULL STD { int clock_getres(clockid_t clock_id, \ struct timespec *tp); } 235 AUE_NULL STD { int ktimer_create(clockid_t clock_id, \ struct sigevent *evp, int *timerid); } 236 AUE_NULL STD { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int ktimer_settime(int timerid, int flags, \ const struct itimerspec *value, \ struct itimerspec *ovalue); } 238 AUE_NULL STD { int ktimer_gettime(int timerid, struct \ itimerspec *value); } 239 AUE_NULL STD { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int nanosleep(const struct timespec *rqtp, \ struct timespec *rmtp); } 241 AUE_NULL STD { int ffclock_getcounter(ffcounter *ffcount); } 242 AUE_NULL STD { int ffclock_setestimate( \ struct ffclock_estimate *cest); } 243 AUE_NULL STD { int ffclock_getestimate( \ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int clock_nanosleep(clockid_t clock_id, \ int flags, const struct timespec *rqtp, \ struct timespec *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int clock_getcpuclockid2(id_t id,\ int which, clockid_t *clock_id); } 248 AUE_NULL STD { int ntp_gettime(struct ntptimeval *ntvp); } 249 AUE_NULL UNIMPL nosys ; syscall numbers initially used in OpenBSD 250 AUE_MINHERIT STD { int minherit(void *addr, size_t len, \ int inherit); } 251 AUE_RFORK STD { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID STD { int issetugid(void); } 254 AUE_LCHOWN STD { int lchown(char *path, int uid, int gid); } 255 AUE_AIO_READ STD { int aio_read(struct aiocb *aiocbp); } 256 AUE_AIO_WRITE STD { int aio_write(struct aiocb *aiocbp); } 257 AUE_LIO_LISTIO STD { int lio_listio(int mode, \ struct aiocb * const *acb_list, \ int nent, struct sigevent *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys 272 AUE_O_GETDENTS COMPAT11 { int getdents(int fd, char *buf, \ size_t count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD STD { int lchmod(char *path, mode_t mode); } 275 AUE_LCHOWN NOPROTO { int lchown(char *path, uid_t uid, \ gid_t gid); } netbsd_lchown lchown_args \ int 276 AUE_LUTIMES STD { int lutimes(char *path, \ struct timeval *tptr); } 277 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } netbsd_msync msync_args int 278 AUE_STAT COMPAT11 { int nstat(char *path, struct nstat *ub); } 279 AUE_FSTAT COMPAT11 { int nfstat(int fd, struct nstat *sb); } 280 AUE_LSTAT COMPAT11 { int nlstat(char *path, struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys ; 289 and 290 from NetBSD (OpenBSD: 267 and 268) 289 AUE_PREADV STD { ssize_t preadv(int fd, struct iovec *iovp, \ u_int iovcnt, off_t offset); } 290 AUE_PWRITEV STD { ssize_t pwritev(int fd, struct iovec *iovp, \ u_int iovcnt, off_t offset); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys ; XXX 297 is 300 in NetBSD 297 AUE_FHSTATFS COMPAT4 { int fhstatfs( \ const struct fhandle *u_fhp, \ struct ostatfs *buf); } 298 AUE_FHOPEN STD { int fhopen(const struct fhandle *u_fhp, \ int flags); } 299 AUE_FHSTAT COMPAT11 { int fhstat(const struct fhandle *u_fhp, \ struct freebsd11_stat *sb); } ; syscall numbers for FreeBSD 300 AUE_NULL STD { int modnext(int modid); } 301 AUE_NULL STD { int modstat(int modid, \ struct module_stat *stat); } 302 AUE_NULL STD { int modfnext(int modid); } 303 AUE_NULL STD { int modfind(const char *name); } 304 AUE_MODLOAD STD { int kldload(const char *file); } 305 AUE_MODUNLOAD STD { int kldunload(int fileid); } 306 AUE_NULL STD { int kldfind(const char *file); } 307 AUE_NULL STD { int kldnext(int fileid); } 308 AUE_NULL STD { int kldstat(int fileid, struct \ kld_file_stat* stat); } 309 AUE_NULL STD { int kldfirstmod(int fileid); } 310 AUE_GETSID STD { int getsid(pid_t pid); } 311 AUE_SETRESUID STD { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID STD { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { ssize_t aio_return(struct aiocb *aiocbp); } 315 AUE_AIO_SUSPEND STD { int aio_suspend( \ struct aiocb * const * aiocbp, int nent, \ const struct timespec *timeout); } 316 AUE_AIO_CANCEL STD { int aio_cancel(int fd, \ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int aio_error(struct aiocb *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int aio_read(struct oaiocb *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int aio_write(struct oaiocb *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int lio_listio(int mode, \ struct oaiocb * const *acb_list, \ int nent, struct osigevent *sig); } 321 AUE_NULL STD { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL STD { int mlockall(int how); } 325 AUE_MUNLOCKALL STD { int munlockall(void); } 326 AUE_GETCWD STD { int __getcwd(char *buf, size_t buflen); } 327 AUE_NULL STD { int sched_setparam (pid_t pid, \ const struct sched_param *param); } 328 AUE_NULL STD { int sched_getparam (pid_t pid, struct \ sched_param *param); } 329 AUE_NULL STD { int sched_setscheduler (pid_t pid, int \ policy, const struct sched_param \ *param); } 330 AUE_NULL STD { int sched_getscheduler (pid_t pid); } 331 AUE_NULL STD { int sched_yield (void); } 332 AUE_NULL STD { int sched_get_priority_max (int policy); } 333 AUE_NULL STD { int sched_get_priority_min (int policy); } 334 AUE_NULL STD { int sched_rr_get_interval (pid_t pid, \ struct timespec *interval); } 335 AUE_NULL STD { int utrace(const void *addr, size_t len); } 336 AUE_SENDFILE COMPAT4 { int sendfile(int fd, int s, \ off_t offset, size_t nbytes, \ struct sf_hdtr *hdtr, off_t *sbytes, \ int flags); } 337 AUE_NULL STD { int kldsym(int fileid, int cmd, \ void *data); } 338 AUE_JAIL STD { int jail(struct jail *jail); } 339 AUE_NULL NOSTD|NOTSTATIC { int nnpfs_syscall(int operation, \ char *a_pathP, int a_opcode, \ void *a_paramsP, int a_followSymlinks); } 340 AUE_SIGPROCMASK STD { int sigprocmask(int how, \ const sigset_t *set, sigset_t *oset); } 341 AUE_SIGSUSPEND STD { int sigsuspend(const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int sigaction(int sig, const \ struct sigaction *act, \ struct sigaction *oact); } 343 AUE_SIGPENDING STD { int sigpending(sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int sigreturn( \ const struct ucontext4 *sigcntxp); } 345 AUE_SIGWAIT STD { int sigtimedwait(const sigset_t *set, \ siginfo_t *info, \ const struct timespec *timeout); } 346 AUE_NULL STD { int sigwaitinfo(const sigset_t *set, \ siginfo_t *info); } 347 AUE_ACL_GET_FILE STD { int __acl_get_file(const char *path, \ acl_type_t type, struct acl *aclp); } 348 AUE_ACL_SET_FILE STD { int __acl_set_file(const char *path, \ acl_type_t type, struct acl *aclp); } 349 AUE_ACL_GET_FD STD { int __acl_get_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 350 AUE_ACL_SET_FD STD { int __acl_set_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 351 AUE_ACL_DELETE_FILE STD { int __acl_delete_file(const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD STD { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE STD { int __acl_aclcheck_file(const char *path, \ acl_type_t type, struct acl *aclp); } 354 AUE_ACL_CHECK_FD STD { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 355 AUE_EXTATTRCTL STD { int extattrctl(const char *path, int cmd, \ const char *filename, int attrnamespace, \ const char *attrname); } 356 AUE_EXTATTR_SET_FILE STD { ssize_t extattr_set_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE STD { ssize_t extattr_get_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE STD { int extattr_delete_file(const char *path, \ int attrnamespace, \ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { ssize_t aio_waitcomplete( \ struct aiocb **aiocbp, \ struct timespec *timeout); } 360 AUE_GETRESUID STD { int getresuid(uid_t *ruid, uid_t *euid, \ uid_t *suid); } 361 AUE_GETRESGID STD { int getresgid(gid_t *rgid, gid_t *egid, \ gid_t *sgid); } 362 AUE_KQUEUE STD { int kqueue(void); } -363 AUE_KEVENT STD { int kevent(int fd, \ - struct kevent *changelist, int nchanges, \ - struct kevent *eventlist, int nevents, \ +363 AUE_KEVENT COMPAT11 { int kevent(int fd, \ + struct kevent_freebsd11 *changelist, \ + int nchanges, \ + struct kevent_freebsd11 *eventlist, \ + int nevents, \ const struct timespec *timeout); } 364 AUE_NULL UNIMPL __cap_get_proc 365 AUE_NULL UNIMPL __cap_set_proc 366 AUE_NULL UNIMPL __cap_get_fd 367 AUE_NULL UNIMPL __cap_get_file 368 AUE_NULL UNIMPL __cap_set_fd 369 AUE_NULL UNIMPL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD STD { ssize_t extattr_set_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 372 AUE_EXTATTR_GET_FD STD { ssize_t extattr_get_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD STD { int extattr_delete_fd(int fd, \ int attrnamespace, \ const char *attrname); } 374 AUE_SETUGID STD { int __setugid(int flag); } 375 AUE_NULL UNIMPL nfsclnt 376 AUE_EACCESS STD { int eaccess(char *path, int amode); } 377 AUE_NULL NOSTD|NOTSTATIC { int afs3_syscall(long syscall, \ long parm1, long parm2, long parm3, \ long parm4, long parm5, long parm6); } 378 AUE_NMOUNT STD { int nmount(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 379 AUE_NULL UNIMPL kse_exit 380 AUE_NULL UNIMPL kse_wakeup 381 AUE_NULL UNIMPL kse_create 382 AUE_NULL UNIMPL kse_thr_interrupt 383 AUE_NULL UNIMPL kse_release 384 AUE_NULL STD { int __mac_get_proc(struct mac *mac_p); } 385 AUE_NULL STD { int __mac_set_proc(struct mac *mac_p); } 386 AUE_NULL STD { int __mac_get_fd(int fd, \ struct mac *mac_p); } 387 AUE_NULL STD { int __mac_get_file(const char *path_p, \ struct mac *mac_p); } 388 AUE_NULL STD { int __mac_set_fd(int fd, \ struct mac *mac_p); } 389 AUE_NULL STD { int __mac_set_file(const char *path_p, \ struct mac *mac_p); } 390 AUE_NULL STD { int kenv(int what, const char *name, \ char *value, int len); } 391 AUE_LCHFLAGS STD { int lchflags(const char *path, \ u_long flags); } 392 AUE_NULL STD { int uuidgen(struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int sendfile(int fd, int s, off_t offset, \ size_t nbytes, struct sf_hdtr *hdtr, \ off_t *sbytes, int flags); } 394 AUE_NULL STD { int mac_syscall(const char *policy, \ int call, void *arg); } 395 AUE_GETFSSTAT COMPAT11 { int getfsstat(struct freebsd11_statfs *buf, \ long bufsize, int mode); } 396 AUE_STATFS COMPAT11 { int statfs(char *path, \ struct freebsd11_statfs *buf); } 397 AUE_FSTATFS COMPAT11 { int fstatfs(int fd, \ struct freebsd11_statfs *buf); } 398 AUE_FHSTATFS COMPAT11 { int fhstatfs(const struct fhandle *u_fhp, \ struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int ksem_init(semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int ksem_open(semid_t *idp, \ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD { int ksem_unlink(const char *name); } 407 AUE_SEMGETVALUE NOSTD { int ksem_getvalue(semid_t id, int *val); } 408 AUE_SEMDESTROY NOSTD { int ksem_destroy(semid_t id); } 409 AUE_NULL STD { int __mac_get_pid(pid_t pid, \ struct mac *mac_p); } 410 AUE_NULL STD { int __mac_get_link(const char *path_p, \ struct mac *mac_p); } 411 AUE_NULL STD { int __mac_set_link(const char *path_p, \ struct mac *mac_p); } 412 AUE_EXTATTR_SET_LINK STD { ssize_t extattr_set_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK STD { ssize_t extattr_get_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK STD { int extattr_delete_link( \ const char *path, int attrnamespace, \ const char *attrname); } 415 AUE_NULL STD { int __mac_execve(char *fname, char **argv, \ char **envv, struct mac *mac_p); } 416 AUE_SIGACTION STD { int sigaction(int sig, \ const struct sigaction *act, \ struct sigaction *oact); } 417 AUE_SIGRETURN STD { int sigreturn( \ const struct __ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int getcontext(struct __ucontext *ucp); } 422 AUE_NULL STD { int setcontext( \ const struct __ucontext *ucp); } 423 AUE_NULL STD { int swapcontext(struct __ucontext *oucp, \ const struct __ucontext *ucp); } 424 AUE_SWAPOFF STD { int swapoff(const char *name); } 425 AUE_ACL_GET_LINK STD { int __acl_get_link(const char *path, \ acl_type_t type, struct acl *aclp); } 426 AUE_ACL_SET_LINK STD { int __acl_set_link(const char *path, \ acl_type_t type, struct acl *aclp); } 427 AUE_ACL_DELETE_LINK STD { int __acl_delete_link(const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK STD { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } 429 AUE_SIGWAIT STD { int sigwait(const sigset_t *set, \ int *sig); } 430 AUE_THR_CREATE STD { int thr_create(ucontext_t *ctx, long *id, \ int flags); } 431 AUE_THR_EXIT STD { void thr_exit(long *state); } 432 AUE_NULL STD { int thr_self(long *id); } 433 AUE_THR_KILL STD { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH STD { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD STD { ssize_t extattr_list_fd(int fd, \ int attrnamespace, void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE STD { ssize_t extattr_list_file( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK STD { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 440 AUE_NULL UNIMPL kse_switchin 441 AUE_SEMWAIT NOSTD { int ksem_timedwait(semid_t id, \ const struct timespec *abstime); } 442 AUE_NULL STD { int thr_suspend( \ const struct timespec *timeout); } 443 AUE_NULL STD { int thr_wake(long id); } 444 AUE_MODUNLOAD STD { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT STD { int audit(const void *record, \ u_int length); } 446 AUE_AUDITON STD { int auditon(int cmd, void *data, \ u_int length); } 447 AUE_GETAUID STD { int getauid(uid_t *auid); } 448 AUE_SETAUID STD { int setauid(uid_t *auid); } 449 AUE_GETAUDIT STD { int getaudit(struct auditinfo *auditinfo); } 450 AUE_SETAUDIT STD { int setaudit(struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR STD { int getaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR STD { int setaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL STD { int auditctl(char *path); } 454 AUE_NULL STD { int _umtx_op(void *obj, int op, \ u_long val, void *uaddr1, void *uaddr2); } 455 AUE_THR_NEW STD { int thr_new(struct thr_param *param, \ int param_size); } 456 AUE_NULL STD { int sigqueue(pid_t pid, int signum, void *value); } 457 AUE_MQ_OPEN NOSTD { int kmq_open(const char *path, int flags, \ mode_t mode, const struct mq_attr *attr); } 458 AUE_MQ_SETATTR NOSTD { int kmq_setattr(int mqd, \ const struct mq_attr *attr, \ struct mq_attr *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int kmq_timedreceive(int mqd, \ char *msg_ptr, size_t msg_len, \ unsigned *msg_prio, \ const struct timespec *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int kmq_timedsend(int mqd, \ const char *msg_ptr, size_t msg_len,\ unsigned msg_prio, \ const struct timespec *abs_timeout);} 461 AUE_MQ_NOTIFY NOSTD { int kmq_notify(int mqd, \ const struct sigevent *sigev); } 462 AUE_MQ_UNLINK NOSTD { int kmq_unlink(const char *path); } 463 AUE_NULL STD { int abort2(const char *why, int nargs, void **args); } 464 AUE_NULL STD { int thr_set_name(long id, const char *name); } 465 AUE_AIO_FSYNC STD { int aio_fsync(int op, struct aiocb *aiocbp); } 466 AUE_RTPRIO STD { int rtprio_thread(int function, \ lwpid_t lwpid, struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOSTD { int sctp_peeloff(int sd, uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } 475 AUE_PREAD STD { ssize_t pread(int fd, void *buf, \ size_t nbyte, off_t offset); } 476 AUE_PWRITE STD { ssize_t pwrite(int fd, const void *buf, \ size_t nbyte, off_t offset); } 477 AUE_MMAP STD { caddr_t mmap(caddr_t addr, size_t len, \ int prot, int flags, int fd, off_t pos); } 478 AUE_LSEEK STD { off_t lseek(int fd, off_t offset, \ int whence); } 479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); } 480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); } 481 AUE_THR_KILL2 STD { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN STD { int shm_open(const char *path, int flags, \ mode_t mode); } 483 AUE_SHMUNLINK STD { int shm_unlink(const char *path); } 484 AUE_NULL STD { int cpuset(cpusetid_t *setid); } 485 AUE_NULL STD { int cpuset_setid(cpuwhich_t which, id_t id, \ cpusetid_t setid); } 486 AUE_NULL STD { int cpuset_getid(cpulevel_t level, \ cpuwhich_t which, id_t id, \ cpusetid_t *setid); } 487 AUE_NULL STD { int cpuset_getaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, size_t cpusetsize, \ cpuset_t *mask); } 488 AUE_NULL STD { int cpuset_setaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, size_t cpusetsize, \ const cpuset_t *mask); } 489 AUE_FACCESSAT STD { int faccessat(int fd, char *path, int amode, \ int flag); } 490 AUE_FCHMODAT STD { int fchmodat(int fd, char *path, mode_t mode, \ int flag); } 491 AUE_FCHOWNAT STD { int fchownat(int fd, char *path, uid_t uid, \ gid_t gid, int flag); } 492 AUE_FEXECVE STD { int fexecve(int fd, char **argv, \ char **envv); } 493 AUE_FSTATAT COMPAT11 { int fstatat(int fd, char *path, \ struct freebsd11_stat *buf, int flag); } 494 AUE_FUTIMESAT STD { int futimesat(int fd, char *path, \ struct timeval *times); } 495 AUE_LINKAT STD { int linkat(int fd1, char *path1, int fd2, \ char *path2, int flag); } 496 AUE_MKDIRAT STD { int mkdirat(int fd, char *path, mode_t mode); } 497 AUE_MKFIFOAT STD { int mkfifoat(int fd, char *path, mode_t mode); } 498 AUE_MKNODAT COMPAT11 { int mknodat(int fd, char *path, mode_t mode, \ uint32_t dev); } ; XXX: see the comment for open 499 AUE_OPENAT_RWTC STD { int openat(int fd, char *path, int flag, \ mode_t mode); } 500 AUE_READLINKAT STD { int readlinkat(int fd, char *path, char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT STD { int renameat(int oldfd, char *old, int newfd, \ char *new); } 502 AUE_SYMLINKAT STD { int symlinkat(char *path1, int fd, \ char *path2); } 503 AUE_UNLINKAT STD { int unlinkat(int fd, char *path, int flag); } 504 AUE_POSIX_OPENPT STD { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL NOSTD { int gssd_syscall(char *path); } 506 AUE_JAIL_GET STD { int jail_get(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int jail_set(struct iovec *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE STD { int jail_remove(int jid); } 509 AUE_CLOSEFROM STD { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int __semctl(int semid, int semnum, \ int cmd, union semun *arg); } 511 AUE_MSGCTL NOSTD { int msgctl(int msqid, int cmd, \ struct msqid_ds *buf); } 512 AUE_SHMCTL NOSTD { int shmctl(int shmid, int cmd, \ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET STD { int __cap_rights_get(int version, \ int fd, cap_rights_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } 518 AUE_PDFORK STD { int pdfork(int *fdp, int flags); } 519 AUE_PDKILL STD { int pdkill(int fd, int signum); } 520 AUE_PDGETPID STD { int pdgetpid(int fd, pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec *ts, \ const sigset_t *sm); } 523 AUE_GETLOGINCLASS STD { int getloginclass(char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS STD { int setloginclass(const char *namebuf); } 525 AUE_NULL STD { int rctl_get_racct(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 526 AUE_NULL STD { int rctl_get_rules(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 527 AUE_NULL STD { int rctl_get_limits(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 528 AUE_NULL STD { int rctl_add_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 530 AUE_POSIX_FALLOCATE STD { int posix_fallocate(int fd, \ off_t offset, off_t len); } 531 AUE_POSIX_FADVISE STD { int posix_fadvise(int fd, off_t offset, \ off_t len, int advice); } 532 AUE_WAIT6 STD { int wait6(idtype_t idtype, id_t id, \ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } 533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ const u_long *cmds, size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ u_long *cmds, size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ uint32_t *fcntlrightsp); } 538 AUE_BINDAT STD { int bindat(int fd, int s, caddr_t name, \ int namelen); } 539 AUE_CONNECTAT STD { int connectat(int fd, int s, caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT STD { int chflagsat(int fd, const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT STD { int accept4(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE STD { int pipe2(int *fildes, int flags); } 543 AUE_AIO_MLOCK STD { int aio_mlock(struct aiocb *aiocbp); } 544 AUE_PROCCTL STD { int procctl(idtype_t idtype, id_t id, \ int com, void *data); } 545 AUE_POLL STD { int ppoll(struct pollfd *fds, u_int nfds, \ const struct timespec *ts, \ const sigset_t *set); } 546 AUE_FUTIMES STD { int futimens(int fd, \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int utimensat(int fd, \ char *path, \ struct timespec *times, int flag); } 548 AUE_NULL STD { int numa_getaffinity(cpuwhich_t which, \ id_t id, \ struct vm_domain_policy_entry *policy); } 549 AUE_NULL STD { int numa_setaffinity(cpuwhich_t which, \ id_t id, const struct \ vm_domain_policy_entry *policy); } 550 AUE_FSYNC STD { int fdatasync(int fd); } 551 AUE_FSTAT STD { int fstat(int fd, struct stat *sb); } 552 AUE_FSTATAT STD { int fstatat(int fd, char *path, \ struct stat *buf, int flag); } 553 AUE_FHSTAT STD { int fhstat(const struct fhandle *u_fhp, \ struct stat *sb); } 554 AUE_GETDIRENTRIES STD { ssize_t getdirentries(int fd, char *buf, \ size_t count, off_t *basep); } 555 AUE_STATFS STD { int statfs(char *path, struct statfs *buf); } 556 AUE_FSTATFS STD { int fstatfs(int fd, struct statfs *buf); } 557 AUE_GETFSSTAT STD { int getfsstat(struct statfs *buf, \ long bufsize, int mode); } 558 AUE_FHSTATFS STD { int fhstatfs(const struct fhandle *u_fhp, \ struct statfs *buf); } 559 AUE_MKNODAT STD { int mknodat(int fd, char *path, mode_t mode, \ dev_t dev); } +560 AUE_KEVENT STD { int kevent(int fd, \ + struct kevent *changelist, int nchanges, \ + struct kevent *eventlist, int nevents, \ + const struct timespec *timeout); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: head/sys/kern/vfs_aio.c =================================================================== --- head/sys/kern/vfs_aio.c (revision 320042) +++ head/sys/kern/vfs_aio.c (revision 320043) @@ -1,2990 +1,2994 @@ /*- * Copyright (c) 1997 John S. Dyson. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. John S. Dyson's name may not be used to endorse or promote products * derived from this software without specific prior written permission. * * DISCLAIMER: This code isn't warranted to do anything useful. Anything * bad that happens because of using this software isn't the responsibility * of the author. This software is distributed AS-IS. */ /* * This file contains support for the POSIX 1003.1B AIO/LIO facility. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Counter for allocating reference ids to new jobs. Wrapped to 1 on * overflow. (XXX will be removed soon.) */ static u_long jobrefid; /* * Counter for aio_fsync. */ static uint64_t jobseqno; #ifndef MAX_AIO_PER_PROC #define MAX_AIO_PER_PROC 32 #endif #ifndef MAX_AIO_QUEUE_PER_PROC #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ #endif #ifndef MAX_AIO_QUEUE #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ #endif #ifndef MAX_BUF_AIO #define MAX_BUF_AIO 16 #endif FEATURE(aio, "Asynchronous I/O"); static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "Async IO management"); static int enable_aio_unsafe = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, enable_unsafe, CTLFLAG_RW, &enable_aio_unsafe, 0, "Permit asynchronous IO on all file types, not just known-safe types"); static unsigned int unsafe_warningcnt = 1; SYSCTL_UINT(_vfs_aio, OID_AUTO, unsafe_warningcnt, CTLFLAG_RW, &unsafe_warningcnt, 0, "Warnings that will be triggered upon failed IO requests on unsafe files"); static int max_aio_procs = MAX_AIO_PROCS; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0, "Maximum number of kernel processes to use for handling async IO "); static int num_aio_procs = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0, "Number of presently active kernel processes for async IO"); /* * The code will adjust the actual number of AIO processes towards this * number when it gets a chance. */ static int target_aio_procs = TARGET_AIO_PROCS; SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs, 0, "Preferred number of ready kernel processes for async IO"); static int max_queue_count = MAX_AIO_QUEUE; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0, "Maximum number of aio requests to queue, globally"); static int num_queue_count = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0, "Number of queued aio requests"); static int num_buf_aio = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, "Number of aio requests presently handled by the buf subsystem"); /* Number of async I/O processes in the process of being started */ /* XXX This should be local to aio_aqueue() */ static int num_aio_resv_start = 0; static int aiod_lifetime; SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, "Maximum lifetime for idle aiod"); static int max_aio_per_proc = MAX_AIO_PER_PROC; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc, 0, "Maximum active aio requests per process (stored in the process)"); static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW, &max_aio_queue_per_proc, 0, "Maximum queued aio requests per process (stored in the process)"); static int max_buf_aio = MAX_BUF_AIO; SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0, "Maximum buf aio requests per process (stored in the process)"); #ifdef COMPAT_FREEBSD6 typedef struct oaiocb { int aio_fildes; /* File descriptor */ off_t aio_offset; /* File offset for I/O */ volatile void *aio_buf; /* I/O buffer in process space */ size_t aio_nbytes; /* Number of bytes for I/O */ struct osigevent aio_sigevent; /* Signal to deliver */ int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private _aiocb_private; } oaiocb_t; #endif /* * Below is a key of locks used to protect each member of struct kaiocb * aioliojob and kaioinfo and any backends. * * * - need not protected * a - locked by kaioinfo lock * b - locked by backend lock, the backend lock can be null in some cases, * for example, BIO belongs to this type, in this case, proc lock is * reused. * c - locked by aio_job_mtx, the lock for the generic file I/O backend. */ /* * If the routine that services an AIO request blocks while running in an * AIO kernel process it can starve other I/O requests. BIO requests * queued via aio_qphysio() complete in GEOM and do not use AIO kernel * processes at all. Socket I/O requests use a separate pool of * kprocs and also force non-blocking I/O. Other file I/O requests * use the generic fo_read/fo_write operations which can block. The * fsync and mlock operations can also block while executing. Ideally * none of these requests would block while executing. * * Note that the service routines cannot toggle O_NONBLOCK in the file * structure directly while handling a request due to races with * userland threads. */ /* jobflags */ #define KAIOCB_QUEUEING 0x01 #define KAIOCB_CANCELLED 0x02 #define KAIOCB_CANCELLING 0x04 #define KAIOCB_CHECKSYNC 0x08 #define KAIOCB_CLEARED 0x10 #define KAIOCB_FINISHED 0x20 /* * AIO process info */ #define AIOP_FREE 0x1 /* proc on free queue */ struct aioproc { int aioprocflags; /* (c) AIO proc flags */ TAILQ_ENTRY(aioproc) list; /* (c) list of processes */ struct proc *aioproc; /* (*) the AIO proc */ }; /* * data-structure for lio signal management */ struct aioliojob { int lioj_flags; /* (a) listio flags */ int lioj_count; /* (a) listio flags */ int lioj_finished_count; /* (a) listio flags */ struct sigevent lioj_signal; /* (a) signal on all I/O done */ TAILQ_ENTRY(aioliojob) lioj_list; /* (a) lio list */ struct knlist klist; /* (a) list of knotes */ ksiginfo_t lioj_ksi; /* (a) Realtime signal info */ }; #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ #define LIOJ_KEVENT_POSTED 0x4 /* kevent triggered */ /* * per process aio data structure */ struct kaioinfo { struct mtx kaio_mtx; /* the lock to protect this struct */ int kaio_flags; /* (a) per process kaio flags */ int kaio_maxactive_count; /* (*) maximum number of AIOs */ int kaio_active_count; /* (c) number of currently used AIOs */ int kaio_qallowed_count; /* (*) maxiumu size of AIO queue */ int kaio_count; /* (a) size of AIO queue */ int kaio_ballowed_count; /* (*) maximum number of buffers */ int kaio_buffer_count; /* (a) number of physio buffers */ TAILQ_HEAD(,kaiocb) kaio_all; /* (a) all AIOs in a process */ TAILQ_HEAD(,kaiocb) kaio_done; /* (a) done queue for process */ TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */ TAILQ_HEAD(,kaiocb) kaio_jobqueue; /* (a) job queue for process */ TAILQ_HEAD(,kaiocb) kaio_syncqueue; /* (a) queue for aio_fsync */ TAILQ_HEAD(,kaiocb) kaio_syncready; /* (a) second q for aio_fsync */ struct task kaio_task; /* (*) task to kick aio processes */ struct task kaio_sync_task; /* (*) task to schedule fsync jobs */ }; #define AIO_LOCK(ki) mtx_lock(&(ki)->kaio_mtx) #define AIO_UNLOCK(ki) mtx_unlock(&(ki)->kaio_mtx) #define AIO_LOCK_ASSERT(ki, f) mtx_assert(&(ki)->kaio_mtx, (f)) #define AIO_MTX(ki) (&(ki)->kaio_mtx) #define KAIO_RUNDOWN 0x1 /* process is being run down */ #define KAIO_WAKEUP 0x2 /* wakeup process when AIO completes */ /* * Operations used to interact with userland aio control blocks. * Different ABIs provide their own operations. */ struct aiocb_ops { int (*copyin)(struct aiocb *ujob, struct aiocb *kjob); long (*fetch_status)(struct aiocb *ujob); long (*fetch_error)(struct aiocb *ujob); int (*store_status)(struct aiocb *ujob, long status); int (*store_error)(struct aiocb *ujob, long error); int (*store_kernelinfo)(struct aiocb *ujob, long jobref); int (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob); }; static TAILQ_HEAD(,aioproc) aio_freeproc; /* (c) Idle daemons */ static struct sema aio_newproc_sem; static struct mtx aio_job_mtx; static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ static struct unrhdr *aiod_unr; void aio_init_aioinfo(struct proc *p); static int aio_onceonly(void); static int aio_free_entry(struct kaiocb *job); static void aio_process_rw(struct kaiocb *job); static void aio_process_sync(struct kaiocb *job); static void aio_process_mlock(struct kaiocb *job); static void aio_schedule_fsync(void *context, int pending); static int aio_newproc(int *); int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lio, int type, struct aiocb_ops *ops); static int aio_queue_file(struct file *fp, struct kaiocb *job); static void aio_physwakeup(struct bio *bp); static void aio_proc_rundown(void *arg, struct proc *p); static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp); static int aio_qphysio(struct proc *p, struct kaiocb *job); static void aio_daemon(void *param); static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job); static bool aio_clear_cancel_function_locked(struct kaiocb *job); static int aio_kick(struct proc *userp); static void aio_kick_nowait(struct proc *userp); static void aio_kick_helper(void *context, int pending); static int filt_aioattach(struct knote *kn); static void filt_aiodetach(struct knote *kn); static int filt_aio(struct knote *kn, long hint); static int filt_lioattach(struct knote *kn); static void filt_liodetach(struct knote *kn); static int filt_lio(struct knote *kn, long hint); /* * Zones for: * kaio Per process async io info * aiop async io process data * aiocb async io jobs * aiol list io job pointer - internal to aio_suspend XXX * aiolio list io jobs */ static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone; /* kqueue filters for aio */ static struct filterops aio_filtops = { .f_isfd = 0, .f_attach = filt_aioattach, .f_detach = filt_aiodetach, .f_event = filt_aio, }; static struct filterops lio_filtops = { .f_isfd = 0, .f_attach = filt_lioattach, .f_detach = filt_liodetach, .f_event = filt_lio }; static eventhandler_tag exit_tag, exec_tag; TASKQUEUE_DEFINE_THREAD(aiod_kick); /* * Main operations function for use as a kernel module. */ static int aio_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: aio_onceonly(); break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t aio_mod = { "aio", &aio_modload, NULL }; DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY); MODULE_VERSION(aio, 1); /* * Startup initialization */ static int aio_onceonly(void) { exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL, EVENTHANDLER_PRI_ANY); exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec, NULL, EVENTHANDLER_PRI_ANY); kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); kqueue_add_filteropts(EVFILT_LIO, &lio_filtops); TAILQ_INIT(&aio_freeproc); sema_init(&aio_newproc_sem, 0, "aio_new_proc"); mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF); TAILQ_INIT(&aio_jobs); aiod_unr = new_unrhdr(1, INT_MAX, NULL); kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiocb_zone = uma_zcreate("AIOCB", sizeof(struct kaiocb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiol_zone = uma_zcreate("AIOL", AIO_LISTIO_MAX*sizeof(intptr_t) , NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiod_lifetime = AIOD_LIFETIME_DEFAULT; jobrefid = 1; p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO); p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, AIO_LISTIO_MAX); p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE); p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0); return (0); } /* * Init the per-process aioinfo structure. The aioinfo limits are set * per-process for user limit (resource) management. */ void aio_init_aioinfo(struct proc *p) { struct kaioinfo *ki; ki = uma_zalloc(kaio_zone, M_WAITOK); mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW); ki->kaio_flags = 0; ki->kaio_maxactive_count = max_aio_per_proc; ki->kaio_active_count = 0; ki->kaio_qallowed_count = max_aio_queue_per_proc; ki->kaio_count = 0; ki->kaio_ballowed_count = max_buf_aio; ki->kaio_buffer_count = 0; TAILQ_INIT(&ki->kaio_all); TAILQ_INIT(&ki->kaio_done); TAILQ_INIT(&ki->kaio_jobqueue); TAILQ_INIT(&ki->kaio_liojoblist); TAILQ_INIT(&ki->kaio_syncqueue); TAILQ_INIT(&ki->kaio_syncready); TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p); TASK_INIT(&ki->kaio_sync_task, 0, aio_schedule_fsync, ki); PROC_LOCK(p); if (p->p_aioinfo == NULL) { p->p_aioinfo = ki; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); mtx_destroy(&ki->kaio_mtx); uma_zfree(kaio_zone, ki); } while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) aio_newproc(NULL); } static int aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi) { struct thread *td; int error; error = sigev_findtd(p, sigev, &td); if (error) return (error); if (!KSI_ONQ(ksi)) { ksiginfo_set_sigev(ksi, sigev); ksi->ksi_code = SI_ASYNCIO; ksi->ksi_flags |= KSI_EXT | KSI_INS; tdsendsignal(p, td, ksi->ksi_signo, ksi); } PROC_UNLOCK(p); return (error); } /* * Free a job entry. Wait for completion if it is currently active, but don't * delay forever. If we delay, we return a flag that says that we have to * restart the queue scan. */ static int aio_free_entry(struct kaiocb *job) { struct kaioinfo *ki; struct aioliojob *lj; struct proc *p; p = job->userproc; MPASS(curproc == p); ki = p->p_aioinfo; MPASS(ki != NULL); AIO_LOCK_ASSERT(ki, MA_OWNED); MPASS(job->jobflags & KAIOCB_FINISHED); atomic_subtract_int(&num_queue_count, 1); ki->kaio_count--; MPASS(ki->kaio_count >= 0); TAILQ_REMOVE(&ki->kaio_done, job, plist); TAILQ_REMOVE(&ki->kaio_all, job, allist); lj = job->lio; if (lj) { lj->lioj_count--; lj->lioj_finished_count--; if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); /* lio is going away, we need to destroy any knotes */ knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); uma_zfree(aiolio_zone, lj); } } /* job is going away, we need to destroy any knotes */ knlist_delete(&job->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&job->ksi); PROC_UNLOCK(p); AIO_UNLOCK(ki); /* * The thread argument here is used to find the owning process * and is also passed to fo_close() which may pass it to various * places such as devsw close() routines. Because of that, we * need a thread pointer from the process owning the job that is * persistent and won't disappear out from under us or move to * another process. * * Currently, all the callers of this function call it to remove * a kaiocb from the current process' job list either via a * syscall or due to the current process calling exit() or * execve(). Thus, we know that p == curproc. We also know that * curthread can't exit since we are curthread. * * Therefore, we use curthread as the thread to pass to * knlist_delete(). This does mean that it is possible for the * thread pointer at close time to differ from the thread pointer * at open time, but this is already true of file descriptors in * a multithreaded process. */ if (job->fd_file) fdrop(job->fd_file, curthread); crfree(job->cred); uma_zfree(aiocb_zone, job); AIO_LOCK(ki); return (0); } static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp __unused) { aio_proc_rundown(arg, p); } static int aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job) { aio_cancel_fn_t *func; int cancelled; AIO_LOCK_ASSERT(ki, MA_OWNED); if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED)) return (0); MPASS((job->jobflags & KAIOCB_CANCELLING) == 0); job->jobflags |= KAIOCB_CANCELLED; func = job->cancel_fn; /* * If there is no cancel routine, just leave the job marked as * cancelled. The job should be in active use by a caller who * should complete it normally or when it fails to install a * cancel routine. */ if (func == NULL) return (0); /* * Set the CANCELLING flag so that aio_complete() will defer * completions of this job. This prevents the job from being * freed out from under the cancel callback. After the * callback any deferred completion (whether from the callback * or any other source) will be completed. */ job->jobflags |= KAIOCB_CANCELLING; AIO_UNLOCK(ki); func(job); AIO_LOCK(ki); job->jobflags &= ~KAIOCB_CANCELLING; if (job->jobflags & KAIOCB_FINISHED) { cancelled = job->uaiocb._aiocb_private.error == ECANCELED; TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); aio_bio_done_notify(p, job); } else { /* * The cancel callback might have scheduled an * operation to cancel this request, but it is * only counted as cancelled if the request is * cancelled when the callback returns. */ cancelled = 0; } return (cancelled); } /* * Rundown the jobs for a given process. */ static void aio_proc_rundown(void *arg, struct proc *p) { struct kaioinfo *ki; struct aioliojob *lj; struct kaiocb *job, *jobn; KASSERT(curthread->td_proc == p, ("%s: called on non-curproc", __func__)); ki = p->p_aioinfo; if (ki == NULL) return; AIO_LOCK(ki); ki->kaio_flags |= KAIO_RUNDOWN; restart: /* * Try to cancel all pending requests. This code simulates * aio_cancel on all pending I/O requests. */ TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { aio_cancel_job(p, ki, job); } /* Wait for all running I/O to be finished */ if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) { ki->kaio_flags |= KAIO_WAKEUP; msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz); goto restart; } /* Free all completed I/O requests. */ while ((job = TAILQ_FIRST(&ki->kaio_done)) != NULL) aio_free_entry(job); while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) { if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); uma_zfree(aiolio_zone, lj); } else { panic("LIO job not cleaned up: C:%d, FC:%d\n", lj->lioj_count, lj->lioj_finished_count); } } AIO_UNLOCK(ki); taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task); taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_sync_task); mtx_destroy(&ki->kaio_mtx); uma_zfree(kaio_zone, ki); p->p_aioinfo = NULL; } /* * Select a job to run (called by an AIO daemon). */ static struct kaiocb * aio_selectjob(struct aioproc *aiop) { struct kaiocb *job; struct kaioinfo *ki; struct proc *userp; mtx_assert(&aio_job_mtx, MA_OWNED); restart: TAILQ_FOREACH(job, &aio_jobs, list) { userp = job->userproc; ki = userp->p_aioinfo; if (ki->kaio_active_count < ki->kaio_maxactive_count) { TAILQ_REMOVE(&aio_jobs, job, list); if (!aio_clear_cancel_function(job)) goto restart; /* Account for currently active jobs. */ ki->kaio_active_count++; break; } } return (job); } /* * Move all data to a permanent storage device. This code * simulates the fsync syscall. */ static int aio_fsync_vnode(struct thread *td, struct vnode *vp) { struct mount *mp; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); drop: return (error); } /* * The AIO processing activity for LIO_READ/LIO_WRITE. This is the code that * does the I/O request for the non-physio version of the operations. The * normal vn operations are used, and this code should work in all instances * for every type of file, including pipes, sockets, fifos, and regular files. * * XXX I don't think it works well for socket, pipe, and fifo. */ static void aio_process_rw(struct kaiocb *job) { struct ucred *td_savedcred; struct thread *td; struct aiocb *cb; struct file *fp; struct uio auio; struct iovec aiov; ssize_t cnt; long msgsnd_st, msgsnd_end; long msgrcv_st, msgrcv_end; long oublock_st, oublock_end; long inblock_st, inblock_end; int error; KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ || job->uaiocb.aio_lio_opcode == LIO_WRITE, ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); aio_switch_vmspace(job); td = curthread; td_savedcred = td->td_ucred; td->td_ucred = job->cred; cb = &job->uaiocb; fp = job->fd_file; aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; aiov.iov_len = cb->aio_nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = cb->aio_offset; auio.uio_resid = cb->aio_nbytes; cnt = cb->aio_nbytes; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; msgrcv_st = td->td_ru.ru_msgrcv; msgsnd_st = td->td_ru.ru_msgsnd; inblock_st = td->td_ru.ru_inblock; oublock_st = td->td_ru.ru_oublock; /* * aio_aqueue() acquires a reference to the file that is * released in aio_free_entry(). */ if (cb->aio_lio_opcode == LIO_READ) { auio.uio_rw = UIO_READ; if (auio.uio_resid == 0) error = 0; else error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); } else { if (fp->f_type == DTYPE_VNODE) bwillwrite(); auio.uio_rw = UIO_WRITE; error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); } msgrcv_end = td->td_ru.ru_msgrcv; msgsnd_end = td->td_ru.ru_msgsnd; inblock_end = td->td_ru.ru_inblock; oublock_end = td->td_ru.ru_oublock; job->msgrcv = msgrcv_end - msgrcv_st; job->msgsnd = msgsnd_end - msgsnd_st; job->inblock = inblock_end - inblock_st; job->outblock = oublock_end - oublock_st; if ((error) && (auio.uio_resid != cnt)) { if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) error = 0; if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { PROC_LOCK(job->userproc); kern_psignal(job->userproc, SIGPIPE); PROC_UNLOCK(job->userproc); } } cnt -= auio.uio_resid; td->td_ucred = td_savedcred; if (error) aio_complete(job, -1, error); else aio_complete(job, cnt, 0); } static void aio_process_sync(struct kaiocb *job) { struct thread *td = curthread; struct ucred *td_savedcred = td->td_ucred; struct file *fp = job->fd_file; int error = 0; KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC, ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); td->td_ucred = job->cred; if (fp->f_vnode != NULL) error = aio_fsync_vnode(td, fp->f_vnode); td->td_ucred = td_savedcred; if (error) aio_complete(job, -1, error); else aio_complete(job, 0, 0); } static void aio_process_mlock(struct kaiocb *job) { struct aiocb *cb = &job->uaiocb; int error; KASSERT(job->uaiocb.aio_lio_opcode == LIO_MLOCK, ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); aio_switch_vmspace(job); error = kern_mlock(job->userproc, job->cred, __DEVOLATILE(uintptr_t, cb->aio_buf), cb->aio_nbytes); aio_complete(job, error != 0 ? -1 : 0, error); } static void aio_bio_done_notify(struct proc *userp, struct kaiocb *job) { struct aioliojob *lj; struct kaioinfo *ki; struct kaiocb *sjob, *sjobn; int lj_done; bool schedule_fsync; ki = userp->p_aioinfo; AIO_LOCK_ASSERT(ki, MA_OWNED); lj = job->lio; lj_done = 0; if (lj) { lj->lioj_finished_count++; if (lj->lioj_count == lj->lioj_finished_count) lj_done = 1; } TAILQ_INSERT_TAIL(&ki->kaio_done, job, plist); MPASS(job->jobflags & KAIOCB_FINISHED); if (ki->kaio_flags & KAIO_RUNDOWN) goto notification_done; if (job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) aio_sendsig(userp, &job->uaiocb.aio_sigevent, &job->ksi); KNOTE_LOCKED(&job->klist, 1); if (lj_done) { if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { lj->lioj_flags |= LIOJ_KEVENT_POSTED; KNOTE_LOCKED(&lj->klist, 1); } if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi); lj->lioj_flags |= LIOJ_SIGNAL_POSTED; } } notification_done: if (job->jobflags & KAIOCB_CHECKSYNC) { schedule_fsync = false; TAILQ_FOREACH_SAFE(sjob, &ki->kaio_syncqueue, list, sjobn) { if (job->fd_file != sjob->fd_file || job->seqno >= sjob->seqno) continue; if (--sjob->pending > 0) continue; TAILQ_REMOVE(&ki->kaio_syncqueue, sjob, list); if (!aio_clear_cancel_function_locked(sjob)) continue; TAILQ_INSERT_TAIL(&ki->kaio_syncready, sjob, list); schedule_fsync = true; } if (schedule_fsync) taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_sync_task); } if (ki->kaio_flags & KAIO_WAKEUP) { ki->kaio_flags &= ~KAIO_WAKEUP; wakeup(&userp->p_aioinfo); } } static void aio_schedule_fsync(void *context, int pending) { struct kaioinfo *ki; struct kaiocb *job; ki = context; AIO_LOCK(ki); while (!TAILQ_EMPTY(&ki->kaio_syncready)) { job = TAILQ_FIRST(&ki->kaio_syncready); TAILQ_REMOVE(&ki->kaio_syncready, job, list); AIO_UNLOCK(ki); aio_schedule(job, aio_process_sync); AIO_LOCK(ki); } AIO_UNLOCK(ki); } bool aio_cancel_cleared(struct kaiocb *job) { struct kaioinfo *ki; /* * The caller should hold the same queue lock held when * aio_clear_cancel_function() was called and set this flag * ensuring this check sees an up-to-date value. However, * there is no way to assert that. */ ki = job->userproc->p_aioinfo; return ((job->jobflags & KAIOCB_CLEARED) != 0); } static bool aio_clear_cancel_function_locked(struct kaiocb *job) { AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED); MPASS(job->cancel_fn != NULL); if (job->jobflags & KAIOCB_CANCELLING) { job->jobflags |= KAIOCB_CLEARED; return (false); } job->cancel_fn = NULL; return (true); } bool aio_clear_cancel_function(struct kaiocb *job) { struct kaioinfo *ki; bool ret; ki = job->userproc->p_aioinfo; AIO_LOCK(ki); ret = aio_clear_cancel_function_locked(job); AIO_UNLOCK(ki); return (ret); } static bool aio_set_cancel_function_locked(struct kaiocb *job, aio_cancel_fn_t *func) { AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED); if (job->jobflags & KAIOCB_CANCELLED) return (false); job->cancel_fn = func; return (true); } bool aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func) { struct kaioinfo *ki; bool ret; ki = job->userproc->p_aioinfo; AIO_LOCK(ki); ret = aio_set_cancel_function_locked(job, func); AIO_UNLOCK(ki); return (ret); } void aio_complete(struct kaiocb *job, long status, int error) { struct kaioinfo *ki; struct proc *userp; job->uaiocb._aiocb_private.error = error; job->uaiocb._aiocb_private.status = status; userp = job->userproc; ki = userp->p_aioinfo; AIO_LOCK(ki); KASSERT(!(job->jobflags & KAIOCB_FINISHED), ("duplicate aio_complete")); job->jobflags |= KAIOCB_FINISHED; if ((job->jobflags & (KAIOCB_QUEUEING | KAIOCB_CANCELLING)) == 0) { TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist); aio_bio_done_notify(userp, job); } AIO_UNLOCK(ki); } void aio_cancel(struct kaiocb *job) { aio_complete(job, -1, ECANCELED); } void aio_switch_vmspace(struct kaiocb *job) { vmspace_switch_aio(job->userproc->p_vmspace); } /* * The AIO daemon, most of the actual work is done in aio_process_*, * but the setup (and address space mgmt) is done in this routine. */ static void aio_daemon(void *_id) { struct kaiocb *job; struct aioproc *aiop; struct kaioinfo *ki; struct proc *p; struct vmspace *myvm; struct thread *td = curthread; int id = (intptr_t)_id; /* * Grab an extra reference on the daemon's vmspace so that it * doesn't get freed by jobs that switch to a different * vmspace. */ p = td->td_proc; myvm = vmspace_acquire_ref(p); KASSERT(p->p_textvp == NULL, ("kthread has a textvp")); /* * Allocate and ready the aio control info. There is one aiop structure * per daemon. */ aiop = uma_zalloc(aiop_zone, M_WAITOK); aiop->aioproc = p; aiop->aioprocflags = 0; /* * Wakeup parent process. (Parent sleeps to keep from blasting away * and creating too many daemons.) */ sema_post(&aio_newproc_sem); mtx_lock(&aio_job_mtx); for (;;) { /* * Take daemon off of free queue */ if (aiop->aioprocflags & AIOP_FREE) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aioprocflags &= ~AIOP_FREE; } /* * Check for jobs. */ while ((job = aio_selectjob(aiop)) != NULL) { mtx_unlock(&aio_job_mtx); ki = job->userproc->p_aioinfo; job->handle_fn(job); mtx_lock(&aio_job_mtx); /* Decrement the active job count. */ ki->kaio_active_count--; } /* * Disconnect from user address space. */ if (p->p_vmspace != myvm) { mtx_unlock(&aio_job_mtx); vmspace_switch_aio(myvm); mtx_lock(&aio_job_mtx); /* * We have to restart to avoid race, we only sleep if * no job can be selected. */ continue; } mtx_assert(&aio_job_mtx, MA_OWNED); TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); aiop->aioprocflags |= AIOP_FREE; /* * If daemon is inactive for a long time, allow it to exit, * thereby freeing resources. */ if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy", aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) && (aiop->aioprocflags & AIOP_FREE) && num_aio_procs > target_aio_procs) break; } TAILQ_REMOVE(&aio_freeproc, aiop, list); num_aio_procs--; mtx_unlock(&aio_job_mtx); uma_zfree(aiop_zone, aiop); free_unr(aiod_unr, id); vmspace_free(myvm); KASSERT(p->p_vmspace == myvm, ("AIOD: bad vmspace for exiting daemon")); KASSERT(myvm->vm_refcnt > 1, ("AIOD: bad vm refcnt for exiting daemon: %d", myvm->vm_refcnt)); kproc_exit(0); } /* * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The * AIO daemon modifies its environment itself. */ static int aio_newproc(int *start) { int error; struct proc *p; int id; id = alloc_unr(aiod_unr); error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p, RFNOWAIT, 0, "aiod%d", id); if (error == 0) { /* * Wait until daemon is started. */ sema_wait(&aio_newproc_sem); mtx_lock(&aio_job_mtx); num_aio_procs++; if (start != NULL) (*start)--; mtx_unlock(&aio_job_mtx); } else { free_unr(aiod_unr, id); } return (error); } /* * Try the high-performance, low-overhead physio method for eligible * VCHR devices. This method doesn't use an aio helper thread, and * thus has very low overhead. * * Assumes that the caller, aio_aqueue(), has incremented the file * structure's reference count, preventing its deallocation for the * duration of this call. */ static int aio_qphysio(struct proc *p, struct kaiocb *job) { struct aiocb *cb; struct file *fp; struct bio *bp; struct buf *pbuf; struct vnode *vp; struct cdevsw *csw; struct cdev *dev; struct kaioinfo *ki; int error, ref, poff; vm_prot_t prot; cb = &job->uaiocb; fp = job->fd_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) return (-1); vp = fp->f_vnode; if (vp->v_type != VCHR) return (-1); if (vp->v_bufobj.bo_bsize == 0) return (-1); if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) return (-1); ref = 0; csw = devvn_refthread(vp, &dev, &ref); if (csw == NULL) return (ENXIO); if ((csw->d_flags & D_DISK) == 0) { error = -1; goto unref; } if (cb->aio_nbytes > dev->si_iosize_max) { error = -1; goto unref; } ki = p->p_aioinfo; poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { if (cb->aio_nbytes > MAXPHYS) { error = -1; goto unref; } pbuf = NULL; } else { if (cb->aio_nbytes > MAXPHYS - poff) { error = -1; goto unref; } if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) { error = -1; goto unref; } job->pbuf = pbuf = (struct buf *)getpbuf(NULL); BUF_KERNPROC(pbuf); AIO_LOCK(ki); ki->kaio_buffer_count++; AIO_UNLOCK(ki); } job->bp = bp = g_alloc_bio(); bp->bio_length = cb->aio_nbytes; bp->bio_bcount = cb->aio_nbytes; bp->bio_done = aio_physwakeup; bp->bio_data = (void *)(uintptr_t)cb->aio_buf; bp->bio_offset = cb->aio_offset; bp->bio_cmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; bp->bio_dev = dev; bp->bio_caller1 = (void *)job; prot = VM_PROT_READ; if (cb->aio_lio_opcode == LIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ job->npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, (vm_offset_t)bp->bio_data, bp->bio_length, prot, job->pages, nitems(job->pages)); if (job->npages < 0) { error = EFAULT; goto doerror; } if (pbuf != NULL) { pmap_qenter((vm_offset_t)pbuf->b_data, job->pages, job->npages); bp->bio_data = pbuf->b_data + poff; atomic_add_int(&num_buf_aio, 1); } else { bp->bio_ma = job->pages; bp->bio_ma_n = job->npages; bp->bio_ma_offset = poff; bp->bio_data = unmapped_buf; bp->bio_flags |= BIO_UNMAPPED; } /* Perform transfer. */ csw->d_strategy(bp); dev_relthread(dev, ref); return (0); doerror: if (pbuf != NULL) { AIO_LOCK(ki); ki->kaio_buffer_count--; AIO_UNLOCK(ki); relpbuf(pbuf, NULL); job->pbuf = NULL; } g_destroy_bio(bp); job->bp = NULL; unref: dev_relthread(dev, ref); return (error); } #ifdef COMPAT_FREEBSD6 static int convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) { /* * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are * supported by AIO with the old sigevent structure. */ nsig->sigev_notify = osig->sigev_notify; switch (nsig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_SIGNAL: nsig->sigev_signo = osig->__sigev_u.__sigev_signo; break; case SIGEV_KEVENT: nsig->sigev_notify_kqueue = osig->__sigev_u.__sigev_notify_kqueue; nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr; break; default: return (EINVAL); } return (0); } static int aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) { struct oaiocb *ojob; int error; bzero(kjob, sizeof(struct aiocb)); error = copyin(ujob, kjob, sizeof(struct oaiocb)); if (error) return (error); ojob = (struct oaiocb *)kjob; return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); } #endif static int aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob) { return (copyin(ujob, kjob, sizeof(struct aiocb))); } static long aiocb_fetch_status(struct aiocb *ujob) { return (fuword(&ujob->_aiocb_private.status)); } static long aiocb_fetch_error(struct aiocb *ujob) { return (fuword(&ujob->_aiocb_private.error)); } static int aiocb_store_status(struct aiocb *ujob, long status) { return (suword(&ujob->_aiocb_private.status, status)); } static int aiocb_store_error(struct aiocb *ujob, long error) { return (suword(&ujob->_aiocb_private.error, error)); } static int aiocb_store_kernelinfo(struct aiocb *ujob, long jobref) { return (suword(&ujob->_aiocb_private.kernelinfo, jobref)); } static int aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { return (suword(ujobp, (long)ujob)); } static struct aiocb_ops aiocb_ops = { .copyin = aiocb_copyin, .fetch_status = aiocb_fetch_status, .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; #ifdef COMPAT_FREEBSD6 static struct aiocb_ops aiocb_ops_osigevent = { .copyin = aiocb_copyin_old_sigevent, .fetch_status = aiocb_fetch_status, .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; #endif /* * Queue a new AIO request. Choosing either the threaded or direct physio VCHR * technique is done in this code. */ int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int type, struct aiocb_ops *ops) { struct proc *p = td->td_proc; cap_rights_t rights; struct file *fp; struct kaiocb *job; struct kaioinfo *ki; struct kevent kev; int opcode; int error; int fd, kqfd; int jid; u_short evflags; if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; ops->store_status(ujob, -1); ops->store_error(ujob, 0); ops->store_kernelinfo(ujob, -1); if (num_queue_count >= max_queue_count || ki->kaio_count >= ki->kaio_qallowed_count) { ops->store_error(ujob, EAGAIN); return (EAGAIN); } job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO); knlist_init_mtx(&job->klist, AIO_MTX(ki)); error = ops->copyin(ujob, &job->uaiocb); if (error) { ops->store_error(ujob, error); uma_zfree(aiocb_zone, job); return (error); } if (job->uaiocb.aio_nbytes > IOSIZE_MAX) { uma_zfree(aiocb_zone, job); return (EINVAL); } if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID && job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) { ops->store_error(ujob, EINVAL); uma_zfree(aiocb_zone, job); return (EINVAL); } if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) { uma_zfree(aiocb_zone, job); return (EINVAL); } ksiginfo_init(&job->ksi); /* Save userspace address of the job info. */ job->ujob = ujob; /* Get the opcode. */ if (type != LIO_NOP) job->uaiocb.aio_lio_opcode = type; opcode = job->uaiocb.aio_lio_opcode; /* * Validate the opcode and fetch the file object for the specified * file descriptor. * * XXXRW: Moved the opcode validation up here so that we don't * retrieve a file descriptor without knowing what the capabiltity * should be. */ fd = job->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: error = fget_write(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp); break; case LIO_READ: error = fget_read(td, fd, cap_rights_init(&rights, CAP_PREAD), &fp); break; case LIO_SYNC: error = fget(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); break; case LIO_MLOCK: fp = NULL; break; case LIO_NOP: error = fget(td, fd, cap_rights_init(&rights), &fp); break; default: error = EINVAL; } if (error) { uma_zfree(aiocb_zone, job); ops->store_error(ujob, error); return (error); } if (opcode == LIO_SYNC && fp->f_vnode == NULL) { error = EINVAL; goto aqueue_fail; } if (opcode != LIO_SYNC && job->uaiocb.aio_offset == -1LL) { error = EINVAL; goto aqueue_fail; } job->fd_file = fp; mtx_lock(&aio_job_mtx); jid = jobrefid++; job->seqno = jobseqno++; mtx_unlock(&aio_job_mtx); error = ops->store_kernelinfo(ujob, jid); if (error) { error = EINVAL; goto aqueue_fail; } job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; if (opcode == LIO_NOP) { fdrop(fp, td); uma_zfree(aiocb_zone, job); return (0); } if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT) goto no_kqueue; evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags; if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) { error = EINVAL; goto aqueue_fail; } kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue; kev.ident = (uintptr_t)job->ujob; kev.filter = EVFILT_AIO; kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags; kev.data = (intptr_t)job; kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr; error = kqfd_register(kqfd, &kev, td, 1); if (error) goto aqueue_fail; no_kqueue: ops->store_error(ujob, EINPROGRESS); job->uaiocb._aiocb_private.error = EINPROGRESS; job->userproc = p; job->cred = crhold(td->td_ucred); job->jobflags = KAIOCB_QUEUEING; job->lio = lj; if (opcode == LIO_MLOCK) { aio_schedule(job, aio_process_mlock); error = 0; } else if (fp->f_ops->fo_aio_queue == NULL) error = aio_queue_file(fp, job); else error = fo_aio_queue(fp, job); if (error) goto aqueue_fail; AIO_LOCK(ki); job->jobflags &= ~KAIOCB_QUEUEING; TAILQ_INSERT_TAIL(&ki->kaio_all, job, allist); ki->kaio_count++; if (lj) lj->lioj_count++; atomic_add_int(&num_queue_count, 1); if (job->jobflags & KAIOCB_FINISHED) { /* * The queue callback completed the request synchronously. * The bulk of the completion is deferred in that case * until this point. */ aio_bio_done_notify(p, job); } else TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist); AIO_UNLOCK(ki); return (0); aqueue_fail: knlist_delete(&job->klist, curthread, 0); if (fp) fdrop(fp, td); uma_zfree(aiocb_zone, job); ops->store_error(ujob, error); return (error); } static void aio_cancel_daemon_job(struct kaiocb *job) { mtx_lock(&aio_job_mtx); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&aio_jobs, job, list); mtx_unlock(&aio_job_mtx); aio_cancel(job); } void aio_schedule(struct kaiocb *job, aio_handle_fn_t *func) { mtx_lock(&aio_job_mtx); if (!aio_set_cancel_function(job, aio_cancel_daemon_job)) { mtx_unlock(&aio_job_mtx); aio_cancel(job); return; } job->handle_fn = func; TAILQ_INSERT_TAIL(&aio_jobs, job, list); aio_kick_nowait(job->userproc); mtx_unlock(&aio_job_mtx); } static void aio_cancel_sync(struct kaiocb *job) { struct kaioinfo *ki; ki = job->userproc->p_aioinfo; AIO_LOCK(ki); if (!aio_cancel_cleared(job)) TAILQ_REMOVE(&ki->kaio_syncqueue, job, list); AIO_UNLOCK(ki); aio_cancel(job); } int aio_queue_file(struct file *fp, struct kaiocb *job) { struct aioliojob *lj; struct kaioinfo *ki; struct kaiocb *job2; struct vnode *vp; struct mount *mp; int error, opcode; bool safe; lj = job->lio; ki = job->userproc->p_aioinfo; opcode = job->uaiocb.aio_lio_opcode; if (opcode == LIO_SYNC) goto queueit; if ((error = aio_qphysio(job->userproc, job)) == 0) goto done; #if 0 /* * XXX: This means qphysio() failed with EFAULT. The current * behavior is to retry the operation via fo_read/fo_write. * Wouldn't it be better to just complete the request with an * error here? */ if (error > 0) goto done; #endif queueit: safe = false; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vp->v_type == VREG || vp->v_type == VDIR) { mp = fp->f_vnode->v_mount; if (mp == NULL || (mp->mnt_flag & MNT_LOCAL) != 0) safe = true; } } if (!(safe || enable_aio_unsafe)) { counted_warning(&unsafe_warningcnt, "is attempting to use unsafe AIO requests"); return (EOPNOTSUPP); } if (opcode == LIO_SYNC) { AIO_LOCK(ki); TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { if (job2->fd_file == job->fd_file && job2->uaiocb.aio_lio_opcode != LIO_SYNC && job2->seqno < job->seqno) { job2->jobflags |= KAIOCB_CHECKSYNC; job->pending++; } } if (job->pending != 0) { if (!aio_set_cancel_function_locked(job, aio_cancel_sync)) { AIO_UNLOCK(ki); aio_cancel(job); return (0); } TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, job, list); AIO_UNLOCK(ki); return (0); } AIO_UNLOCK(ki); } switch (opcode) { case LIO_READ: case LIO_WRITE: aio_schedule(job, aio_process_rw); error = 0; break; case LIO_SYNC: aio_schedule(job, aio_process_sync); error = 0; break; default: error = EINVAL; } done: return (error); } static void aio_kick_nowait(struct proc *userp) { struct kaioinfo *ki = userp->p_aioinfo; struct aioproc *aiop; mtx_assert(&aio_job_mtx, MA_OWNED); if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aioprocflags &= ~AIOP_FREE; wakeup(aiop->aioproc); } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && ki->kaio_active_count + num_aio_resv_start < ki->kaio_maxactive_count) { taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task); } } static int aio_kick(struct proc *userp) { struct kaioinfo *ki = userp->p_aioinfo; struct aioproc *aiop; int error, ret = 0; mtx_assert(&aio_job_mtx, MA_OWNED); retryproc: if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aioprocflags &= ~AIOP_FREE; wakeup(aiop->aioproc); } else if (num_aio_resv_start + num_aio_procs < max_aio_procs && ki->kaio_active_count + num_aio_resv_start < ki->kaio_maxactive_count) { num_aio_resv_start++; mtx_unlock(&aio_job_mtx); error = aio_newproc(&num_aio_resv_start); mtx_lock(&aio_job_mtx); if (error) { num_aio_resv_start--; goto retryproc; } } else { ret = -1; } return (ret); } static void aio_kick_helper(void *context, int pending) { struct proc *userp = context; mtx_lock(&aio_job_mtx); while (--pending >= 0) { if (aio_kick(userp)) break; } mtx_unlock(&aio_job_mtx); } /* * Support the aio_return system call, as a side-effect, kernel resources are * released. */ static int kern_aio_return(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct kaiocb *job; struct kaioinfo *ki; long status, error; ki = p->p_aioinfo; if (ki == NULL) return (EINVAL); AIO_LOCK(ki); TAILQ_FOREACH(job, &ki->kaio_done, plist) { if (job->ujob == ujob) break; } if (job != NULL) { MPASS(job->jobflags & KAIOCB_FINISHED); status = job->uaiocb._aiocb_private.status; error = job->uaiocb._aiocb_private.error; td->td_retval[0] = status; td->td_ru.ru_oublock += job->outblock; td->td_ru.ru_inblock += job->inblock; td->td_ru.ru_msgsnd += job->msgsnd; td->td_ru.ru_msgrcv += job->msgrcv; aio_free_entry(job); AIO_UNLOCK(ki); ops->store_error(ujob, error); ops->store_status(ujob, status); } else { error = EINVAL; AIO_UNLOCK(ki); } return (error); } int sys_aio_return(struct thread *td, struct aio_return_args *uap) { return (kern_aio_return(td, uap->aiocbp, &aiocb_ops)); } /* * Allow a process to wakeup when any of the I/O requests are completed. */ static int kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist, struct timespec *ts) { struct proc *p = td->td_proc; struct timeval atv; struct kaioinfo *ki; struct kaiocb *firstjob, *job; int error, i, timo; timo = 0; if (ts) { if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) return (EINVAL); TIMESPEC_TO_TIMEVAL(&atv, ts); if (itimerfix(&atv)) return (EINVAL); timo = tvtohz(&atv); } ki = p->p_aioinfo; if (ki == NULL) return (EAGAIN); if (njoblist == 0) return (0); AIO_LOCK(ki); for (;;) { firstjob = NULL; error = 0; TAILQ_FOREACH(job, &ki->kaio_all, allist) { for (i = 0; i < njoblist; i++) { if (job->ujob == ujoblist[i]) { if (firstjob == NULL) firstjob = job; if (job->jobflags & KAIOCB_FINISHED) goto RETURN; } } } /* All tasks were finished. */ if (firstjob == NULL) break; ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiospn", timo); if (error == ERESTART) error = EINTR; if (error) break; } RETURN: AIO_UNLOCK(ki); return (error); } int sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap) { struct timespec ts, *tsp; struct aiocb **ujoblist; int error; if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->timeout) { /* Get timespec struct. */ if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) return (error); tsp = &ts; } else tsp = NULL; ujoblist = uma_zalloc(aiol_zone, M_WAITOK); error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0])); if (error == 0) error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); uma_zfree(aiol_zone, ujoblist); return (error); } /* * aio_cancel cancels any non-physio aio operations not currently in * progress. */ int sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) { struct proc *p = td->td_proc; struct kaioinfo *ki; struct kaiocb *job, *jobn; struct file *fp; cap_rights_t rights; int error; int cancelled = 0; int notcancelled = 0; struct vnode *vp; /* Lookup file object. */ error = fget(td, uap->fd, cap_rights_init(&rights), &fp); if (error) return (error); ki = p->p_aioinfo; if (ki == NULL) goto done; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vn_isdisk(vp, &error)) { fdrop(fp, td); td->td_retval[0] = AIO_NOTCANCELED; return (0); } } AIO_LOCK(ki); TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) { if ((uap->fd == job->uaiocb.aio_fildes) && ((uap->aiocbp == NULL) || (uap->aiocbp == job->ujob))) { if (aio_cancel_job(p, ki, job)) { cancelled++; } else { notcancelled++; } if (uap->aiocbp != NULL) break; } } AIO_UNLOCK(ki); done: fdrop(fp, td); if (uap->aiocbp != NULL) { if (cancelled) { td->td_retval[0] = AIO_CANCELED; return (0); } } if (notcancelled) { td->td_retval[0] = AIO_NOTCANCELED; return (0); } if (cancelled) { td->td_retval[0] = AIO_CANCELED; return (0); } td->td_retval[0] = AIO_ALLDONE; return (0); } /* * aio_error is implemented in the kernel level for compatibility purposes * only. For a user mode async implementation, it would be best to do it in * a userland subroutine. */ static int kern_aio_error(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct kaiocb *job; struct kaioinfo *ki; int status; ki = p->p_aioinfo; if (ki == NULL) { td->td_retval[0] = EINVAL; return (0); } AIO_LOCK(ki); TAILQ_FOREACH(job, &ki->kaio_all, allist) { if (job->ujob == ujob) { if (job->jobflags & KAIOCB_FINISHED) td->td_retval[0] = job->uaiocb._aiocb_private.error; else td->td_retval[0] = EINPROGRESS; AIO_UNLOCK(ki); return (0); } } AIO_UNLOCK(ki); /* * Hack for failure of aio_aqueue. */ status = ops->fetch_status(ujob); if (status == -1) { td->td_retval[0] = ops->fetch_error(ujob); return (0); } td->td_retval[0] = EINVAL; return (0); } int sys_aio_error(struct thread *td, struct aio_error_args *uap) { return (kern_aio_error(td, uap->aiocbp, &aiocb_ops)); } /* syscall - asynchronous read from a file (REALTIME) */ #ifdef COMPAT_FREEBSD6 int freebsd6_aio_read(struct thread *td, struct freebsd6_aio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb_ops_osigevent)); } #endif int sys_aio_read(struct thread *td, struct aio_read_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); } /* syscall - asynchronous write to a file (REALTIME) */ #ifdef COMPAT_FREEBSD6 int freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops_osigevent)); } #endif int sys_aio_write(struct thread *td, struct aio_write_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); } int sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); } static int kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, struct aiocb **acb_list, int nent, struct sigevent *sig, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct aiocb *job; struct kaioinfo *ki; struct aioliojob *lj; struct kevent kev; int error; int nerror; int i; if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT)) return (EINVAL); if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; lj = uma_zalloc(aiolio_zone, M_WAITOK); lj->lioj_flags = 0; lj->lioj_count = 0; lj->lioj_finished_count = 0; knlist_init_mtx(&lj->klist, AIO_MTX(ki)); ksiginfo_init(&lj->lioj_ksi); /* * Setup signal. */ if (sig && (mode == LIO_NOWAIT)) { bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal)); if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { /* Assume only new style KEVENT */ kev.filter = EVFILT_LIO; kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1; kev.ident = (uintptr_t)uacb_list; /* something unique */ kev.data = (intptr_t)lj; /* pass user defined sigval data */ kev.udata = lj->lioj_signal.sigev_value.sival_ptr; error = kqfd_register( lj->lioj_signal.sigev_notify_kqueue, &kev, td, 1); if (error) { uma_zfree(aiolio_zone, lj); return (error); } } else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) { ; } else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) { if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { uma_zfree(aiolio_zone, lj); return EINVAL; } lj->lioj_flags |= LIOJ_SIGNAL; } else { uma_zfree(aiolio_zone, lj); return EINVAL; } } AIO_LOCK(ki); TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); /* * Add extra aiocb count to avoid the lio to be freed * by other threads doing aio_waitcomplete or aio_return, * and prevent event from being sent until we have queued * all tasks. */ lj->lioj_count = 1; AIO_UNLOCK(ki); /* * Get pointers to the list of I/O requests. */ nerror = 0; for (i = 0; i < nent; i++) { job = acb_list[i]; if (job != NULL) { error = aio_aqueue(td, job, lj, LIO_NOP, ops); if (error != 0) nerror++; } } error = 0; AIO_LOCK(ki); if (mode == LIO_WAIT) { while (lj->lioj_count - 1 != lj->lioj_finished_count) { ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiospn", 0); if (error == ERESTART) error = EINTR; if (error) break; } } else { if (lj->lioj_count - 1 == lj->lioj_finished_count) { if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { lj->lioj_flags |= LIOJ_KEVENT_POSTED; KNOTE_LOCKED(&lj->klist, 1); } if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { aio_sendsig(p, &lj->lioj_signal, &lj->lioj_ksi); lj->lioj_flags |= LIOJ_SIGNAL_POSTED; } } } lj->lioj_count--; if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); AIO_UNLOCK(ki); uma_zfree(aiolio_zone, lj); } else AIO_UNLOCK(ki); if (nerror) return (EIO); return (error); } /* syscall - list directed I/O (REALTIME) */ #ifdef COMPAT_FREEBSD6 int freebsd6_lio_listio(struct thread *td, struct freebsd6_lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct osigevent osig; int error, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &osig, sizeof(osig)); if (error) return (error); error = convert_old_sigevent(&osig, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); if (error == 0) error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb_ops_osigevent); free(acb_list, M_LIO); return (error); } #endif /* syscall - list directed I/O (REALTIME) */ int sys_lio_listio(struct thread *td, struct lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; int error, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &sig, sizeof(sig)); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); if (error == 0) error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, nent, sigp, &aiocb_ops); free(acb_list, M_LIO); return (error); } static void aio_physwakeup(struct bio *bp) { struct kaiocb *job = (struct kaiocb *)bp->bio_caller1; struct proc *userp; struct kaioinfo *ki; size_t nbytes; int error, nblks; /* Release mapping into kernel space. */ userp = job->userproc; ki = userp->p_aioinfo; if (job->pbuf) { pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages); relpbuf(job->pbuf, NULL); job->pbuf = NULL; atomic_subtract_int(&num_buf_aio, 1); AIO_LOCK(ki); ki->kaio_buffer_count--; AIO_UNLOCK(ki); } vm_page_unhold_pages(job->pages, job->npages); bp = job->bp; job->bp = NULL; nbytes = job->uaiocb.aio_nbytes - bp->bio_resid; error = 0; if (bp->bio_flags & BIO_ERROR) error = bp->bio_error; nblks = btodb(nbytes); if (job->uaiocb.aio_lio_opcode == LIO_WRITE) job->outblock += nblks; else job->inblock += nblks; if (error) aio_complete(job, -1, error); else aio_complete(job, nbytes, 0); g_destroy_bio(bp); } /* syscall - wait for the next completion of an aio request */ static int kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, struct timespec *ts, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct timeval atv; struct kaioinfo *ki; struct kaiocb *job; struct aiocb *ujob; long error, status; int timo; ops->store_aiocb(ujobp, NULL); if (ts == NULL) { timo = 0; } else if (ts->tv_sec == 0 && ts->tv_nsec == 0) { timo = -1; } else { if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000)) return (EINVAL); TIMESPEC_TO_TIMEVAL(&atv, ts); if (itimerfix(&atv)) return (EINVAL); timo = tvtohz(&atv); } if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; error = 0; job = NULL; AIO_LOCK(ki); while ((job = TAILQ_FIRST(&ki->kaio_done)) == NULL) { if (timo == -1) { error = EWOULDBLOCK; break; } ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiowc", timo); if (timo && error == ERESTART) error = EINTR; if (error) break; } if (job != NULL) { MPASS(job->jobflags & KAIOCB_FINISHED); ujob = job->ujob; status = job->uaiocb._aiocb_private.status; error = job->uaiocb._aiocb_private.error; td->td_retval[0] = status; td->td_ru.ru_oublock += job->outblock; td->td_ru.ru_inblock += job->inblock; td->td_ru.ru_msgsnd += job->msgsnd; td->td_ru.ru_msgrcv += job->msgrcv; aio_free_entry(job); AIO_UNLOCK(ki); ops->store_aiocb(ujobp, ujob); ops->store_error(ujob, error); ops->store_status(ujob, status); } else AIO_UNLOCK(ki); return (error); } int sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) { struct timespec ts, *tsp; int error; if (uap->timeout) { /* Get timespec struct. */ error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); tsp = &ts; } else tsp = NULL; return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops)); } static int kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob, struct aiocb_ops *ops) { if (op != O_SYNC) /* XXX lack of O_DSYNC */ return (EINVAL); return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops)); } int sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap) { return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops)); } /* kqueue attach function */ static int filt_aioattach(struct knote *kn) { - struct kaiocb *job = (struct kaiocb *)kn->kn_sdata; + struct kaiocb *job; + job = (struct kaiocb *)(uintptr_t)kn->kn_sdata; + /* * The job pointer must be validated before using it, so * registration is restricted to the kernel; the user cannot * set EV_FLAG1. */ if ((kn->kn_flags & EV_FLAG1) == 0) return (EPERM); kn->kn_ptr.p_aio = job; kn->kn_flags &= ~EV_FLAG1; knlist_add(&job->klist, kn, 0); return (0); } /* kqueue detach function */ static void filt_aiodetach(struct knote *kn) { struct knlist *knl; knl = &kn->kn_ptr.p_aio->klist; knl->kl_lock(knl->kl_lockarg); if (!knlist_empty(knl)) knlist_remove(knl, kn, 1); knl->kl_unlock(knl->kl_lockarg); } /* kqueue filter function */ /*ARGSUSED*/ static int filt_aio(struct knote *kn, long hint) { struct kaiocb *job = kn->kn_ptr.p_aio; kn->kn_data = job->uaiocb._aiocb_private.error; if (!(job->jobflags & KAIOCB_FINISHED)) return (0); kn->kn_flags |= EV_EOF; return (1); } /* kqueue attach function */ static int filt_lioattach(struct knote *kn) { - struct aioliojob * lj = (struct aioliojob *)kn->kn_sdata; + struct aioliojob *lj; + + lj = (struct aioliojob *)(uintptr_t)kn->kn_sdata; /* * The aioliojob pointer must be validated before using it, so * registration is restricted to the kernel; the user cannot * set EV_FLAG1. */ if ((kn->kn_flags & EV_FLAG1) == 0) return (EPERM); kn->kn_ptr.p_lio = lj; kn->kn_flags &= ~EV_FLAG1; knlist_add(&lj->klist, kn, 0); return (0); } /* kqueue detach function */ static void filt_liodetach(struct knote *kn) { struct knlist *knl; knl = &kn->kn_ptr.p_lio->klist; knl->kl_lock(knl->kl_lockarg); if (!knlist_empty(knl)) knlist_remove(knl, kn, 1); knl->kl_unlock(knl->kl_lockarg); } /* kqueue filter function */ /*ARGSUSED*/ static int filt_lio(struct knote *kn, long hint) { struct aioliojob * lj = kn->kn_ptr.p_lio; return (lj->lioj_flags & LIOJ_KEVENT_POSTED); } #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include #include struct __aiocb_private32 { int32_t status; int32_t error; uint32_t kernelinfo; }; #ifdef COMPAT_FREEBSD6 typedef struct oaiocb32 { int aio_fildes; /* File descriptor */ uint64_t aio_offset __packed; /* File offset for I/O */ uint32_t aio_buf; /* I/O buffer in process space */ uint32_t aio_nbytes; /* Number of bytes for I/O */ struct osigevent32 aio_sigevent; /* Signal to deliver */ int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private32 _aiocb_private; } oaiocb32_t; #endif typedef struct aiocb32 { int32_t aio_fildes; /* File descriptor */ uint64_t aio_offset __packed; /* File offset for I/O */ uint32_t aio_buf; /* I/O buffer in process space */ uint32_t aio_nbytes; /* Number of bytes for I/O */ int __spare__[2]; uint32_t __spare2__; int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private32 _aiocb_private; struct sigevent32 aio_sigevent; /* Signal to deliver */ } aiocb32_t; #ifdef COMPAT_FREEBSD6 static int convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig) { /* * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are * supported by AIO with the old sigevent structure. */ CP(*osig, *nsig, sigev_notify); switch (nsig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_SIGNAL: nsig->sigev_signo = osig->__sigev_u.__sigev_signo; break; case SIGEV_KEVENT: nsig->sigev_notify_kqueue = osig->__sigev_u.__sigev_notify_kqueue; PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } static int aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) { struct oaiocb32 job32; int error; bzero(kjob, sizeof(struct aiocb)); error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); CP(job32, *kjob, aio_fildes); CP(job32, *kjob, aio_offset); PTRIN_CP(job32, *kjob, aio_buf); CP(job32, *kjob, aio_nbytes); CP(job32, *kjob, aio_lio_opcode); CP(job32, *kjob, aio_reqprio); CP(job32, *kjob, _aiocb_private.status); CP(job32, *kjob, _aiocb_private.error); PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); return (convert_old_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); } #endif static int aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) { struct aiocb32 job32; int error; error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); CP(job32, *kjob, aio_fildes); CP(job32, *kjob, aio_offset); PTRIN_CP(job32, *kjob, aio_buf); CP(job32, *kjob, aio_nbytes); CP(job32, *kjob, aio_lio_opcode); CP(job32, *kjob, aio_reqprio); CP(job32, *kjob, _aiocb_private.status); CP(job32, *kjob, _aiocb_private.error); PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); } static long aiocb32_fetch_status(struct aiocb *ujob) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (fuword32(&ujob32->_aiocb_private.status)); } static long aiocb32_fetch_error(struct aiocb *ujob) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (fuword32(&ujob32->_aiocb_private.error)); } static int aiocb32_store_status(struct aiocb *ujob, long status) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.status, status)); } static int aiocb32_store_error(struct aiocb *ujob, long error) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.error, error)); } static int aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref)); } static int aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { return (suword32(ujobp, (long)ujob)); } static struct aiocb_ops aiocb32_ops = { .copyin = aiocb32_copyin, .fetch_status = aiocb32_fetch_status, .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; #ifdef COMPAT_FREEBSD6 static struct aiocb_ops aiocb32_ops_osigevent = { .copyin = aiocb32_copyin_old_sigevent, .fetch_status = aiocb32_fetch_status, .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; #endif int freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap) { return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } int freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct aiocb **ujoblist; uint32_t *ujoblist32; int error, i; if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->timeout) { /* Get timespec struct. */ if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; ujoblist = uma_zalloc(aiol_zone, M_WAITOK); ujoblist32 = (uint32_t *)ujoblist; error = copyin(uap->aiocbp, ujoblist32, uap->nent * sizeof(ujoblist32[0])); if (error == 0) { for (i = uap->nent; i > 0; i--) ujoblist[i] = PTRIN(ujoblist32[i]); error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); } uma_zfree(aiol_zone, ujoblist); return (error); } int freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap) { return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_aio_read(struct thread *td, struct freebsd6_freebsd32_aio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb32_ops_osigevent)); } #endif int freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb32_ops)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_aio_write(struct thread *td, struct freebsd6_freebsd32_aio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb32_ops_osigevent)); } #endif int freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb32_ops)); } int freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, &aiocb32_ops)); } int freebsd32_aio_waitcomplete(struct thread *td, struct freebsd32_aio_waitcomplete_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; if (uap->timeout) { /* Get timespec struct. */ error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp, &aiocb32_ops)); } int freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap) { return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_lio_listio(struct thread *td, struct freebsd6_freebsd32_lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct osigevent32 osig; uint32_t *acb_list32; int error, i, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &osig, sizeof(osig)); if (error) return (error); error = convert_old_sigevent32(&osig, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); if (error) { free(acb_list32, M_LIO); return (error); } acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); for (i = 0; i < nent; i++) acb_list[i] = PTRIN(acb_list32[i]); free(acb_list32, M_LIO); error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb32_ops_osigevent); free(acb_list, M_LIO); return (error); } #endif int freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct sigevent32 sig32; uint32_t *acb_list32; int error, i, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &sig32, sizeof(sig32)); if (error) return (error); error = convert_sigevent32(&sig32, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); if (error) { free(acb_list32, M_LIO); return (error); } acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); for (i = 0; i < nent; i++) acb_list[i] = PTRIN(acb_list32[i]); free(acb_list32, M_LIO); error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb32_ops); free(acb_list, M_LIO); return (error); } #endif Index: head/sys/sys/event.h =================================================================== --- head/sys/sys/event.h (revision 320042) +++ head/sys/sys/event.h (revision 320043) @@ -1,303 +1,310 @@ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_EVENT_H_ #define _SYS_EVENT_H_ #include #include #define EVFILT_READ (-1) #define EVFILT_WRITE (-2) #define EVFILT_AIO (-3) /* attached to aio requests */ #define EVFILT_VNODE (-4) /* attached to vnodes */ #define EVFILT_PROC (-5) /* attached to struct proc */ #define EVFILT_SIGNAL (-6) /* attached to struct proc */ #define EVFILT_TIMER (-7) /* timers */ #define EVFILT_PROCDESC (-8) /* attached to process descriptors */ #define EVFILT_FS (-9) /* filesystem events */ #define EVFILT_LIO (-10) /* attached to lio requests */ #define EVFILT_USER (-11) /* User events */ #define EVFILT_SENDFILE (-12) /* attached to sendfile requests */ #define EVFILT_EMPTY (-13) /* empty send socket buf */ #define EVFILT_SYSCOUNT 13 #define EV_SET(kevp_, a, b, c, d, e, f) do { \ struct kevent *kevp = (kevp_); \ (kevp)->ident = (a); \ (kevp)->filter = (b); \ (kevp)->flags = (c); \ (kevp)->fflags = (d); \ (kevp)->data = (e); \ (kevp)->udata = (f); \ + (kevp)->ext[0] = 0; \ + (kevp)->ext[1] = 0; \ + (kevp)->ext[2] = 0; \ + (kevp)->ext[3] = 0; \ } while(0) struct kevent { __uintptr_t ident; /* identifier for this event */ short filter; /* filter for event */ unsigned short flags; unsigned int fflags; - __intptr_t data; + __int64_t data; void *udata; /* opaque user data identifier */ + __uint64_t ext[4]; }; /* actions */ #define EV_ADD 0x0001 /* add event to kq (implies enable) */ #define EV_DELETE 0x0002 /* delete event from kq */ #define EV_ENABLE 0x0004 /* enable event */ #define EV_DISABLE 0x0008 /* disable event (not reported) */ #define EV_FORCEONESHOT 0x0100 /* enable _ONESHOT and force trigger */ /* flags */ #define EV_ONESHOT 0x0010 /* only report one occurrence */ #define EV_CLEAR 0x0020 /* clear event state after reporting */ #define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data=0 */ #define EV_DISPATCH 0x0080 /* disable event after reporting */ #define EV_SYSFLAGS 0xF000 /* reserved by system */ #define EV_DROP 0x1000 /* note should be dropped */ #define EV_FLAG1 0x2000 /* filter-specific flag */ #define EV_FLAG2 0x4000 /* filter-specific flag */ /* returned values */ #define EV_EOF 0x8000 /* EOF detected */ #define EV_ERROR 0x4000 /* error, data contains errno */ /* * data/hint flags/masks for EVFILT_USER, shared with userspace * * On input, the top two bits of fflags specifies how the lower twenty four * bits should be applied to the stored value of fflags. * * On output, the top two bits will always be set to NOTE_FFNOP and the * remaining twenty four bits will contain the stored fflags value. */ #define NOTE_FFNOP 0x00000000 /* ignore input fflags */ #define NOTE_FFAND 0x40000000 /* AND fflags */ #define NOTE_FFOR 0x80000000 /* OR fflags */ #define NOTE_FFCOPY 0xc0000000 /* copy fflags */ #define NOTE_FFCTRLMASK 0xc0000000 /* masks for operations */ #define NOTE_FFLAGSMASK 0x00ffffff #define NOTE_TRIGGER 0x01000000 /* Cause the event to be triggered for output. */ /* * data/hint flags for EVFILT_{READ|WRITE}, shared with userspace */ #define NOTE_LOWAT 0x0001 /* low water mark */ #define NOTE_FILE_POLL 0x0002 /* behave like poll() */ /* * data/hint flags for EVFILT_VNODE, shared with userspace */ #define NOTE_DELETE 0x0001 /* vnode was removed */ #define NOTE_WRITE 0x0002 /* data contents changed */ #define NOTE_EXTEND 0x0004 /* size increased */ #define NOTE_ATTRIB 0x0008 /* attributes changed */ #define NOTE_LINK 0x0010 /* link count changed */ #define NOTE_RENAME 0x0020 /* vnode was renamed */ #define NOTE_REVOKE 0x0040 /* vnode access was revoked */ #define NOTE_OPEN 0x0080 /* vnode was opened */ #define NOTE_CLOSE 0x0100 /* file closed, fd did not allowed write */ #define NOTE_CLOSE_WRITE 0x0200 /* file closed, fd did allowed write */ #define NOTE_READ 0x0400 /* file was read */ /* * data/hint flags for EVFILT_PROC and EVFILT_PROCDESC, shared with userspace */ #define NOTE_EXIT 0x80000000 /* process exited */ #define NOTE_FORK 0x40000000 /* process forked */ #define NOTE_EXEC 0x20000000 /* process exec'd */ #define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */ #define NOTE_PDATAMASK 0x000fffff /* mask for pid */ /* additional flags for EVFILT_PROC */ #define NOTE_TRACK 0x00000001 /* follow across forks */ #define NOTE_TRACKERR 0x00000002 /* could not track child */ #define NOTE_CHILD 0x00000004 /* am a child process */ /* additional flags for EVFILT_TIMER */ #define NOTE_SECONDS 0x00000001 /* data is seconds */ #define NOTE_MSECONDS 0x00000002 /* data is milliseconds */ #define NOTE_USECONDS 0x00000004 /* data is microseconds */ #define NOTE_NSECONDS 0x00000008 /* data is nanoseconds */ +#define NOTE_ABSTIME 0x00000010 /* timeout is absolute */ struct knote; SLIST_HEAD(klist, knote); struct kqueue; TAILQ_HEAD(kqlist, kqueue); struct knlist { struct klist kl_list; void (*kl_lock)(void *); /* lock function */ void (*kl_unlock)(void *); void (*kl_assert_locked)(void *); void (*kl_assert_unlocked)(void *); void *kl_lockarg; /* argument passed to lock functions */ int kl_autodestroy; }; #ifdef _KERNEL /* * Flags for knote call */ #define KNF_LISTLOCKED 0x0001 /* knlist is locked */ #define KNF_NOKQLOCK 0x0002 /* do not keep KQ_LOCK */ #define KNOTE(list, hint, flags) knote(list, hint, flags) #define KNOTE_LOCKED(list, hint) knote(list, hint, KNF_LISTLOCKED) #define KNOTE_UNLOCKED(list, hint) knote(list, hint, 0) #define KNLIST_EMPTY(list) SLIST_EMPTY(&(list)->kl_list) /* * Flag indicating hint is a signal. Used by EVFILT_SIGNAL, and also * shared by EVFILT_PROC (all knotes attached to p->p_klist) */ #define NOTE_SIGNAL 0x08000000 /* * Hint values for the optional f_touch event filter. If f_touch is not set * to NULL and f_isfd is zero the f_touch filter will be called with the type * argument set to EVENT_REGISTER during a kevent() system call. It is also * called under the same conditions with the type argument set to EVENT_PROCESS * when the event has been triggered. */ #define EVENT_REGISTER 1 #define EVENT_PROCESS 2 struct filterops { int f_isfd; /* true if ident == filedescriptor */ int (*f_attach)(struct knote *kn); void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type); }; /* * An in-flux knote cannot be dropped from its kq while the kq is * unlocked. If the KN_SCAN flag is not set, a thread can only set * kn_influx when it is exclusive owner of the knote state, and can * modify kn_status as if it had the KQ lock. KN_SCAN must not be set * on a knote which is already in flux. * * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock. */ struct knote { SLIST_ENTRY(knote) kn_link; /* for kq */ SLIST_ENTRY(knote) kn_selnext; /* for struct selinfo */ struct knlist *kn_knlist; /* f_attach populated */ TAILQ_ENTRY(knote) kn_tqe; struct kqueue *kn_kq; /* which queue we are on */ struct kevent kn_kevent; void *kn_hook; int kn_hookid; int kn_status; /* protected by kq lock */ #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DETACHED 0x08 /* knote is detached */ #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ #define KN_SCAN 0x100 /* flux set in kqueue_scan() */ int kn_influx; int kn_sfflags; /* saved filter flags */ - intptr_t kn_sdata; /* saved data field */ + int64_t kn_sdata; /* saved data field */ union { struct file *p_fp; /* file data pointer */ struct proc *p_proc; /* proc pointer */ struct kaiocb *p_aio; /* AIO job pointer */ struct aioliojob *p_lio; /* LIO job pointer */ void *p_v; /* generic other pointer */ } kn_ptr; struct filterops *kn_fop; #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter #define kn_flags kn_kevent.flags #define kn_fflags kn_kevent.fflags #define kn_data kn_kevent.data #define kn_fp kn_ptr.p_fp }; struct kevent_copyops { void *arg; int (*k_copyout)(void *arg, struct kevent *kevp, int count); int (*k_copyin)(void *arg, struct kevent *kevp, int count); + size_t kevent_size; }; struct thread; struct proc; struct knlist; struct mtx; struct rwlock; void knote(struct knlist *list, long hint, int lockflags); void knote_fork(struct knlist *list, int pid); struct knlist *knlist_alloc(struct mtx *lock); void knlist_detach(struct knlist *knl); void knlist_add(struct knlist *knl, struct knote *kn, int islocked); void knlist_remove(struct knlist *knl, struct knote *kn, int islocked); int knlist_empty(struct knlist *knl); void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)); void knlist_init_mtx(struct knlist *knl, struct mtx *lock); void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock); void knlist_destroy(struct knlist *knl); void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn); #define knlist_clear(knl, islocked) \ knlist_cleardel((knl), NULL, (islocked), 0) #define knlist_delete(knl, td, islocked) \ knlist_cleardel((knl), (td), (islocked), 1) void knote_fdclose(struct thread *p, int fd); int kqfd_register(int fd, struct kevent *kev, struct thread *p, int waitok); int kqueue_add_filteropts(int filt, struct filterops *filtops); int kqueue_del_filteropts(int filt); #else /* !_KERNEL */ #include struct timespec; __BEGIN_DECLS int kqueue(void); int kevent(int kq, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_EVENT_H_ */ Index: head/tests/sys/kqueue/libkqueue/main.c =================================================================== --- head/tests/sys/kqueue/libkqueue/main.c (revision 320042) +++ head/tests/sys/kqueue/libkqueue/main.c (revision 320043) @@ -1,329 +1,338 @@ /* * Copyright (c) 2009 Mark Heily * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #include #include "config.h" #include "common.h" int testnum = 1; char *cur_test_id = NULL; int kqfd; extern void test_evfilt_read(); extern void test_evfilt_signal(); extern void test_evfilt_vnode(); extern void test_evfilt_timer(); extern void test_evfilt_proc(); #if HAVE_EVFILT_USER extern void test_evfilt_user(); #endif /* Checks if any events are pending, which is an error. */ void test_no_kevents(void) { int nfds; struct timespec timeo; struct kevent kev; puts("confirming that there are no events pending"); memset(&timeo, 0, sizeof(timeo)); nfds = kevent(kqfd, NULL, 0, &kev, 1, &timeo); if (nfds != 0) { puts("\nUnexpected event:"); puts(kevent_to_str(&kev)); errx(1, "%d event(s) pending, but none expected:", nfds); } } /* Retrieve a single kevent */ struct kevent * kevent_get(int kqfd) { int nfds; struct kevent *kev; if ((kev = calloc(1, sizeof(*kev))) == NULL) err(1, "out of memory"); nfds = kevent(kqfd, NULL, 0, kev, 1, NULL); if (nfds < 1) err(1, "kevent(2)"); return (kev); } /* Retrieve a single kevent, specifying a maximum time to wait for it. */ struct kevent * kevent_get_timeout(int kqfd, int seconds) { int nfds; struct kevent *kev; struct timespec timeout = {seconds, 0}; if ((kev = calloc(1, sizeof(*kev))) == NULL) err(1, "out of memory"); nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout); if (nfds < 0) { err(1, "kevent(2)"); } else if (nfds == 0) { free(kev); kev = NULL; } return (kev); } char * kevent_fflags_dump(struct kevent *kev) { char *buf; #define KEVFFL_DUMP(attrib) \ if (kev->fflags & attrib) \ strncat(buf, #attrib" ", 64); if ((buf = calloc(1, 1024)) == NULL) abort(); /* Not every filter has meaningful fflags */ if (kev->filter == EVFILT_PROC) { snprintf(buf, 1024, "fflags = %x (", kev->fflags); KEVFFL_DUMP(NOTE_EXIT); KEVFFL_DUMP(NOTE_FORK); KEVFFL_DUMP(NOTE_EXEC); KEVFFL_DUMP(NOTE_CHILD); KEVFFL_DUMP(NOTE_TRACKERR); KEVFFL_DUMP(NOTE_TRACK); buf[strlen(buf) - 1] = ')'; } else if (kev->filter == EVFILT_PROCDESC) { snprintf(buf, 1024, "fflags = %x (", kev->fflags); KEVFFL_DUMP(NOTE_EXIT); KEVFFL_DUMP(NOTE_FORK); KEVFFL_DUMP(NOTE_EXEC); buf[strlen(buf) - 1] = ')'; } else if (kev->filter == EVFILT_VNODE) { snprintf(buf, 1024, "fflags = %x (", kev->fflags); KEVFFL_DUMP(NOTE_DELETE); KEVFFL_DUMP(NOTE_WRITE); KEVFFL_DUMP(NOTE_EXTEND); #if HAVE_NOTE_TRUNCATE KEVFFL_DUMP(NOTE_TRUNCATE); #endif KEVFFL_DUMP(NOTE_ATTRIB); KEVFFL_DUMP(NOTE_LINK); KEVFFL_DUMP(NOTE_RENAME); #if HAVE_NOTE_REVOKE KEVFFL_DUMP(NOTE_REVOKE); #endif buf[strlen(buf) - 1] = ')'; } else { snprintf(buf, 1024, "fflags = %x", kev->fflags); } return (buf); } char * kevent_flags_dump(struct kevent *kev) { char *buf; #define KEVFL_DUMP(attrib) \ if (kev->flags & attrib) \ strncat(buf, #attrib" ", 64); if ((buf = calloc(1, 1024)) == NULL) abort(); snprintf(buf, 1024, "flags = %d (", kev->flags); KEVFL_DUMP(EV_ADD); KEVFL_DUMP(EV_ENABLE); KEVFL_DUMP(EV_DISABLE); KEVFL_DUMP(EV_DELETE); KEVFL_DUMP(EV_ONESHOT); KEVFL_DUMP(EV_CLEAR); KEVFL_DUMP(EV_EOF); KEVFL_DUMP(EV_ERROR); #if HAVE_EV_DISPATCH KEVFL_DUMP(EV_DISPATCH); #endif #if HAVE_EV_RECEIPT KEVFL_DUMP(EV_RECEIPT); #endif buf[strlen(buf) - 1] = ')'; return (buf); } /* Copied from ../kevent.c kevent_dump() and improved */ const char * kevent_to_str(struct kevent *kev) { char buf[512]; snprintf(&buf[0], sizeof(buf), - "[ident=%d, filter=%d, %s, %s, data=%d, udata=%p]", - (u_int) kev->ident, + "[ident=%ju, filter=%d, %s, %s, data=%jd, udata=%p, " + "ext=[%jx %jx %jx %jx]", + (uintmax_t) kev->ident, kev->filter, kevent_flags_dump(kev), kevent_fflags_dump(kev), - (int) kev->data, - kev->udata); + (uintmax_t)kev->data, + kev->udata, + (uintmax_t)kev->ext[0], + (uintmax_t)kev->ext[1], + (uintmax_t)kev->ext[2], + (uintmax_t)kev->ext[3]); return (strdup(buf)); } void kevent_add(int kqfd, struct kevent *kev, uintptr_t ident, short filter, u_short flags, u_int fflags, intptr_t data, void *udata) { EV_SET(kev, ident, filter, flags, fflags, data, NULL); if (kevent(kqfd, kev, 1, NULL, 0, NULL) < 0) { printf("Unable to add the following kevent:\n%s\n", kevent_to_str(kev)); err(1, "kevent(): %s", strerror(errno)); } } void kevent_cmp(struct kevent *k1, struct kevent *k2) { /* XXX- Workaround for inconsistent implementation of kevent(2) */ #ifdef __FreeBSD__ if (k1->flags & EV_ADD) k2->flags |= EV_ADD; #endif - if (memcmp(k1, k2, sizeof(*k1)) != 0) { + if (k1->ident != k2->ident || k1->filter != k2->filter || + k1->flags != k2->flags || k1->fflags != k2->fflags || + k1->data != k2->data || k1->udata != k2->udata || + k1->ext[0] != k2->ext[0] || k1->ext[1] != k2->ext[1] || + k1->ext[0] != k2->ext[2] || k1->ext[0] != k2->ext[3]) { printf("kevent_cmp: mismatch:\n %s !=\n %s\n", kevent_to_str(k1), kevent_to_str(k2)); abort(); } } void test_begin(const char *func) { if (cur_test_id) free(cur_test_id); cur_test_id = strdup(func); if (!cur_test_id) err(1, "strdup failed"); printf("\n\nTest %d: %s\n", testnum++, func); } void success(void) { printf("%-70s %s\n", cur_test_id, "passed"); free(cur_test_id); cur_test_id = NULL; } void test_kqueue(void) { test_begin("kqueue()"); if ((kqfd = kqueue()) < 0) err(1, "kqueue()"); test_no_kevents(); success(); } void test_kqueue_close(void) { test_begin("close(kq)"); if (close(kqfd) < 0) err(1, "close()"); success(); } int main(int argc, char **argv) { int test_proc = 1; int test_socket = 1; int test_signal = 1; int test_vnode = 1; int test_timer = 1; #ifdef __FreeBSD__ int test_user = 1; #else /* XXX-FIXME temporary */ int test_user = 0; #endif while (argc) { if (strcmp(argv[0], "--no-proc") == 0) test_proc = 0; if (strcmp(argv[0], "--no-socket") == 0) test_socket = 0; if (strcmp(argv[0], "--no-timer") == 0) test_timer = 0; if (strcmp(argv[0], "--no-signal") == 0) test_signal = 0; if (strcmp(argv[0], "--no-vnode") == 0) test_vnode = 0; if (strcmp(argv[0], "--no-user") == 0) test_user = 0; argv++; argc--; } /* * Some tests fork. If output is fully buffered, * the children inherit some buffered data and flush * it when they exit, causing some data to be printed twice. * Use line buffering to avoid this problem. */ setlinebuf(stdout); setlinebuf(stderr); test_kqueue(); test_kqueue_close(); if (test_socket) test_evfilt_read(); if (test_signal) test_evfilt_signal(); if (test_vnode) test_evfilt_vnode(); #if HAVE_EVFILT_USER if (test_user) test_evfilt_user(); #endif if (test_timer) test_evfilt_timer(); if (test_proc) test_evfilt_proc(); printf("\n---\n" "+OK All %d tests completed.\n", testnum - 1); return (0); } Index: head/tests/sys/kqueue/libkqueue/timer.c =================================================================== --- head/tests/sys/kqueue/libkqueue/timer.c (revision 320042) +++ head/tests/sys/kqueue/libkqueue/timer.c (revision 320043) @@ -1,178 +1,213 @@ /* * Copyright (c) 2009 Mark Heily * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * $FreeBSD$ */ #include "common.h" +#include int kqfd; void test_kevent_timer_add(void) { const char *test_id = "kevent(EVFILT_TIMER, EV_ADD)"; struct kevent kev; test_begin(test_id); EV_SET(&kev, 1, EVFILT_TIMER, EV_ADD, 0, 1000, NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); success(); } void test_kevent_timer_del(void) { const char *test_id = "kevent(EVFILT_TIMER, EV_DELETE)"; struct kevent kev; test_begin(test_id); EV_SET(&kev, 1, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); test_no_kevents(); success(); } void test_kevent_timer_get(void) { const char *test_id = "kevent(EVFILT_TIMER, wait)"; struct kevent kev; test_begin(test_id); EV_SET(&kev, 1, EVFILT_TIMER, EV_ADD, 0, 1000, NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); kev.flags |= EV_CLEAR; kev.data = 1; kevent_cmp(&kev, kevent_get(kqfd)); EV_SET(&kev, 1, EVFILT_TIMER, EV_DELETE, 0, 0, NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); success(); } static void test_oneshot(void) { const char *test_id = "kevent(EVFILT_TIMER, EV_ONESHOT)"; struct kevent kev; test_begin(test_id); test_no_kevents(); EV_SET(&kev, vnode_fd, EVFILT_TIMER, EV_ADD | EV_ONESHOT, 0, 500,NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); /* Retrieve the event */ kev.flags = EV_ADD | EV_CLEAR | EV_ONESHOT; kev.data = 1; kevent_cmp(&kev, kevent_get(kqfd)); /* Check if the event occurs again */ sleep(3); test_no_kevents(); success(); } static void test_periodic(void) { const char *test_id = "kevent(EVFILT_TIMER, periodic)"; struct kevent kev; test_begin(test_id); test_no_kevents(); EV_SET(&kev, vnode_fd, EVFILT_TIMER, EV_ADD, 0, 1000,NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); /* Retrieve the event */ kev.flags = EV_ADD | EV_CLEAR; kev.data = 1; kevent_cmp(&kev, kevent_get(kqfd)); /* Check if the event occurs again */ sleep(1); kevent_cmp(&kev, kevent_get(kqfd)); /* Delete the event */ kev.flags = EV_DELETE; if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); success(); } static void disable_and_enable(void) { const char *test_id = "kevent(EVFILT_TIMER, EV_DISABLE and EV_ENABLE)"; struct kevent kev; test_begin(test_id); test_no_kevents(); /* Add the watch and immediately disable it */ EV_SET(&kev, vnode_fd, EVFILT_TIMER, EV_ADD | EV_ONESHOT, 0, 2000,NULL); if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); kev.flags = EV_DISABLE; if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); test_no_kevents(); /* Re-enable and check again */ kev.flags = EV_ENABLE; if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) err(1, "%s", test_id); kev.flags = EV_ADD | EV_CLEAR | EV_ONESHOT; kev.data = 1; kevent_cmp(&kev, kevent_get(kqfd)); success(); } +static void +test_abstime(void) +{ + const char *test_id = "kevent(EVFILT_TIMER, EV_ONESHOT, NOTE_ABSTIME)"; + struct kevent kev; + time_t when; + const int timeout = 3; + + test_begin(test_id); + + test_no_kevents(); + + when = time(NULL); + EV_SET(&kev, vnode_fd, EVFILT_TIMER, EV_ADD | EV_ONESHOT, + NOTE_ABSTIME | NOTE_SECONDS, when + timeout, NULL); + if (kevent(kqfd, &kev, 1, NULL, 0, NULL) < 0) + err(1, "%s", test_id); + + /* Retrieve the event */ + kev.flags = EV_ADD | EV_ONESHOT; + kev.data = 1; + kev.fflags = 0; + kevent_cmp(&kev, kevent_get(kqfd)); + if (time(NULL) < when + timeout) + err(1, "too early %jd %jd", time(), when + timeout); + + /* Check if the event occurs again */ + sleep(3); + test_no_kevents(); + + success(); +} + void test_evfilt_timer() { kqfd = kqueue(); test_kevent_timer_add(); test_kevent_timer_del(); test_kevent_timer_get(); test_oneshot(); test_periodic(); + test_abstime(); disable_and_enable(); close(kqfd); } Index: head/usr.bin/truss/syscalls.c =================================================================== --- head/usr.bin/truss/syscalls.c (revision 320042) +++ head/usr.bin/truss/syscalls.c (revision 320043) @@ -1,2400 +1,2400 @@ /* * Copyright 1997 Sean Eric Fagan * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Sean Eric Fagan * 4. Neither the name of the author may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * This file has routines used to print out system calls and their * arguments. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "truss.h" #include "extern.h" #include "syscall.h" /* * This should probably be in its own file, sorted alphabetically. */ static struct syscall decoded_syscalls[] = { /* Native ABI */ { .name = "__acl_aclcheck_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_aclcheck_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_delete_fd", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_file", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_delete_link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Acltype, 1 } } }, { .name = "__acl_get_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_get_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__acl_set_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Acltype, 1 }, { Ptr, 2 } } }, { .name = "__cap_rights_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CapRights | OUT, 2 } } }, { .name = "__getcwd", .ret_type = 1, .nargs = 2, .args = { { Name | OUT, 0 }, { Int, 1 } } }, { .name = "_umtx_op", .ret_type = 1, .nargs = 5, .args = { { Ptr, 0 }, { Umtxop, 1 }, { LongHex, 2 }, { Ptr, 3 }, { Ptr, 4 } } }, { .name = "accept", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "access", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "bindat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "break", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "cap_fcntls_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights | OUT, 1 } } }, { .name = "cap_fcntls_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapFcntlRights, 1 } } }, { .name = "cap_getmode", .ret_type = 1, .nargs = 1, .args = { { PUInt | OUT, 0 } } }, { .name = "cap_rights_limit", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CapRights, 1 } } }, { .name = "chdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "chflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "chflagsat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { FileFlags, 2 }, { Atflags, 3 } } }, { .name = "chmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "chown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "chroot", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "clock_gettime", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | IN, 1 }, { Socklent, 2 } } }, { .name = "connectat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Int, 1 }, { Sockaddr | IN, 2 }, { Int, 3 } } }, { .name = "dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "dup2", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "eaccess", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Accessmode, 1 } } }, { .name = "execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "exit", .ret_type = 0, .nargs = 1, .args = { { Hex, 0 } } }, { .name = "extattr_delete_fd", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_file", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_delete_link", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 } } }, { .name = "extattr_get_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_get_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | OUT, 3 }, { Sizet, 4 } } }, { .name = "extattr_list_fd", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_file", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_list_link", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { BinString | OUT, 2 }, { Sizet, 3 } } }, { .name = "extattr_set_fd", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_file", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattr_set_link", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Extattrnamespace, 1 }, { Name, 2 }, { BinString | IN, 3 }, { Sizet, 4 } } }, { .name = "extattrctl", .ret_type = 1, .nargs = 5, .args = { { Name, 0 }, { Hex, 1 }, { Name, 2 }, { Extattrnamespace, 3 }, { Name, 4 } } }, { .name = "faccessat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Accessmode, 2 }, { Atflags, 3 } } }, { .name = "fchflags", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { FileFlags, 1 } } }, { .name = "fchmod", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Octal, 1 } } }, { .name = "fchmodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Atflags, 3 } } }, { .name = "fchown", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "fchownat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Int, 2 }, { Int, 3 }, { Atflags, 4 } } }, { .name = "fcntl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Fcntl, 1 }, { Fcntlflag, 2 } } }, { .name = "flock", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Flockop, 1 } } }, { .name = "fstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Stat | OUT, 1 } } }, { .name = "fstatat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Stat | OUT, 2 }, { Atflags, 3 } } }, { .name = "fstatfs", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { StatFs | OUT, 1 } } }, { .name = "ftruncate", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { QuadHex | IN, 1 } } }, { .name = "futimens", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec2 | IN, 1 } } }, { .name = "futimes", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timeval2 | IN, 1 } } }, { .name = "futimesat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timeval2 | IN, 2 } } }, { .name = "getdirentries", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { PQuadHex | OUT, 3 } } }, { .name = "getfsstat", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Long, 1 }, { Getfsstatmode, 2 } } }, { .name = "getitimer", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Itimerval | OUT, 2 } } }, { .name = "getpeername", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getpgid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getpriority", .ret_type = 1, .nargs = 2, .args = { { Priowhich, 0 }, { Int, 1 } } }, { .name = "getrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | OUT, 1 } } }, { .name = "getrusage", .ret_type = 1, .nargs = 2, .args = { { RusageWho, 0 }, { Rusage | OUT, 1 } } }, { .name = "getsid", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "getsockname", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Sockaddr | OUT, 1 }, { Ptr | OUT, 2 } } }, { .name = "getsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { Kevent, 1 }, { Int, 2 }, { Kevent | OUT, 3 }, { Int, 4 }, { Timespec, 5 } } }, { .name = "kill", .ret_type = 1, .nargs = 2, .args = { { Int | IN, 0 }, { Signal | IN, 1 } } }, { .name = "kldfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldfirstmod", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldload", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "kldnext", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr, 1 } } }, { .name = "kldsym", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Kldsymcmd, 1 }, { Ptr, 2 } } }, { .name = "kldunload", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "kldunloadf", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Kldunloadflags, 1 } } }, { .name = "kse_release", .ret_type = 0, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "lchflags", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { FileFlags, 1 } } }, { .name = "lchmod", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "lchown", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "link", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "linkat", .ret_type = 1, .nargs = 5, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 }, { Atflags, 4 } } }, { .name = "listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { QuadHex, 1 }, { Whence, 2 } } }, { .name = "lstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "lutimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "madvise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Madvice, 2 } } }, { .name = "minherit", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Minherit, 2 } } }, { .name = "mkdir", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkdirat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mkfifo", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Octal, 1 } } }, { .name = "mkfifoat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 } } }, { .name = "mknod", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Octal, 1 }, { Int, 2 } } }, { .name = "mknodat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Octal, 2 }, { Int, 3 } } }, { .name = "mlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "mlockall", .ret_type = 1, .nargs = 1, .args = { { Mlockall, 0 } } }, { .name = "mmap", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 }, { Mmapflags, 3 }, { Int, 4 }, { QuadHex, 5 } } }, { .name = "modfind", .ret_type = 1, .nargs = 1, .args = { { Name | IN, 0 } } }, { .name = "mount", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Name, 1 }, { Mountflags, 2 }, { Ptr, 3 } } }, { .name = "mprotect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Mprot, 2 } } }, { .name = "msync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Sizet, 1 }, { Msync, 2 } } }, { .name = "munlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "munmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Sizet, 1 } } }, { .name = "nanosleep", .ret_type = 1, .nargs = 1, .args = { { Timespec, 0 } } }, { .name = "nmount", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { UInt, 1 }, { Mountflags, 2 } } }, { .name = "open", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { Open, 1 }, { Octal, 2 } } }, { .name = "openat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Open, 2 }, { Octal, 3 } } }, { .name = "pathconf", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Pathconf, 1 } } }, { .name = "pipe", .ret_type = 1, .nargs = 1, .args = { { PipeFds | OUT, 0 } } }, { .name = "pipe2", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Pipe2, 1 } } }, { .name = "poll", .ret_type = 1, .nargs = 3, .args = { { Pollfd, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "posix_fadvise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { QuadHex, 1 }, { QuadHex, 2 }, { Fadvice, 3 } } }, { .name = "posix_openpt", .ret_type = 1, .nargs = 1, .args = { { Open, 0 } } }, { .name = "pread", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "procctl", .ret_type = 1, .nargs = 4, .args = { { Idtype, 0 }, { Quad, 1 }, { Procctl, 2 }, { Ptr, 3 } } }, { .name = "ptrace", .ret_type = 1, .nargs = 4, .args = { { Ptraceop, 0 }, { Int, 1 }, { Ptr, 2 }, { Int, 3 } } }, { .name = "pwrite", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { QuadHex, 3 } } }, { .name = "quotactl", .ret_type = 1, .nargs = 4, .args = { { Name, 0 }, { Quotactlcmd, 1 }, { Int, 2 }, { Ptr, 3 } } }, { .name = "read", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Readlinkres | OUT, 1 }, { Sizet, 2 } } }, { .name = "readlinkat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Readlinkres | OUT, 2 }, { Sizet, 3 } } }, { .name = "reboot", .ret_type = 1, .nargs = 1, .args = { { Reboothowto, 0 } } }, { .name = "recvfrom", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | OUT, 4 }, { Ptr | OUT, 5 } } }, { .name = "recvmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ptr, 1 }, { Msgflags, 2 } } }, { .name = "rename", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "renameat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name, 1 }, { Atfd, 2 }, { Name, 3 } } }, { .name = "rfork", .ret_type = 1, .nargs = 1, .args = { { Rforkflags, 0 } } }, { .name = "rmdir", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "rtprio", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "rtprio_thread", .ret_type = 1, .nargs = 3, .args = { { Rtpriofunc, 0 }, { Int, 1 }, { Ptr, 2 } } }, { .name = "sched_get_priority_max", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_get_priority_min", .ret_type = 1, .nargs = 1, .args = { { Schedpolicy, 0 } } }, { .name = "sched_getparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam | OUT, 1 } } }, { .name = "sched_getscheduler", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "sched_rr_get_interval", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Timespec | OUT, 1 } } }, { .name = "sched_setparam", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Schedparam, 1 } } }, { .name = "sched_setscheduler", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Schedpolicy, 1 }, { Schedparam, 2 } } }, { .name = "sctp_generic_recvmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { Ptr | IN, 1 }, { Int, 2 }, { Sockaddr | OUT, 3 }, { Ptr | OUT, 4 }, { Ptr | OUT, 5 }, { Ptr | OUT, 6 } } }, { .name = "sctp_generic_sendmsg", .ret_type = 1, .nargs = 7, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { Sockaddr | IN, 3 }, { Socklent, 4 }, { Ptr | IN, 5 }, { Msgflags, 6 } } }, { .name = "select", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Fd_set, 1 }, { Fd_set, 2 }, { Fd_set, 3 }, { Timeval, 4 } } }, { .name = "sendmsg", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ptr, 1 }, { Msgflags, 2 } } }, { .name = "sendto", .ret_type = 1, .nargs = 6, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 }, { Msgflags, 3 }, { Sockaddr | IN, 4 }, { Socklent | IN, 5 } } }, { .name = "setitimer", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Itimerval, 1 }, { Itimerval | OUT, 2 } } }, { .name = "setpriority", .ret_type = 1, .nargs = 3, .args = { { Priowhich, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "setrlimit", .ret_type = 1, .nargs = 2, .args = { { Resource, 0 }, { Rlimit | IN, 1 } } }, { .name = "setsockopt", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { Sockoptlevel, 1 }, { Sockoptname, 2 }, { Ptr | IN, 3 }, { Socklent, 4 } } }, { .name = "shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Shutdown, 1 } } }, { .name = "sigaction", .ret_type = 1, .nargs = 3, .args = { { Signal, 0 }, { Sigaction | IN, 1 }, { Sigaction | OUT, 2 } } }, { .name = "sigpending", .ret_type = 1, .nargs = 1, .args = { { Sigset | OUT, 0 } } }, { .name = "sigprocmask", .ret_type = 1, .nargs = 3, .args = { { Sigprocmask, 0 }, { Sigset, 1 }, { Sigset | OUT, 2 } } }, { .name = "sigqueue", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Signal, 1 }, { LongHex, 2 } } }, { .name = "sigreturn", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "sigsuspend", .ret_type = 1, .nargs = 1, .args = { { Sigset | IN, 0 } } }, { .name = "sigtimedwait", .ret_type = 1, .nargs = 3, .args = { { Sigset | IN, 0 }, { Ptr, 1 }, { Timespec | IN, 2 } } }, { .name = "sigwait", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Ptr, 1 } } }, { .name = "sigwaitinfo", .ret_type = 1, .nargs = 2, .args = { { Sigset | IN, 0 }, { Ptr, 1 } } }, { .name = "socket", .ret_type = 1, .nargs = 3, .args = { { Sockdomain, 0 }, { Socktype, 1 }, { Sockprotocol, 2 } } }, { .name = "stat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Stat | OUT, 1 } } }, { .name = "statfs", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { StatFs | OUT, 1 } } }, { .name = "symlink", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Name, 1 } } }, { .name = "symlinkat", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Atfd, 1 }, { Name, 2 } } }, { .name = "sysarch", .ret_type = 1, .nargs = 2, .args = { { Sysarch, 0 }, { Ptr, 1 } } }, { .name = "thr_kill", .ret_type = 1, .nargs = 2, .args = { { Long, 0 }, { Signal, 1 } } }, { .name = "thr_self", .ret_type = 1, .nargs = 1, .args = { { Ptr, 0 } } }, { .name = "truncate", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { QuadHex | IN, 1 } } }, #if 0 /* Does not exist */ { .name = "umount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Int, 2 } } }, #endif { .name = "unlink", .ret_type = 1, .nargs = 1, .args = { { Name, 0 } } }, { .name = "unlinkat", .ret_type = 1, .nargs = 3, .args = { { Atfd, 0 }, { Name, 1 }, { Atflags, 2 } } }, { .name = "unmount", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Mountflags, 1 } } }, { .name = "utimensat", .ret_type = 1, .nargs = 4, .args = { { Atfd, 0 }, { Name | IN, 1 }, { Timespec2 | IN, 2 }, { Atflags, 3 } } }, { .name = "utimes", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Timeval2 | IN, 1 } } }, { .name = "utrace", .ret_type = 1, .nargs = 1, .args = { { Utrace, 0 } } }, { .name = "wait4", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { ExitStatus | OUT, 1 }, { Waitoptions, 2 }, { Rusage | OUT, 3 } } }, { .name = "wait6", .ret_type = 1, .nargs = 6, .args = { { Idtype, 0 }, { Quad, 1 }, { ExitStatus | OUT, 2 }, { Waitoptions, 3 }, { Rusage | OUT, 4 }, { Ptr, 5 } } }, { .name = "write", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { Sizet, 2 } } }, /* Linux ABI */ { .name = "linux_access", .ret_type = 1, .nargs = 2, .args = { { Name, 0 }, { Accessmode, 1 } } }, { .name = "linux_execve", .ret_type = 1, .nargs = 3, .args = { { Name | IN, 0 }, { ExecArgs | IN, 1 }, { ExecEnv | IN, 2 } } }, { .name = "linux_lseek", .ret_type = 2, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Whence, 2 } } }, { .name = "linux_mkdir", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Int, 1 } } }, { .name = "linux_newfstat", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_newstat", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, { .name = "linux_open", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Hex, 1 }, { Octal, 2 } } }, { .name = "linux_readlink", .ret_type = 1, .nargs = 3, .args = { { Name, 0 }, { Name | OUT, 1 }, { Sizet, 2 } } }, { .name = "linux_socketcall", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { LinuxSockArgs, 1 } } }, { .name = "linux_stat64", .ret_type = 1, .nargs = 2, .args = { { Name | IN, 0 }, { Ptr | OUT, 1 } } }, /* CloudABI system calls. */ { .name = "cloudabi_sys_clock_res_get", .ret_type = 1, .nargs = 1, .args = { { CloudABIClockID, 0 } } }, { .name = "cloudabi_sys_clock_time_get", .ret_type = 1, .nargs = 2, .args = { { CloudABIClockID, 0 }, { CloudABITimestamp, 1 } } }, { .name = "cloudabi_sys_condvar_signal", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 }, { UInt, 2 } } }, { .name = "cloudabi_sys_fd_close", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_create1", .ret_type = 1, .nargs = 1, .args = { { CloudABIFileType, 0 } } }, { .name = "cloudabi_sys_fd_create2", .ret_type = 1, .nargs = 2, .args = { { CloudABIFileType, 0 }, { PipeFds | OUT, 0 } } }, { .name = "cloudabi_sys_fd_datasync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_dup", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_fd_replace", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_fd_seek", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { CloudABIWhence, 2 } } }, { .name = "cloudabi_sys_fd_stat_get", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABIFDStat | OUT, 1 } } }, { .name = "cloudabi_sys_fd_stat_put", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABIFDStat | IN, 1 }, { ClouduABIFDSFlags, 2 } } }, { .name = "cloudabi_sys_fd_sync", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_file_advise", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 }, { CloudABIAdvice, 3 } } }, { .name = "cloudabi_sys_file_allocate", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { Int, 2 } } }, { .name = "cloudabi_sys_file_create", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIFileType, 3 } } }, { .name = "cloudabi_sys_file_link", .ret_type = 1, .nargs = 4, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { Int, 3 }, { BinString | IN, 4 } } }, { .name = "cloudabi_sys_file_open", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIOFlags, 3 }, { CloudABIFDStat | IN, 4 } } }, { .name = "cloudabi_sys_file_readdir", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | OUT, 1 }, { Int, 2 }, { Int, 3 } } }, { .name = "cloudabi_sys_file_readlink", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { BinString | OUT, 3 }, { Int, 4 } } }, { .name = "cloudabi_sys_file_rename", .ret_type = 1, .nargs = 4, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 3 }, { BinString | IN, 4 } } }, { .name = "cloudabi_sys_file_stat_fget", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABIFileStat | OUT, 1 } } }, { .name = "cloudabi_sys_file_stat_fput", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABIFileStat | IN, 1 }, { CloudABIFSFlags, 2 } } }, { .name = "cloudabi_sys_file_stat_get", .ret_type = 1, .nargs = 3, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { CloudABIFileStat | OUT, 3 } } }, { .name = "cloudabi_sys_file_stat_put", .ret_type = 1, .nargs = 4, .args = { { CloudABILookup, 0 }, { BinString | IN, 1 }, { CloudABIFileStat | IN, 3 }, { CloudABIFSFlags, 4 } } }, { .name = "cloudabi_sys_file_symlink", .ret_type = 1, .nargs = 3, .args = { { BinString | IN, 0 }, { Int, 2 }, { BinString | IN, 3 } } }, { .name = "cloudabi_sys_file_unlink", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { BinString | IN, 1 }, { CloudABIULFlags, 3 } } }, { .name = "cloudabi_sys_lock_unlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } }, { .name = "cloudabi_sys_mem_advise", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIAdvice, 2 } } }, { .name = "cloudabi_sys_mem_lock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_mem_map", .ret_type = 1, .nargs = 6, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 }, { CloudABIMFlags, 3 }, { Int, 4 }, { Int, 5 } } }, { .name = "cloudabi_sys_mem_protect", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMProt, 2 } } }, { .name = "cloudabi_sys_mem_sync", .ret_type = 1, .nargs = 3, .args = { { Ptr, 0 }, { Int, 1 }, { CloudABIMSFlags, 2 } } }, { .name = "cloudabi_sys_mem_unlock", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_mem_unmap", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_proc_exec", .ret_type = 1, .nargs = 5, .args = { { Int, 0 }, { BinString | IN, 1 }, { Int, 2 }, { IntArray, 3 }, { Int, 4 } } }, { .name = "cloudabi_sys_proc_exit", .ret_type = 1, .nargs = 1, .args = { { Int, 0 } } }, { .name = "cloudabi_sys_proc_fork", .ret_type = 1, .nargs = 0 }, { .name = "cloudabi_sys_proc_raise", .ret_type = 1, .nargs = 1, .args = { { CloudABISignal, 0 } } }, { .name = "cloudabi_sys_random_get", .ret_type = 1, .nargs = 2, .args = { { BinString | OUT, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_sock_accept", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABISockStat | OUT, 1 } } }, { .name = "cloudabi_sys_sock_bind", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { BinString | IN, 2 } } }, { .name = "cloudabi_sys_sock_connect", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Int, 1 }, { BinString | IN, 2 } } }, { .name = "cloudabi_sys_sock_listen", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { Int, 1 } } }, { .name = "cloudabi_sys_sock_shutdown", .ret_type = 1, .nargs = 2, .args = { { Int, 0 }, { CloudABISDFlags, 1 } } }, { .name = "cloudabi_sys_sock_stat_get", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { CloudABISockStat | OUT, 1 }, { CloudABISSFlags, 2 } } }, { .name = "cloudabi_sys_thread_exit", .ret_type = 1, .nargs = 2, .args = { { Ptr, 0 }, { CloudABIMFlags, 1 } } }, { .name = "cloudabi_sys_thread_yield", .ret_type = 1, .nargs = 0 }, { .name = 0 }, }; static STAILQ_HEAD(, syscall) syscalls; /* Xlat idea taken from strace */ struct xlat { int val; const char *str; }; #define X(a) { a, #a }, #define XEND { 0, NULL } static struct xlat kevent_filters[] = { X(EVFILT_READ) X(EVFILT_WRITE) X(EVFILT_AIO) X(EVFILT_VNODE) X(EVFILT_PROC) X(EVFILT_SIGNAL) X(EVFILT_TIMER) X(EVFILT_PROCDESC) X(EVFILT_FS) X(EVFILT_LIO) X(EVFILT_USER) X(EVFILT_SENDFILE) XEND }; static struct xlat kevent_flags[] = { X(EV_ADD) X(EV_DELETE) X(EV_ENABLE) X(EV_DISABLE) X(EV_ONESHOT) X(EV_CLEAR) X(EV_RECEIPT) X(EV_DISPATCH) X(EV_FORCEONESHOT) X(EV_DROP) X(EV_FLAG1) X(EV_ERROR) X(EV_EOF) XEND }; static struct xlat kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND }; static struct xlat kevent_rdwr_fflags[] = { X(NOTE_LOWAT) X(NOTE_FILE_POLL) XEND }; static struct xlat kevent_vnode_fflags[] = { X(NOTE_DELETE) X(NOTE_WRITE) X(NOTE_EXTEND) X(NOTE_ATTRIB) X(NOTE_LINK) X(NOTE_RENAME) X(NOTE_REVOKE) XEND }; static struct xlat kevent_proc_fflags[] = { X(NOTE_EXIT) X(NOTE_FORK) X(NOTE_EXEC) X(NOTE_TRACK) X(NOTE_TRACKERR) X(NOTE_CHILD) XEND }; static struct xlat kevent_timer_fflags[] = { X(NOTE_SECONDS) X(NOTE_MSECONDS) X(NOTE_USECONDS) X(NOTE_NSECONDS) XEND }; static struct xlat poll_flags[] = { X(POLLSTANDARD) X(POLLIN) X(POLLPRI) X(POLLOUT) X(POLLERR) X(POLLHUP) X(POLLNVAL) X(POLLRDNORM) X(POLLRDBAND) X(POLLWRBAND) X(POLLINIGNEOF) XEND }; static struct xlat sigaction_flags[] = { X(SA_ONSTACK) X(SA_RESTART) X(SA_RESETHAND) X(SA_NOCLDSTOP) X(SA_NODEFER) X(SA_NOCLDWAIT) X(SA_SIGINFO) XEND }; static struct xlat pathconf_arg[] = { X(_PC_LINK_MAX) X(_PC_MAX_CANON) X(_PC_MAX_INPUT) X(_PC_NAME_MAX) X(_PC_PATH_MAX) X(_PC_PIPE_BUF) X(_PC_CHOWN_RESTRICTED) X(_PC_NO_TRUNC) X(_PC_VDISABLE) X(_PC_ASYNC_IO) X(_PC_PRIO_IO) X(_PC_SYNC_IO) X(_PC_ALLOC_SIZE_MIN) X(_PC_FILESIZEBITS) X(_PC_REC_INCR_XFER_SIZE) X(_PC_REC_MAX_XFER_SIZE) X(_PC_REC_MIN_XFER_SIZE) X(_PC_REC_XFER_ALIGN) X(_PC_SYMLINK_MAX) X(_PC_ACL_EXTENDED) X(_PC_ACL_PATH_MAX) X(_PC_CAP_PRESENT) X(_PC_INF_PRESENT) X(_PC_MAC_PRESENT) X(_PC_ACL_NFS4) X(_PC_MIN_HOLE_SIZE) XEND }; static struct xlat at_flags[] = { X(AT_EACCESS) X(AT_SYMLINK_NOFOLLOW) X(AT_SYMLINK_FOLLOW) X(AT_REMOVEDIR) XEND }; static struct xlat sysarch_ops[] = { #if defined(__i386__) || defined(__amd64__) X(I386_GET_LDT) X(I386_SET_LDT) X(I386_GET_IOPERM) X(I386_SET_IOPERM) X(I386_VM86) X(I386_GET_FSBASE) X(I386_SET_FSBASE) X(I386_GET_GSBASE) X(I386_SET_GSBASE) X(I386_GET_XFPUSTATE) X(AMD64_GET_FSBASE) X(AMD64_SET_FSBASE) X(AMD64_GET_GSBASE) X(AMD64_SET_GSBASE) X(AMD64_GET_XFPUSTATE) #endif XEND }; static struct xlat linux_socketcall_ops[] = { X(LINUX_SOCKET) X(LINUX_BIND) X(LINUX_CONNECT) X(LINUX_LISTEN) X(LINUX_ACCEPT) X(LINUX_GETSOCKNAME) X(LINUX_GETPEERNAME) X(LINUX_SOCKETPAIR) X(LINUX_SEND) X(LINUX_RECV) X(LINUX_SENDTO) X(LINUX_RECVFROM) X(LINUX_SHUTDOWN) X(LINUX_SETSOCKOPT) X(LINUX_GETSOCKOPT) X(LINUX_SENDMSG) X(LINUX_RECVMSG) XEND }; #undef X #define X(a) { CLOUDABI_##a, #a }, static struct xlat cloudabi_advice[] = { X(ADVICE_DONTNEED) X(ADVICE_NOREUSE) X(ADVICE_NORMAL) X(ADVICE_RANDOM) X(ADVICE_SEQUENTIAL) X(ADVICE_WILLNEED) XEND }; static struct xlat cloudabi_clockid[] = { X(CLOCK_MONOTONIC) X(CLOCK_PROCESS_CPUTIME_ID) X(CLOCK_REALTIME) X(CLOCK_THREAD_CPUTIME_ID) XEND }; static struct xlat cloudabi_errno[] = { X(E2BIG) X(EACCES) X(EADDRINUSE) X(EADDRNOTAVAIL) X(EAFNOSUPPORT) X(EAGAIN) X(EALREADY) X(EBADF) X(EBADMSG) X(EBUSY) X(ECANCELED) X(ECHILD) X(ECONNABORTED) X(ECONNREFUSED) X(ECONNRESET) X(EDEADLK) X(EDESTADDRREQ) X(EDOM) X(EDQUOT) X(EEXIST) X(EFAULT) X(EFBIG) X(EHOSTUNREACH) X(EIDRM) X(EILSEQ) X(EINPROGRESS) X(EINTR) X(EINVAL) X(EIO) X(EISCONN) X(EISDIR) X(ELOOP) X(EMFILE) X(EMLINK) X(EMSGSIZE) X(EMULTIHOP) X(ENAMETOOLONG) X(ENETDOWN) X(ENETRESET) X(ENETUNREACH) X(ENFILE) X(ENOBUFS) X(ENODEV) X(ENOENT) X(ENOEXEC) X(ENOLCK) X(ENOLINK) X(ENOMEM) X(ENOMSG) X(ENOPROTOOPT) X(ENOSPC) X(ENOSYS) X(ENOTCONN) X(ENOTDIR) X(ENOTEMPTY) X(ENOTRECOVERABLE) X(ENOTSOCK) X(ENOTSUP) X(ENOTTY) X(ENXIO) X(EOVERFLOW) X(EOWNERDEAD) X(EPERM) X(EPIPE) X(EPROTO) X(EPROTONOSUPPORT) X(EPROTOTYPE) X(ERANGE) X(EROFS) X(ESPIPE) X(ESRCH) X(ESTALE) X(ETIMEDOUT) X(ETXTBSY) X(EXDEV) X(ENOTCAPABLE) XEND }; static struct xlat cloudabi_fdflags[] = { X(FDFLAG_APPEND) X(FDFLAG_DSYNC) X(FDFLAG_NONBLOCK) X(FDFLAG_RSYNC) X(FDFLAG_SYNC) XEND }; static struct xlat cloudabi_fdsflags[] = { X(FDSTAT_FLAGS) X(FDSTAT_RIGHTS) XEND }; static struct xlat cloudabi_filetype[] = { X(FILETYPE_UNKNOWN) X(FILETYPE_BLOCK_DEVICE) X(FILETYPE_CHARACTER_DEVICE) X(FILETYPE_DIRECTORY) X(FILETYPE_FIFO) X(FILETYPE_POLL) X(FILETYPE_PROCESS) X(FILETYPE_REGULAR_FILE) X(FILETYPE_SHARED_MEMORY) X(FILETYPE_SOCKET_DGRAM) X(FILETYPE_SOCKET_SEQPACKET) X(FILETYPE_SOCKET_STREAM) X(FILETYPE_SYMBOLIC_LINK) XEND }; static struct xlat cloudabi_fsflags[] = { X(FILESTAT_ATIM) X(FILESTAT_ATIM_NOW) X(FILESTAT_MTIM) X(FILESTAT_MTIM_NOW) X(FILESTAT_SIZE) XEND }; static struct xlat cloudabi_mflags[] = { X(MAP_ANON) X(MAP_FIXED) X(MAP_PRIVATE) X(MAP_SHARED) XEND }; static struct xlat cloudabi_mprot[] = { X(PROT_EXEC) X(PROT_WRITE) X(PROT_READ) XEND }; static struct xlat cloudabi_msflags[] = { X(MS_ASYNC) X(MS_INVALIDATE) X(MS_SYNC) XEND }; static struct xlat cloudabi_oflags[] = { X(O_CREAT) X(O_DIRECTORY) X(O_EXCL) X(O_TRUNC) XEND }; static struct xlat cloudabi_sa_family[] = { X(AF_UNSPEC) X(AF_INET) X(AF_INET6) X(AF_UNIX) XEND }; static struct xlat cloudabi_sdflags[] = { X(SHUT_RD) X(SHUT_WR) XEND }; static struct xlat cloudabi_signal[] = { X(SIGABRT) X(SIGALRM) X(SIGBUS) X(SIGCHLD) X(SIGCONT) X(SIGFPE) X(SIGHUP) X(SIGILL) X(SIGINT) X(SIGKILL) X(SIGPIPE) X(SIGQUIT) X(SIGSEGV) X(SIGSTOP) X(SIGSYS) X(SIGTERM) X(SIGTRAP) X(SIGTSTP) X(SIGTTIN) X(SIGTTOU) X(SIGURG) X(SIGUSR1) X(SIGUSR2) X(SIGVTALRM) X(SIGXCPU) X(SIGXFSZ) XEND }; static struct xlat cloudabi_ssflags[] = { X(SOCKSTAT_CLEAR_ERROR) XEND }; static struct xlat cloudabi_ssstate[] = { X(SOCKSTATE_ACCEPTCONN) XEND }; static struct xlat cloudabi_ulflags[] = { X(UNLINK_REMOVEDIR) XEND }; static struct xlat cloudabi_whence[] = { X(WHENCE_CUR) X(WHENCE_END) X(WHENCE_SET) XEND }; #undef X #undef XEND /* * Searches an xlat array for a value, and returns it if found. Otherwise * return a string representation. */ static const char * lookup(struct xlat *xlat, int val, int base) { static char tmp[16]; for (; xlat->str != NULL; xlat++) if (xlat->val == val) return (xlat->str); switch (base) { case 8: sprintf(tmp, "0%o", val); break; case 16: sprintf(tmp, "0x%x", val); break; case 10: sprintf(tmp, "%u", val); break; default: errx(1,"Unknown lookup base"); break; } return (tmp); } static const char * xlookup(struct xlat *xlat, int val) { return (lookup(xlat, val, 16)); } /* * Searches an xlat array containing bitfield values. Remaining bits * set after removing the known ones are printed at the end: * IN|0x400. */ static char * xlookup_bits(struct xlat *xlat, int val) { int len, rem; static char str[512]; len = 0; rem = val; for (; xlat->str != NULL; xlat++) { if ((xlat->val & rem) == xlat->val) { /* * Don't print the "all-bits-zero" string unless all * bits are really zero. */ if (xlat->val == 0 && val != 0) continue; len += sprintf(str + len, "%s|", xlat->str); rem &= ~(xlat->val); } } /* * If we have leftover bits or didn't match anything, print * the remainder. */ if (rem || len == 0) len += sprintf(str + len, "0x%x", rem); if (len && str[len - 1] == '|') len--; str[len] = 0; return (str); } static void print_integer_arg(const char *(*decoder)(int), FILE *fp, int value) { const char *str; str = decoder(value); if (str != NULL) fputs(str, fp); else fprintf(fp, "%d", value); } static void print_mask_arg(bool (*decoder)(FILE *, int, int *), FILE *fp, int value) { int rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } static void print_mask_arg32(bool (*decoder)(FILE *, uint32_t, uint32_t *), FILE *fp, uint32_t value) { uint32_t rem; if (!decoder(fp, value, &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); } #ifndef __LP64__ /* * Add argument padding to subsequent system calls afater a Quad * syscall arguments as needed. This used to be done by hand in the * decoded_syscalls table which was ugly and error prone. It is * simpler to do the fixup of offsets at initalization time than when * decoding arguments. */ static void quad_fixup(struct syscall *sc) { int offset, prev; u_int i; offset = 0; prev = -1; for (i = 0; i < sc->nargs; i++) { /* This arg type is a dummy that doesn't use offset. */ if ((sc->args[i].type & ARG_MASK) == PipeFds) continue; assert(prev < sc->args[i].offset); prev = sc->args[i].offset; sc->args[i].offset += offset; switch (sc->args[i].type & ARG_MASK) { case Quad: case QuadHex: #ifdef __powerpc__ /* * 64-bit arguments on 32-bit powerpc must be * 64-bit aligned. If the current offset is * not aligned, the calling convention inserts * a 32-bit pad argument that should be skipped. */ if (sc->args[i].offset % 2 == 1) { sc->args[i].offset++; offset++; } #endif offset++; default: break; } } } #endif void init_syscalls(void) { struct syscall *sc; STAILQ_INIT(&syscalls); for (sc = decoded_syscalls; sc->name != NULL; sc++) { #ifndef __LP64__ quad_fixup(sc); #endif STAILQ_INSERT_HEAD(&syscalls, sc, entries); } } static struct syscall * find_syscall(struct procabi *abi, u_int number) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) return (abi->syscalls[number]); STAILQ_FOREACH(es, &abi->extra_syscalls, entries) { if (es->number == number) return (es->sc); } return (NULL); } static void add_syscall(struct procabi *abi, u_int number, struct syscall *sc) { struct extra_syscall *es; if (number < nitems(abi->syscalls)) { assert(abi->syscalls[number] == NULL); abi->syscalls[number] = sc; } else { es = malloc(sizeof(*es)); es->sc = sc; es->number = number; STAILQ_INSERT_TAIL(&abi->extra_syscalls, es, entries); } } /* * If/when the list gets big, it might be desirable to do it * as a hash table or binary search. */ struct syscall * get_syscall(struct threadinfo *t, u_int number, u_int nargs) { struct syscall *sc; const char *name; char *new_name; u_int i; sc = find_syscall(t->proc->abi, number); if (sc != NULL) return (sc); name = sysdecode_syscallname(t->proc->abi->abi, number); if (name == NULL) { asprintf(&new_name, "#%d", number); name = new_name; } else new_name = NULL; STAILQ_FOREACH(sc, &syscalls, entries) { if (strcmp(name, sc->name) == 0) { add_syscall(t->proc->abi, number, sc); free(new_name); return (sc); } } /* It is unknown. Add it into the list. */ #if DEBUG fprintf(stderr, "unknown syscall %s -- setting args to %d\n", name, nargs); #endif sc = calloc(1, sizeof(struct syscall)); sc->name = name; if (new_name != NULL) sc->unknown = true; sc->ret_type = 1; sc->nargs = nargs; for (i = 0; i < nargs; i++) { sc->args[i].offset = i; /* Treat all unknown arguments as LongHex. */ sc->args[i].type = LongHex; } STAILQ_INSERT_HEAD(&syscalls, sc, entries); add_syscall(t->proc->abi, number, sc); return (sc); } /* * Copy a fixed amount of bytes from the process. */ static int get_struct(pid_t pid, void *offset, void *buf, int len) { struct ptrace_io_desc iorequest; iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = offset; iorequest.piod_addr = buf; iorequest.piod_len = len; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) return (-1); return (0); } #define MAXSIZE 4096 /* * Copy a string from the process. Note that it is * expected to be a C string, but if max is set, it will * only get that much. */ static char * get_string(pid_t pid, void *addr, int max) { struct ptrace_io_desc iorequest; char *buf, *nbuf; size_t offset, size, totalsize; offset = 0; if (max) size = max + 1; else { /* Read up to the end of the current page. */ size = PAGE_SIZE - ((uintptr_t)addr % PAGE_SIZE); if (size > MAXSIZE) size = MAXSIZE; } totalsize = size; buf = malloc(totalsize); if (buf == NULL) return (NULL); for (;;) { iorequest.piod_op = PIOD_READ_D; iorequest.piod_offs = (char *)addr + offset; iorequest.piod_addr = buf + offset; iorequest.piod_len = size; if (ptrace(PT_IO, pid, (caddr_t)&iorequest, 0) < 0) { free(buf); return (NULL); } if (memchr(buf + offset, '\0', size) != NULL) return (buf); offset += size; if (totalsize < MAXSIZE && max == 0) { size = MAXSIZE - totalsize; if (size > PAGE_SIZE) size = PAGE_SIZE; nbuf = realloc(buf, totalsize + size); if (nbuf == NULL) { buf[totalsize - 1] = '\0'; return (buf); } buf = nbuf; totalsize += size; } else { buf[totalsize - 1] = '\0'; return (buf); } } } static const char * strsig2(int sig) { static char tmp[32]; const char *signame; signame = sysdecode_signal(sig); if (signame == NULL) { snprintf(tmp, sizeof(tmp), "%d", sig); signame = tmp; } return (signame); } static void print_kevent(FILE *fp, struct kevent *ke, int input) { switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: case EVFILT_VNODE: case EVFILT_PROC: case EVFILT_TIMER: case EVFILT_PROCDESC: fprintf(fp, "%ju", (uintmax_t)ke->ident); break; case EVFILT_SIGNAL: fputs(strsig2(ke->ident), fp); break; default: fprintf(fp, "%p", (void *)ke->ident); } fprintf(fp, ",%s,%s,", xlookup(kevent_filters, ke->filter), xlookup_bits(kevent_flags, ke->flags)); switch (ke->filter) { case EVFILT_READ: case EVFILT_WRITE: fputs(xlookup_bits(kevent_rdwr_fflags, ke->fflags), fp); break; case EVFILT_VNODE: fputs(xlookup_bits(kevent_vnode_fflags, ke->fflags), fp); break; case EVFILT_PROC: case EVFILT_PROCDESC: fputs(xlookup_bits(kevent_proc_fflags, ke->fflags), fp); break; case EVFILT_TIMER: fputs(xlookup_bits(kevent_timer_fflags, ke->fflags), fp); break; case EVFILT_USER: { int ctrl, data; ctrl = ke->fflags & NOTE_FFCTRLMASK; data = ke->fflags & NOTE_FFLAGSMASK; if (input) { fputs(xlookup(kevent_user_ffctrl, ctrl), fp); if (ke->fflags & NOTE_TRIGGER) fputs("|NOTE_TRIGGER", fp); if (data != 0) fprintf(fp, "|%#x", data); } else { fprintf(fp, "%#x", data); } break; } default: fprintf(fp, "%#x", ke->fflags); } - fprintf(fp, ",%p,%p", (void *)ke->data, (void *)ke->udata); + fprintf(fp, ",%#jx,%p", (uintmax_t)ke->data, ke->udata); } static void print_utrace(FILE *fp, void *utrace_addr, size_t len) { unsigned char *utrace_buffer; fprintf(fp, "{ "); if (sysdecode_utrace(fp, utrace_addr, len)) { fprintf(fp, " }"); return; } utrace_buffer = utrace_addr; fprintf(fp, "%zu:", len); while (len--) fprintf(fp, " %02x", *utrace_buffer++); fprintf(fp, " }"); } /* * Converts a syscall argument into a string. Said string is * allocated via malloc(), so needs to be free()'d. sc is * a pointer to the syscall description (see above); args is * an array of all of the system call arguments. */ char * print_arg(struct syscall_args *sc, unsigned long *args, long *retval, struct trussinfo *trussinfo) { FILE *fp; char *tmp; size_t tmplen; pid_t pid; fp = open_memstream(&tmp, &tmplen); pid = trussinfo->curthread->proc->pid; switch (sc->type & ARG_MASK) { case Hex: fprintf(fp, "0x%x", (int)args[sc->offset]); break; case Octal: fprintf(fp, "0%o", (int)args[sc->offset]); break; case Int: fprintf(fp, "%d", (int)args[sc->offset]); break; case UInt: fprintf(fp, "%u", (unsigned int)args[sc->offset]); break; case PUInt: { unsigned int val; if (get_struct(pid, (void *)args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ %u }", val); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case LongHex: fprintf(fp, "0x%lx", args[sc->offset]); break; case Long: fprintf(fp, "%ld", args[sc->offset]); break; case Sizet: fprintf(fp, "%zu", (size_t)args[sc->offset]); break; case Name: { /* NULL-terminated string. */ char *tmp2; tmp2 = get_string(pid, (void*)args[sc->offset], 0); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case BinString: { /* * Binary block of data that might have printable characters. * XXX If type|OUT, assume that the length is the syscall's * return value. Otherwise, assume that the length of the block * is in the next syscall argument. */ int max_string = trussinfo->strsize; char tmp2[max_string + 1], *tmp3; int len; int truncated = 0; if (sc->type & OUT) len = retval[0]; else len = args[sc->offset + 1]; /* * Don't print more than max_string characters, to avoid word * wrap. If we have to truncate put some ... after the string. */ if (len > max_string) { len = max_string; truncated = 1; } if (len && get_struct(pid, (void*)args[sc->offset], &tmp2, len) != -1) { tmp3 = malloc(len * 4 + 1); while (len) { if (strvisx(tmp3, tmp2, len, VIS_CSTYLE|VIS_TAB|VIS_NL) <= max_string) break; len--; truncated = 1; } fprintf(fp, "\"%s\"%s", tmp3, truncated ? "..." : ""); free(tmp3); } else { fprintf(fp, "0x%lx", args[sc->offset]); } break; } case ExecArgs: case ExecEnv: case StringArray: { uintptr_t addr; union { char *strarray[0]; char buf[PAGE_SIZE]; } u; char *string; size_t len; u_int first, i; /* * Only parse argv[] and environment arrays from exec calls * if requested. */ if (((sc->type & ARG_MASK) == ExecArgs && (trussinfo->flags & EXECVEARGS) == 0) || ((sc->type & ARG_MASK) == ExecEnv && (trussinfo->flags & EXECVEENVS) == 0)) { fprintf(fp, "0x%lx", args[sc->offset]); break; } /* * Read a page of pointers at a time. Punt if the top-level * pointer is not aligned. Note that the first read is of * a partial page. */ addr = args[sc->offset]; if (addr % sizeof(char *) != 0) { fprintf(fp, "0x%lx", args[sc->offset]); break; } len = PAGE_SIZE - (addr & PAGE_MASK); if (get_struct(pid, (void *)addr, u.buf, len) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } fputc('[', fp); first = 1; i = 0; while (u.strarray[i] != NULL) { string = get_string(pid, u.strarray[i], 0); fprintf(fp, "%s \"%s\"", first ? "" : ",", string); free(string); first = 0; i++; if (i == len / sizeof(char *)) { addr += len; len = PAGE_SIZE; if (get_struct(pid, (void *)addr, u.buf, len) == -1) { fprintf(fp, ", "); break; } i = 0; } } fputs(" ]", fp); break; } #ifdef __LP64__ case Quad: fprintf(fp, "%ld", args[sc->offset]); break; case QuadHex: fprintf(fp, "0x%lx", args[sc->offset]); break; #else case Quad: case QuadHex: { unsigned long long ll; #if _BYTE_ORDER == _LITTLE_ENDIAN ll = (unsigned long long)args[sc->offset + 1] << 32 | args[sc->offset]; #else ll = (unsigned long long)args[sc->offset] << 32 | args[sc->offset + 1]; #endif if ((sc->type & ARG_MASK) == Quad) fprintf(fp, "%lld", ll); else fprintf(fp, "0x%llx", ll); break; } #endif case PQuadHex: { uint64_t val; if (get_struct(pid, (void *)args[sc->offset], &val, sizeof(val)) == 0) fprintf(fp, "{ 0x%jx }", (uintmax_t)val); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Ptr: fprintf(fp, "0x%lx", args[sc->offset]); break; case Readlinkres: { char *tmp2; if (retval[0] == -1) break; tmp2 = get_string(pid, (void*)args[sc->offset], retval[0]); fprintf(fp, "\"%s\"", tmp2); free(tmp2); break; } case Ioctl: { const char *temp; unsigned long cmd; cmd = args[sc->offset]; temp = sysdecode_ioctlname(cmd); if (temp) fputs(temp, fp); else { fprintf(fp, "0x%lx { IO%s%s 0x%lx('%c'), %lu, %lu }", cmd, cmd & IOC_OUT ? "R" : "", cmd & IOC_IN ? "W" : "", IOCGROUP(cmd), isprint(IOCGROUP(cmd)) ? (char)IOCGROUP(cmd) : '?', cmd & 0xFF, IOCPARM_LEN(cmd)); } break; } case Timespec: { struct timespec ts; if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts)) != -1) fprintf(fp, "{ %jd.%09ld }", (intmax_t)ts.tv_sec, ts.tv_nsec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timespec2: { struct timespec ts[2]; const char *sep; unsigned int i; if (get_struct(pid, (void *)args[sc->offset], &ts, sizeof(ts)) != -1) { fputs("{ ", fp); sep = ""; for (i = 0; i < nitems(ts); i++) { fputs(sep, fp); sep = ", "; switch (ts[i].tv_nsec) { case UTIME_NOW: fprintf(fp, "UTIME_NOW"); break; case UTIME_OMIT: fprintf(fp, "UTIME_OMIT"); break; default: fprintf(fp, "%jd.%09ld", (intmax_t)ts[i].tv_sec, ts[i].tv_nsec); break; } } fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timeval: { struct timeval tv; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld }", (intmax_t)tv.tv_sec, tv.tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Timeval2: { struct timeval tv[2]; if (get_struct(pid, (void *)args[sc->offset], &tv, sizeof(tv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)tv[0].tv_sec, tv[0].tv_usec, (intmax_t)tv[1].tv_sec, tv[1].tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Itimerval: { struct itimerval itv; if (get_struct(pid, (void *)args[sc->offset], &itv, sizeof(itv)) != -1) fprintf(fp, "{ %jd.%06ld, %jd.%06ld }", (intmax_t)itv.it_interval.tv_sec, itv.it_interval.tv_usec, (intmax_t)itv.it_value.tv_sec, itv.it_value.tv_usec); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case LinuxSockArgs: { struct linux_socketcall_args largs; if (get_struct(pid, (void *)args[sc->offset], (void *)&largs, sizeof(largs)) != -1) fprintf(fp, "{ %s, 0x%lx }", lookup(linux_socketcall_ops, largs.what, 10), (long unsigned int)largs.args); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Pollfd: { /* * XXX: A Pollfd argument expects the /next/ syscall argument * to be the number of fds in the array. This matches the poll * syscall. */ struct pollfd *pfd; int numfds = args[sc->offset + 1]; size_t bytes = sizeof(struct pollfd) * numfds; int i; if ((pfd = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for pollfd array", bytes); if (get_struct(pid, (void *)args[sc->offset], pfd, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { fprintf(fp, " %d/%s", pfd[i].fd, xlookup_bits(poll_flags, pfd[i].events)); } fputs(" }", fp); } else { fprintf(fp, "0x%lx", args[sc->offset]); } free(pfd); break; } case Fd_set: { /* * XXX: A Fd_set argument expects the /first/ syscall argument * to be the number of fds in the array. This matches the * select syscall. */ fd_set *fds; int numfds = args[0]; size_t bytes = _howmany(numfds, _NFDBITS) * _NFDBITS; int i; if ((fds = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for fd_set array", bytes); if (get_struct(pid, (void *)args[sc->offset], fds, bytes) != -1) { fputs("{", fp); for (i = 0; i < numfds; i++) { if (FD_ISSET(i, fds)) fprintf(fp, " %d", i); } fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); free(fds); break; } case Signal: fputs(strsig2(args[sc->offset]), fp); break; case Sigset: { long sig; sigset_t ss; int i, first; sig = args[sc->offset]; if (get_struct(pid, (void *)args[sc->offset], (void *)&ss, sizeof(ss)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } fputs("{ ", fp); first = 1; for (i = 1; i < sys_nsig; i++) { if (sigismember(&ss, i)) { fprintf(fp, "%s%s", !first ? "|" : "", strsig2(i)); first = 0; } } if (!first) fputc(' ', fp); fputc('}', fp); break; } case Sigprocmask: print_integer_arg(sysdecode_sigprocmask_how, fp, args[sc->offset]); break; case Fcntlflag: /* XXX: Output depends on the value of the previous argument. */ if (sysdecode_fcntl_arg_p(args[sc->offset - 1])) sysdecode_fcntl_arg(fp, args[sc->offset - 1], args[sc->offset], 16); break; case Open: print_mask_arg(sysdecode_open_flags, fp, args[sc->offset]); break; case Fcntl: print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]); break; case Mprot: print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]); break; case Mmapflags: print_mask_arg(sysdecode_mmap_flags, fp, args[sc->offset]); break; case Whence: print_integer_arg(sysdecode_whence, fp, args[sc->offset]); break; case Sockdomain: print_integer_arg(sysdecode_socketdomain, fp, args[sc->offset]); break; case Socktype: print_mask_arg(sysdecode_socket_type, fp, args[sc->offset]); break; case Shutdown: print_integer_arg(sysdecode_shutdown_how, fp, args[sc->offset]); break; case Resource: print_integer_arg(sysdecode_rlimit, fp, args[sc->offset]); break; case RusageWho: print_integer_arg(sysdecode_getrusage_who, fp, args[sc->offset]); break; case Pathconf: fputs(xlookup(pathconf_arg, args[sc->offset]), fp); break; case Rforkflags: print_mask_arg(sysdecode_rfork_flags, fp, args[sc->offset]); break; case Sockaddr: { char addr[64]; struct sockaddr_in *lsin; struct sockaddr_in6 *lsin6; struct sockaddr_un *sun; struct sockaddr *sa; socklen_t len; u_char *q; if (args[sc->offset] == 0) { fputs("NULL", fp); break; } /* * Extract the address length from the next argument. If * this is an output sockaddr (OUT is set), then the * next argument is a pointer to a socklen_t. Otherwise * the next argument contains a socklen_t by value. */ if (sc->type & OUT) { if (get_struct(pid, (void *)args[sc->offset + 1], &len, sizeof(len)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } } else len = args[sc->offset + 1]; /* If the length is too small, just bail. */ if (len < sizeof(*sa)) { fprintf(fp, "0x%lx", args[sc->offset]); break; } sa = calloc(1, len); if (get_struct(pid, (void *)args[sc->offset], sa, len) == -1) { free(sa); fprintf(fp, "0x%lx", args[sc->offset]); break; } switch (sa->sa_family) { case AF_INET: if (len < sizeof(*lsin)) goto sockaddr_short; lsin = (struct sockaddr_in *)(void *)sa; inet_ntop(AF_INET, &lsin->sin_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET %s:%d }", addr, htons(lsin->sin_port)); break; case AF_INET6: if (len < sizeof(*lsin6)) goto sockaddr_short; lsin6 = (struct sockaddr_in6 *)(void *)sa; inet_ntop(AF_INET6, &lsin6->sin6_addr, addr, sizeof(addr)); fprintf(fp, "{ AF_INET6 [%s]:%d }", addr, htons(lsin6->sin6_port)); break; case AF_UNIX: sun = (struct sockaddr_un *)sa; fprintf(fp, "{ AF_UNIX \"%.*s\" }", (int)(len - offsetof(struct sockaddr_un, sun_path)), sun->sun_path); break; default: sockaddr_short: fprintf(fp, "{ sa_len = %d, sa_family = %d, sa_data = {", (int)sa->sa_len, (int)sa->sa_family); for (q = (u_char *)sa->sa_data; q < (u_char *)sa + len; q++) fprintf(fp, "%s 0x%02x", q == (u_char *)sa->sa_data ? "" : ",", *q); fputs(" } }", fp); } free(sa); break; } case Sigaction: { struct sigaction sa; if (get_struct(pid, (void *)args[sc->offset], &sa, sizeof(sa)) != -1) { fputs("{ ", fp); if (sa.sa_handler == SIG_DFL) fputs("SIG_DFL", fp); else if (sa.sa_handler == SIG_IGN) fputs("SIG_IGN", fp); else fprintf(fp, "%p", sa.sa_handler); fprintf(fp, " %s ss_t }", xlookup_bits(sigaction_flags, sa.sa_flags)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Kevent: { /* * XXX XXX: The size of the array is determined by either the * next syscall argument, or by the syscall return value, * depending on which argument number we are. This matches the * kevent syscall, but luckily that's the only syscall that uses * them. */ struct kevent *ke; int numevents = -1; size_t bytes; int i; if (sc->offset == 1) numevents = args[sc->offset+1]; else if (sc->offset == 3 && retval[0] != -1) numevents = retval[0]; if (numevents >= 0) { bytes = sizeof(struct kevent) * numevents; if ((ke = malloc(bytes)) == NULL) err(1, "Cannot malloc %zu bytes for kevent array", bytes); } else ke = NULL; if (numevents >= 0 && get_struct(pid, (void *)args[sc->offset], ke, bytes) != -1) { fputc('{', fp); for (i = 0; i < numevents; i++) { fputc(' ', fp); print_kevent(fp, &ke[i], sc->offset == 1); } fputs(" }", fp); } else { fprintf(fp, "0x%lx", args[sc->offset]); } free(ke); break; } case Stat: { struct stat st; if (get_struct(pid, (void *)args[sc->offset], &st, sizeof(st)) != -1) { char mode[12]; strmode(st.st_mode, mode); fprintf(fp, "{ mode=%s,inode=%ju,size=%jd,blksize=%ld }", mode, (uintmax_t)st.st_ino, (intmax_t)st.st_size, (long)st.st_blksize); } else { fprintf(fp, "0x%lx", args[sc->offset]); } break; } case StatFs: { unsigned int i; struct statfs buf; if (get_struct(pid, (void *)args[sc->offset], &buf, sizeof(buf)) != -1) { char fsid[17]; bzero(fsid, sizeof(fsid)); if (buf.f_fsid.val[0] != 0 || buf.f_fsid.val[1] != 0) { for (i = 0; i < sizeof(buf.f_fsid); i++) snprintf(&fsid[i*2], sizeof(fsid) - (i*2), "%02x", ((u_char *)&buf.f_fsid)[i]); } fprintf(fp, "{ fstypename=%s,mntonname=%s,mntfromname=%s," "fsid=%s }", buf.f_fstypename, buf.f_mntonname, buf.f_mntfromname, fsid); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Rusage: { struct rusage ru; if (get_struct(pid, (void *)args[sc->offset], &ru, sizeof(ru)) != -1) { fprintf(fp, "{ u=%jd.%06ld,s=%jd.%06ld,in=%ld,out=%ld }", (intmax_t)ru.ru_utime.tv_sec, ru.ru_utime.tv_usec, (intmax_t)ru.ru_stime.tv_sec, ru.ru_stime.tv_usec, ru.ru_inblock, ru.ru_oublock); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Rlimit: { struct rlimit rl; if (get_struct(pid, (void *)args[sc->offset], &rl, sizeof(rl)) != -1) { fprintf(fp, "{ cur=%ju,max=%ju }", rl.rlim_cur, rl.rlim_max); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case ExitStatus: { int status; if (get_struct(pid, (void *)args[sc->offset], &status, sizeof(status)) != -1) { fputs("{ ", fp); if (WIFCONTINUED(status)) fputs("CONTINUED", fp); else if (WIFEXITED(status)) fprintf(fp, "EXITED,val=%d", WEXITSTATUS(status)); else if (WIFSIGNALED(status)) fprintf(fp, "SIGNALED,sig=%s%s", strsig2(WTERMSIG(status)), WCOREDUMP(status) ? ",cored" : ""); else fprintf(fp, "STOPPED,sig=%s", strsig2(WTERMSIG(status))); fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Waitoptions: print_mask_arg(sysdecode_wait6_options, fp, args[sc->offset]); break; case Idtype: print_integer_arg(sysdecode_idtype, fp, args[sc->offset]); break; case Procctl: print_integer_arg(sysdecode_procctl_cmd, fp, args[sc->offset]); break; case Umtxop: print_integer_arg(sysdecode_umtx_op, fp, args[sc->offset]); break; case Atfd: print_integer_arg(sysdecode_atfd, fp, args[sc->offset]); break; case Atflags: fputs(xlookup_bits(at_flags, args[sc->offset]), fp); break; case Accessmode: print_mask_arg(sysdecode_access_mode, fp, args[sc->offset]); break; case Sysarch: fputs(xlookup(sysarch_ops, args[sc->offset]), fp); break; case PipeFds: /* * The pipe() system call in the kernel returns its * two file descriptors via return values. However, * the interface exposed by libc is that pipe() * accepts a pointer to an array of descriptors. * Format the output to match the libc API by printing * the returned file descriptors as a fake argument. * * Overwrite the first retval to signal a successful * return as well. */ fprintf(fp, "{ %ld, %ld }", retval[0], retval[1]); retval[0] = 0; break; case Utrace: { size_t len; void *utrace_addr; len = args[sc->offset + 1]; utrace_addr = calloc(1, len); if (get_struct(pid, (void *)args[sc->offset], (void *)utrace_addr, len) != -1) print_utrace(fp, utrace_addr, len); else fprintf(fp, "0x%lx", args[sc->offset]); free(utrace_addr); break; } case IntArray: { int descriptors[16]; unsigned long i, ndescriptors; bool truncated; ndescriptors = args[sc->offset + 1]; truncated = false; if (ndescriptors > nitems(descriptors)) { ndescriptors = nitems(descriptors); truncated = true; } if (get_struct(pid, (void *)args[sc->offset], descriptors, ndescriptors * sizeof(descriptors[0])) != -1) { fprintf(fp, "{"); for (i = 0; i < ndescriptors; i++) fprintf(fp, i == 0 ? " %d" : ", %d", descriptors[i]); fprintf(fp, truncated ? ", ... }" : " }"); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Pipe2: print_mask_arg(sysdecode_pipe2_flags, fp, args[sc->offset]); break; case CapFcntlRights: { uint32_t rights; if (sc->type & OUT) { if (get_struct(pid, (void *)args[sc->offset], &rights, sizeof(rights)) == -1) { fprintf(fp, "0x%lx", args[sc->offset]); break; } } else rights = args[sc->offset]; print_mask_arg32(sysdecode_cap_fcntlrights, fp, rights); break; } case Fadvice: print_integer_arg(sysdecode_fadvice, fp, args[sc->offset]); break; case FileFlags: { fflags_t rem; if (!sysdecode_fileflags(fp, args[sc->offset], &rem)) fprintf(fp, "0x%x", rem); else if (rem != 0) fprintf(fp, "|0x%x", rem); break; } case Flockop: print_mask_arg(sysdecode_flock_operation, fp, args[sc->offset]); break; case Getfsstatmode: print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; case Kldsymcmd: print_integer_arg(sysdecode_kldsym_cmd, fp, args[sc->offset]); break; case Kldunloadflags: print_integer_arg(sysdecode_kldunload_flags, fp, args[sc->offset]); break; case Madvice: print_integer_arg(sysdecode_madvice, fp, args[sc->offset]); break; case Socklent: fprintf(fp, "%u", (socklen_t)args[sc->offset]); break; case Sockprotocol: { const char *temp; int domain, protocol; domain = args[sc->offset - 2]; protocol = args[sc->offset]; if (protocol == 0) { fputs("0", fp); } else { temp = sysdecode_socket_protocol(domain, protocol); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", protocol); } } break; } case Sockoptlevel: print_integer_arg(sysdecode_sockopt_level, fp, args[sc->offset]); break; case Sockoptname: { const char *temp; int level, name; level = args[sc->offset - 1]; name = args[sc->offset]; temp = sysdecode_sockopt_name(level, name); if (temp) { fputs(temp, fp); } else { fprintf(fp, "%d", name); } break; } case Msgflags: print_mask_arg(sysdecode_msg_flags, fp, args[sc->offset]); break; case CapRights: { cap_rights_t rights; if (get_struct(pid, (void *)args[sc->offset], &rights, sizeof(rights)) != -1) { fputs("{ ", fp); sysdecode_cap_rights(fp, &rights); fputs(" }", fp); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case Acltype: print_integer_arg(sysdecode_acltype, fp, args[sc->offset]); break; case Extattrnamespace: print_integer_arg(sysdecode_extattrnamespace, fp, args[sc->offset]); break; case Minherit: print_integer_arg(sysdecode_minherit_inherit, fp, args[sc->offset]); break; case Mlockall: print_mask_arg(sysdecode_mlockall_flags, fp, args[sc->offset]); break; case Mountflags: print_mask_arg(sysdecode_mount_flags, fp, args[sc->offset]); break; case Msync: print_mask_arg(sysdecode_msync_flags, fp, args[sc->offset]); break; case Priowhich: print_integer_arg(sysdecode_prio_which, fp, args[sc->offset]); break; case Ptraceop: print_integer_arg(sysdecode_ptrace_request, fp, args[sc->offset]); break; case Quotactlcmd: if (!sysdecode_quotactl_cmd(fp, args[sc->offset])) fprintf(fp, "%#x", (int)args[sc->offset]); break; case Reboothowto: print_mask_arg(sysdecode_reboot_howto, fp, args[sc->offset]); break; case Rtpriofunc: print_integer_arg(sysdecode_rtprio_function, fp, args[sc->offset]); break; case Schedpolicy: print_integer_arg(sysdecode_scheduler_policy, fp, args[sc->offset]); break; case Schedparam: { struct sched_param sp; if (get_struct(pid, (void *)args[sc->offset], &sp, sizeof(sp)) != -1) fprintf(fp, "{ %d }", sp.sched_priority); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIAdvice: fputs(xlookup(cloudabi_advice, args[sc->offset]), fp); break; case CloudABIClockID: fputs(xlookup(cloudabi_clockid, args[sc->offset]), fp); break; case ClouduABIFDSFlags: fputs(xlookup_bits(cloudabi_fdsflags, args[sc->offset]), fp); break; case CloudABIFDStat: { cloudabi_fdstat_t fds; if (get_struct(pid, (void *)args[sc->offset], &fds, sizeof(fds)) != -1) { fprintf(fp, "{ %s, ", xlookup(cloudabi_filetype, fds.fs_filetype)); fprintf(fp, "%s, ... }", xlookup_bits(cloudabi_fdflags, fds.fs_flags)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIFileStat: { cloudabi_filestat_t fsb; if (get_struct(pid, (void *)args[sc->offset], &fsb, sizeof(fsb)) != -1) fprintf(fp, "{ %s, %ju }", xlookup(cloudabi_filetype, fsb.st_filetype), (uintmax_t)fsb.st_size); else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABIFileType: fputs(xlookup(cloudabi_filetype, args[sc->offset]), fp); break; case CloudABIFSFlags: fputs(xlookup_bits(cloudabi_fsflags, args[sc->offset]), fp); break; case CloudABILookup: if ((args[sc->offset] & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) != 0) fprintf(fp, "%d|LOOKUP_SYMLINK_FOLLOW", (int)args[sc->offset]); else fprintf(fp, "%d", (int)args[sc->offset]); break; case CloudABIMFlags: fputs(xlookup_bits(cloudabi_mflags, args[sc->offset]), fp); break; case CloudABIMProt: fputs(xlookup_bits(cloudabi_mprot, args[sc->offset]), fp); break; case CloudABIMSFlags: fputs(xlookup_bits(cloudabi_msflags, args[sc->offset]), fp); break; case CloudABIOFlags: fputs(xlookup_bits(cloudabi_oflags, args[sc->offset]), fp); break; case CloudABISDFlags: fputs(xlookup_bits(cloudabi_sdflags, args[sc->offset]), fp); break; case CloudABISignal: fputs(xlookup(cloudabi_signal, args[sc->offset]), fp); break; case CloudABISockStat: { cloudabi_sockstat_t ss; if (get_struct(pid, (void *)args[sc->offset], &ss, sizeof(ss)) != -1) { fprintf(fp, "{ %s, ", xlookup( cloudabi_sa_family, ss.ss_sockname.sa_family)); fprintf(fp, "%s, ", xlookup( cloudabi_sa_family, ss.ss_peername.sa_family)); fprintf(fp, "%s, ", xlookup( cloudabi_errno, ss.ss_error)); fprintf(fp, "%s }", xlookup_bits( cloudabi_ssstate, ss.ss_state)); } else fprintf(fp, "0x%lx", args[sc->offset]); break; } case CloudABISSFlags: fputs(xlookup_bits(cloudabi_ssflags, args[sc->offset]), fp); break; case CloudABITimestamp: fprintf(fp, "%lu.%09lus", args[sc->offset] / 1000000000, args[sc->offset] % 1000000000); break; case CloudABIULFlags: fputs(xlookup_bits(cloudabi_ulflags, args[sc->offset]), fp); break; case CloudABIWhence: fputs(xlookup(cloudabi_whence, args[sc->offset]), fp); break; default: errx(1, "Invalid argument type %d\n", sc->type & ARG_MASK); } fclose(fp); return (tmp); } /* * Print (to outfile) the system call and its arguments. */ void print_syscall(struct trussinfo *trussinfo) { struct threadinfo *t; const char *name; char **s_args; int i, len, nargs; t = trussinfo->curthread; name = t->cs.sc->name; nargs = t->cs.nargs; s_args = t->cs.s_args; len = print_line_prefix(trussinfo); len += fprintf(trussinfo->outfile, "%s(", name); for (i = 0; i < nargs; i++) { if (s_args[i] != NULL) len += fprintf(trussinfo->outfile, "%s", s_args[i]); else len += fprintf(trussinfo->outfile, ""); len += fprintf(trussinfo->outfile, "%s", i < (nargs - 1) ? "," : ""); } len += fprintf(trussinfo->outfile, ")"); for (i = 0; i < 6 - (len / 8); i++) fprintf(trussinfo->outfile, "\t"); } void print_syscall_ret(struct trussinfo *trussinfo, int errorp, long *retval) { struct timespec timediff; struct threadinfo *t; struct syscall *sc; int error; t = trussinfo->curthread; sc = t->cs.sc; if (trussinfo->flags & COUNTONLY) { timespecsubt(&t->after, &t->before, &timediff); timespecadd(&sc->time, &timediff, &sc->time); sc->ncalls++; if (errorp) sc->nerror++; return; } print_syscall(trussinfo); fflush(trussinfo->outfile); if (retval == NULL) { /* * This system call resulted in the current thread's exit, * so there is no return value or error to display. */ fprintf(trussinfo->outfile, "\n"); return; } if (errorp) { error = sysdecode_abi_to_freebsd_errno(t->proc->abi->abi, retval[0]); fprintf(trussinfo->outfile, " ERR#%ld '%s'\n", retval[0], error == INT_MAX ? "Unknown error" : strerror(error)); } #ifndef __LP64__ else if (sc->ret_type == 2) { off_t off; #if _BYTE_ORDER == _LITTLE_ENDIAN off = (off_t)retval[1] << 32 | retval[0]; #else off = (off_t)retval[0] << 32 | retval[1]; #endif fprintf(trussinfo->outfile, " = %jd (0x%jx)\n", (intmax_t)off, (intmax_t)off); } #endif else fprintf(trussinfo->outfile, " = %ld (0x%lx)\n", retval[0], retval[0]); } void print_summary(struct trussinfo *trussinfo) { struct timespec total = {0, 0}; struct syscall *sc; int ncall, nerror; fprintf(trussinfo->outfile, "%-20s%15s%8s%8s\n", "syscall", "seconds", "calls", "errors"); ncall = nerror = 0; STAILQ_FOREACH(sc, &syscalls, entries) if (sc->ncalls) { fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", sc->name, (intmax_t)sc->time.tv_sec, sc->time.tv_nsec, sc->ncalls, sc->nerror); timespecadd(&total, &sc->time, &total); ncall += sc->ncalls; nerror += sc->nerror; } fprintf(trussinfo->outfile, "%20s%15s%8s%8s\n", "", "-------------", "-------", "-------"); fprintf(trussinfo->outfile, "%-20s%5jd.%09ld%8d%8d\n", "", (intmax_t)total.tv_sec, total.tv_nsec, ncall, nerror); }