Index: lib/libc/stdio/Makefile.inc =================================================================== --- lib/libc/stdio/Makefile.inc +++ lib/libc/stdio/Makefile.inc @@ -4,7 +4,7 @@ # stdio sources .PATH: ${LIBC_SRCTOP}/stdio -SRCS+= _flock_stub.c asprintf.c clrerr.c dprintf.c \ +SRCS+= _flock_stub.c asprintf.c clrerr.c dprintf.c eventfd_rw.c \ fclose.c fcloseall.c fdopen.c \ feof.c ferror.c fflush.c fgetc.c fgetln.c fgetpos.c fgets.c fgetwc.c \ fgetwln.c fgetws.c \ Index: lib/libc/stdio/Symbol.map =================================================================== --- lib/libc/stdio/Symbol.map +++ lib/libc/stdio/Symbol.map @@ -178,6 +178,8 @@ fread_unlocked; fwrite_unlocked; mkostempsat; + eventfd_read; + eventfd_write; }; FBSDprivate_1.0 { Index: lib/libc/stdio/eventfd_rw.c =================================================================== --- /dev/null +++ lib/libc/stdio/eventfd_rw.c @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: MIT + * + * Copyright (c) 2005-2020 Rich Felker, et al. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include "namespace.h" +#include +#include +#include "un-namespace.h" + +int eventfd_read(int fd, eventfd_t *value) +{ + return (sizeof(*value) == _read(fd, value, sizeof(*value))) ? 0 : -1; +} + +int eventfd_write(int fd, eventfd_t value) +{ + return (sizeof(value) == _write(fd, &value, sizeof(value))) ? 0 : -1; +} Index: lib/libc/sys/Symbol.map =================================================================== --- lib/libc/sys/Symbol.map +++ lib/libc/sys/Symbol.map @@ -413,6 +413,7 @@ memfd_create; shm_create_largepage; shm_rename; + eventfd; }; FBSDprivate_1.0 { Index: sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- sys/compat/freebsd32/freebsd32_syscall.h +++ sys/compat/freebsd32/freebsd32_syscall.h @@ -503,4 +503,5 @@ #define FREEBSD32_SYS___realpathat 574 #define FREEBSD32_SYS_close_range 575 #define FREEBSD32_SYS_rpctls_syscall 576 -#define FREEBSD32_SYS_MAXSYSCALL 577 +#define FREEBSD32_SYS_eventfd 577 +#define FREEBSD32_SYS_MAXSYSCALL 578 Index: sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- sys/compat/freebsd32/freebsd32_syscalls.c +++ sys/compat/freebsd32/freebsd32_syscalls.c @@ -613,4 +613,5 @@ "__realpathat", /* 574 = __realpathat */ "close_range", /* 575 = close_range */ "rpctls_syscall", /* 576 = rpctls_syscall */ + "eventfd", /* 577 = eventfd */ }; Index: sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- sys/compat/freebsd32/freebsd32_sysent.c +++ sys/compat/freebsd32/freebsd32_sysent.c @@ -666,4 +666,5 @@ { .sy_narg = AS(__realpathat_args), .sy_call = (sy_call_t *)sys___realpathat, .sy_auevent = AUE_REALPATHAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 574 = __realpathat */ { .sy_narg = AS(close_range_args), .sy_call = (sy_call_t *)sys_close_range, .sy_auevent = AUE_CLOSERANGE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 575 = close_range */ { .sy_narg = AS(rpctls_syscall_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 576 = rpctls_syscall */ + { .sy_narg = AS(eventfd_args), .sy_call = (sy_call_t *)sys_eventfd, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = eventfd */ }; Index: sys/compat/freebsd32/freebsd32_systrace_args.c =================================================================== --- sys/compat/freebsd32/freebsd32_systrace_args.c +++ sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3384,6 +3384,14 @@ *n_args = 2; break; } + /* eventfd */ + case 577: { + struct eventfd_args *p = params; + uarg[0] = p->initval; /* unsigned int */ + iarg[1] = p->flags; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9124,6 +9132,19 @@ break; }; break; + /* eventfd */ + case 577: + switch(ndx) { + case 0: + p = "unsigned int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11028,6 +11049,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* eventfd */ + case 577: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; Index: sys/compat/freebsd32/syscalls.master =================================================================== --- sys/compat/freebsd32/syscalls.master +++ sys/compat/freebsd32/syscalls.master @@ -1167,5 +1167,6 @@ ; 576 is initialised by the krpc code, if present. 576 AUE_NULL NOSTD|NOPROTO { int rpctls_syscall(int op, \ const char *path); } +577 AUE_NULL NOPROTO { int eventfd(unsigned int initval, int flags); } ; vim: syntax=off Index: sys/compat/linux/linux_event.h =================================================================== --- sys/compat/linux/linux_event.h +++ sys/compat/linux/linux_event.h @@ -56,8 +56,6 @@ #define LINUX_EPOLL_CTL_DEL 2 #define LINUX_EPOLL_CTL_MOD 3 -#define LINUX_EFD_SEMAPHORE (1 << 0) - #define LINUX_TFD_TIMER_ABSTIME (1 << 0) #define LINUX_TFD_TIMER_CANCEL_ON_SET (1 << 1) #define LINUX_TFD_CLOEXEC LINUX_O_CLOEXEC Index: sys/compat/linux/linux_event.c =================================================================== --- sys/compat/linux/linux_event.c +++ sys/compat/linux/linux_event.c @@ -52,6 +52,7 @@ #include #include #include +#include #ifdef COMPAT_LINUX32 #include @@ -122,53 +123,11 @@ int error; }; -/* eventfd */ -typedef uint64_t eventfd_t; - -static fo_rdwr_t eventfd_read; -static fo_rdwr_t eventfd_write; -static fo_ioctl_t eventfd_ioctl; -static fo_poll_t eventfd_poll; -static fo_kqfilter_t eventfd_kqfilter; -static fo_stat_t eventfd_stat; -static fo_close_t eventfd_close; -static fo_fill_kinfo_t eventfd_fill_kinfo; - -static struct fileops eventfdops = { - .fo_read = eventfd_read, - .fo_write = eventfd_write, - .fo_truncate = invfo_truncate, - .fo_ioctl = eventfd_ioctl, - .fo_poll = eventfd_poll, - .fo_kqfilter = eventfd_kqfilter, - .fo_stat = eventfd_stat, - .fo_close = eventfd_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, - .fo_fill_kinfo = eventfd_fill_kinfo, - .fo_flags = DFLAG_PASSABLE -}; - -static void filt_eventfddetach(struct knote *kn); -static int filt_eventfdread(struct knote *kn, long hint); -static int filt_eventfdwrite(struct knote *kn, long hint); - -static struct filterops eventfd_rfiltops = { - .f_isfd = 1, - .f_detach = filt_eventfddetach, - .f_event = filt_eventfdread -}; -static struct filterops eventfd_wfiltops = { - .f_isfd = 1, - .f_detach = filt_eventfddetach, - .f_event = filt_eventfdwrite -}; - /* timerfd */ typedef uint64_t timerfd_t; static fo_rdwr_t timerfd_read; +static fo_ioctl_t timerfd_ioctl; static fo_poll_t timerfd_poll; static fo_kqfilter_t timerfd_kqfilter; static fo_stat_t timerfd_stat; @@ -179,7 +138,7 @@ .fo_read = timerfd_read, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, - .fo_ioctl = eventfd_ioctl, + .fo_ioctl = timerfd_ioctl, .fo_poll = timerfd_poll, .fo_kqfilter = timerfd_kqfilter, .fo_stat = timerfd_stat, @@ -200,13 +159,6 @@ .f_event = filt_timerfdread }; -struct eventfd { - eventfd_t efd_count; - uint32_t efd_flags; - struct selinfo efd_sel; - struct mtx efd_lock; -}; - struct timerfd { clockid_t tfd_clockid; struct itimerspec tfd_time; @@ -217,7 +169,6 @@ struct mtx tfd_lock; }; -static int eventfd_create(struct thread *td, uint32_t initval, int flags); static void linux_timerfd_expire(void *); static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); @@ -695,41 +646,6 @@ return (error1 == 0 ? 0 : error2); } -static int -eventfd_create(struct thread *td, uint32_t initval, int flags) -{ - struct filedesc *fdp; - struct eventfd *efd; - struct file *fp; - int fflags, fd, error; - - fflags = 0; - if ((flags & LINUX_O_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - - fdp = td->td_proc->p_fd; - error = falloc(td, &fp, &fd, fflags); - if (error != 0) - return (error); - - efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); - efd->efd_flags = flags; - efd->efd_count = initval; - mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); - - knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); - - fflags = FREAD | FWRITE; - if ((flags & LINUX_O_NONBLOCK) != 0) - fflags |= FNONBLOCK; - - finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); - fdrop(fp, td); - - td->td_retval[0] = fd; - return (error); -} - #ifdef LINUX_LEGACY_SYSCALLS int linux_eventfd(struct thread *td, struct linux_eventfd_args *args) @@ -743,254 +659,12 @@ linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) { - if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) + if ((args->flags & ~(EFD_CLOEXEC|EFD_NONBLOCK|EFD_SEMAPHORE)) != 0) return (EINVAL); return (eventfd_create(td, args->initval, args->flags)); } -static int -eventfd_close(struct file *fp, struct thread *td) -{ - struct eventfd *efd; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - seldrain(&efd->efd_sel); - knlist_destroy(&efd->efd_sel.si_note); - - fp->f_ops = &badfileops; - mtx_destroy(&efd->efd_lock); - free(efd, M_EPOLL); - - return (0); -} - -static int -eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct eventfd *efd; - eventfd_t count; - int error; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(eventfd_t)) - return (EINVAL); - - error = 0; - mtx_lock(&efd->efd_lock); -retry: - if (efd->efd_count == 0) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&efd->efd_lock); - return (EAGAIN); - } - error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { - count = 1; - --efd->efd_count; - } else { - count = efd->efd_count; - efd->efd_count = 0; - } - KNOTE_LOCKED(&efd->efd_sel.si_note, 0); - selwakeup(&efd->efd_sel); - wakeup(&efd->efd_count); - mtx_unlock(&efd->efd_lock); - error = uiomove(&count, sizeof(eventfd_t), uio); - } else - mtx_unlock(&efd->efd_lock); - - return (error); -} - -static int -eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct eventfd *efd; - eventfd_t count; - int error; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(eventfd_t)) - return (EINVAL); - - error = uiomove(&count, sizeof(eventfd_t), uio); - if (error != 0) - return (error); - if (count == UINT64_MAX) - return (EINVAL); - - mtx_lock(&efd->efd_lock); -retry: - if (UINT64_MAX - efd->efd_count <= count) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&efd->efd_lock); - /* Do not not return the number of bytes written */ - uio->uio_resid += sizeof(eventfd_t); - return (EAGAIN); - } - error = mtx_sleep(&efd->efd_count, &efd->efd_lock, - PCATCH, "lefdwr", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - efd->efd_count += count; - KNOTE_LOCKED(&efd->efd_sel.si_note, 0); - selwakeup(&efd->efd_sel); - wakeup(&efd->efd_count); - } - mtx_unlock(&efd->efd_lock); - - return (error); -} - -static int -eventfd_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - struct eventfd *efd; - int revents = 0; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (POLLERR); - - mtx_lock(&efd->efd_lock); - if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) - revents |= events & (POLLIN|POLLRDNORM); - if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) - revents |= events & (POLLOUT|POLLWRNORM); - if (revents == 0) - selrecord(td, &efd->efd_sel); - mtx_unlock(&efd->efd_lock); - - return (revents); -} - -/*ARGSUSED*/ -static int -eventfd_kqfilter(struct file *fp, struct knote *kn) -{ - struct eventfd *efd; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - mtx_lock(&efd->efd_lock); - switch (kn->kn_filter) { - case EVFILT_READ: - kn->kn_fop = &eventfd_rfiltops; - break; - case EVFILT_WRITE: - kn->kn_fop = &eventfd_wfiltops; - break; - default: - mtx_unlock(&efd->efd_lock); - return (EINVAL); - } - - kn->kn_hook = efd; - knlist_add(&efd->efd_sel.si_note, kn, 1); - mtx_unlock(&efd->efd_lock); - - return (0); -} - -static void -filt_eventfddetach(struct knote *kn) -{ - struct eventfd *efd = kn->kn_hook; - - mtx_lock(&efd->efd_lock); - knlist_remove(&efd->efd_sel.si_note, kn, 1); - mtx_unlock(&efd->efd_lock); -} - -/*ARGSUSED*/ -static int -filt_eventfdread(struct knote *kn, long hint) -{ - struct eventfd *efd = kn->kn_hook; - int ret; - - mtx_assert(&efd->efd_lock, MA_OWNED); - ret = (efd->efd_count > 0); - - return (ret); -} - -/*ARGSUSED*/ -static int -filt_eventfdwrite(struct knote *kn, long hint) -{ - struct eventfd *efd = kn->kn_hook; - int ret; - - mtx_assert(&efd->efd_lock, MA_OWNED); - ret = (UINT64_MAX - 1 > efd->efd_count); - - return (ret); -} - -/*ARGSUSED*/ -static int -eventfd_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td) -{ - - if (fp->f_data == NULL || (fp->f_type != DTYPE_LINUXEFD && - fp->f_type != DTYPE_LINUXTFD)) - return (EINVAL); - - switch (cmd) - { - case FIONBIO: - if ((*(int *)data)) - atomic_set_int(&fp->f_flag, FNONBLOCK); - else - atomic_clear_int(&fp->f_flag, FNONBLOCK); - case FIOASYNC: - return (0); - default: - return (ENXIO); - } -} - -/*ARGSUSED*/ -static int -eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, - struct thread *td) -{ - - return (ENXIO); -} - -/*ARGSUSED*/ -static int -eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) -{ - - kif->kf_type = KF_TYPE_UNKNOWN; - return (0); -} - int linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) { @@ -1166,6 +840,29 @@ return (tfd->tfd_count > 0); } +/*ARGSUSED*/ +static int +timerfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) + return (EINVAL); + + switch (cmd) + { + case FIONBIO: + if ((*(int *)data)) + atomic_set_int(&fp->f_flag, FNONBLOCK); + else + atomic_clear_int(&fp->f_flag, FNONBLOCK); + case FIOASYNC: + return (0); + default: + return (ENXIO); + } +} + /*ARGSUSED*/ static int timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3906,6 +3906,7 @@ kern/subr_vmem.c standard kern/subr_witness.c optional witness kern/sys_capability.c standard +kern/sys_eventfd.c standard kern/sys_generic.c standard kern/sys_getrandom.c standard kern/sys_pipe.c standard Index: sys/kern/capabilities.conf =================================================================== --- sys/kern/capabilities.conf +++ sys/kern/capabilities.conf @@ -393,6 +393,11 @@ ktimer_gettime ktimer_settime +## +## Allow creating eventfds. +## +eventfd + ## ## We can't allow ktrace(2) because it relies on a global namespace, but we ## might want to introduce an fktrace(2) of some sort. Index: sys/kern/init_sysent.c =================================================================== --- sys/kern/init_sysent.c +++ sys/kern/init_sysent.c @@ -632,4 +632,5 @@ { .sy_narg = AS(__realpathat_args), .sy_call = (sy_call_t *)sys___realpathat, .sy_auevent = AUE_REALPATHAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 574 = __realpathat */ { .sy_narg = AS(close_range_args), .sy_call = (sy_call_t *)sys_close_range, .sy_auevent = AUE_CLOSERANGE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 575 = close_range */ { .sy_narg = AS(rpctls_syscall_args), .sy_call = (sy_call_t *)lkmressys, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_ABSENT }, /* 576 = rpctls_syscall */ + { .sy_narg = AS(eventfd_args), .sy_call = (sy_call_t *)sys_eventfd, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 577 = eventfd */ }; Index: sys/kern/sys_eventfd.c =================================================================== --- /dev/null +++ sys/kern/sys_eventfd.c @@ -0,0 +1,364 @@ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); + +typedef uint64_t eventfd_t; + +static fo_rdwr_t eventfd_read; +static fo_rdwr_t eventfd_write; +static fo_ioctl_t eventfd_ioctl; +static fo_poll_t eventfd_poll; +static fo_kqfilter_t eventfd_kqfilter; +static fo_stat_t eventfd_stat; +static fo_close_t eventfd_close; +static fo_fill_kinfo_t eventfd_fill_kinfo; + +static struct fileops eventfdops = { + .fo_read = eventfd_read, + .fo_write = eventfd_write, + .fo_truncate = invfo_truncate, + .fo_ioctl = eventfd_ioctl, + .fo_poll = eventfd_poll, + .fo_kqfilter = eventfd_kqfilter, + .fo_stat = eventfd_stat, + .fo_close = eventfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = eventfd_fill_kinfo, + .fo_flags = DFLAG_PASSABLE +}; + +static void filt_eventfddetach(struct knote *kn); +static int filt_eventfdread(struct knote *kn, long hint); +static int filt_eventfdwrite(struct knote *kn, long hint); + +static struct filterops eventfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdread +}; +static struct filterops eventfd_wfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdwrite +}; + +struct eventfd { + eventfd_t efd_count; + uint32_t efd_flags; + struct selinfo efd_sel; + struct mtx efd_lock; +}; + +int +eventfd_create(struct thread *td, uint32_t initval, int flags) +{ + struct filedesc *fdp; + struct eventfd *efd; + struct file *fp; + int fflags, fd, error; + + fflags = 0; + if ((flags & EFD_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + fdp = td->td_proc->p_fd; + error = falloc(td, &fp, &fd, fflags); + if (error != 0) + return (error); + + efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); + efd->efd_flags = flags; + efd->efd_count = initval; + mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); + + knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); + + fflags = FREAD | FWRITE; + if ((flags & EFD_NONBLOCK) != 0) + fflags |= FNONBLOCK; + + finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); + fdrop(fp, td); + + td->td_retval[0] = fd; + return (error); +} + +static int +eventfd_close(struct file *fp, struct thread *td) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + seldrain(&efd->efd_sel); + knlist_destroy(&efd->efd_sel.si_note); + + fp->f_ops = &badfileops; + mtx_destroy(&efd->efd_lock); + free(efd, M_EVENTFD); + + return (0); +} + +int +sys_eventfd(struct thread *td, struct eventfd_args *args) +{ + + if ((args->flags & ~(EFD_CLOEXEC|EFD_NONBLOCK|EFD_SEMAPHORE)) != 0) + return (EINVAL); + + return (eventfd_create(td, args->initval, args->flags)); +} + +static int +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = 0; + mtx_lock(&efd->efd_lock); +retry: + if (efd->efd_count == 0) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { + count = 1; + --efd->efd_count; + } else { + count = efd->efd_count; + efd->efd_count = 0; + } + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + mtx_unlock(&efd->efd_lock); + error = uiomove(&count, sizeof(eventfd_t), uio); + } else + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = uiomove(&count, sizeof(eventfd_t), uio); + if (error != 0) + return (error); + if (count == UINT64_MAX) + return (EINVAL); + + mtx_lock(&efd->efd_lock); +retry: + if (UINT64_MAX - efd->efd_count <= count) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + /* Do not not return the number of bytes written */ + uio->uio_resid += sizeof(eventfd_t); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, + PCATCH, "lefdwr", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + efd->efd_count += count; + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + } + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct eventfd *efd; + int revents = 0; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (POLLERR); + + mtx_lock(&efd->efd_lock); + if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) + revents |= events & (POLLIN|POLLRDNORM); + if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) + revents |= events & (POLLOUT|POLLWRNORM); + if (revents == 0) + selrecord(td, &efd->efd_sel); + mtx_unlock(&efd->efd_lock); + + return (revents); +} + +/*ARGSUSED*/ +static int +eventfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + mtx_lock(&efd->efd_lock); + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &eventfd_rfiltops; + break; + case EVFILT_WRITE: + kn->kn_fop = &eventfd_wfiltops; + break; + default: + mtx_unlock(&efd->efd_lock); + return (EINVAL); + } + + kn->kn_hook = efd; + knlist_add(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); + + return (0); +} + +static void +filt_eventfddetach(struct knote *kn) +{ + struct eventfd *efd = kn->kn_hook; + + mtx_lock(&efd->efd_lock); + knlist_remove(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); +} + +/*ARGSUSED*/ +static int +filt_eventfdread(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (efd->efd_count > 0); + + return (ret); +} + +/*ARGSUSED*/ +static int +filt_eventfdwrite(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (UINT64_MAX - 1 > efd->efd_count); + + return (ret); +} + +/*ARGSUSED*/ +static int +eventfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXEFD) + return (EINVAL); + + switch (cmd) + { + case FIONBIO: + if ((*(int *)data)) + atomic_set_int(&fp->f_flag, FNONBLOCK); + else + atomic_clear_int(&fp->f_flag, FNONBLOCK); + case FIOASYNC: + return (0); + default: + return (ENXIO); + } +} + +/*ARGSUSED*/ +static int +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, + struct thread *td) +{ + + return (ENXIO); +} + +/*ARGSUSED*/ +static int +eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) +{ + + /* TODO: add KF_TYPE */ + kif->kf_type = KF_TYPE_UNKNOWN; + return (0); +} Index: sys/kern/syscalls.c =================================================================== --- sys/kern/syscalls.c +++ sys/kern/syscalls.c @@ -583,4 +583,5 @@ "__realpathat", /* 574 = __realpathat */ "close_range", /* 575 = close_range */ "rpctls_syscall", /* 576 = rpctls_syscall */ + "eventfd", /* 577 = eventfd */ }; Index: sys/kern/syscalls.master =================================================================== --- sys/kern/syscalls.master +++ sys/kern/syscalls.master @@ -3241,6 +3241,12 @@ _In_z_ const char *path ); } +577 AUE_NULL STD { + int eventfd( + unsigned int initval, + int flags + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Index: sys/kern/systrace_args.c =================================================================== --- sys/kern/systrace_args.c +++ sys/kern/systrace_args.c @@ -3376,6 +3376,14 @@ *n_args = 2; break; } + /* eventfd */ + case 577: { + struct eventfd_args *p = params; + uarg[0] = p->initval; /* unsigned int */ + iarg[1] = p->flags; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9029,6 +9037,19 @@ break; }; break; + /* eventfd */ + case 577: + switch(ndx) { + case 0: + p = "unsigned int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10960,6 +10981,11 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* eventfd */ + case 577: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; Index: sys/sys/eventfd.h =================================================================== --- /dev/null +++ sys/sys/eventfd.h @@ -0,0 +1,30 @@ +#ifndef _SYS_EVENTFD_H_ +#define _SYS_EVENTFD_H_ + +#include + +typedef uint64_t eventfd_t; + +#define EFD_SEMAPHORE (1 << 0) +#define EFD_CLOEXEC 02000000 +#define EFD_NONBLOCK 00004000 + +#ifdef _KERNEL + +struct thread; + +int eventfd_create(struct thread *td, uint32_t initval, int flags); + +#else + +#include + +__BEGIN_DECLS +int eventfd(unsigned int initval, int flags); +int eventfd_read(int fd, eventfd_t *value); +int eventfd_write(int fd, eventfd_t value); +__END_DECLS + +#endif /* !_KERNEL */ + +#endif /* !_SYS_EVENTFD_H_ */ Index: sys/sys/syscall.h =================================================================== --- sys/sys/syscall.h +++ sys/sys/syscall.h @@ -512,4 +512,5 @@ #define SYS___realpathat 574 #define SYS_close_range 575 #define SYS_rpctls_syscall 576 -#define SYS_MAXSYSCALL 577 +#define SYS_eventfd 577 +#define SYS_MAXSYSCALL 578 Index: sys/sys/syscall.mk =================================================================== --- sys/sys/syscall.mk +++ sys/sys/syscall.mk @@ -417,4 +417,5 @@ sigfastblock.o \ __realpathat.o \ close_range.o \ - rpctls_syscall.o + rpctls_syscall.o \ + eventfd.o Index: sys/sys/sysproto.h =================================================================== --- sys/sys/sysproto.h +++ sys/sys/sysproto.h @@ -1836,6 +1836,10 @@ char op_l_[PADL_(int)]; int op; char op_r_[PADR_(int)]; char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; +struct eventfd_args { + char initval_l_[PADL_(unsigned int)]; unsigned int initval; char initval_r_[PADR_(unsigned int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2227,6 +2231,7 @@ int sys___realpathat(struct thread *, struct __realpathat_args *); int sys_close_range(struct thread *, struct close_range_args *); int sys_rpctls_syscall(struct thread *, struct rpctls_syscall_args *); +int sys_eventfd(struct thread *, struct eventfd_args *); #ifdef COMPAT_43 @@ -3158,6 +3163,7 @@ #define SYS_AUE___realpathat AUE_REALPATHAT #define SYS_AUE_close_range AUE_CLOSERANGE #define SYS_AUE_rpctls_syscall AUE_NULL +#define SYS_AUE_eventfd AUE_NULL #undef PAD_ #undef PADL_