Index: sys/amd64/linux/linux_dummy.c =================================================================== --- sys/amd64/linux/linux_dummy.c +++ sys/amd64/linux/linux_dummy.c @@ -103,12 +103,10 @@ DUMMY(epoll_pwait); DUMMY(signalfd); DUMMY(timerfd); -DUMMY(eventfd); DUMMY(fallocate); DUMMY(timerfd_settime); DUMMY(timerfd_gettime); DUMMY(signalfd4); -DUMMY(eventfd2); DUMMY(inotify_init1); DUMMY(preadv); DUMMY(pwritev); Index: sys/amd64/linux/syscalls.master =================================================================== --- sys/amd64/linux/syscalls.master +++ sys/amd64/linux/syscalls.master @@ -472,14 +472,14 @@ l_int maxevents, l_int timeout, l_sigset_t *mask); } 282 AUE_NULL STD { int linux_signalfd(void); } 283 AUE_NULL STD { int linux_timerfd(void); } -284 AUE_NULL STD { int linux_eventfd(void); } +284 AUE_NULL STD { int linux_eventfd(l_uint initval); } 285 AUE_NULL STD { int linux_fallocate(void); } 286 AUE_NULL STD { int linux_timerfd_settime(void); } 287 AUE_NULL STD { int linux_timerfd_gettime(void); } 288 AUE_ACCEPT STD { int linux_accept4(l_int s, l_uintptr_t addr, \ l_uintptr_t namelen, int flags); } 289 AUE_NULL STD { int linux_signalfd4(void); } -290 AUE_NULL STD { int linux_eventfd2(void); } +290 AUE_NULL STD { int linux_eventfd2(l_uint initval, l_int flags); } 291 AUE_NULL STD { int linux_epoll_create1(l_int flags); } 292 AUE_NULL STD { int linux_dup3(l_int oldfd, \ l_int newfd, l_int flags); } Index: sys/amd64/linux32/linux32_dummy.c =================================================================== --- sys/amd64/linux32/linux32_dummy.c +++ sys/amd64/linux32/linux32_dummy.c @@ -108,7 +108,6 @@ DUMMY(utimensat); DUMMY(signalfd); DUMMY(timerfd_create); -DUMMY(eventfd); /* linux 2.6.23: */ DUMMY(fallocate); /* linux 2.6.25: */ @@ -116,7 +115,6 @@ DUMMY(timerfd_gettime); /* linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(eventfd2); DUMMY(inotify_init1); /* linux 2.6.30: */ DUMMY(preadv); Index: sys/amd64/linux32/syscalls.master =================================================================== --- sys/amd64/linux32/syscalls.master +++ sys/amd64/linux32/syscalls.master @@ -534,7 +534,7 @@ 320 AUE_NULL STD { int linux_utimensat(void); } 321 AUE_NULL STD { int linux_signalfd(void); } 322 AUE_NULL STD { int linux_timerfd_create(void); } -323 AUE_NULL STD { int linux_eventfd(void); } +323 AUE_NULL STD { int linux_eventfd(l_uint initval); } ; linux 2.6.23: 324 AUE_NULL STD { int linux_fallocate(void); } ; linux 2.6.25: @@ -542,7 +542,7 @@ 326 AUE_NULL STD { int linux_timerfd_gettime(void); } ; linux 2.6.27: 327 AUE_NULL STD { int linux_signalfd4(void); } -328 AUE_NULL STD { int linux_eventfd2(void); } +328 AUE_NULL STD { int linux_eventfd2(l_uint initval, l_int flags); } 329 AUE_NULL STD { int linux_epoll_create1(l_int flags); } 330 AUE_NULL STD { int linux_dup3(l_int oldfd, \ l_int newfd, l_int flags); } Index: sys/compat/linux/linux_event.h =================================================================== --- sys/compat/linux/linux_event.h +++ sys/compat/linux/linux_event.h @@ -55,4 +55,6 @@ #define LINUX_EPOLL_CTL_DEL 2 #define LINUX_EPOLL_CTL_MOD 3 +#define LINUX_EFD_SEMAPHORE (1 << 0) + #endif /* !_LINUX_EVENT_H_ */ Index: sys/compat/linux/linux_event.c =================================================================== --- sys/compat/linux/linux_event.c +++ sys/compat/linux/linux_event.c @@ -43,7 +43,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -114,6 +116,57 @@ int error; }; +/* eventfd */ +typedef uint64_t eventfd_t; + +static fo_rdwr_t eventfd_read; +static fo_rdwr_t eventfd_write; +static fo_truncate_t eventfd_truncate; +static fo_ioctl_t eventfd_ioctl; +static fo_poll_t eventfd_poll; +static fo_kqfilter_t eventfd_kqfilter; +static fo_stat_t eventfd_stat; +static fo_close_t eventfd_close; + +static struct fileops eventfdops = { + .fo_read = eventfd_read, + .fo_write = eventfd_write, + .fo_truncate = eventfd_truncate, + .fo_ioctl = eventfd_ioctl, + .fo_poll = eventfd_poll, + .fo_kqfilter = eventfd_kqfilter, + .fo_stat = eventfd_stat, + .fo_close = eventfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_flags = DFLAG_PASSABLE +}; + +static void filt_eventfddetach(struct knote *kn); +static int filt_eventfdread(struct knote *kn, long hint); +static int filt_eventfdwrite(struct knote *kn, long hint); + +static struct filterops eventfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdread +}; +static struct filterops eventfd_wfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdwrite +}; + +struct eventfd { + eventfd_t efd_count; + uint32_t efd_flags; + struct selinfo efd_sel; + struct mtx efd_lock; +}; + +static int eventfd_create(struct thread *td, uint32_t initval, int flags); + static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) @@ -496,3 +549,280 @@ /* report any errors we got */ return (error1 == 0 ? error2 : error1); } + +static int +eventfd_create(struct thread *td, uint32_t initval, int flags) +{ + struct filedesc *fdp; + struct eventfd *efd; + struct file *fp; + int fflags, fd, error; + + fflags = 0; + if ((flags & LINUX_O_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + fdp = td->td_proc->p_fd; + error = falloc(td, &fp, &fd, fflags); + if (error) + return (error); + + efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); + efd->efd_flags = flags; + efd->efd_count = initval; + mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); + + knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); + + fflags = FREAD | FWRITE; + if ((flags & LINUX_O_NONBLOCK) != 0) + fflags |= FNONBLOCK; + + finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); + fdrop(fp, td); + + td->td_retval[0] = fd; + return (error); +} + +int +linux_eventfd(struct thread *td, struct linux_eventfd_args *args) +{ + + return (eventfd_create(td, args->initval, 0)); +} + +int +linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) +{ + + if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) + return (EINVAL); + + return (eventfd_create(td, args->initval, args->flags)); +} + +static int +eventfd_close(struct file *fp, struct thread *td) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + seldrain(&efd->efd_sel); + knlist_destroy(&efd->efd_sel.si_note); + + fp->f_ops = &badfileops; + mtx_destroy(&efd->efd_lock); + free(efd, M_EPOLL); + + return (0); +} + +static int +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = 0; + mtx_lock(&efd->efd_lock); +retry: + if (efd->efd_count == 0) { + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { + count = 1; + --efd->efd_count; + } else { + count = efd->efd_count; + efd->efd_count = 0; + } + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + mtx_unlock(&efd->efd_lock); + error = uiomove(&count, sizeof(eventfd_t), uio); + } else + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = uiomove(&count, sizeof(eventfd_t), uio); + if (error) + return (error); + if (count == UINT64_MAX) + return (EINVAL); + + mtx_lock(&efd->efd_lock); +retry: + if (UINT64_MAX - efd->efd_count <= count) { + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, + PCATCH, "lefdwr", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + efd->efd_count += count; + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + } + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct eventfd *efd; + int revents = 0; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (POLLERR); + + mtx_lock(&efd->efd_lock); + if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) + revents |= events & (POLLIN|POLLRDNORM); + if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) + revents |= events & (POLLOUT|POLLWRNORM); + if (revents == 0) + selrecord(td, &efd->efd_sel); + mtx_unlock(&efd->efd_lock); + + return (revents); +} + +/*ARGSUSED*/ +static int +eventfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + mtx_lock(&efd->efd_lock); + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &eventfd_rfiltops; + break; + case EVFILT_WRITE: + kn->kn_fop = &eventfd_wfiltops; + break; + default: + mtx_unlock(&efd->efd_lock); + return (EINVAL); + } + + kn->kn_hook = efd; + knlist_add(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); + + return (0); +} + +static void +filt_eventfddetach(struct knote *kn) +{ + struct eventfd *efd = kn->kn_hook; + + mtx_lock(&efd->efd_lock); + knlist_remove(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); +} + +/*ARGSUSED*/ +static int +filt_eventfdread(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (efd->efd_count > 0); + + return (ret); +} + +/*ARGSUSED*/ +static int +filt_eventfdwrite(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (UINT64_MAX - 1 > efd->efd_count); + + return (ret); +} + +/*ARGSUSED*/ +static int +eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, + struct thread *td) +{ + + return (ENXIO); +} + +/*ARGSUSED*/ +static int +eventfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + return (ENXIO); +} + +/*ARGSUSED*/ +static int +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, + struct thread *td) +{ + + return (ENXIO); +} Index: sys/i386/linux/linux_dummy.c =================================================================== --- sys/i386/linux/linux_dummy.c +++ sys/i386/linux/linux_dummy.c @@ -104,7 +104,6 @@ DUMMY(utimensat); DUMMY(signalfd); DUMMY(timerfd_create); -DUMMY(eventfd); /* linux 2.6.23: */ DUMMY(fallocate); /* linux 2.6.25: */ @@ -112,7 +111,6 @@ DUMMY(timerfd_gettime); /* linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(eventfd2); DUMMY(inotify_init1); /* linux 2.6.30: */ DUMMY(preadv); Index: sys/i386/linux/syscalls.master =================================================================== --- sys/i386/linux/syscalls.master +++ sys/i386/linux/syscalls.master @@ -542,7 +542,7 @@ 320 AUE_NULL STD { int linux_utimensat(void); } 321 AUE_NULL STD { int linux_signalfd(void); } 322 AUE_NULL STD { int linux_timerfd_create(void); } -323 AUE_NULL STD { int linux_eventfd(void); } +323 AUE_NULL STD { int linux_eventfd(l_uint initval); } ; linux 2.6.23: 324 AUE_NULL STD { int linux_fallocate(void); } ; linux 2.6.25: @@ -550,7 +550,7 @@ 326 AUE_NULL STD { int linux_timerfd_gettime(void); } ; linux 2.6.27: 327 AUE_NULL STD { int linux_signalfd4(void); } -328 AUE_NULL STD { int linux_eventfd2(void); } +328 AUE_NULL STD { int linux_eventfd2(l_uint initval, l_int flags); } 329 AUE_NULL STD { int linux_epoll_create1(l_int flags); } 330 AUE_NULL STD { int linux_dup3(l_int oldfd, \ l_int newfd, l_int flags); } Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -66,6 +66,7 @@ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_PROCDESC 12 /* process descriptor */ +#define DTYPE_LINUXEFD 13 /* emulation eventfd type */ #ifdef _KERNEL