diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -659,6 +659,7 @@ #define AUE_SHMRENAME 43263 /* FreeBSD-specific. */ #define AUE_REALPATHAT 43264 /* FreeBSD-specific. */ #define AUE_CLOSERANGE 43265 /* FreeBSD-specific. */ +#define AUE_SPECIALFD 43266 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1168,5 +1168,7 @@ ; 576 is initialised by the krpc code, if present. 576 AUE_NULL NOSTD|NOPROTO { int rpctls_syscall(int op, \ const char *path); } +577 AUE_SPECIALFD NOPROTO { int __specialfd(int type, const void *req, \ + size_t len); } ; vim: syntax=off diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -51,9 +51,11 @@ #include #include #include +#include #include #include #include +#include #ifdef COMPAT_LINUX32 #include @@ -124,53 +126,11 @@ int error; }; -/* eventfd */ -typedef uint64_t eventfd_t; - -static fo_rdwr_t eventfd_read; -static fo_rdwr_t eventfd_write; -static fo_ioctl_t eventfd_ioctl; -static fo_poll_t eventfd_poll; -static fo_kqfilter_t eventfd_kqfilter; -static fo_stat_t eventfd_stat; -static fo_close_t eventfd_close; -static fo_fill_kinfo_t eventfd_fill_kinfo; - -static struct fileops eventfdops = { - .fo_read = eventfd_read, - .fo_write = eventfd_write, - .fo_truncate = invfo_truncate, - .fo_ioctl = eventfd_ioctl, - .fo_poll = eventfd_poll, - .fo_kqfilter = eventfd_kqfilter, - .fo_stat = eventfd_stat, - .fo_close = eventfd_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, - .fo_fill_kinfo = eventfd_fill_kinfo, - .fo_flags = DFLAG_PASSABLE -}; - -static void filt_eventfddetach(struct knote *kn); -static int filt_eventfdread(struct knote *kn, long hint); -static int filt_eventfdwrite(struct knote *kn, long hint); - -static struct filterops eventfd_rfiltops = { - .f_isfd = 1, - .f_detach = filt_eventfddetach, - .f_event = filt_eventfdread -}; -static struct filterops eventfd_wfiltops = { - .f_isfd = 1, - .f_detach = filt_eventfddetach, - .f_event = filt_eventfdwrite -}; - /* timerfd */ typedef uint64_t timerfd_t; static fo_rdwr_t timerfd_read; +static fo_ioctl_t timerfd_ioctl; static fo_poll_t timerfd_poll; static fo_kqfilter_t timerfd_kqfilter; static fo_stat_t timerfd_stat; @@ -181,7 +141,7 @@ .fo_read = timerfd_read, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, - .fo_ioctl = eventfd_ioctl, + .fo_ioctl = timerfd_ioctl, .fo_poll = timerfd_poll, .fo_kqfilter = timerfd_kqfilter, .fo_stat = timerfd_stat, @@ -202,13 +162,6 @@ .f_event = filt_timerfdread }; -struct eventfd { - eventfd_t efd_count; - uint32_t efd_flags; - struct selinfo efd_sel; - struct mtx efd_lock; -}; - struct timerfd { clockid_t tfd_clockid; struct itimerspec tfd_time; @@ -219,7 +172,6 @@ struct mtx tfd_lock; }; -static int eventfd_create(struct thread *td, uint32_t initval, int flags); static void linux_timerfd_expire(void *); static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); @@ -691,294 +643,39 @@ return (error1 == 0 ? 0 : error2); } -static int -eventfd_create(struct thread *td, uint32_t initval, int flags) -{ - struct filedesc *fdp; - struct eventfd *efd; - struct file *fp; - int fflags, fd, error; - - fflags = 0; - if ((flags & LINUX_O_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - - fdp = td->td_proc->p_fd; - error = falloc(td, &fp, &fd, fflags); - if (error != 0) - return (error); - - efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); - efd->efd_flags = flags; - efd->efd_count = initval; - mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); - - knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); - - fflags = FREAD | FWRITE; - if ((flags & LINUX_O_NONBLOCK) != 0) - fflags |= FNONBLOCK; - - finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); - fdrop(fp, td); - - td->td_retval[0] = fd; - return (error); -} - #ifdef LINUX_LEGACY_SYSCALLS int linux_eventfd(struct thread *td, struct linux_eventfd_args *args) { + struct specialfd_eventfd ae; - return (eventfd_create(td, args->initval, 0)); + bzero(&ae, sizeof(ae)); + ae.initval = args->initval; + return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); } #endif int linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) { + struct specialfd_eventfd ae; + int flags; - if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) - return (EINVAL); - - return (eventfd_create(td, args->initval, args->flags)); -} - -static int -eventfd_close(struct file *fp, struct thread *td) -{ - struct eventfd *efd; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - seldrain(&efd->efd_sel); - knlist_destroy(&efd->efd_sel.si_note); - - fp->f_ops = &badfileops; - mtx_destroy(&efd->efd_lock); - free(efd, M_EPOLL); - - return (0); -} - -static int -eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct eventfd *efd; - eventfd_t count; - int error; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(eventfd_t)) - return (EINVAL); - - error = 0; - mtx_lock(&efd->efd_lock); -retry: - if (efd->efd_count == 0) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&efd->efd_lock); - return (EAGAIN); - } - error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { - count = 1; - --efd->efd_count; - } else { - count = efd->efd_count; - efd->efd_count = 0; - } - KNOTE_LOCKED(&efd->efd_sel.si_note, 0); - selwakeup(&efd->efd_sel); - wakeup(&efd->efd_count); - mtx_unlock(&efd->efd_lock); - error = uiomove(&count, sizeof(eventfd_t), uio); - } else - mtx_unlock(&efd->efd_lock); - - return (error); -} - -static int -eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct eventfd *efd; - eventfd_t count; - int error; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(eventfd_t)) - return (EINVAL); - - error = uiomove(&count, sizeof(eventfd_t), uio); - if (error != 0) - return (error); - if (count == UINT64_MAX) - return (EINVAL); - - mtx_lock(&efd->efd_lock); -retry: - if (UINT64_MAX - efd->efd_count <= count) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&efd->efd_lock); - /* Do not not return the number of bytes written */ - uio->uio_resid += sizeof(eventfd_t); - return (EAGAIN); - } - error = mtx_sleep(&efd->efd_count, &efd->efd_lock, - PCATCH, "lefdwr", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - efd->efd_count += count; - KNOTE_LOCKED(&efd->efd_sel.si_note, 0); - selwakeup(&efd->efd_sel); - wakeup(&efd->efd_count); - } - mtx_unlock(&efd->efd_lock); - - return (error); -} - -static int -eventfd_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - struct eventfd *efd; - int revents = 0; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (POLLERR); - - mtx_lock(&efd->efd_lock); - if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) - revents |= events & (POLLIN|POLLRDNORM); - if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) - revents |= events & (POLLOUT|POLLWRNORM); - if (revents == 0) - selrecord(td, &efd->efd_sel); - mtx_unlock(&efd->efd_lock); - - return (revents); -} - -static int -eventfd_kqfilter(struct file *fp, struct knote *kn) -{ - struct eventfd *efd; - - efd = fp->f_data; - if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) - return (EINVAL); - - mtx_lock(&efd->efd_lock); - switch (kn->kn_filter) { - case EVFILT_READ: - kn->kn_fop = &eventfd_rfiltops; - break; - case EVFILT_WRITE: - kn->kn_fop = &eventfd_wfiltops; - break; - default: - mtx_unlock(&efd->efd_lock); - return (EINVAL); - } - - kn->kn_hook = efd; - knlist_add(&efd->efd_sel.si_note, kn, 1); - mtx_unlock(&efd->efd_lock); - - return (0); -} - -static void -filt_eventfddetach(struct knote *kn) -{ - struct eventfd *efd = kn->kn_hook; - - mtx_lock(&efd->efd_lock); - knlist_remove(&efd->efd_sel.si_note, kn, 1); - mtx_unlock(&efd->efd_lock); -} - -static int -filt_eventfdread(struct knote *kn, long hint) -{ - struct eventfd *efd = kn->kn_hook; - int ret; - - mtx_assert(&efd->efd_lock, MA_OWNED); - ret = (efd->efd_count > 0); - - return (ret); -} - -static int -filt_eventfdwrite(struct knote *kn, long hint) -{ - struct eventfd *efd = kn->kn_hook; - int ret; - - mtx_assert(&efd->efd_lock, MA_OWNED); - ret = (UINT64_MAX - 1 > efd->efd_count); - - return (ret); -} - -static int -eventfd_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td) -{ - - if (fp->f_data == NULL || (fp->f_type != DTYPE_LINUXEFD && - fp->f_type != DTYPE_LINUXTFD)) + if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | + LINUX_EFD_SEMAPHORE)) != 0) return (EINVAL); - - switch (cmd) - { - case FIONBIO: - if ((*(int *)data)) - atomic_set_int(&fp->f_flag, FNONBLOCK); - else - atomic_clear_int(&fp->f_flag, FNONBLOCK); - case FIOASYNC: - return (0); - default: - return (ENXIO); - } -} - -static int -eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, - struct thread *td) -{ - - return (ENXIO); -} - -static int -eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) -{ - - kif->kf_type = KF_TYPE_UNKNOWN; - return (0); + flags = 0; + if ((args->flags & LINUX_O_CLOEXEC) != 0) + flags |= EFD_CLOEXEC; + if ((args->flags & LINUX_O_NONBLOCK) != 0) + flags |= EFD_NONBLOCK; + if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) + flags |= EFD_SEMAPHORE; + + bzero(&ae, sizeof(ae)); + ae.flags = flags; + ae.initval = args->initval; + return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); } int @@ -1154,6 +851,23 @@ return (tfd->tfd_count > 0); } +static int +timerfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) + return (EINVAL); + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + } + + return (ENOTTY); +} + static int timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, struct thread *td) diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3926,6 +3926,7 @@ kern/subr_vmem.c standard kern/subr_witness.c optional witness kern/sys_capability.c standard +kern/sys_eventfd.c standard kern/sys_generic.c standard kern/sys_getrandom.c standard kern/sys_pipe.c standard diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -55,6 +55,11 @@ __mac_set_fd __mac_set_proc +## +## Allow creating special file descriptors like eventfd(2). +## +__specialfd + ## ## Allow sysctl(2) as we scope internal to the call; this is a global ## namespace, but there are several critical sysctls required for almost diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -4609,8 +4609,8 @@ return ("dev"); case DTYPE_PROCDESC: return ("proc"); - case DTYPE_LINUXEFD: - return ("levent"); + case DTYPE_EVENTFD: + return ("eventfd"); case DTYPE_LINUXTFD: return ("ltimer"); default: diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c new file mode 100644 --- /dev/null +++ b/sys/kern/sys_eventfd.c @@ -0,0 +1,349 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +_Static_assert(EFD_CLOEXEC == O_CLOEXEC, "Mismatched EFD_CLOEXEC"); +_Static_assert(EFD_NONBLOCK == O_NONBLOCK, "Mismatched EFD_NONBLOCK"); + +MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); + +static fo_rdwr_t eventfd_read; +static fo_rdwr_t eventfd_write; +static fo_ioctl_t eventfd_ioctl; +static fo_poll_t eventfd_poll; +static fo_kqfilter_t eventfd_kqfilter; +static fo_stat_t eventfd_stat; +static fo_close_t eventfd_close; +static fo_fill_kinfo_t eventfd_fill_kinfo; + +static struct fileops eventfdops = { + .fo_read = eventfd_read, + .fo_write = eventfd_write, + .fo_truncate = invfo_truncate, + .fo_ioctl = eventfd_ioctl, + .fo_poll = eventfd_poll, + .fo_kqfilter = eventfd_kqfilter, + .fo_stat = eventfd_stat, + .fo_close = eventfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = eventfd_fill_kinfo, + .fo_flags = DFLAG_PASSABLE +}; + +static void filt_eventfddetach(struct knote *kn); +static int filt_eventfdread(struct knote *kn, long hint); +static int filt_eventfdwrite(struct knote *kn, long hint); + +static struct filterops eventfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdread +}; + +static struct filterops eventfd_wfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdwrite +}; + +struct eventfd { + eventfd_t efd_count; + uint32_t efd_flags; + struct selinfo efd_sel; + struct mtx efd_lock; +}; + +int +eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, + int flags) +{ + struct eventfd *efd; + int fflags; + + AUDIT_ARG_FFLAGS(flags); + AUDIT_ARG_VALUE(initval); + + efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); + efd->efd_flags = flags; + efd->efd_count = initval; + mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); + knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); + + fflags = FREAD | FWRITE; + if ((flags & EFD_NONBLOCK) != 0) + fflags |= FNONBLOCK; + finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); + + return (0); +} + +static int +eventfd_close(struct file *fp, struct thread *td) +{ + struct eventfd *efd; + + efd = fp->f_data; + seldrain(&efd->efd_sel); + knlist_destroy(&efd->efd_sel.si_note); + mtx_destroy(&efd->efd_lock); + free(efd, M_EVENTFD); + return (0); +} + +static int +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = 0; + efd = fp->f_data; + mtx_lock(&efd->efd_lock); + while (error == 0 && efd->efd_count == 0) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, + "efdrd", 0); + } + if (error == 0) { + MPASS(efd->efd_count > 0); + if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { + count = 1; + --efd->efd_count; + } else { + count = efd->efd_count; + efd->efd_count = 0; + } + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + mtx_unlock(&efd->efd_lock); + error = uiomove(&count, sizeof(eventfd_t), uio); + } else + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = uiomove(&count, sizeof(eventfd_t), uio); + if (error != 0) + return (error); + if (count == UINT64_MAX) + return (EINVAL); + + efd = fp->f_data; + mtx_lock(&efd->efd_lock); +retry: + if (UINT64_MAX - efd->efd_count <= count) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + /* Do not not return the number of bytes written */ + uio->uio_resid += sizeof(eventfd_t); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, + PCATCH, "efdwr", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + MPASS(UINT64_MAX - efd->efd_count > count); + efd->efd_count += count; + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + } + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct eventfd *efd; + int revents; + + efd = fp->f_data; + revents = 0; + mtx_lock(&efd->efd_lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && efd->efd_count > 0) + revents |= events & (POLLIN | POLLRDNORM); + if ((events & (POLLOUT | POLLWRNORM)) != 0 && UINT64_MAX - 1 > + efd->efd_count) + revents |= events & (POLLOUT | POLLWRNORM); + if (revents == 0) + selrecord(td, &efd->efd_sel); + mtx_unlock(&efd->efd_lock); + + return (revents); +} + +static int +eventfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct eventfd *efd = fp->f_data; + + mtx_lock(&efd->efd_lock); + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &eventfd_rfiltops; + break; + case EVFILT_WRITE: + kn->kn_fop = &eventfd_wfiltops; + break; + default: + mtx_unlock(&efd->efd_lock); + return (EINVAL); + } + + kn->kn_hook = efd; + knlist_add(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); + + return (0); +} + +static void +filt_eventfddetach(struct knote *kn) +{ + struct eventfd *efd = kn->kn_hook; + + mtx_lock(&efd->efd_lock); + knlist_remove(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); +} + +static int +filt_eventfdread(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + kn->kn_data = (int64_t)efd->efd_count; + ret = efd->efd_count > 0; + + return (ret); +} + +static int +filt_eventfdwrite(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); + ret = UINT64_MAX - 1 > efd->efd_count; + + return (ret); +} + +static int +eventfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + } + + return (ENOTTY); +} + +static int +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, + struct thread *td) +{ + bzero((void *)st, sizeof *st); + st->st_mode = S_IFIFO; + return (0); +} + +static int +eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) +{ + struct eventfd *efd = fp->f_data; + + kif->kf_type = KF_TYPE_EVENTFD; + mtx_lock(&efd->efd_lock); + kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; + kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; + mtx_unlock(&efd->efd_lock); + return (0); +} diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -859,6 +861,67 @@ return (error); } +int +kern_specialfd(struct thread *td, int type, void *arg) +{ + struct file *fp; + struct specialfd_eventfd *ae; + int error, fd, fflags; + + fflags = 0; + error = falloc_noinstall(td, &fp); + if (error != 0) + return (error); + + switch (type) { + case SPECIALFD_EVENTFD: + ae = arg; + if ((ae->flags & EFD_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + error = eventfd_create_file(td, fp, ae->initval, ae->flags); + break; + default: + error = EINVAL; + break; + } + + if (error == 0) + error = finstall(td, fp, &fd, fflags, NULL); + fdrop(fp, td); + if (error == 0) + td->td_retval[0] = fd; + return (error); +} + +int +sys___specialfd(struct thread *td, struct __specialfd_args *args) +{ + struct specialfd_eventfd ae; + int error; + + switch (args->type) { + case SPECIALFD_EVENTFD: + if (args->len != sizeof(struct specialfd_eventfd)) { + error = EINVAL; + break; + } + error = copyin(args->req, &ae, sizeof(ae)); + if (error != 0) + break; + if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | + EFD_SEMAPHORE)) != 0) { + error = EINVAL; + break; + } + error = kern_specialfd(td, args->type, &ae); + break; + default: + error = EINVAL; + break; + } + return (error); +} + int poll_no_poll(int events) { diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3241,6 +3241,13 @@ _In_z_ const char *path ); } +577 AUE_SPECIALFD STD { + int __specialfd( + int type, + _In_reads_bytes_(len) const void *req, + size_t len + ); + } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/sys/eventfd.h b/sys/sys/eventfd.h new file mode 100644 --- /dev/null +++ b/sys/sys/eventfd.h @@ -0,0 +1,54 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Greg V + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_EVENTFD_H_ +#define _SYS_EVENTFD_H_ + +#include + +typedef uint64_t eventfd_t; + +#define EFD_SEMAPHORE 0x00000001 +#define EFD_NONBLOCK 0x00000004 +#define EFD_CLOEXEC 0x00100000 + +#ifdef _KERNEL + +int eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, + int flags); + +#else + +__BEGIN_DECLS +int eventfd(unsigned int initval, int flags); +int eventfd_read(int fd, eventfd_t *value); +int eventfd_write(int fd, eventfd_t value); +__END_DECLS + +#endif /* !_KERNEL */ + +#endif /* !_SYS_EVENTFD_H_ */ diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -69,7 +69,7 @@ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_PROCDESC 12 /* process descriptor */ -#define DTYPE_LINUXEFD 13 /* emulation eventfd type */ +#define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_LINUXTFD 14 /* emulation timerfd type */ #ifdef _KERNEL diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h new file mode 100644 --- /dev/null +++ b/sys/sys/specialfd.h @@ -0,0 +1,42 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Greg V + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_SPECIALFD_H_ +#define _SYS_SPECIALFD_H_ + +enum specialfd_type { + SPECIALFD_EVENTFD = 1, +}; + +struct specialfd_eventfd { + unsigned int initval; + int flags; +}; + +#endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -304,6 +304,7 @@ int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); +int kern_specialfd(struct thread *td, int type, void *arg); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -264,6 +264,7 @@ #define KF_TYPE_PTS 10 #define KF_TYPE_PROCDESC 11 #define KF_TYPE_DEV 12 +#define KF_TYPE_EVENTFD 13 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -436,6 +437,10 @@ uint64_t kf_spareint64[32]; pid_t kf_pid; } kf_proc; + struct { + uint64_t kf_eventfd_value; + uint32_t kf_eventfd_flags; + } kf_eventfd; } kf_un; }; uint16_t kf_status; /* Status flags. */