Changeset View
Standalone View
sys/kern/sys_eventfd.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
kib: Missing copyright. | |||||
Done Inline ActionsShould be the linux_event.c copyright since the code is mostly from there? val_packett.cool: Should be the linux_event.c copyright since the code is mostly from there? | |||||
Done Inline ActionsThat seems reasonable to me. markj: That seems reasonable to me. | |||||
* | |||||
* Copyright (c) 2007 Roman Divacky | |||||
* Copyright (c) 2014 Dmitry Chagin | |||||
* All rights reserved. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/param.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/limits.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/mutex.h> | |||||
#include <sys/types.h> | |||||
#include <sys/user.h> | |||||
#include <sys/fcntl.h> | |||||
#include <sys/file.h> | |||||
#include <sys/filedesc.h> | |||||
#include <sys/filio.h> | |||||
#include <sys/stat.h> | |||||
#include <sys/errno.h> | |||||
#include <sys/event.h> | |||||
#include <sys/poll.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/uio.h> | |||||
#include <sys/selinfo.h> | |||||
#include <sys/sysproto.h> | |||||
#include <sys/syscallsubr.h> | |||||
#include <sys/eventfd.h> | |||||
#include <security/audit/audit.h> | |||||
MALLOC_DEFINE(M_EVENTFD, "eventfd", "eventfd structures"); | |||||
static fo_rdwr_t eventfd_read; | |||||
static fo_rdwr_t eventfd_write; | |||||
static fo_ioctl_t eventfd_ioctl; | |||||
static fo_poll_t eventfd_poll; | |||||
static fo_kqfilter_t eventfd_kqfilter; | |||||
static fo_stat_t eventfd_stat; | |||||
static fo_close_t eventfd_close; | |||||
static fo_fill_kinfo_t eventfd_fill_kinfo; | |||||
static struct fileops eventfdops = { | |||||
.fo_read = eventfd_read, | |||||
.fo_write = eventfd_write, | |||||
.fo_truncate = invfo_truncate, | |||||
.fo_ioctl = eventfd_ioctl, | |||||
.fo_poll = eventfd_poll, | |||||
.fo_kqfilter = eventfd_kqfilter, | |||||
.fo_stat = eventfd_stat, | |||||
.fo_close = eventfd_close, | |||||
.fo_chmod = invfo_chmod, | |||||
.fo_chown = invfo_chown, | |||||
.fo_sendfile = invfo_sendfile, | |||||
.fo_fill_kinfo = eventfd_fill_kinfo, | |||||
.fo_flags = DFLAG_PASSABLE | |||||
}; | |||||
static void filt_eventfddetach(struct knote *kn); | |||||
static int filt_eventfdread(struct knote *kn, long hint); | |||||
static int filt_eventfdwrite(struct knote *kn, long hint); | |||||
Not Done Inline ActionsShould we check for unknown flags? markj: Should we check for unknown flags? | |||||
Done Inline ActionsThat's happening in sys_eventfd and linux_eventfd2 val_packett.cool: That's happening in `sys_eventfd` and `linux_eventfd2` | |||||
static struct filterops eventfd_rfiltops = { | |||||
.f_isfd = 1, | |||||
.f_detach = filt_eventfddetach, | |||||
.f_event = filt_eventfdread | |||||
}; | |||||
static struct filterops eventfd_wfiltops = { | |||||
.f_isfd = 1, | |||||
.f_detach = filt_eventfddetach, | |||||
.f_event = filt_eventfdwrite | |||||
}; | |||||
struct eventfd { | |||||
eventfd_t efd_count; | |||||
uint32_t efd_flags; | |||||
struct selinfo efd_sel; | |||||
struct mtx efd_lock; | |||||
}; | |||||
int | |||||
eventfd_create(struct thread *td, uint32_t initval, int flags) | |||||
{ | |||||
struct eventfd *efd; | |||||
struct file *fp; | |||||
Done Inline ActionsThis should be return (0);. kib: This should be return (0);. | |||||
int fflags, fd, error; | |||||
AUDIT_ARG_FFLAGS(flags); | |||||
AUDIT_ARG_VALUE(initval); | |||||
fflags = 0; | |||||
if ((flags & EFD_CLOEXEC) != 0) | |||||
fflags |= O_CLOEXEC; | |||||
Done Inline ActionsThis should be a separate file type. kib: This should be a separate file type. | |||||
Done Inline ActionsI was thinking of just renaming LINUXEFD to EVENTFD, since in this patch the linux part just reuses the same type..? val_packett.cool: I was thinking of just renaming LINUXEFD to EVENTFD, since in this patch the linux part just… | |||||
Done Inline ActionsThat sounds reasonable to me. markj: That sounds reasonable to me. | |||||
error = falloc(td, &fp, &fd, fflags); | |||||
if (error != 0) | |||||
return (error); | |||||
efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); | |||||
efd->efd_flags = flags; | |||||
efd->efd_count = initval; | |||||
mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); | |||||
knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); | |||||
fflags = FREAD | FWRITE; | |||||
if ((flags & EFD_NONBLOCK) != 0) | |||||
fflags |= FNONBLOCK; | |||||
finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); | |||||
fdrop(fp, td); | |||||
td->td_retval[0] = fd; | |||||
return (0); | |||||
} | |||||
static int | |||||
eventfd_close(struct file *fp, struct thread *td) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
seldrain(&efd->efd_sel); | |||||
knlist_destroy(&efd->efd_sel.si_note); | |||||
Done Inline ActionsI think you can instead just assert that f_type == DTYPE_EVENTFD. markj: I think you can instead just assert that `f_type == DTYPE_EVENTFD`. | |||||
fp->f_ops = &badfileops; | |||||
mtx_destroy(&efd->efd_lock); | |||||
free(efd, M_EVENTFD); | |||||
return (0); | |||||
} | |||||
int | |||||
sys_eventfd(struct thread *td, struct eventfd_args *args) | |||||
{ | |||||
if ((args->flags & ~(EFD_CLOEXEC|EFD_NONBLOCK|EFD_SEMAPHORE)) != 0) | |||||
return (EINVAL); | |||||
return (eventfd_create(td, args->initval, args->flags)); | |||||
} | |||||
static int | |||||
eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, | |||||
int flags, struct thread *td) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
eventfd_t count; | |||||
int error; | |||||
if (uio->uio_resid < sizeof(eventfd_t)) | |||||
return (EINVAL); | |||||
error = 0; | |||||
mtx_lock(&efd->efd_lock); | |||||
while (error == 0 && efd->efd_count == 0) { | |||||
Done Inline ActionsSame here and below, I don't believe this needs to be checked. markj: Same here and below, I don't believe this needs to be checked. | |||||
Done Inline ActionsYeah looks like this isn't checked for e.g. pipes. I wonder why this check was added originally — possibly due to the mixing of eventfd and timerfd in one file. val_packett.cool: Yeah looks like this isn't checked for e.g. pipes. I wonder why this check was added originally… | |||||
if ((fp->f_flag & FNONBLOCK) != 0) { | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (EAGAIN); | |||||
} | |||||
error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "efdrd", 0); | |||||
} | |||||
if (error == 0) { | |||||
if ((efd->efd_flags & EFD_SEMAPHORE) != 0) { | |||||
Done Inline ActionsThe goto can be eliminated with a loop: while (error == 0 && efd->efd_count == 0) { ... markj: The goto can be eliminated with a loop:
```
while (error == 0 && efd->efd_count == 0) {
...
``` | |||||
count = 1; | |||||
--efd->efd_count; | |||||
} else { | |||||
count = efd->efd_count; | |||||
efd->efd_count = 0; | |||||
} | |||||
KNOTE_LOCKED(&efd->efd_sel.si_note, 0); | |||||
selwakeup(&efd->efd_sel); | |||||
wakeup(&efd->efd_count); | |||||
mtx_unlock(&efd->efd_lock); | |||||
error = uiomove(&count, sizeof(eventfd_t), uio); | |||||
} else | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (error); | |||||
} | |||||
static int | |||||
eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, | |||||
int flags, struct thread *td) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
eventfd_t count; | |||||
int error; | |||||
if (uio->uio_resid < sizeof(eventfd_t)) | |||||
return (EINVAL); | |||||
error = uiomove(&count, sizeof(eventfd_t), uio); | |||||
if (error != 0) | |||||
return (error); | |||||
if (count == UINT64_MAX) | |||||
return (EINVAL); | |||||
mtx_lock(&efd->efd_lock); | |||||
retry: | |||||
if (UINT64_MAX - efd->efd_count <= count) { | |||||
if ((fp->f_flag & FNONBLOCK) != 0) { | |||||
mtx_unlock(&efd->efd_lock); | |||||
/* Do not not return the number of bytes written */ | |||||
uio->uio_resid += sizeof(eventfd_t); | |||||
return (EAGAIN); | |||||
} | |||||
error = mtx_sleep(&efd->efd_count, &efd->efd_lock, | |||||
PCATCH, "efdwr", 0); | |||||
if (error == 0) | |||||
goto retry; | |||||
} | |||||
if (error == 0) { | |||||
efd->efd_count += count; | |||||
KNOTE_LOCKED(&efd->efd_sel.si_note, 0); | |||||
selwakeup(&efd->efd_sel); | |||||
wakeup(&efd->efd_count); | |||||
} | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (error); | |||||
} | |||||
static int | |||||
eventfd_poll(struct file *fp, int events, struct ucred *active_cred, | |||||
struct thread *td) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
int revents = 0; | |||||
mtx_lock(&efd->efd_lock); | |||||
if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) | |||||
revents |= events & (POLLIN|POLLRDNORM); | |||||
kibAuthorUnsubmitted Done Inline ActionsSpaces around |. kib: Spaces around `|`. | |||||
if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) | |||||
revents |= events & (POLLOUT|POLLWRNORM); | |||||
if (revents == 0) | |||||
selrecord(td, &efd->efd_sel); | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (revents); | |||||
} | |||||
static int | |||||
eventfd_kqfilter(struct file *fp, struct knote *kn) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
mtx_lock(&efd->efd_lock); | |||||
switch (kn->kn_filter) { | |||||
case EVFILT_READ: | |||||
kn->kn_fop = &eventfd_rfiltops; | |||||
break; | |||||
case EVFILT_WRITE: | |||||
kn->kn_fop = &eventfd_wfiltops; | |||||
break; | |||||
default: | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (EINVAL); | |||||
} | |||||
kn->kn_hook = efd; | |||||
knlist_add(&efd->efd_sel.si_note, kn, 1); | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (0); | |||||
} | |||||
static void | |||||
filt_eventfddetach(struct knote *kn) | |||||
{ | |||||
struct eventfd *efd = kn->kn_hook; | |||||
mtx_lock(&efd->efd_lock); | |||||
knlist_remove(&efd->efd_sel.si_note, kn, 1); | |||||
mtx_unlock(&efd->efd_lock); | |||||
} | |||||
static int | |||||
filt_eventfdread(struct knote *kn, long hint) | |||||
{ | |||||
struct eventfd *efd = kn->kn_hook; | |||||
Done Inline ActionsMaybe kn->kn_data could be set to the current value of the eventfd here? This might save user code a call to eventfd_read. User code must remember to cast the kn_data (an int64_t) back to uint64_t, though. jan.kokemueller_gmail.com: Maybe `kn->kn_data` could be set to the current value of the eventfd here? This might save user… | |||||
Done Inline ActionsOnly direct users of kqueue would be able to use kn_data, right? The cast is ehh.. I don't know about the cast. val_packett.cool: Only direct users of kqueue would be able to use `kn_data`, right? The cast is ehh.. I don't… | |||||
Done Inline Actions
Yes, only kqueue users would be able to do that. Right now, the kqueue integration is only used as an implementation detail for Linuxulator's epoll emulation, so setting kn_data doesn't matter at all. But if eventfd is native, the kqueue integration suddenly becomes part of the API surface. jan.kokemueller_gmail.com: > Only direct users of kqueue would be able to use `kn_data`, right? The cast is ehh.. I don't… | |||||
Done Inline ActionsWell, I mean that kqueue is often used indirectly through libevent/libuv/etc too. I guess the cast is okay if documented in kqueue(2). Interestingly the C cast operator does not guarantee that casting unsigned to signed would be a simple byte reinterpretation but on all our platforms it should be equivalent anyway..? val_packett.cool: Well, I mean that kqueue is often used indirectly through libevent/libuv/etc too.
I guess the… | |||||
int ret; | |||||
mtx_assert(&efd->efd_lock, MA_OWNED); | |||||
kn->kn_data = (int64_t)efd->efd_count; | |||||
ret = (efd->efd_count > 0); | |||||
return (ret); | |||||
} | |||||
static int | |||||
filt_eventfdwrite(struct knote *kn, long hint) | |||||
{ | |||||
struct eventfd *efd = kn->kn_hook; | |||||
Done Inline ActionsHere, kn_data could be the maximum number that can be written to the eventfd without blocking (UINT64_MAX - 1 - efd->efd_count), to mirror the current behavior for sockets, pipes etc. jan.kokemueller_gmail.com: Here, `kn_data` could be the maximum number that can be written to the eventfd without blocking… | |||||
int ret; | |||||
mtx_assert(&efd->efd_lock, MA_OWNED); | |||||
kn->kn_data = (int64_t)(UINT64_MAX - 1 - efd->efd_count); | |||||
ret = (UINT64_MAX - 1 > efd->efd_count); | |||||
return (ret); | |||||
} | |||||
static int | |||||
eventfd_ioctl(struct file *fp, u_long cmd, void *data, | |||||
struct ucred *active_cred, struct thread *td) | |||||
{ | |||||
switch (cmd) { | |||||
case FIONBIO: | |||||
Done Inline ActionsAgain what is the point of copying this ? kib: Again what is the point of copying this ? | |||||
case FIOASYNC: | |||||
return (0); | |||||
} | |||||
return (ENOTTY); | |||||
} | |||||
static int | |||||
eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, | |||||
struct thread *td) | |||||
{ | |||||
bzero((void *)st, sizeof *st); | |||||
st->st_mode = S_IFIFO; | |||||
return (0); | |||||
} | |||||
static int | |||||
Not Done Inline ActionsWhy ? kib: Why ? | |||||
Done Inline ActionsIt was like this.. Looking at other stat implementations like kqueue_stat, seems like they bzero and return 0, guess I should go with that? val_packett.cool: It was like this.. Looking at other `stat` implementations like `kqueue_stat`, seems like they… | |||||
eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) | |||||
{ | |||||
struct eventfd *efd = fp->f_data; | |||||
Done Inline ActionsThe opening brace should be on the previous line. markj: The opening brace should be on the previous line. | |||||
kif->kf_type = KF_TYPE_EVENTFD; | |||||
mtx_lock(&efd->efd_lock); | |||||
kif->kf_un.kf_eventfd.kf_eventfd_value = efd->efd_count; | |||||
kif->kf_un.kf_eventfd.kf_eventfd_flags = efd->efd_flags; | |||||
mtx_unlock(&efd->efd_lock); | |||||
return (0); | |||||
} |
Missing copyright.