diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -663,6 +663,7 @@ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ +#define AUE_TIMERFD 43270 /* FreeBSD-specific. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h --- a/sys/compat/linux/linux_event.h +++ b/sys/compat/linux/linux_event.h @@ -56,15 +56,4 @@ #define LINUX_EFD_SEMAPHORE (1 << 0) -#define LINUX_TFD_TIMER_ABSTIME (1 << 0) -#define LINUX_TFD_TIMER_CANCEL_ON_SET (1 << 1) -#define LINUX_TFD_CLOEXEC LINUX_O_CLOEXEC -#define LINUX_TFD_NONBLOCK LINUX_O_NONBLOCK - -#define LINUX_TFD_SHARED_FCNTL_FLAGS (LINUX_TFD_CLOEXEC \ - |LINUX_TFD_NONBLOCK) -#define LINUX_TFD_CREATE_FLAGS LINUX_TFD_SHARED_FCNTL_FLAGS -#define LINUX_TFD_SETTIME_FLAGS (LINUX_TFD_TIMER_ABSTIME \ - |LINUX_TFD_TIMER_CANCEL_ON_SET) - #endif /* !_LINUX_EVENT_H_ */ diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -55,6 +55,7 @@ #include #include #include +#include #ifdef COMPAT_LINUX32 #include @@ -108,55 +109,6 @@ int error; }; -/* timerfd */ -typedef uint64_t timerfd_t; - -static fo_rdwr_t timerfd_read; -static fo_ioctl_t timerfd_ioctl; -static fo_poll_t timerfd_poll; -static fo_kqfilter_t timerfd_kqfilter; -static fo_stat_t timerfd_stat; -static fo_close_t timerfd_close; -static fo_fill_kinfo_t timerfd_fill_kinfo; - -static struct fileops timerfdops = { - .fo_read = timerfd_read, - .fo_write = invfo_rdwr, - .fo_truncate = invfo_truncate, - .fo_ioctl = timerfd_ioctl, - .fo_poll = timerfd_poll, - .fo_kqfilter = timerfd_kqfilter, - .fo_stat = timerfd_stat, - .fo_close = timerfd_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, - .fo_fill_kinfo = timerfd_fill_kinfo, - .fo_flags = DFLAG_PASSABLE -}; - -static void filt_timerfddetach(struct knote *kn); -static int filt_timerfdread(struct knote *kn, long hint); - -static struct filterops timerfd_rfiltops = { - .f_isfd = 1, - .f_detach = filt_timerfddetach, - .f_event = filt_timerfdread -}; - -struct timerfd { - clockid_t tfd_clockid; - struct itimerspec tfd_time; - struct callout tfd_callout; - timerfd_t tfd_count; - bool tfd_canceled; - struct selinfo tfd_sel; - struct mtx tfd_lock; -}; - -static void linux_timerfd_expire(void *); -static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); - static int epoll_create_common(struct thread *td, int flags) { @@ -637,7 +589,7 @@ bzero(&ae, sizeof(ae)); ae.initval = args->initval; - return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); + return (kern_eventfd(td, &ae)); } #endif @@ -661,261 +613,20 @@ bzero(&ae, sizeof(ae)); ae.flags = flags; ae.initval = args->initval; - return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); + return (kern_eventfd(td, &ae)); } int linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) { - struct timerfd *tfd; - struct file *fp; clockid_t clockid; - int fflags, fd, error; - - if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) - return (EINVAL); - - error = linux_to_native_clockid(&clockid, args->clockid); - if (error != 0) - return (error); - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return (EINVAL); - - fflags = 0; - if ((args->flags & LINUX_TFD_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - - error = falloc(td, &fp, &fd, fflags); - if (error != 0) - return (error); - - tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); - tfd->tfd_clockid = clockid; - mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); - - callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); - knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); - - fflags = FREAD; - if ((args->flags & LINUX_O_NONBLOCK) != 0) - fflags |= FNONBLOCK; - - finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); - fdrop(fp, td); - - td->td_retval[0] = fd; - return (error); -} - -static int -timerfd_close(struct file *fp, struct thread *td) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - timespecclear(&tfd->tfd_time.it_value); - timespecclear(&tfd->tfd_time.it_interval); - - callout_drain(&tfd->tfd_callout); - - seldrain(&tfd->tfd_sel); - knlist_destroy(&tfd->tfd_sel.si_note); - - fp->f_ops = &badfileops; - mtx_destroy(&tfd->tfd_lock); - free(tfd, M_EPOLL); - - return (0); -} - -static int -timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct timerfd *tfd; - timerfd_t count; int error; - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(timerfd_t)) - return (EINVAL); - - error = 0; - mtx_lock(&tfd->tfd_lock); -retry: - if (tfd->tfd_canceled) { - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - return (ECANCELED); - } - if (tfd->tfd_count == 0) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&tfd->tfd_lock); - return (EAGAIN); - } - error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - count = tfd->tfd_count; - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - error = uiomove(&count, sizeof(timerfd_t), uio); - } else - mtx_unlock(&tfd->tfd_lock); - - return (error); -} - -static int -timerfd_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - struct timerfd *tfd; - int revents = 0; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (POLLERR); - - mtx_lock(&tfd->tfd_lock); - if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) - revents |= events & (POLLIN|POLLRDNORM); - if (revents == 0) - selrecord(td, &tfd->tfd_sel); - mtx_unlock(&tfd->tfd_lock); - - return (revents); -} - -static int -timerfd_kqfilter(struct file *fp, struct knote *kn) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (kn->kn_filter == EVFILT_READ) - kn->kn_fop = &timerfd_rfiltops; - else - return (EINVAL); - - kn->kn_hook = tfd; - knlist_add(&tfd->tfd_sel.si_note, kn, 0); - - return (0); -} - -static void -filt_timerfddetach(struct knote *kn) -{ - struct timerfd *tfd = kn->kn_hook; - - mtx_lock(&tfd->tfd_lock); - knlist_remove(&tfd->tfd_sel.si_note, kn, 1); - mtx_unlock(&tfd->tfd_lock); -} - -static int -filt_timerfdread(struct knote *kn, long hint) -{ - struct timerfd *tfd = kn->kn_hook; - - return (tfd->tfd_count > 0); -} - -static int -timerfd_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td) -{ - - if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) - return (EINVAL); - - switch (cmd) { - case FIONBIO: - case FIOASYNC: - return (0); - } - - return (ENOTTY); -} - -static int -timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) -{ - - return (ENXIO); -} - -static int -timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) -{ - - kif->kf_type = KF_TYPE_UNKNOWN; - return (0); -} - -static void -linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) -{ - - if (tfd->tfd_clockid == CLOCK_REALTIME) - getnanotime(ts); - else /* CLOCK_MONOTONIC */ - getnanouptime(ts); -} - -static void -linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) -{ - struct timespec cts; - - linux_timerfd_clocktime(tfd, &cts); - *ots = tfd->tfd_time; - if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { - timespecsub(&ots->it_value, &cts, &ots->it_value); - if (ots->it_value.tv_sec < 0 || - (ots->it_value.tv_sec == 0 && - ots->it_value.tv_nsec == 0)) { - ots->it_value.tv_sec = 0; - ots->it_value.tv_nsec = 1; - } - } -} - -static int -linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) -{ - struct timerfd *tfd; - struct file *fp; - int error; - - error = fget(td, fd, &cap_read_rights, &fp); + error = linux_to_native_clockid(&clockid, args->clockid); if (error != 0) return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - linux_timerfd_curval(tfd, ots); - mtx_unlock(&tfd->tfd_lock); -out: - fdrop(fp, td); - return (error); + return (timerfd_create_file(td, clockid, args->flags)); } int @@ -925,7 +636,7 @@ struct itimerspec ots; int error; - error = linux_timerfd_gettime_common(td, args->fd, &ots); + error = timerfd_gettime_common(td, args->fd, &ots); if (error != 0) return (error); error = native_to_linux_itimerspec(&lots, &ots); @@ -942,7 +653,7 @@ struct itimerspec ots; int error; - error = linux_timerfd_gettime_common(td, args->fd, &ots); + error = timerfd_gettime_common(td, args->fd, &ots); if (error != 0) return (error); error = native_to_linux_itimerspec64(&lots, &ots); @@ -952,60 +663,6 @@ } #endif -static int -linux_timerfd_settime_common(struct thread *td, int fd, int flags, - struct itimerspec *nts, struct itimerspec *oval) -{ - struct timespec cts, ts; - struct timerfd *tfd; - struct timeval tv; - struct file *fp; - int error; - - if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) - return (EINVAL); - - error = fget(td, fd, &cap_write_rights, &fp); - if (error != 0) - return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - if (!timespecisset(&nts->it_value)) - timespecclear(&nts->it_interval); - if (oval != NULL) - linux_timerfd_curval(tfd, oval); - - bcopy(nts, &tfd->tfd_time, sizeof(*nts)); - tfd->tfd_count = 0; - if (timespecisset(&nts->it_value)) { - linux_timerfd_clocktime(tfd, &cts); - ts = nts->it_value; - if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { - timespecadd(&tfd->tfd_time.it_value, &cts, - &tfd->tfd_time.it_value); - } else { - timespecsub(&ts, &cts, &ts); - } - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - tfd->tfd_canceled = false; - } else { - tfd->tfd_canceled = true; - callout_stop(&tfd->tfd_callout); - } - mtx_unlock(&tfd->tfd_lock); - -out: - fdrop(fp, td); - return (error); -} - int linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) { @@ -1020,8 +677,7 @@ if (error != 0) return (error); pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + error = timerfd_settime_common(td, args->fd, args->flags, &nts, pots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec(&lots, &ots); if (error == 0) @@ -1045,8 +701,7 @@ if (error != 0) return (error); pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + error = timerfd_settime_common(td, args->fd, args->flags, &nts, pots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec64(&lots, &ots); if (error == 0) @@ -1055,39 +710,3 @@ return (error); } #endif - -static void -linux_timerfd_expire(void *arg) -{ - struct timespec cts, ts; - struct timeval tv; - struct timerfd *tfd; - - tfd = (struct timerfd *)arg; - - linux_timerfd_clocktime(tfd, &cts); - if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { - if (timespecisset(&tfd->tfd_time.it_interval)) - timespecadd(&tfd->tfd_time.it_value, - &tfd->tfd_time.it_interval, - &tfd->tfd_time.it_value); - else - /* single shot timer */ - timespecclear(&tfd->tfd_time.it_value); - if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } - tfd->tfd_count++; - KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); - selwakeup(&tfd->tfd_sel); - wakeup(&tfd->tfd_count); - } else if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } -} diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3944,6 +3944,7 @@ kern/subr_witness.c optional witness kern/sys_capability.c standard kern/sys_eventfd.c standard +kern/sys_timerfd.c standard kern/sys_generic.c standard kern/sys_getrandom.c standard kern/sys_pipe.c standard diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -4939,8 +4939,8 @@ return ("proc"); case DTYPE_EVENTFD: return ("eventfd"); - case DTYPE_LINUXTFD: - return ("ltimer"); + case DTYPE_TIMERFD: + return ("timerfd"); default: return ("unkn"); } diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c --- a/sys/kern/sys_eventfd.c +++ b/sys/kern/sys_eventfd.c @@ -106,15 +106,24 @@ }; int -eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, - int flags) +eventfd_create_file(struct thread *td, uint32_t initval, int flags) { struct eventfd *efd; - int fflags; + struct file *fp; + int error, fd, fflags = 0; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_VALUE(initval); + if ((flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) != 0) + return (EINVAL); + if ((flags & EFD_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + error = falloc_noinstall(td, &fp); + if (error != 0) + return (error); + efd = malloc(sizeof(*efd), M_EVENTFD, M_WAITOK | M_ZERO); efd->efd_flags = flags; efd->efd_count = initval; @@ -124,8 +133,13 @@ fflags = FREAD | FWRITE; if ((flags & EFD_NONBLOCK) != 0) fflags |= FNONBLOCK; + finit(fp, fflags, DTYPE_EVENTFD, efd, &eventfdops); + error = finstall(td, fp, &fd, fflags, NULL); + if (error == 0) + td->td_retval[0] = fd; + fdrop(fp, td); return (0); } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -936,64 +937,99 @@ } int -kern_specialfd(struct thread *td, int type, void *arg) +kern_eventfd(struct thread *td, const void *args) { - struct file *fp; - struct specialfd_eventfd *ae; - int error, fd, fflags; + struct specialfd_eventfd efd; + int error; - fflags = 0; - error = falloc_noinstall(td, &fp); + error = copyin(args, &efd, sizeof(efd)); if (error != 0) return (error); - switch (type) { - case SPECIALFD_EVENTFD: - ae = arg; - if ((ae->flags & EFD_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - error = eventfd_create_file(td, fp, ae->initval, ae->flags); - break; - default: - error = EINVAL; - break; - } + return (eventfd_create_file(td, efd.initval, efd.flags)); +} +int +kern_timerfd_create(struct thread *td, const void *args) +{ + struct specialfd_timerfd_create tfd; + int error; + + error = copyin(args, &tfd, sizeof(tfd)); + if (error != 0) + return (error); + + return (timerfd_create_file(td, tfd.clockid, tfd.flags)); +} + +int +kern_timerfd_gettime(struct thread *td, const void *args) +{ + struct specialfd_timerfd_gettime tfd; + struct itimerspec cts; + int error; + + error = copyin(args, &tfd, sizeof(tfd)); + if (error != 0) + return (error); + + error = timerfd_gettime_common(td, tfd.fd, &cts); if (error == 0) - error = finstall(td, fp, &fd, fflags, NULL); - fdrop(fp, td); - if (error == 0) - td->td_retval[0] = fd; + error = copyout(&cts, tfd.curr_value, sizeof(cts)); + return (error); } int -sys___specialfd(struct thread *td, struct __specialfd_args *args) +kern_timerfd_settime(struct thread *td, const void *args) { - struct specialfd_eventfd ae; + struct specialfd_timerfd_settime tfd; + struct itimerspec nts, ots, *pots; int error; + error = copyin(args, &tfd, sizeof(tfd)); + if (error != 0) + return (error); + error = copyin(tfd.new_value, &nts, sizeof(nts)); + if (error != 0) + return (error); + pots = (tfd.old_value != NULL ? &ots : NULL); + error = timerfd_settime_common(td, tfd.fd, tfd.flags, &nts, pots); + if (error == 0 && tfd.old_value != NULL) + error = copyout(&ots, tfd.old_value, sizeof(ots)); + + return (error); +} + +int +sys___specialfd(struct thread *td, struct __specialfd_args *args) +{ + size_t arg_size; + int (*specialfd_func)(struct thread *, const void *); + switch (args->type) { case SPECIALFD_EVENTFD: - if (args->len != sizeof(struct specialfd_eventfd)) { - error = EINVAL; - break; - } - error = copyin(args->req, &ae, sizeof(ae)); - if (error != 0) - break; - if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | - EFD_SEMAPHORE)) != 0) { - error = EINVAL; - break; - } - error = kern_specialfd(td, args->type, &ae); + arg_size = sizeof(struct specialfd_eventfd); + specialfd_func = kern_eventfd; break; - default: - error = EINVAL; + case SPECIALFD_TIMERFD_CREATE: + arg_size = sizeof(struct specialfd_timerfd_create); + specialfd_func = kern_timerfd_create; + break; + case SPECIALFD_TIMERFD_SETTIME: + arg_size = sizeof(struct specialfd_timerfd_settime); + specialfd_func = kern_timerfd_settime; + break; + case SPECIALFD_TIMERFD_GETTIME: + arg_size = sizeof(struct specialfd_timerfd_gettime); + specialfd_func = kern_timerfd_gettime; break; } - return (error); + + if (args->len != arg_size) + return (EINVAL); + + return (specialfd_func(td, args->req)); } int diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c new file mode 100644 --- /dev/null +++ b/sys/kern/sys_timerfd.c @@ -0,0 +1,449 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin + * Copyright (c) 2023 Jake Freeland + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); + +static fo_rdwr_t timerfd_read; +static fo_ioctl_t timerfd_ioctl; +static fo_poll_t timerfd_poll; +static fo_kqfilter_t timerfd_kqfilter; +static fo_stat_t timerfd_stat; +static fo_close_t timerfd_close; +static fo_fill_kinfo_t timerfd_fill_kinfo; + +static struct fileops timerfdops = { + .fo_read = timerfd_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = timerfd_ioctl, + .fo_poll = timerfd_poll, + .fo_kqfilter = timerfd_kqfilter, + .fo_stat = timerfd_stat, + .fo_close = timerfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = timerfd_fill_kinfo, + .fo_flags = DFLAG_PASSABLE +}; + +static void filt_timerfddetach(struct knote *kn); +static int filt_timerfdread(struct knote *kn, long hint); + +static struct filterops timerfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_timerfddetach, + .f_event = filt_timerfdread +}; + +struct timerfd { + clockid_t tfd_clockid; + struct itimerspec tfd_time; + struct callout tfd_callout; + timerfd_t tfd_count; + bool tfd_canceled; + struct selinfo tfd_sel; + struct mtx tfd_lock; + int tfd_flags; +}; + +static void timerfd_expire(void *); +static void timerfd_curval(struct timerfd *, struct itimerspec *); + +int +timerfd_create_file(struct thread *td, int clockid, int flags) +{ + struct timerfd *tfd; + struct file *fp; + int error, fd, fflags = 0; + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return (EINVAL); + if ((flags & ~TFD_CREATE_FLAGS) != 0) + return (EINVAL); + if ((flags & TFD_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + error = falloc_noinstall(td, &fp); + if (error != 0) + return (error); + + AUDIT_ARG_FFLAGS(flags); + AUDIT_ARG_VALUE(clockid); + + tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); + tfd->tfd_clockid = (clockid_t) clockid; + mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); + callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); + knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); + + fflags = FREAD; + if ((flags & O_NONBLOCK) != 0) + fflags |= FNONBLOCK; + + finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); + error = finstall(td, fp, &fd, fflags, NULL); + if (error == 0) + td->td_retval[0] = fd; + + fdrop(fp, td); + return (0); +} + +static int +timerfd_close(struct file *fp, struct thread *td) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) + return (EINVAL); + + timespecclear(&tfd->tfd_time.it_value); + timespecclear(&tfd->tfd_time.it_interval); + + callout_drain(&tfd->tfd_callout); + + seldrain(&tfd->tfd_sel); + knlist_destroy(&tfd->tfd_sel.si_note); + + fp->f_ops = &badfileops; + mtx_destroy(&tfd->tfd_lock); + free(tfd, M_TIMERFD); + + return (0); +} + +static int +timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct timerfd *tfd; + timerfd_t count; + int error; + + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) + return (EINVAL); + + if (uio->uio_resid < sizeof(timerfd_t)) + return (EINVAL); + + error = 0; + mtx_lock(&tfd->tfd_lock); +retry: + if (tfd->tfd_canceled) { + tfd->tfd_count = 0; + mtx_unlock(&tfd->tfd_lock); + return (ECANCELED); + } + if (tfd->tfd_count == 0) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&tfd->tfd_lock); + return (EAGAIN); + } + error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + count = tfd->tfd_count; + tfd->tfd_count = 0; + mtx_unlock(&tfd->tfd_lock); + error = uiomove(&count, sizeof(timerfd_t), uio); + } else + mtx_unlock(&tfd->tfd_lock); + + return (error); +} + +static int +timerfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct timerfd *tfd; + int revents = 0; + + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) + return (POLLERR); + + mtx_lock(&tfd->tfd_lock); + if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) + revents |= events & (POLLIN|POLLRDNORM); + if (revents == 0) + selrecord(td, &tfd->tfd_sel); + mtx_unlock(&tfd->tfd_lock); + + return (revents); +} + +static int +timerfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) + return (EINVAL); + + if (kn->kn_filter == EVFILT_READ) + kn->kn_fop = &timerfd_rfiltops; + else + return (EINVAL); + + kn->kn_hook = tfd; + knlist_add(&tfd->tfd_sel.si_note, kn, 0); + + return (0); +} + +static void +filt_timerfddetach(struct knote *kn) +{ + struct timerfd *tfd = kn->kn_hook; + + mtx_lock(&tfd->tfd_lock); + knlist_remove(&tfd->tfd_sel.si_note, kn, 1); + mtx_unlock(&tfd->tfd_lock); +} + +static int +filt_timerfdread(struct knote *kn, long hint) +{ + struct timerfd *tfd = kn->kn_hook; + + return (tfd->tfd_count > 0); +} + +static int +timerfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + if (fp->f_data == NULL || fp->f_type != DTYPE_TIMERFD) + return (EINVAL); + + switch (cmd) { + case FIONBIO: + case FIOASYNC: + return (0); + } + + return (ENOTTY); +} + +static int +timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) +{ + + return (ENXIO); +} + +static int +timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) +{ + + struct timerfd *tfd = fp->f_data; + + kif->kf_type = KF_TYPE_TIMERFD; + mtx_lock(&tfd->tfd_lock); + kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; + kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; + kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; + mtx_unlock(&tfd->tfd_lock); + return (0); +} + +static void +timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) +{ + + if (tfd->tfd_clockid == CLOCK_REALTIME) + getnanotime(ts); + else /* CLOCK_MONOTONIC */ + getnanouptime(ts); +} + +static void +timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) +{ + struct timespec cts; + + timerfd_clocktime(tfd, &cts); + *ots = tfd->tfd_time; + if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { + timespecsub(&ots->it_value, &cts, &ots->it_value); + if (ots->it_value.tv_sec < 0 || + (ots->it_value.tv_sec == 0 && + ots->it_value.tv_nsec == 0)) { + ots->it_value.tv_sec = 0; + ots->it_value.tv_nsec = 1; + } + } +} + +int +timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *cts) +{ + struct timerfd *tfd; + struct file *fp; + int error; + + error = fget(td, fd, &cap_read_rights, &fp); + if (error != 0) + return (error); + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) { + error = EINVAL; + goto out; + } + + mtx_lock(&tfd->tfd_lock); + timerfd_curval(tfd, cts); + mtx_unlock(&tfd->tfd_lock); + +out: + fdrop(fp, td); + return (error); +} + +int +timerfd_settime_common(struct thread *td, int fd, int flags, + struct itimerspec *nts, struct itimerspec *ots) +{ + struct timespec cts, ts; + struct timerfd *tfd; + struct timeval tv; + struct file *fp; + int error; + + if ((flags & ~TFD_SETTIME_FLAGS) != 0) + return (EINVAL); + + error = fget(td, fd, &cap_write_rights, &fp); + if (error != 0) + return (error); + tfd = fp->f_data; + if (fp->f_type != DTYPE_TIMERFD || tfd == NULL) { + error = EINVAL; + goto out; + } + + mtx_lock(&tfd->tfd_lock); + if (!timespecisset(&nts->it_value)) + timespecclear(&nts->it_interval); + if (ots != NULL) + timerfd_curval(tfd, ots); + + bcopy(nts, &tfd->tfd_time, sizeof(*nts)); + tfd->tfd_count = 0; + if (timespecisset(&nts->it_value)) { + timerfd_clocktime(tfd, &cts); + ts = nts->it_value; + if ((flags & TFD_TIMER_ABSTIME) == 0) { + timespecadd(&tfd->tfd_time.it_value, &cts, + &tfd->tfd_time.it_value); + } else { + timespecsub(&ts, &cts, &ts); + } + TIMESPEC_TO_TIMEVAL(&tv, &ts); + callout_reset(&tfd->tfd_callout, tvtohz(&tv), + timerfd_expire, tfd); + tfd->tfd_canceled = false; + } else { + tfd->tfd_canceled = true; + callout_stop(&tfd->tfd_callout); + } + mtx_unlock(&tfd->tfd_lock); + +out: + fdrop(fp, td); + return (error); +} + +static void +timerfd_expire(void *arg) +{ + struct timespec cts, ts; + struct timeval tv; + struct timerfd *tfd; + + tfd = (struct timerfd *)arg; + + timerfd_clocktime(tfd, &cts); + if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { + if (timespecisset(&tfd->tfd_time.it_interval)) + timespecadd(&tfd->tfd_time.it_value, + &tfd->tfd_time.it_interval, + &tfd->tfd_time.it_value); + else + /* single shot timer */ + timespecclear(&tfd->tfd_time.it_value); + if (timespecisset(&tfd->tfd_time.it_value)) { + timespecsub(&tfd->tfd_time.it_value, &cts, &ts); + TIMESPEC_TO_TIMEVAL(&tv, &ts); + callout_reset(&tfd->tfd_callout, tvtohz(&tv), + timerfd_expire, tfd); + } + tfd->tfd_count++; + KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); + selwakeup(&tfd->tfd_sel); + wakeup(&tfd->tfd_count); + } else if (timespecisset(&tfd->tfd_time.it_value)) { + timespecsub(&tfd->tfd_time.it_value, &cts, &ts); + TIMESPEC_TO_TIMEVAL(&tv, &ts); + callout_reset(&tfd->tfd_callout, tvtohz(&tv), + timerfd_expire, tfd); + } +} diff --git a/sys/sys/eventfd.h b/sys/sys/eventfd.h --- a/sys/sys/eventfd.h +++ b/sys/sys/eventfd.h @@ -38,8 +38,7 @@ #ifdef _KERNEL -int eventfd_create_file(struct thread *td, struct file *fp, uint32_t initval, - int flags); +int eventfd_create_file(struct thread *td, uint32_t initval, int flags); #else diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -70,7 +70,7 @@ #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ -#define DTYPE_LINUXTFD 14 /* emulation timerfd type */ +#define DTYPE_TIMERFD 14 /* timerfd */ #ifdef _KERNEL diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -32,6 +32,9 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_TIMERFD_CREATE = 2, + SPECIALFD_TIMERFD_SETTIME = 3, + SPECIALFD_TIMERFD_GETTIME = 4 }; struct specialfd_eventfd { @@ -39,4 +42,21 @@ int flags; }; +struct specialfd_timerfd_create { + int clockid; + int flags; +}; + +struct specialfd_timerfd_gettime { + int fd; + struct itimerspec *curr_value; +}; + +struct specialfd_timerfd_settime { + int fd; + int flags; + const struct itimerspec *new_value; + struct itimerspec *old_value; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -321,7 +321,10 @@ int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); -int kern_specialfd(struct thread *td, int type, void *arg); +int kern_eventfd(struct thread *td, const void *args); +int kern_timerfd_create(struct thread *td, const void *args); +int kern_timerfd_settime(struct thread *td, const void *args); +int kern_timerfd_gettime(struct thread *td, const void *args); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, diff --git a/sys/sys/timerfd.h b/sys/sys/timerfd.h new file mode 100644 --- /dev/null +++ b/sys/sys/timerfd.h @@ -0,0 +1,68 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2023 Jake Freeland + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_TIMERFD_H_ +#define _SYS_TIMERFD_H_ + +#include +#include +#include +#include + +typedef uint64_t timerfd_t; + +#define TFD_NONBLOCK O_NONBLOCK +#define TFD_CLOEXEC O_CLOEXEC + +#define TFD_TIMER_ABSTIME (1 << 0) +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) + +#define TFD_SHARED_FCNTL_FLAGS (TFD_NONBLOCK | TFD_CLOEXEC) +#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) + +#ifdef _KERNEL + +MALLOC_DECLARE(M_TIMERFD); + +int timerfd_create_file(struct thread *td, int clockid, int flags); +int timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *cts); +int timerfd_settime_common(struct thread *td, int fd, int flags, + struct itimerspec *nts, struct itimerspec *ots); + +#else + +__BEGIN_DECLS +int timerfd_create(int clockid, int flags); +int timerfd_settime(int fd, int flags, const struct itimerspec *new_value, + struct itimerspec *old_value); +int timerfd_gettime(int fd, struct itimerspec *curr_value); +__END_DECLS + +#endif /* !_KERNEL */ + +#endif /* !_SYS_TIMERFD_H_ */ diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ #define KF_TYPE_PROCDESC 11 #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 +#define KF_TYPE_TIMERFD 14 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -446,6 +447,11 @@ uint32_t kf_eventfd_spareint[3]; uint64_t kf_eventfd_addr; } kf_eventfd; + struct { + uint32_t kf_timerfd_clockid; + uint32_t kf_timerfd_flags; + uint64_t kf_timerfd_addr; + } kf_timerfd; struct { uint64_t kf_kqueue_addr; int32_t kf_kqueue_count;