diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h --- a/sys/compat/linux/linux_event.h +++ b/sys/compat/linux/linux_event.h @@ -56,15 +56,4 @@ #define LINUX_EFD_SEMAPHORE (1 << 0) -#define LINUX_TFD_TIMER_ABSTIME (1 << 0) -#define LINUX_TFD_TIMER_CANCEL_ON_SET (1 << 1) -#define LINUX_TFD_CLOEXEC LINUX_O_CLOEXEC -#define LINUX_TFD_NONBLOCK LINUX_O_NONBLOCK - -#define LINUX_TFD_SHARED_FCNTL_FLAGS (LINUX_TFD_CLOEXEC \ - |LINUX_TFD_NONBLOCK) -#define LINUX_TFD_CREATE_FLAGS LINUX_TFD_SHARED_FCNTL_FLAGS -#define LINUX_TFD_SETTIME_FLAGS (LINUX_TFD_TIMER_ABSTIME \ - |LINUX_TFD_TIMER_CANCEL_ON_SET) - #endif /* !_LINUX_EVENT_H_ */ diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -108,55 +109,6 @@ int error; }; -/* timerfd */ -typedef uint64_t timerfd_t; - -static fo_rdwr_t timerfd_read; -static fo_ioctl_t timerfd_ioctl; -static fo_poll_t timerfd_poll; -static fo_kqfilter_t timerfd_kqfilter; -static fo_stat_t timerfd_stat; -static fo_close_t timerfd_close; -static fo_fill_kinfo_t timerfd_fill_kinfo; - -static struct fileops timerfdops = { - .fo_read = timerfd_read, - .fo_write = invfo_rdwr, - .fo_truncate = invfo_truncate, - .fo_ioctl = timerfd_ioctl, - .fo_poll = timerfd_poll, - .fo_kqfilter = timerfd_kqfilter, - .fo_stat = timerfd_stat, - .fo_close = timerfd_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, - .fo_fill_kinfo = timerfd_fill_kinfo, - .fo_flags = DFLAG_PASSABLE -}; - -static void filt_timerfddetach(struct knote *kn); -static int filt_timerfdread(struct knote *kn, long hint); - -static struct filterops timerfd_rfiltops = { - .f_isfd = 1, - .f_detach = filt_timerfddetach, - .f_event = filt_timerfdread -}; - -struct timerfd { - clockid_t tfd_clockid; - struct itimerspec tfd_time; - struct callout tfd_callout; - timerfd_t tfd_count; - bool tfd_canceled; - struct selinfo tfd_sel; - struct mtx tfd_lock; -}; - -static void linux_timerfd_expire(void *); -static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); - static int epoll_create_common(struct thread *td, int flags) { @@ -658,255 +610,14 @@ int linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) { - struct timerfd *tfd; - struct file *fp; clockid_t clockid; - int fflags, fd, error; - - if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) - return (EINVAL); - - error = linux_to_native_clockid(&clockid, args->clockid); - if (error != 0) - return (error); - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return (EINVAL); - - fflags = 0; - if ((args->flags & LINUX_TFD_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - - error = falloc(td, &fp, &fd, fflags); - if (error != 0) - return (error); - - tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); - tfd->tfd_clockid = clockid; - mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); - - callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); - knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); - - fflags = FREAD; - if ((args->flags & LINUX_O_NONBLOCK) != 0) - fflags |= FNONBLOCK; - - finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); - fdrop(fp, td); - - td->td_retval[0] = fd; - return (error); -} - -static int -timerfd_close(struct file *fp, struct thread *td) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - timespecclear(&tfd->tfd_time.it_value); - timespecclear(&tfd->tfd_time.it_interval); - - callout_drain(&tfd->tfd_callout); - - seldrain(&tfd->tfd_sel); - knlist_destroy(&tfd->tfd_sel.si_note); - - fp->f_ops = &badfileops; - mtx_destroy(&tfd->tfd_lock); - free(tfd, M_EPOLL); - - return (0); -} - -static int -timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct timerfd *tfd; - timerfd_t count; - int error; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(timerfd_t)) - return (EINVAL); - - error = 0; - mtx_lock(&tfd->tfd_lock); -retry: - if (tfd->tfd_canceled) { - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - return (ECANCELED); - } - if (tfd->tfd_count == 0) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&tfd->tfd_lock); - return (EAGAIN); - } - error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - count = tfd->tfd_count; - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - error = uiomove(&count, sizeof(timerfd_t), uio); - } else - mtx_unlock(&tfd->tfd_lock); - - return (error); -} - -static int -timerfd_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - struct timerfd *tfd; - int revents = 0; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (POLLERR); - - mtx_lock(&tfd->tfd_lock); - if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) - revents |= events & (POLLIN|POLLRDNORM); - if (revents == 0) - selrecord(td, &tfd->tfd_sel); - mtx_unlock(&tfd->tfd_lock); - - return (revents); -} - -static int -timerfd_kqfilter(struct file *fp, struct knote *kn) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (kn->kn_filter == EVFILT_READ) - kn->kn_fop = &timerfd_rfiltops; - else - return (EINVAL); - - kn->kn_hook = tfd; - knlist_add(&tfd->tfd_sel.si_note, kn, 0); - - return (0); -} - -static void -filt_timerfddetach(struct knote *kn) -{ - struct timerfd *tfd = kn->kn_hook; - - mtx_lock(&tfd->tfd_lock); - knlist_remove(&tfd->tfd_sel.si_note, kn, 1); - mtx_unlock(&tfd->tfd_lock); -} - -static int -filt_timerfdread(struct knote *kn, long hint) -{ - struct timerfd *tfd = kn->kn_hook; - - return (tfd->tfd_count > 0); -} - -static int -timerfd_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td) -{ - - if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) - return (EINVAL); - - switch (cmd) { - case FIONBIO: - case FIOASYNC: - return (0); - } - - return (ENOTTY); -} - -static int -timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) -{ - - return (ENXIO); -} - -static int -timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) -{ - - kif->kf_type = KF_TYPE_UNKNOWN; - return (0); -} - -static void -linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) -{ - - if (tfd->tfd_clockid == CLOCK_REALTIME) - getnanotime(ts); - else /* CLOCK_MONOTONIC */ - getnanouptime(ts); -} - -static void -linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) -{ - struct timespec cts; - - linux_timerfd_clocktime(tfd, &cts); - *ots = tfd->tfd_time; - if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { - timespecsub(&ots->it_value, &cts, &ots->it_value); - if (ots->it_value.tv_sec < 0 || - (ots->it_value.tv_sec == 0 && - ots->it_value.tv_nsec == 0)) { - ots->it_value.tv_sec = 0; - ots->it_value.tv_nsec = 1; - } - } -} - -static int -linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) -{ - struct timerfd *tfd; - struct file *fp; int error; - error = fget(td, fd, &cap_read_rights, &fp); + error = linux_to_native_clockid(&clockid, args->clockid); if (error != 0) return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - linux_timerfd_curval(tfd, ots); - mtx_unlock(&tfd->tfd_lock); -out: - fdrop(fp, td); - return (error); + return (timerfd_create_file(td, clockid, args->flags)); } int @@ -914,14 +625,20 @@ { struct l_itimerspec lots; struct itimerspec ots; + struct file *fp; int error; - error = linux_timerfd_gettime_common(td, args->fd, &ots); + error = fget(td, args->fd, &cap_read_rights, &fp); if (error != 0) return (error); - error = native_to_linux_itimerspec(&lots, &ots); - if (error == 0) - error = copyout(&lots, args->old_value, sizeof(lots)); + error = timerfd_gettime_common(fp, &ots); + if (error == 0) { + error = native_to_linux_itimerspec(&lots, &ots); + if (error == 0) + error = copyout(&lots, args->old_value, sizeof(lots)); + } + + fdrop(fp, td); return (error); } @@ -931,93 +648,50 @@ { struct l_itimerspec64 lots; struct itimerspec ots; - int error; - - error = linux_timerfd_gettime_common(td, args->fd, &ots); - if (error != 0) - return (error); - error = native_to_linux_itimerspec64(&lots, &ots); - if (error == 0) - error = copyout(&lots, args->old_value, sizeof(lots)); - return (error); -} -#endif - -static int -linux_timerfd_settime_common(struct thread *td, int fd, int flags, - struct itimerspec *nts, struct itimerspec *oval) -{ - struct timespec cts, ts; - struct timerfd *tfd; - struct timeval tv; struct file *fp; int error; - if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) - return (EINVAL); - - error = fget(td, fd, &cap_write_rights, &fp); + error = fget(td, args->fd, &cap_read_rights, &fp); if (error != 0) return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - if (!timespecisset(&nts->it_value)) - timespecclear(&nts->it_interval); - if (oval != NULL) - linux_timerfd_curval(tfd, oval); - - bcopy(nts, &tfd->tfd_time, sizeof(*nts)); - tfd->tfd_count = 0; - if (timespecisset(&nts->it_value)) { - linux_timerfd_clocktime(tfd, &cts); - ts = nts->it_value; - if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { - timespecadd(&tfd->tfd_time.it_value, &cts, - &tfd->tfd_time.it_value); - } else { - timespecsub(&ts, &cts, &ts); - } - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - tfd->tfd_canceled = false; - } else { - tfd->tfd_canceled = true; - callout_stop(&tfd->tfd_callout); + error = timerfd_gettime_common(fp, &ots); + if (error == 0) { + error = native_to_linux_itimerspec64(&lots, &ots); + if (error == 0) + error = copyout(&lots, args->old_value, sizeof(lots)); } - mtx_unlock(&tfd->tfd_lock); -out: fdrop(fp, td); return (error); } +#endif int linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) { struct l_itimerspec lots; struct itimerspec nts, ots, *pots; + struct file *fp; int error; error = copyin(args->new_value, &lots, sizeof(lots)); if (error != 0) return (error); error = linux_to_native_itimerspec(&nts, &lots); + if (error != 0) + return (error); + error = fget(td, args->fd, &cap_write_rights, &fp); if (error != 0) return (error); pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + error = timerfd_settime_common(fp, args->flags, &nts, pots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec(&lots, &ots); if (error == 0) error = copyout(&lots, args->old_value, sizeof(lots)); } + + fdrop(fp, td); return (error); } @@ -1027,58 +701,27 @@ { struct l_itimerspec64 lots; struct itimerspec nts, ots, *pots; + struct file *fp; int error; error = copyin(args->new_value, &lots, sizeof(lots)); if (error != 0) return (error); error = linux_to_native_itimerspec64(&nts, &lots); + if (error != 0) + return (error); + error = fget(td, args->fd, &cap_write_rights, &fp); if (error != 0) return (error); pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + error = timerfd_settime_common(fp, args->flags, &nts, pots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec64(&lots, &ots); if (error == 0) error = copyout(&lots, args->old_value, sizeof(lots)); } + + fdrop(fp, td); return (error); } #endif - -static void -linux_timerfd_expire(void *arg) -{ - struct timespec cts, ts; - struct timeval tv; - struct timerfd *tfd; - - tfd = (struct timerfd *)arg; - - linux_timerfd_clocktime(tfd, &cts); - if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { - if (timespecisset(&tfd->tfd_time.it_interval)) - timespecadd(&tfd->tfd_time.it_value, - &tfd->tfd_time.it_interval, - &tfd->tfd_time.it_value); - else - /* single shot timer */ - timespecclear(&tfd->tfd_time.it_value); - if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } - tfd->tfd_count++; - KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); - selwakeup(&tfd->tfd_sel); - wakeup(&tfd->tfd_count); - } else if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } -} diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3950,6 +3950,7 @@ kern/sys_procdesc.c standard kern/sys_process.c standard kern/sys_socket.c standard +kern/sys_timerfd.c standard kern/syscalls.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -4939,8 +4939,8 @@ return ("proc"); case DTYPE_EVENTFD: return ("eventfd"); - case DTYPE_LINUXTFD: - return ("ltimer"); + case DTYPE_TIMERFD: + return ("timerfd"); default: return ("unkn"); } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -948,6 +949,19 @@ return (eventfd_create_file(td, efd.initval, efd.flags)); } +int +user_timerfd(struct thread *td, const void *arg) +{ + struct specialfd_timerfd tfd; + int error; + + error = copyin(arg, &tfd, sizeof(tfd)); + if (error != 0) + return (error); + + return (timerfd_create_file(td, tfd.clockid, tfd.flags)); +} + int sys___specialfd(struct thread *td, struct __specialfd_args *args) { @@ -959,6 +973,10 @@ arg_size = sizeof(struct specialfd_eventfd); specialfd_func = user_eventfd; break; + case SPECIALFD_TIMERFD: + arg_size = sizeof(struct specialfd_timerfd); + specialfd_func = user_timerfd; + break; default: return (EINVAL); } diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c new file mode 100644 --- /dev/null +++ b/sys/kern/sys_timerfd.c @@ -0,0 +1,527 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2014 Dmitry Chagin + * Copyright (c) 2023 Jake Freeland + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); + +static fo_rdwr_t timerfd_read; +static fo_ioctl_t timerfd_ioctl; +static fo_poll_t timerfd_poll; +static fo_kqfilter_t timerfd_kqfilter; +static fo_stat_t timerfd_stat; +static fo_close_t timerfd_close; +static fo_fill_kinfo_t timerfd_fill_kinfo; + +static struct fileops timerfdops = { + .fo_read = timerfd_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = timerfd_ioctl, + .fo_poll = timerfd_poll, + .fo_kqfilter = timerfd_kqfilter, + .fo_stat = timerfd_stat, + .fo_close = timerfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = timerfd_fill_kinfo, + .fo_flags = DFLAG_PASSABLE, +}; + +static void filt_timerfddetach(struct knote *kn); +static int filt_timerfdread(struct knote *kn, long hint); + +static struct filterops timerfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_timerfddetach, + .f_event = filt_timerfdread, +}; + +static struct unrhdr64 tfdino_unr; + +struct timerfd { + clockid_t tfd_clockid; + struct itimerspec tfd_time; + struct callout tfd_callout; + timerfd_t tfd_count; + bool tfd_canceled; + struct selinfo tfd_sel; + struct mtx tfd_lock; + int tfd_flags; + ino_t tfd_ino; +}; + +static void timerfd_expire(void *); +static void timerfd_curval(struct timerfd *, struct itimerspec *); + +static int timerfd_settime_user(struct file *fp, + struct timerfd_settime_args *args); +#ifdef COMPAT_FREEBSD32 +static int timerfd_settime_user32(struct file *fp, + struct timerfd_settime_args32 *args); +#endif + +static void +timerfd_init(void *data) +{ + new_unrhdr64(&tfdino_unr, 1); +} + +SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); + +int +timerfd_create_file(struct thread *td, int clockid, int flags) +{ + struct file *fp; + struct timerfd *tfd; + int error, fd, fflags = 0; + + AUDIT_ARG_FFLAGS(flags); + AUDIT_ARG_VALUE(clockid); + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return (EINVAL); + if ((flags & ~TFD_CREATE_FLAGS) != 0) + return (EINVAL); + if ((flags & TFD_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + error = falloc(td, &fp, &fd, fflags); + if (error != 0) + return (error); + + tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); + tfd->tfd_clockid = (clockid_t)clockid; + tfd->tfd_flags = flags; + tfd->tfd_ino = alloc_unr64(&tfdino_unr); + mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); + callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); + knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); + + fflags = FREAD; + if ((flags & TFD_NONBLOCK) != 0) + fflags |= FNONBLOCK; + + finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); + fdrop(fp, td); + + td->td_retval[0] = fd; + return (error); +} + +static int +timerfd_close(struct file *fp, struct thread *td) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + timespecclear(&tfd->tfd_time.it_value); + timespecclear(&tfd->tfd_time.it_interval); + + callout_drain(&tfd->tfd_callout); + + seldrain(&tfd->tfd_sel); + knlist_destroy(&tfd->tfd_sel.si_note); + + fp->f_ops = &badfileops; + mtx_destroy(&tfd->tfd_lock); + free(tfd, M_TIMERFD); + + return (0); +} + +static int +timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct timerfd *tfd; + timerfd_t count; + int error; + + tfd = fp->f_data; + if (uio->uio_resid < sizeof(timerfd_t)) + return (EINVAL); + + error = 0; + mtx_lock(&tfd->tfd_lock); +retry: + if (tfd->tfd_canceled) { + tfd->tfd_count = 0; + mtx_unlock(&tfd->tfd_lock); + return (ECANCELED); + } + if (tfd->tfd_count == 0) { + if ((fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&tfd->tfd_lock); + return (EAGAIN); + } + error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + count = tfd->tfd_count; + tfd->tfd_count = 0; + mtx_unlock(&tfd->tfd_lock); + error = uiomove(&count, sizeof(timerfd_t), uio); + } else { + mtx_unlock(&tfd->tfd_lock); + } + + return (error); +} + +static int +timerfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct timerfd *tfd; + int revents = 0; + + tfd = fp->f_data; + mtx_lock(&tfd->tfd_lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && tfd->tfd_count > 0) + revents |= events & (POLLIN | POLLRDNORM); + if (revents == 0) + selrecord(td, &tfd->tfd_sel); + mtx_unlock(&tfd->tfd_lock); + + return (revents); +} + +static int +timerfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + if (kn->kn_filter != EVFILT_READ) + return (EINVAL); + + kn->kn_fop = &timerfd_rfiltops; + kn->kn_hook = tfd; + knlist_add(&tfd->tfd_sel.si_note, kn, 0); + + return (0); +} + +static void +filt_timerfddetach(struct knote *kn) +{ + struct timerfd *tfd = kn->kn_hook; + + mtx_lock(&tfd->tfd_lock); + knlist_remove(&tfd->tfd_sel.si_note, kn, 1); + mtx_unlock(&tfd->tfd_lock); +} + +static int +filt_timerfdread(struct knote *kn, long hint) +{ + struct timerfd *tfd = kn->kn_hook; + + return (tfd->tfd_count > 0); +} + +static int +timerfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + struct timerfd *tfd = fp->f_data; +#ifdef COMPAT_FREEBSD32 + struct itimerspec its; + struct itimerspec32 *itsp32; + int error; +#endif + + switch (cmd) { + case TFD_GETTIME: +#ifdef COMPAT_FREEBSD32 +#ifdef TFD_GETTIME32 + case TFD_GETTIME32: +#else /* TFD_GETTIME32 */ + /* On non-i386 systems we can't depend on the cmd differing */ + _Static_assert(sizeof(struct itimerspec) == sizeof(struct itimerspec32)); +#endif /* !TFD_GETTIME32 */ + if (SV_CURPROC_FLAG(SV_ILP32)) { + error = timerfd_gettime_common(fp, &its); + if (error != 0) + return (error); + itsp32 = (struct itimerspec32 *)data; + CP(its, *itsp32, it_interval.tv_sec); + CP(its, *itsp32, it_interval.tv_nsec); + CP(its, *itsp32, it_value.tv_sec); + CP(its, *itsp32, it_value.tv_nsec); + return (error); + } + return (timerfd_gettime_common(fp, (struct itimerspec *)data)); +#endif /* COMPAT_FREEBSD32 */ + case TFD_SETTIME: + return (timerfd_settime_user(fp, (struct timerfd_settime_args *)data)); +#ifdef COMPAT_FREEBSD32 + case TFD_SETTIME32: + return (timerfd_settime_user32(fp, (struct timerfd_settime_args32 *)data)); +#endif + case FIONBIO: + if (*(int *)data != 0) + atomic_set_int(&fp->f_flag, FNONBLOCK); + else + atomic_clear_int(&fp->f_flag, FNONBLOCK); + return (0); + case FIONREAD: + if (tfd->tfd_count > 0) + *(int *)data = sizeof(tfd->tfd_time); + else + *(int *)data = 0; + return (0); + } + + return (ENOTTY); +} + +static int +timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + bzero(sb, sizeof(*sb)); + sb->st_ino = tfd->tfd_ino; + sb->st_nlink = fp->f_count - 1; + sb->st_uid = fp->f_cred->cr_uid; + sb->st_gid = fp->f_cred->cr_gid; + sb->st_blksize = PAGE_SIZE; + return (0); +} + +static int +timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) +{ + + struct timerfd *tfd; + + tfd = fp->f_data; + kif->kf_type = KF_TYPE_TIMERFD; + mtx_lock(&tfd->tfd_lock); + kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; + kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; + kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; + mtx_unlock(&tfd->tfd_lock); + return (0); +} + +static void +timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) +{ + + if (tfd->tfd_clockid == CLOCK_REALTIME) + getnanotime(ts); + else /* CLOCK_MONOTONIC */ + getnanouptime(ts); +} + +static void +timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) +{ + struct timespec curr_value; + + timerfd_clocktime(tfd, &curr_value); + *old_value = tfd->tfd_time; + if (old_value->it_value.tv_sec != 0 || old_value->it_value.tv_nsec != 0) { + timespecsub(&old_value->it_value, &curr_value, &old_value->it_value); + if (old_value->it_value.tv_sec < 0 || + (old_value->it_value.tv_sec == 0 && + old_value->it_value.tv_nsec == 0)) { + old_value->it_value.tv_sec = 0; + old_value->it_value.tv_nsec = 1; + } + } +} + +int +timerfd_gettime_common(struct file *fp, struct itimerspec *curr_value) +{ + struct timerfd *tfd; + + tfd = fp->f_data; + if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) + return (EINVAL); + + mtx_lock(&tfd->tfd_lock); + timerfd_curval(tfd, curr_value); + mtx_unlock(&tfd->tfd_lock); + + return (0); +} + +int +timerfd_settime_common(struct file *fp, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + struct timerfd *tfd; + struct timespec curr_value, exp; + struct timeval tv; + + tfd = fp->f_data; + if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) + return (EINVAL); + + mtx_lock(&tfd->tfd_lock); + if (old_value != NULL) + timerfd_curval(tfd, old_value); + bcopy(new_value, &tfd->tfd_time, sizeof(*new_value)); + tfd->tfd_count = 0; + if (timespecisset(&new_value->it_value)) { + timerfd_clocktime(tfd, &curr_value); + exp = new_value->it_value; + if ((flags & TFD_TIMER_ABSTIME) == 0) { + timespecadd(&tfd->tfd_time.it_value, &curr_value, + &tfd->tfd_time.it_value); + } else { + timespecsub(&exp, &curr_value, &exp); + } + TIMESPEC_TO_TIMEVAL(&tv, &exp); + callout_reset_sbt(&tfd->tfd_callout, tvtosbt(tv), 0, + timerfd_expire, tfd, 0); + tfd->tfd_canceled = false; + } else { + tfd->tfd_canceled = true; + callout_stop(&tfd->tfd_callout); + } + mtx_unlock(&tfd->tfd_lock); + + return (0); +} + +static void +timerfd_expire(void *arg) +{ + struct timespec curr_value, ts; + struct timeval tv; + struct timerfd *tfd; + + tfd = (struct timerfd *)arg; + + timerfd_clocktime(tfd, &curr_value); + if (timespeccmp(&curr_value, &tfd->tfd_time.it_value, >=)) { + if (timespecisset(&tfd->tfd_time.it_interval)) { + timespecadd(&tfd->tfd_time.it_value, + &tfd->tfd_time.it_interval, + &tfd->tfd_time.it_value); + } else { + /* single shot timer */ + timespecclear(&tfd->tfd_time.it_value); + } + if (timespecisset(&tfd->tfd_time.it_value)) { + timespecsub(&tfd->tfd_time.it_value, &curr_value, &ts); + TIMESPEC_TO_TIMEVAL(&tv, &ts); + callout_reset_sbt(&tfd->tfd_callout, tvtosbt(tv), 0, + timerfd_expire, tfd, 0); + } + tfd->tfd_count++; + KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); + selwakeup(&tfd->tfd_sel); + wakeup(&tfd->tfd_count); + } else if (timespecisset(&tfd->tfd_time.it_value)) { + timespecsub(&tfd->tfd_time.it_value, &curr_value, &ts); + TIMESPEC_TO_TIMEVAL(&tv, &ts); + callout_reset_sbt(&tfd->tfd_callout, tvtosbt(tv), 0, + timerfd_expire, tfd, 0); + } +} + +static int +timerfd_settime_user(struct file *fp, struct timerfd_settime_args *args) +{ + struct itimerspec new_value, old_value, *pold_value; + int error; + + error = copyin(args->new_value, &new_value, sizeof(new_value)); + if (error != 0) + return (error); + pold_value = args->old_value == NULL ? NULL : &old_value; + error = timerfd_settime_common(fp, args->flags, &new_value, pold_value); + if (error == 0 && pold_value != NULL) + error = copyout(pold_value, args->old_value, sizeof(old_value)); + return (error); +} + +#ifdef COMPAT_FREEBSD32 +static int +timerfd_settime_user32(struct file *fp, struct timerfd_settime_args32 *args) +{ + struct itimerspec new_value, old_value, *pold_value; + struct itimerspec32 new_value32, old_value32; + int error; + + error = copyin((struct itimerspec32 *)(uintptr_t)args->new_value, + &new_value32, sizeof(new_value32)); + if (error != 0) + return (error); + memset(&old_value32, 0, sizeof(old_value32)); + CP(new_value32, new_value, it_interval.tv_sec); + CP(new_value32, new_value, it_interval.tv_nsec); + CP(new_value32, new_value, it_value.tv_sec); + CP(new_value32, new_value, it_value.tv_nsec); + pold_value = (struct itimerspec32 *)(uintptr_t)args->old_value == NULL ? + NULL : &old_value; + error = timerfd_settime_common(fp, args->flags, &new_value, pold_value); + if (error == 0 && pold_value != NULL) { + CP(old_value, old_value32, it_interval.tv_sec); + CP(old_value, old_value32, it_interval.tv_nsec); + CP(old_value, old_value32, it_value.tv_sec); + CP(old_value, old_value32, it_value.tv_nsec); + error = copyout(&old_value32, + (struct itimerspec32 *)(uintptr_t)args->old_value, + sizeof(old_value32)); + } + return (error); +} +#endif diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -70,7 +70,7 @@ #define DTYPE_DEV 11 /* Device specific fd type */ #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ -#define DTYPE_LINUXTFD 14 /* emulation timerfd type */ +#define DTYPE_TIMERFD 14 /* timerfd */ #ifdef _KERNEL diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -32,6 +32,7 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_TIMERFD = 2, }; struct specialfd_eventfd { @@ -39,4 +40,9 @@ int flags; }; +struct specialfd_timerfd { + int clockid; + int flags; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -322,6 +322,7 @@ enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)); int user_eventfd(struct thread *td, const void *arg); +int user_timerfd(struct thread *td, const void *arg); int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf); int kern_symlinkat(struct thread *td, const char *path1, int fd, diff --git a/sys/sys/timerfd.h b/sys/sys/timerfd.h new file mode 100644 --- /dev/null +++ b/sys/sys/timerfd.h @@ -0,0 +1,93 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Jake Freeland + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_TIMERFD_H_ +#define _SYS_TIMERFD_H_ + +#include +#include +#include +#include +#include +#include + +typedef uint64_t timerfd_t; + +#define TFD_NONBLOCK O_NONBLOCK +#define TFD_CLOEXEC O_CLOEXEC + +#define TFD_TIMER_ABSTIME (1 << 0) +#define TFD_TIMER_CANCEL_ON_SET (1 << 1) + +#define TFD_SHARED_FCNTL_FLAGS (TFD_NONBLOCK | TFD_CLOEXEC) +#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS +#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) + +#define TFD_GETTIME _IOR('T', 0x01, struct itimerspec) +#define TFD_SETTIME _IOW('T', 0x02, struct timerfd_settime_args) + +#ifdef _KERNEL + +#ifdef COMPAT_FREEBSD32 +#include +#ifdef __amd64__ +#define TFD_GETTIME32 _IOC_NEWTYPE(TFD_GETTIME, struct itimerspec32) +#endif +#define TFD_SETTIME32 _IOC_NEWTYPE(TFD_SETTIME, struct timerfd_settime_args32) +#endif /* COMPAT_FREEBSD32 */ + +int timerfd_create_file(struct thread *td, int clockid, int flags); +int timerfd_gettime_common(struct file *fp, struct itimerspec *curr_value); +int timerfd_settime_common(struct file *fp, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value); + +#else /* _KERNEL */ + +__BEGIN_DECLS +int timerfd_create(int clockid, int flags); +int timerfd_gettime(int fd, struct itimerspec *curr_value); +int timerfd_settime(int fd, int flags, const struct itimerspec *new_value, + struct itimerspec *old_value); +__END_DECLS + +#endif /* !_KERNEL */ + +struct timerfd_settime_args { + int flags; + const struct itimerspec *new_value; + struct itimerspec *old_value; +}; + +#ifdef _KERNEL +struct timerfd_settime_args32 { + int flags; + uint32_t new_value; + uint32_t old_value; +}; +#endif /* _KERNEL */ + +#endif /* !_SYS_TIMERFD_H_ */ diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ #define KF_TYPE_PROCDESC 11 #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 +#define KF_TYPE_TIMERFD 14 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -446,6 +447,11 @@ uint32_t kf_eventfd_spareint[3]; uint64_t kf_eventfd_addr; } kf_eventfd; + struct { + uint32_t kf_timerfd_clockid; + uint32_t kf_timerfd_flags; + uint64_t kf_timerfd_addr; + } kf_timerfd; struct { uint64_t kf_kqueue_addr; int32_t kf_kqueue_count;