Page MenuHomeFreeBSD

D52045.id164355.diff
No OneTemporary

D52045.id164355.diff

diff --git a/lib/libsys/kqueue.2 b/lib/libsys/kqueue.2
--- a/lib/libsys/kqueue.2
+++ b/lib/libsys/kqueue.2
@@ -97,10 +97,37 @@
a
.Fa flags
argument, which is a bitwise-inclusive OR of the following flags:
-.Bl -tag -width "KQUEUE_CLOEXEC"
+.Bl -tag -width "KQUEUE_CPONFORK"
.It Dv KQUEUE_CLOEXEC
The returned file descriptor is automatically closed on
.Xr execve 2
+.It Dv KQUEUE_CPONFORK
+When this flag is set, the created kqueue is copied into
+the child process on
+.Xr fork 2
+calls.
+The kqueue descriptor index of the new kqueue will be inherited by the child,
+that is, the numeric value of the descriptor will remain the same.
+.Pp
+Copying is deep, that is, each registered event in the original kqueue is
+copied (and not shared) into the new kqueue.
+This is contrary to how other descriptor types are handled upon
+.Xr fork 2 ,
+where the copied file descriptor references the same file object
+as the source descriptor (shallow copy).
+.Pp
+By default, in other words, when the flag is not set, kqueues from
+the parent are not copied on fork to the child process.
+The corresponding file descriptor indeces are unused in the child.
+.Pp
+Registered events that reference file descriptors which are not
+duplicated on fork, are not copied into the new kqueue.
+For instance, if the event references a file descriptor opened with the
+.Dv O_CLOEXEC
+flag set, it is not copied.
+Similarly, if event references a kqueue opened without the
+.Dv KQUEUE_CPONFORK
+flag, the event is not copied.
.El
.Pp
The
diff --git a/sys/arm/ti/ti_pruss.c b/sys/arm/ti/ti_pruss.c
--- a/sys/arm/ti/ti_pruss.c
+++ b/sys/arm/ti/ti_pruss.c
@@ -793,6 +793,7 @@
.f_isfd = 1,
.f_detach = ti_pruss_irq_kqread_detach,
.f_event = ti_pruss_irq_kqevent,
+ .f_copy = knote_triv_copy,
};
static void
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
@@ -206,7 +206,8 @@
static const struct filterops passread_filtops = {
.f_isfd = 1,
.f_detach = passreadfiltdetach,
- .f_event = passreadfilt
+ .f_event = passreadfilt,
+ .f_copy = knote_triv_copy,
};
static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers");
diff --git a/sys/cam/scsi/scsi_target.c b/sys/cam/scsi/scsi_target.c
--- a/sys/cam/scsi/scsi_target.c
+++ b/sys/cam/scsi/scsi_target.c
@@ -108,6 +108,7 @@
.f_isfd = 1,
.f_detach = targreadfiltdetach,
.f_event = targreadfilt,
+ .f_copy = knote_triv_copy,
};
static struct cdevsw targ_cdevsw = {
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -104,7 +104,7 @@
epoll_create_common(struct thread *td, int flags)
{
- return (kern_kqueue(td, flags, NULL));
+ return (kern_kqueue(td, flags, false, NULL));
}
#ifdef LINUX_LEGACY_SYSCALLS
diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c
--- a/sys/compat/linuxkpi/common/src/linux_compat.c
+++ b/sys/compat/linuxkpi/common/src/linux_compat.c
@@ -1171,12 +1171,14 @@
.f_isfd = 1,
.f_detach = linux_file_kqfilter_detach,
.f_event = linux_file_kqfilter_read_event,
+ .f_copy = knote_triv_copy,
};
static const struct filterops linux_dev_kqfiltops_write = {
.f_isfd = 1,
.f_detach = linux_file_kqfilter_detach,
.f_event = linux_file_kqfilter_write_event,
+ .f_copy = knote_triv_copy,
};
static void
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c
@@ -183,6 +183,7 @@
.f_isfd = 1,
.f_detach = zvol_filter_detach,
.f_event = zvol_filter_vnode,
+ .f_copy = knote_triv_copy,
};
extern uint_t zfs_geom_probe_vdev_key;
diff --git a/sys/dev/atkbdc/psm.c b/sys/dev/atkbdc/psm.c
--- a/sys/dev/atkbdc/psm.c
+++ b/sys/dev/atkbdc/psm.c
@@ -5287,6 +5287,7 @@
.f_isfd = 1,
.f_detach = psmfilter_detach,
.f_event = psmfilter,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/dev/cyapa/cyapa.c b/sys/dev/cyapa/cyapa.c
--- a/sys/dev/cyapa/cyapa.c
+++ b/sys/dev/cyapa/cyapa.c
@@ -1121,7 +1121,8 @@
static const struct filterops cyapa_filtops = {
.f_isfd = 1,
.f_detach = cyapafiltdetach,
- .f_event = cyapafilt
+ .f_event = cyapafilt,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/dev/evdev/cdev.c b/sys/dev/evdev/cdev.c
--- a/sys/dev/evdev/cdev.c
+++ b/sys/dev/evdev/cdev.c
@@ -96,6 +96,7 @@
.f_attach = NULL,
.f_detach = evdev_kqdetach,
.f_event = evdev_kqread,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/dev/evdev/uinput.c b/sys/dev/evdev/uinput.c
--- a/sys/dev/evdev/uinput.c
+++ b/sys/dev/evdev/uinput.c
@@ -104,6 +104,7 @@
.f_attach = NULL,
.f_detach = uinput_kqdetach,
.f_event = uinput_kqread,
+ .f_copy = knote_triv_copy,
};
struct uinput_cdev_state
diff --git a/sys/dev/gpio/gpioc.c b/sys/dev/gpio/gpioc.c
--- a/sys/dev/gpio/gpioc.c
+++ b/sys/dev/gpio/gpioc.c
@@ -158,7 +158,8 @@
.f_attach = NULL,
.f_detach = gpioc_kqdetach,
.f_event = gpioc_kqread,
- .f_touch = NULL
+ .f_touch = NULL,
+ .f_copy = knote_triv_copy,
};
static struct gpioc_pin_event *
diff --git a/sys/dev/hid/hidraw.c b/sys/dev/hid/hidraw.c
--- a/sys/dev/hid/hidraw.c
+++ b/sys/dev/hid/hidraw.c
@@ -182,6 +182,7 @@
.f_isfd = 1,
.f_detach = hidraw_kqdetach,
.f_event = hidraw_kqread,
+ .f_copy = knote_triv_copy,
};
static void
diff --git a/sys/dev/hid/u2f.c b/sys/dev/hid/u2f.c
--- a/sys/dev/hid/u2f.c
+++ b/sys/dev/hid/u2f.c
@@ -132,6 +132,7 @@
.f_isfd = 1,
.f_detach = u2f_kqdetach,
.f_event = u2f_kqread,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
--- a/sys/dev/netmap/netmap_freebsd.c
+++ b/sys/dev/netmap/netmap_freebsd.c
@@ -1406,19 +1406,34 @@
return netmap_knrw(kn, hint, POLLOUT);
}
+static int
+netmap_kncopy(struct knote *kn, struct proc *p1)
+{
+ struct netmap_priv_d *priv;
+ struct nm_selinfo *si;
+
+ priv = kn->kn_hook;
+ si = priv->np_si[kn->kn_filter == EVFILT_WRITE ? NR_TX : NR_RX];
+ NMG_LOCK();
+ si->kqueue_users++;
+ NMG_UNLOCK();
+ return (0);
+}
+
static const struct filterops netmap_rfiltops = {
.f_isfd = 1,
.f_detach = netmap_knrdetach,
.f_event = netmap_knread,
+ .f_copy = netmap_kncopy,
};
static const struct filterops netmap_wfiltops = {
.f_isfd = 1,
.f_detach = netmap_knwdetach,
.f_event = netmap_knwrite,
+ .f_copy = netmap_kncopy,
};
-
/*
* This is called when a thread invokes kevent() to record
* a change in the configuration of the kqueue().
diff --git a/sys/dev/null/null.c b/sys/dev/null/null.c
--- a/sys/dev/null/null.c
+++ b/sys/dev/null/null.c
@@ -61,12 +61,14 @@
static const struct filterops one_fop = {
.f_isfd = 1,
- .f_event = one_ev
+ .f_event = one_ev,
+ .f_copy = knote_triv_copy,
};
static const struct filterops zero_fop = {
.f_isfd = 1,
- .f_event = zero_ev
+ .f_event = zero_ev,
+ .f_copy = knote_triv_copy,
};
static struct cdevsw full_cdevsw = {
diff --git a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c
--- a/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c
+++ b/sys/dev/qat/qat_common/adf_freebsd_dev_processes.c
@@ -89,6 +89,7 @@
.f_attach = NULL,
.f_detach = adf_state_kqread_detach,
.f_event = adf_state_kqread_event,
+ .f_copy = knote_triv_copy,
};
static struct cdev *adf_processes_dev;
diff --git a/sys/dev/usb/usb_dev.c b/sys/dev/usb/usb_dev.c
--- a/sys/dev/usb/usb_dev.c
+++ b/sys/dev/usb/usb_dev.c
@@ -1231,12 +1231,14 @@
.f_isfd = 1,
.f_detach = usb_filter_detach,
.f_event = usb_filter_write,
+ .f_copy = knote_triv_copy,
};
static const struct filterops usb_filtops_read = {
.f_isfd = 1,
.f_detach = usb_filter_detach,
.f_event = usb_filter_read,
+ .f_copy = knote_triv_copy,
};
/* ARGSUSED */
diff --git a/sys/fs/cuse/cuse.c b/sys/fs/cuse/cuse.c
--- a/sys/fs/cuse/cuse.c
+++ b/sys/fs/cuse/cuse.c
@@ -195,12 +195,14 @@
.f_isfd = 1,
.f_detach = cuse_client_kqfilter_read_detach,
.f_event = cuse_client_kqfilter_read_event,
+ .f_copy = knote_triv_copy,
};
static const struct filterops cuse_client_kqfilter_write_ops = {
.f_isfd = 1,
.f_detach = cuse_client_kqfilter_write_detach,
.f_event = cuse_client_kqfilter_write_event,
+ .f_copy = knote_triv_copy,
};
static d_open_t cuse_client_open;
diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c
--- a/sys/fs/fuse/fuse_device.c
+++ b/sys/fs/fuse/fuse_device.c
@@ -126,11 +126,13 @@
.f_isfd = 1,
.f_detach = fuse_device_filt_detach,
.f_event = fuse_device_filt_read,
+ .f_copy = knote_triv_copy,
};
static const struct filterops fuse_device_wfiltops = {
.f_isfd = 1,
.f_event = fuse_device_filt_write,
+ .f_copy = knote_triv_copy,
};
/****************************
diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
--- a/sys/geom/geom_dev.c
+++ b/sys/geom/geom_dev.c
@@ -82,6 +82,7 @@
.f_isfd = 1,
.f_detach = gdev_filter_detach,
.f_event = gdev_filter_vnode,
+ .f_copy = knote_triv_copy,
};
static struct cdevsw g_dev_cdevsw = {
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2486,7 +2486,7 @@
if (refcount_load(&p->p_fd->fd_refcnt) == 1)
return;
- tmp = fdcopy(p->p_fd);
+ tmp = fdcopy(p->p_fd, p);
fdescfree(td);
p->p_fd = tmp;
}
@@ -2515,14 +2515,17 @@
* this is to ease callers, not catch errors.
*/
struct filedesc *
-fdcopy(struct filedesc *fdp)
+fdcopy(struct filedesc *fdp, struct proc *p1)
{
struct filedesc *newfdp;
struct filedescent *nfde, *ofde;
+ struct file *fp;
int i, lastfile;
+ bool fork_pass;
MPASS(fdp != NULL);
+ fork_pass = false;
newfdp = fdinit();
FILEDESC_SLOCK(fdp);
for (;;) {
@@ -2533,10 +2536,35 @@
fdgrowtable(newfdp, lastfile + 1);
FILEDESC_SLOCK(fdp);
}
- /* copy all passable descriptors (i.e. not kqueue) */
+
+ /*
+ * Copy all passable descriptors (i.e. not kqueue), and
+ * prepare to handle copyable but not passable descriptors
+ * (kqueues).
+ *
+ * The pass to handle copying is performed after all passable
+ * files are installed into the new file descriptor's table,
+ * since kqueues need all referenced file descriptors already
+ * valid, including other kqueues. For the same reason the
+ * copying is done in two passes by itself, first installing
+ * not fully initialized ('empty') copyable files into the new
+ * fd table, and then giving the subsystems a second chance to
+ * really fill the copied file backing structure with the
+ * content.
+ */
newfdp->fd_freefile = fdp->fd_freefile;
FILEDESC_FOREACH_FDE(fdp, i, ofde) {
- if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 ||
+ const struct fileops *ops;
+
+ ops = ofde->fde_file->f_ops;
+ fp = NULL;
+ if ((ops->fo_flags & DFLAG_FORK) != 0 &&
+ (ofde->fde_flags & UF_FOCLOSE) == 0) {
+ if (ops->fo_fork(newfdp, ofde->fde_file, &fp, p1,
+ curthread) != 0)
+ continue;
+ fork_pass = true;
+ } else if ((ops->fo_flags & DFLAG_PASSABLE) == 0 ||
(ofde->fde_flags & UF_FOCLOSE) != 0 ||
!fhold(ofde->fde_file)) {
if (newfdp->fd_freefile == fdp->fd_freefile)
@@ -2545,11 +2573,30 @@
}
nfde = &newfdp->fd_ofiles[i];
*nfde = *ofde;
+ if (fp != NULL)
+ nfde->fde_file = fp;
filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
fdused_init(newfdp, i);
}
MPASS(newfdp->fd_freefile != -1);
FILEDESC_SUNLOCK(fdp);
+
+ /*
+ * Now handle copying kqueues, since all fds, including
+ * kqueues, are in place.
+ */
+ if (__predict_false(fork_pass)) {
+ FILEDESC_FOREACH_FDE(newfdp, i, nfde) {
+ const struct fileops *ops;
+
+ ops = nfde->fde_file->f_ops;
+ if ((ops->fo_flags & DFLAG_FORK) == 0 ||
+ nfde->fde_file == NULL)
+ continue;
+ ops->fo_fork(newfdp, NULL, &nfde->fde_file, p1,
+ curthread);
+ }
+ }
return (newfdp);
}
diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c
--- a/sys/kern/kern_devctl.c
+++ b/sys/kern/kern_devctl.c
@@ -130,6 +130,7 @@
.f_isfd = 1,
.f_detach = filt_devctl_detach,
.f_event = filt_devctl_read,
+ .f_copy = knote_triv_copy,
};
static struct cdev *devctl_dev;
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -134,6 +134,7 @@
static fo_stat_t kqueue_stat;
static fo_close_t kqueue_close;
static fo_fill_kinfo_t kqueue_fill_kinfo;
+static fo_fork_t kqueue_fork;
static const struct fileops kqueueops = {
.fo_read = invfo_rdwr,
@@ -148,7 +149,9 @@
.fo_chown = invfo_chown,
.fo_sendfile = invfo_sendfile,
.fo_cmp = file_kcmp_generic,
+ .fo_fork = kqueue_fork,
.fo_fill_kinfo = kqueue_fill_kinfo,
+ .fo_flags = DFLAG_FORK,
};
static int knote_attach(struct knote *kn, struct kqueue *kq);
@@ -176,6 +179,7 @@
static void filt_timerstart(struct knote *kn, sbintime_t to);
static void filt_timertouch(struct knote *kn, struct kevent *kev,
u_long type);
+static int filt_timercopy(struct knote *kn, struct proc *p1);
static int filt_timervalidate(struct knote *kn, sbintime_t *to);
static int filt_timer(struct knote *kn, long hint);
static int filt_userattach(struct knote *kn);
@@ -187,11 +191,13 @@
static const struct filterops file_filtops = {
.f_isfd = 1,
.f_attach = filt_fileattach,
+ .f_copy = knote_triv_copy,
};
static const struct filterops kqread_filtops = {
.f_isfd = 1,
.f_detach = filt_kqdetach,
.f_event = filt_kqueue,
+ .f_copy = knote_triv_copy,
};
/* XXX - move to kern_proc.c? */
static const struct filterops proc_filtops = {
@@ -199,12 +205,14 @@
.f_attach = filt_procattach,
.f_detach = filt_procdetach,
.f_event = filt_proc,
+ .f_copy = knote_triv_copy,
};
static const struct filterops jail_filtops = {
.f_isfd = 0,
.f_attach = filt_jailattach,
.f_detach = filt_jaildetach,
.f_event = filt_jail,
+ .f_copy = knote_triv_copy,
};
static const struct filterops timer_filtops = {
.f_isfd = 0,
@@ -212,12 +220,14 @@
.f_detach = filt_timerdetach,
.f_event = filt_timer,
.f_touch = filt_timertouch,
+ .f_copy = filt_timercopy,
};
static const struct filterops user_filtops = {
.f_attach = filt_userattach,
.f_detach = filt_userdetach,
.f_event = filt_user,
.f_touch = filt_usertouch,
+ .f_copy = knote_triv_copy,
};
static uma_zone_t knote_zone;
@@ -347,6 +357,7 @@
static const struct filterops null_filtops = {
.f_isfd = 0,
.f_attach = filt_nullattach,
+ .f_copy = knote_triv_copy,
};
/* XXX - make SYSINIT to add these, and move into respective modules. */
@@ -940,6 +951,30 @@
return (0);
}
+static int
+filt_timercopy(struct knote *kn, struct proc *p)
+{
+ struct kq_timer_cb_data *kc_src, *kc;
+
+ if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) {
+ atomic_subtract_int(&kq_ncallouts, 1);
+ return (ENOMEM);
+ }
+
+ kn->kn_status &= ~KN_DETACHED;
+ kc_src = kn->kn_ptr.p_v;
+ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
+ kc->kn = kn;
+ kc->p = p;
+ kc->flags = kc_src->flags & ~KQ_TIMER_CB_ENQUEUED;
+ kc->next = kc_src->next;
+ kc->to = kc_src->to;
+ kc->cpuid = PCPU_GET(cpuid);
+ callout_init(&kc->c, 1);
+ kqtimer_sched_callout(kc);
+ return (0);
+}
+
static void
filt_timerstart(struct knote *kn, sbintime_t to)
{
@@ -1151,7 +1186,7 @@
sys_kqueue(struct thread *td, struct kqueue_args *uap)
{
- return (kern_kqueue(td, 0, NULL));
+ return (kern_kqueue(td, 0, false, NULL));
}
int
@@ -1159,55 +1194,76 @@
{
int flags;
- if ((uap->flags & ~(KQUEUE_CLOEXEC)) != 0)
+ if ((uap->flags & ~(KQUEUE_CLOEXEC | KQUEUE_CPONFORK)) != 0)
return (EINVAL);
flags = 0;
if ((uap->flags & KQUEUE_CLOEXEC) != 0)
flags |= O_CLOEXEC;
- return (kern_kqueue(td, flags, NULL));
+ return (kern_kqueue(td, flags, (uap->flags & KQUEUE_CPONFORK) != 0,
+ NULL));
}
static void
-kqueue_init(struct kqueue *kq)
+kqueue_init(struct kqueue *kq, bool cponfork)
{
mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);
TAILQ_INIT(&kq->kq_head);
knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
+ if (cponfork)
+ kq->kq_state |= KQ_CPONFORK;
}
-int
-kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps)
+static int
+kern_kqueue_alloc(struct thread *td, struct filedesc *fdp, int *fdip,
+ struct file **fpp, int flags, struct filecaps *fcaps, bool cponfork,
+ struct kqueue **kqp)
{
- struct filedesc *fdp;
- struct kqueue *kq;
- struct file *fp;
struct ucred *cred;
- int fd, error;
+ struct kqueue *kq;
+ int error;
- fdp = td->td_proc->p_fd;
cred = td->td_ucred;
if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))
return (ENOMEM);
- error = falloc_caps(td, &fp, &fd, flags, fcaps);
+ error = fdip != NULL ? falloc_caps(td, fpp, fdip, flags, fcaps) :
+ _falloc_noinstall(td, fpp, 1);
if (error != 0) {
chgkqcnt(cred->cr_ruidinfo, -1, 0);
return (error);
}
/* An extra reference on `fp' has been held for us by falloc(). */
- kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
- kqueue_init(kq);
+ kq = malloc(sizeof(*kq), M_KQUEUE, M_WAITOK | M_ZERO);
+ kqueue_init(kq, cponfork);
kq->kq_fdp = fdp;
kq->kq_cred = crhold(cred);
- FILEDESC_XLOCK(fdp);
+ if (fdip != NULL)
+ FILEDESC_XLOCK(fdp);
TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
- FILEDESC_XUNLOCK(fdp);
+ if (fdip != NULL)
+ FILEDESC_XUNLOCK(fdp);
+
+ finit(*fpp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
+ *kqp = kq;
+ return (0);
+}
+
+int
+kern_kqueue(struct thread *td, int flags, bool cponfork, struct filecaps *fcaps)
+{
+ struct kqueue *kq;
+ struct file *fp;
+ int fd, error;
+
+ error = kern_kqueue_alloc(td, td->td_proc->p_fd, &fd, &fp, flags,
+ fcaps, cponfork, &kq);
+ if (error != 0)
+ return (error);
- finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
fdrop(fp, td);
td->td_retval[0] = fd;
@@ -1488,7 +1544,7 @@
struct kqueue kq = {};
int error;
- kqueue_init(&kq);
+ kqueue_init(&kq, false);
kq.kq_refcnt = 1;
error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
kqueue_drain(&kq, td);
@@ -1576,7 +1632,7 @@
mtx_lock(&filterops_lock);
KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
- ("filter object refcount not valid on release"));
+ ("filter object %d refcount not valid on release", filt));
sysfilt_ops[~filt].for_refcnt--;
mtx_unlock(&filterops_lock);
}
@@ -1855,17 +1911,8 @@
}
static int
-kqueue_acquire(struct file *fp, struct kqueue **kqp)
+kqueue_acquire_ref(struct kqueue *kq)
{
- int error;
- struct kqueue *kq;
-
- error = 0;
-
- kq = fp->f_data;
- if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
- return (EINVAL);
- *kqp = kq;
KQ_LOCK(kq);
if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
KQ_UNLOCK(kq);
@@ -1873,8 +1920,22 @@
}
kq->kq_refcnt++;
KQ_UNLOCK(kq);
+ return (0);
+}
- return error;
+static int
+kqueue_acquire(struct file *fp, struct kqueue **kqp)
+{
+ struct kqueue *kq;
+ int error;
+
+ kq = fp->f_data;
+ if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
+ return (EINVAL);
+ error = kqueue_acquire_ref(kq);
+ if (error == 0)
+ *kqp = kq;
+ return (error);
}
static void
@@ -2937,6 +2998,152 @@
return (error);
}
+static int
+kqueue_fork_alloc(struct filedesc *fdp, struct file *fp, struct file **fp1,
+ struct thread *td)
+{
+ struct kqueue *kq, *kq1;
+ int error;
+
+ MPASS(fp->f_type == DTYPE_KQUEUE);
+ kq = fp->f_data;
+ if ((kq->kq_state & KQ_CPONFORK) == 0)
+ return (EOPNOTSUPP);
+ error = kqueue_acquire_ref(kq);
+ if (error != 0)
+ return (error);
+ error = kern_kqueue_alloc(td, fdp, NULL, fp1, 0, NULL, true, &kq1);
+ if (error == 0) {
+ kq1->kq_forksrc = kq;
+ (*fp1)->f_flag = fp->f_flag & (FREAD | FWRITE | FEXEC |
+ O_CLOEXEC | O_CLOFORK);
+ } else {
+ kqueue_release(kq, 0);
+ }
+ return (error);
+}
+
+static void
+kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1,
+ struct filedesc *fdp)
+{
+ struct knote *kn1;
+ const struct filterops *fop;
+ int error;
+
+ fop = kn->kn_fop;
+ if (fop->f_copy == NULL || (fop->f_isfd &&
+ fdp->fd_files->fdt_ofiles[kn->kn_kevent.ident].fde_file == NULL))
+ return;
+ error = kqueue_expand(kq1, fop, kn->kn_kevent.ident, M_WAITOK);
+ if (error != 0)
+ return;
+
+ kn1 = knote_alloc(M_WAITOK);
+ *kn1 = *kn;
+ kn1->kn_status |= KN_DETACHED;
+ kn1->kn_status &= ~KN_QUEUED;
+ kn1->kn_kq = kq1;
+ error = fop->f_copy(kn1, p1);
+ if (error != 0) {
+ knote_free(kn1);
+ return;
+ }
+ (void)kqueue_fo_find(kn->kn_kevent.filter);
+ if (fop->f_isfd && !fhold(kn1->kn_fp)) {
+ fop->f_detach(kn1);
+ kqueue_fo_release(kn->kn_kevent.filter);
+ knote_free(kn1);
+ return;
+ }
+ if (kn->kn_knlist != NULL)
+ knlist_add(kn->kn_knlist, kn1, 0);
+ KQ_LOCK(kq1);
+ knote_attach(kn1, kq1);
+ kn1->kn_influx = 0;
+ if ((kn->kn_status & KN_QUEUED) != 0)
+ knote_enqueue(kn1);
+ KQ_UNLOCK(kq1);
+}
+
+static void
+kqueue_fork_copy_list(struct klist *knlist, struct knote *marker,
+ struct kqueue *kq, struct kqueue *kq1, struct proc *p1,
+ struct filedesc *fdp)
+{
+ struct knote *kn;
+
+ KQ_OWNED(kq);
+ kn = SLIST_FIRST(knlist);
+ while (kn != NULL) {
+ if ((kn->kn_status & KN_DETACHED) != 0 ||
+ (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) {
+ kn = SLIST_NEXT(kn, kn_link);
+ continue;
+ }
+ kn_enter_flux(kn);
+ SLIST_INSERT_AFTER(kn, marker, kn_link);
+ KQ_UNLOCK(kq);
+ kqueue_fork_copy_knote(kq1, kn, p1, fdp);
+ KQ_LOCK(kq);
+ kn_leave_flux(kn);
+ kn = SLIST_NEXT(marker, kn_link);
+ /* XXXKIB switch kn_link to LIST? */
+ SLIST_REMOVE(knlist, marker, knote, kn_link);
+ }
+}
+
+static int
+kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1,
+ struct proc *p1, struct thread *td)
+{
+ struct kqueue *kq, *kq1;
+ struct knote *marker;
+ int error, i;
+
+ error = 0;
+ MPASS(fp == NULL);
+ MPASS(fp1->f_type == DTYPE_KQUEUE);
+
+ kq1 = fp1->f_data;
+ kq = kq1->kq_forksrc;
+ marker = knote_alloc(M_WAITOK);
+ marker->kn_status = KN_MARKER;
+
+ KQ_LOCK(kq);
+ for (i = 0; i < kq->kq_knlistsize; i++) {
+ kqueue_fork_copy_list(&kq->kq_knlist[i], marker, kq, kq1,
+ p1, fdp);
+ }
+ if (kq->kq_knhashmask != 0) {
+ for (i = 0; i <= kq->kq_knhashmask; i++) {
+ kqueue_fork_copy_list(&kq->kq_knhash[i], marker, kq,
+ kq1, p1, fdp);
+ }
+ }
+ kqueue_release(kq, 1);
+ kq1->kq_forksrc = NULL;
+ KQ_UNLOCK(kq);
+
+ knote_free(marker);
+ return (error);
+}
+
+static int
+kqueue_fork(struct filedesc *fdp, struct file *fp, struct file **fp1,
+ struct proc *p1, struct thread *td)
+{
+ if (*fp1 == NULL)
+ return (kqueue_fork_alloc(fdp, fp, fp1, td));
+ return (kqueue_fork_copy(fdp, fp, *fp1, p1, td));
+}
+
+int
+knote_triv_copy(struct knote *kn __unused, struct proc *p1 __unused)
+{
+ return (0);
+}
+
struct knote_status_export_bit {
int kn_status_bit;
int knt_status_bit;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -423,7 +423,7 @@
pd = pdshare(p1->p_pd);
else
pd = pdcopy(p1->p_pd);
- fd = fdcopy(p1->p_fd);
+ fd = fdcopy(p1->p_fd, p2);
fdtol = NULL;
} else {
if (fr->fr_flags2 & FR2_SHARE_PATHS)
diff --git a/sys/kern/kern_jaildesc.c b/sys/kern/kern_jaildesc.c
--- a/sys/kern/kern_jaildesc.c
+++ b/sys/kern/kern_jaildesc.c
@@ -344,6 +344,7 @@
.f_isfd = 1,
.f_detach = jaildesc_kqops_detach,
.f_event = jaildesc_kqops_event,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -124,6 +124,7 @@
.f_attach = filt_sigattach,
.f_detach = filt_sigdetach,
.f_event = filt_signal,
+ .f_copy = knote_triv_copy,
};
static int kern_forcesigexit = 1;
diff --git a/sys/kern/subr_log.c b/sys/kern/subr_log.c
--- a/sys/kern/subr_log.c
+++ b/sys/kern/subr_log.c
@@ -79,6 +79,7 @@
.f_attach = NULL,
.f_detach = logkqdetach,
.f_event = logkqread,
+ .f_copy = knote_triv_copy,
};
static struct logsoftc {
diff --git a/sys/kern/sys_eventfd.c b/sys/kern/sys_eventfd.c
--- a/sys/kern/sys_eventfd.c
+++ b/sys/kern/sys_eventfd.c
@@ -85,13 +85,16 @@
static const struct filterops eventfd_rfiltops = {
.f_isfd = 1,
.f_detach = filt_eventfddetach,
- .f_event = filt_eventfdread
+ .f_event = filt_eventfdread,
+ .f_copy = knote_triv_copy,
};
+
static const struct filterops eventfd_wfiltops = {
.f_isfd = 1,
.f_detach = filt_eventfddetach,
- .f_event = filt_eventfdwrite
+ .f_event = filt_eventfdwrite,
+ .f_copy = knote_triv_copy,
};
struct eventfd {
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -181,20 +181,23 @@
static const struct filterops pipe_nfiltops = {
.f_isfd = 1,
.f_detach = filt_pipedetach_notsup,
- .f_event = filt_pipenotsup
+ .f_event = filt_pipenotsup,
/* no userdump */
+ .f_copy = knote_triv_copy,
};
static const struct filterops pipe_rfiltops = {
.f_isfd = 1,
.f_detach = filt_pipedetach,
.f_event = filt_piperead,
.f_userdump = filt_pipedump,
+ .f_copy = knote_triv_copy,
};
static const struct filterops pipe_wfiltops = {
.f_isfd = 1,
.f_detach = filt_pipedetach,
.f_event = filt_pipewrite,
.f_userdump = filt_pipedump,
+ .f_copy = knote_triv_copy,
};
/*
diff --git a/sys/kern/sys_procdesc.c b/sys/kern/sys_procdesc.c
--- a/sys/kern/sys_procdesc.c
+++ b/sys/kern/sys_procdesc.c
@@ -486,6 +486,7 @@
.f_isfd = 1,
.f_detach = procdesc_kqops_detach,
.f_event = procdesc_kqops_event,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -754,12 +754,14 @@
.f_isfd = 1,
.f_detach = tty_kqops_read_detach,
.f_event = tty_kqops_read_event,
+ .f_copy = knote_triv_copy,
};
static const struct filterops tty_kqops_write = {
.f_isfd = 1,
.f_detach = tty_kqops_write_detach,
.f_event = tty_kqops_write_event,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/kern/tty_pts.c b/sys/kern/tty_pts.c
--- a/sys/kern/tty_pts.c
+++ b/sys/kern/tty_pts.c
@@ -491,11 +491,13 @@
.f_isfd = 1,
.f_detach = pts_kqops_read_detach,
.f_event = pts_kqops_read_event,
+ .f_copy = knote_triv_copy,
};
static const struct filterops pts_kqops_write = {
.f_isfd = 1,
.f_detach = pts_kqops_write_detach,
.f_event = pts_kqops_write_event,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -281,11 +281,13 @@
.f_isfd = 1,
.f_detach = filt_mqdetach,
.f_event = filt_mqread,
+ .f_copy = knote_triv_copy,
};
static const struct filterops mq_wfiltops = {
.f_isfd = 1,
.f_detach = filt_mqdetach,
.f_event = filt_mqwrite,
+ .f_copy = knote_triv_copy,
};
/*
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -191,16 +191,19 @@
.f_isfd = 1,
.f_detach = filt_sordetach,
.f_event = filt_soread,
+ .f_copy = knote_triv_copy,
};
static const struct filterops sowrite_filtops = {
.f_isfd = 1,
.f_detach = filt_sowdetach,
.f_event = filt_sowrite,
+ .f_copy = knote_triv_copy,
};
static const struct filterops soempty_filtops = {
.f_isfd = 1,
.f_detach = filt_sowdetach,
.f_event = filt_soempty,
+ .f_copy = knote_triv_copy,
};
so_gen_t so_gencnt; /* generation count for sockets */
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1855,11 +1855,13 @@
.f_isfd = 1,
.f_detach = uipc_filt_sowdetach,
.f_event = uipc_filt_sowrite,
+ .f_copy = knote_triv_copy,
};
static const struct filterops uipc_empty_filtops = {
.f_isfd = 1,
.f_detach = uipc_filt_sowdetach,
.f_event = uipc_filt_soempty,
+ .f_copy = knote_triv_copy,
};
static int
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -345,12 +345,14 @@
.f_attach = filt_aioattach,
.f_detach = filt_aiodetach,
.f_event = filt_aio,
+ .f_copy = knote_triv_copy,
};
static const struct filterops lio_filtops = {
.f_isfd = 0,
.f_attach = filt_lioattach,
.f_detach = filt_liodetach,
- .f_event = filt_lio
+ .f_event = filt_lio,
+ .f_copy = knote_triv_copy,
};
static eventhandler_tag exit_tag, exec_tag;
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
--- a/sys/kern/vfs_inotify.c
+++ b/sys/kern/vfs_inotify.c
@@ -111,6 +111,7 @@
.f_isfd = 1,
.f_detach = filt_inotifydetach,
.f_event = filt_inotifyevent,
+ .f_copy = knote_triv_copy,
};
static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures");
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -6545,6 +6545,7 @@
.f_attach = filt_fsattach,
.f_detach = filt_fsdetach,
.f_event = filt_fsevent,
+ .f_copy = knote_triv_copy,
};
static int
@@ -6624,24 +6625,28 @@
static void filt_vfsdetach(struct knote *kn);
static int filt_vfsdump(struct proc *p, struct knote *kn,
struct kinfo_knote *kin);
+static int filt_vfscopy(struct knote *kn, struct proc *p1);
static const struct filterops vfsread_filtops = {
.f_isfd = 1,
.f_detach = filt_vfsdetach,
.f_event = filt_vfsread,
.f_userdump = filt_vfsdump,
+ .f_copy = filt_vfscopy,
};
static const struct filterops vfswrite_filtops = {
.f_isfd = 1,
.f_detach = filt_vfsdetach,
.f_event = filt_vfswrite,
.f_userdump = filt_vfsdump,
+ .f_copy = filt_vfscopy,
};
static const struct filterops vfsvnode_filtops = {
.f_isfd = 1,
.f_detach = filt_vfsdetach,
.f_event = filt_vfsvnode,
.f_userdump = filt_vfsdump,
+ .f_copy = filt_vfscopy,
};
static void
@@ -6825,6 +6830,16 @@
return (0);
}
+static int
+filt_vfscopy(struct knote *kn, struct proc *p1)
+{
+ struct vnode *vp;
+
+ vp = (struct vnode *)kn->kn_hook;
+ vhold(vp);
+ return (0);
+}
+
int
vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
{
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -253,12 +253,14 @@
.f_isfd = 1,
.f_detach = filt_bpfdetach,
.f_event = filt_bpfread,
+ .f_copy = knote_triv_copy,
};
static const struct filterops bpfwrite_filtops = {
.f_isfd = 1,
.f_detach = filt_bpfdetach,
.f_event = filt_bpfwrite,
+ .f_copy = knote_triv_copy,
};
/*
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -261,6 +261,7 @@
.f_attach = NULL,
.f_detach = tunkqdetach,
.f_event = tunkqread,
+ .f_copy = knote_triv_copy,
};
static const struct filterops tun_write_filterops = {
@@ -268,6 +269,7 @@
.f_attach = NULL,
.f_detach = tunkqdetach,
.f_event = tunkqwrite,
+ .f_copy = knote_triv_copy,
};
static struct tuntap_driver {
diff --git a/sys/security/audit/audit_pipe.c b/sys/security/audit/audit_pipe.c
--- a/sys/security/audit/audit_pipe.c
+++ b/sys/security/audit/audit_pipe.c
@@ -243,6 +243,7 @@
.f_attach = NULL,
.f_detach = audit_pipe_kqdetach,
.f_event = audit_pipe_kqread,
+ .f_copy = knote_triv_copy,
};
/*
diff --git a/sys/sys/event.h b/sys/sys/event.h
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -228,6 +228,7 @@
/* Flags for kqueuex(2) */
#define KQUEUE_CLOEXEC 0x00000001 /* close on exec */
+#define KQUEUE_CPONFORK 0x00000002 /* copy on fork */
struct knote;
SLIST_HEAD(klist, knote);
@@ -283,6 +284,7 @@
void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type);
int (*f_userdump)(struct proc *p, struct knote *kn,
struct kinfo_knote *kin);
+ int (*f_copy)(struct knote *kn, struct proc *p1);
};
/*
@@ -346,6 +348,7 @@
void knote(struct knlist *list, long hint, int lockflags);
void knote_fork(struct knlist *list, int pid);
+int knote_triv_copy(struct knote *kn, struct proc *p1);
struct knlist *knlist_alloc(struct mtx *lock);
void knlist_detach(struct knlist *knl);
void knlist_add(struct knlist *knl, struct knote *kn, int islocked);
diff --git a/sys/sys/eventvar.h b/sys/sys/eventvar.h
--- a/sys/sys/eventvar.h
+++ b/sys/sys/eventvar.h
@@ -55,12 +55,14 @@
#define KQ_CLOSING 0x10
#define KQ_TASKSCHED 0x20 /* task scheduled */
#define KQ_TASKDRAIN 0x40 /* waiting for task to drain */
+#define KQ_CPONFORK 0x80
int kq_knlistsize; /* size of knlist */
struct klist *kq_knlist; /* list of knotes */
u_long kq_knhashmask; /* size of knhash */
struct klist *kq_knhash; /* hash table for knotes */
struct task kq_task;
struct ucred *kq_cred;
+ struct kqueue *kq_forksrc;
};
#endif /* !_SYS_EVENTVAR_H_ */
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -139,6 +139,8 @@
off_t *offset, off_t *length, int flags,
struct ucred *active_cred, struct thread *td);
typedef int fo_cmp_t(struct file *fp, struct file *fp1, struct thread *td);
+typedef int fo_fork_t(struct filedesc *fdp, struct file *fp, struct file **fp1,
+ struct proc *p1, struct thread *td);
typedef int fo_spare_t(struct file *fp);
typedef int fo_flags_t;
@@ -163,12 +165,14 @@
fo_fallocate_t *fo_fallocate;
fo_fspacectl_t *fo_fspacectl;
fo_cmp_t *fo_cmp;
+ fo_fork_t *fo_fork;
fo_spare_t *fo_spares[8]; /* Spare slots */
fo_flags_t fo_flags; /* DFLAG_* below */
};
#define DFLAG_PASSABLE 0x01 /* may be passed via unix sockets. */
#define DFLAG_SEEKABLE 0x02 /* seekable / nonsequential */
+#define DFLAG_FORK 0x04 /* copy on fork */
#endif /* _KERNEL */
#if defined(_KERNEL) || defined(_WANT_FILE)
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -265,7 +265,7 @@
void fdclose(struct thread *td, struct file *fp, int idx);
void fdcloseexec(struct thread *td);
void fdsetugidsafety(struct thread *td);
-struct filedesc *fdcopy(struct filedesc *fdp);
+struct filedesc *fdcopy(struct filedesc *fdp, struct proc *p1);
void fdunshare(struct thread *td);
void fdescfree(struct thread *td);
int fdlastfile(struct filedesc *fdp);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -211,7 +211,8 @@
int nevents, struct kevent_copyops *k_ops,
const struct timespec *timeout);
int kern_kill(struct thread *td, pid_t pid, int signum);
-int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
+int kern_kqueue(struct thread *td, int flags, bool cponfork,
+ struct filecaps *fcaps);
int kern_kldload(struct thread *td, const char *file, int *fileid);
int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
int kern_kldunload(struct thread *td, int fileid, int flags);
diff --git a/sys/x86/acpica/acpi_apm.c b/sys/x86/acpica/acpi_apm.c
--- a/sys/x86/acpica/acpi_apm.c
+++ b/sys/x86/acpica/acpi_apm.c
@@ -64,6 +64,7 @@
.f_isfd = 1,
.f_detach = apmreadfiltdetach,
.f_event = apmreadfilt,
+ .f_copy = knote_triv_copy,
};
static struct cdevsw apm_cdevsw = {

File Metadata

Mime Type
text/plain
Expires
Sun, May 24, 12:07 AM (13 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33459588
Default Alt Text
D52045.id164355.diff (34 KB)

Event Timeline