Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -482,7 +482,7 @@ p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; - knlist_init_mtx(&p->p_klist, &p->p_mtx); + p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ Index: sys/kern/kern_event.c =================================================================== --- sys/kern/kern_event.c +++ sys/kern/kern_event.c @@ -227,14 +227,33 @@ #define KQ_NOTOWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ } while (0) -#define KN_LIST_LOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ -} while (0) -#define KN_LIST_UNLOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ -} while (0) + +static struct knlist * +kn_list_lock(struct knote *kn) +{ + struct knlist *knl; + + knl = kn->kn_knlist; + if (knl != NULL) + knl->kl_lock(knl->kl_lockarg); + return (knl); +} + +static void +kn_list_unlock(struct knlist *knl) +{ + bool do_free; + + if (knl == NULL) + return; + do_free = knl->kl_autodestroy && knlist_empty(knl); + knl->kl_unlock(knl->kl_lockarg); + if (do_free) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -350,16 +369,16 @@ filt_procattach(struct knote *kn) { struct proc *p; - int immediate; int error; + bool exiting, immediate; - immediate = 0; + exiting = immediate = false; p = pfind(kn->kn_id); if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { p = zpfind(kn->kn_id); - immediate = 1; + exiting = true; } else if (p != NULL && (p->p_flag & P_WEXIT)) { - immediate = 1; + exiting = true; } if (p == NULL) @@ -380,8 +399,8 @@ kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; - kn->kn_sfflags &= ~NOTE_EXIT; - immediate = 1; /* Force immediate activation of child note. */ + kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); + immediate = true; /* Force immediate activation of child note. */ } /* * Internal flag indicating registration done by kernel (for other than @@ -391,8 +410,7 @@ kn->kn_flags &= ~EV_FLAG1; } - if (immediate == 0) - knlist_add(&p->p_klist, kn, 1); + knlist_add(p->p_klist, kn, 1); /* * Immediately activate any child notes or, in the case of a zombie @@ -400,7 +418,7 @@ * case where the target process, e.g. a child, dies before the kevent * is registered. */ - if (immediate && filt_proc(kn, NOTE_EXIT)) + if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) KNOTE_ACTIVATE(kn, 0); PROC_UNLOCK(p); @@ -420,10 +438,8 @@ static void filt_procdetach(struct knote *kn) { - struct proc *p; - p = kn->kn_ptr.p_proc; - knlist_remove(&p->p_klist, kn, 0); + knlist_remove(kn->kn_knlist, kn, 0); kn->kn_ptr.p_proc = NULL; } @@ -444,8 +460,6 @@ /* Process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { - if (!(kn->kn_status & KN_DETACHED)) - knlist_remove_inevent(&p->p_klist, kn); kn->kn_flags |= EV_EOF | EV_ONESHOT; kn->kn_ptr.p_proc = NULL; if (kn->kn_fflags & NOTE_EXIT) @@ -525,7 +539,8 @@ */ kev.ident = pid; kev.filter = kn->kn_filter; - kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | + EV_FLAG2; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ @@ -1137,6 +1152,7 @@ struct filterops *fops; struct file *fp; struct knote *kn, *tkn; + struct knlist *knl; cap_rights_t rights; int error, filt, event; int haskqglobal, filedesc_unlock; @@ -1146,6 +1162,7 @@ fp = NULL; kn = NULL; + knl = NULL; error = 0; haskqglobal = 0; filedesc_unlock = 0; @@ -1300,7 +1317,7 @@ knote_drop(kn, td); goto done; } - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); goto done_ev_add; } else { /* No matching knote and the EV_ADD flag is not set. */ @@ -1331,7 +1348,7 @@ */ kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; if (!fops->f_isfd && fops->f_touch != NULL) { fops->f_touch(kn, kev, EVENT_REGISTER); @@ -1365,7 +1382,7 @@ KN_ACTIVE) knote_enqueue(kn); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); done: @@ -1535,6 +1552,7 @@ { struct kevent *kevp; struct knote *kn, *marker; + struct knlist *knl; sbintime_t asbt, rsbt; int count, error, haskqglobal, influx, nkev, touch; @@ -1660,7 +1678,7 @@ KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); @@ -1668,7 +1686,7 @@ ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | KN_SCAN); kq->kq_count--; - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; continue; } @@ -1697,7 +1715,7 @@ TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; } @@ -2062,7 +2080,8 @@ } static void -knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) +knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, + int kqislocked) { KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); @@ -2075,7 +2094,7 @@ SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) - knl->kl_unlock(knl->kl_lockarg); + kn_list_unlock(knl); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; @@ -2093,17 +2112,6 @@ knlist_remove_kq(knl, kn, islocked, 0); } -/* - * remove knote from the specified knlist while in f_event handler. - */ -void -knlist_remove_inevent(struct knlist *knl, struct knote *kn) -{ - - knlist_remove_kq(knl, kn, 1, - (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); -} - int knlist_empty(struct knlist *knl) { @@ -2202,6 +2210,7 @@ else knl->kl_assert_unlocked = kl_assert_unlocked; + knl->kl_autodestroy = false; SLIST_INIT(&knl->kl_list); } @@ -2212,6 +2221,16 @@ knlist_init(knl, lock, NULL, NULL, NULL, NULL); } +struct knlist * +knlist_alloc(struct mtx *lock) +{ + struct knlist *knl; + + knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); + knlist_init_mtx(knl, lock); + return (knl); +} + void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock) { @@ -2237,6 +2256,18 @@ SLIST_INIT(&knl->kl_list); } +void +knlist_detach(struct knlist *knl) +{ + + KNL_ASSERT_LOCKED(knl); + knl->kl_autodestroy = true; + if (knlist_empty(knl)) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + /* * Even if we are locked, we may need to drop the lock to allow any influx * knotes time to "settle". @@ -2247,6 +2278,7 @@ struct knote *kn, *kn2; struct kqueue *kq; + KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); if (islocked) KNL_ASSERT_LOCKED(knl); else { Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -832,7 +832,7 @@ * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ - KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); + KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c +++ sys/kern/kern_exit.c @@ -512,7 +512,7 @@ /* * Notify interested parties of our demise. */ - KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); + KNOTE_LOCKED(p->p_klist, NOTE_EXIT); #ifdef KDTRACE_HOOKS int reason = CLD_EXITED; @@ -524,13 +524,6 @@ #endif /* - * Just delete all entries in the p_klist. At this point we won't - * report any more events, and there are nasty race conditions that - * can beat us if we don't. - */ - knlist_clear(&p->p_klist, 1); - - /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and @@ -603,12 +596,6 @@ PROC_UNLOCK(p->p_pptr); /* - * Hopefully no one will try to deliver a signal to the process this - * late in the game. - */ - knlist_destroy(&p->p_klist); - - /* * Save our children's rusage information in our exit rusage. */ PROC_STATLOCK(p); @@ -853,6 +840,11 @@ procdesc_reap(p); sx_xunlock(&proctree_lock); + PROC_LOCK(p); + knlist_detach(p->p_klist); + p->p_klist = NULL; + PROC_UNLOCK(p); + /* * Removal from allproc list and process group list paired with * PROC_LOCK which was executed during that time should guarantee Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -748,7 +748,7 @@ /* * Tell any interested parties about the new process. */ - knote_fork(&p1->p_klist, p2->p_pid); + knote_fork(p1->p_klist, p2->p_pid); SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); if (fr->fr_flags & RFPROCDESC) { @@ -950,7 +950,7 @@ #ifdef MAC mac_proc_init(newproc); #endif - knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx); + newproc->p_klist = knlist_alloc(&newproc->p_mtx); STAILQ_INIT(&newproc->p_ktr); /* We have to lock the process tree while we look for a pid. */ Index: sys/kern/kern_sig.c =================================================================== --- sys/kern/kern_sig.c +++ sys/kern/kern_sig.c @@ -2112,7 +2112,7 @@ } ps = p->p_sigacts; - KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig); + KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); if (td == NULL) { @@ -3502,7 +3502,7 @@ kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ - knlist_add(&p->p_klist, kn, 0); + knlist_add(p->p_klist, kn, 0); return (0); } @@ -3512,7 +3512,7 @@ { struct proc *p = kn->kn_ptr.p_proc; - knlist_remove(&p->p_klist, kn, 0); + knlist_remove(p->p_klist, kn, 0); } /* Index: sys/sys/event.h =================================================================== --- sys/sys/event.h +++ sys/sys/event.h @@ -158,7 +158,8 @@ void (*kl_unlock)(void *); void (*kl_assert_locked)(void *); void (*kl_assert_unlocked)(void *); - void *kl_lockarg; /* argument passed to kl_lockf() */ + void *kl_lockarg; /* argument passed to kl_lock() */ + bool kl_autodestroy; }; @@ -258,9 +259,10 @@ extern void knote(struct knlist *list, long hint, int lockflags); extern void knote_fork(struct knlist *list, int pid); +extern struct knlist *knlist_alloc(struct mtx *lock); +extern void knlist_detach(struct knlist *knl); extern void knlist_add(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove(struct knlist *knl, struct knote *kn, int islocked); -extern void knlist_remove_inevent(struct knlist *knl, struct knote *kn); extern int knlist_empty(struct knlist *knl); extern void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -611,7 +611,7 @@ /* End area that is copied on creation. */ #define p_endcopy p_xsig struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ - struct knlist p_klist; /* (c) Knotes attached to this proc. */ + struct knlist *p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */