Index: sys/kern/kern_event.c =================================================================== --- sys/kern/kern_event.c +++ sys/kern/kern_event.c @@ -193,7 +193,7 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); -/* XXX - ensure not KN_INFLUX?? */ +/* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ @@ -254,6 +254,32 @@ } } +static bool +kn_in_flux(struct knote *kn) +{ + + return (kn->kn_influx > 0); +} + +static void +kn_enter_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx < INT_MAX); + kn->kn_influx++; +} + +static bool +kn_leave_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx > 0); + kn->kn_influx--; + return (kn->kn_influx == 0); +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -498,7 +524,7 @@ SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } @@ -521,7 +547,7 @@ * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); @@ -561,7 +587,7 @@ if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } @@ -1258,7 +1284,7 @@ } /* knote is in the process of changing, wait for it to stabilize. */ - if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); @@ -1302,7 +1328,8 @@ kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); - kn->kn_status = KN_INFLUX|KN_DETACHED; + kn->kn_status = KN_DETACHED; + kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); @@ -1326,7 +1353,7 @@ } if (kev->flags & EV_DELETE) { - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1344,7 +1371,8 @@ * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; @@ -1379,7 +1407,8 @@ if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); @@ -1542,7 +1571,7 @@ /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. - * We treat KN_MARKER knotes as if they are INFLUX. + * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, @@ -1616,7 +1645,7 @@ kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || - (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); @@ -1639,17 +1668,17 @@ goto retry; goto done; } - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + KASSERT(!kn_in_flux(kn), + ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked it as in flux. */ if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1658,12 +1687,12 @@ continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked the knote as being in flux. */ *kevp = kn->kn_kevent; if (!(kn->kn_status & KN_DETACHED)) @@ -1672,7 +1701,8 @@ KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); @@ -1680,9 +1710,9 @@ if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); + kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; @@ -1712,7 +1742,8 @@ } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } @@ -1860,12 +1891,12 @@ for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1876,13 +1907,13 @@ if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2006,7 +2037,6 @@ struct kqueue *kq; struct knote *kn, *tkn; int error; - bool own_influx; if (list == NULL) return; @@ -2017,7 +2047,7 @@ list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can + * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters @@ -2026,7 +2056,7 @@ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the @@ -2037,14 +2067,11 @@ */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { - own_influx = (kn->kn_status & KN_INFLUX) == 0; - if (own_influx) - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - if (own_influx) - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); @@ -2066,10 +2093,12 @@ void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { + KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == - (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); + KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) != 0, + ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); @@ -2085,12 +2114,13 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { - KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); + + KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); - if (!kqislocked) - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, - ("knlist_remove called w/o knote being KN_INFLUX or already removed")); + KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) == 0, + ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); @@ -2119,12 +2149,12 @@ { KNL_ASSERT_LOCKED(knl); - return SLIST_EMPTY(&knl->kl_list); + return (SLIST_EMPTY(&knl->kl_list)); } -static struct mtx knlist_lock; +static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", - MTX_DEF); + MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); @@ -2245,17 +2275,8 @@ knlist_destroy(struct knlist *knl) { -#ifdef INVARIANTS - /* - * if we run across this error, we need to find the offending - * driver and have it call knlist_clear or knlist_delete. - */ - if (!SLIST_EMPTY(&knl->kl_list)) - printf("WARNING: destroying knlist w/ knotes on it!\n"); -#endif - - knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; - SLIST_INIT(&knl->kl_list); + KASSERT(KNLIST_EMPTY(knl), + ("destroying knlist %p with knotes on it", knl)); } void @@ -2292,30 +2313,30 @@ SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX)) { + if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { - kn->kn_status |= KN_INFLUX | KN_DETACHED; + kn->kn_status |= KN_DETACHED; + kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); } else { /* Make sure cleared knotes disappear soon */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { - /* there are still KN_INFLUX remaining */ + /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); - KASSERT(kn->kn_status & KN_INFLUX, - ("knote removed w/o list lock")); + KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); @@ -2357,7 +2378,7 @@ influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { - if (kn->kn_status & KN_INFLUX) { + if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); @@ -2365,7 +2386,7 @@ msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2382,27 +2403,25 @@ { struct klist *list; - KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); + KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } - SLIST_INSERT_HEAD(list, kn, kn_link); - - return 0; + return (0); } /* * knote must already have been detached using the f_detach method. - * no lock need to be held, it is assumed that the KN_INFLUX flag is set + * no lock need to be held, it is assumed that the influx state is set * to prevent other removal. */ static void @@ -2414,10 +2433,10 @@ kq = kn->kn_kq; KQ_NOTOWNED(kq); - KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, - ("knote_drop called without KN_INFLUX set in kn_status")); - KQ_LOCK(kq); + KASSERT(kn->kn_influx == 1, + ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); + if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else @@ -2507,11 +2526,9 @@ goto noacquire; error = kqueue_register(kq, kev, td, waitok); - kqueue_release(kq, 0); noacquire: fdrop(fp, td); - - return error; + return (error); } Index: sys/sys/event.h =================================================================== --- sys/sys/event.h +++ sys/sys/event.h @@ -202,8 +202,12 @@ }; /* - * Setting the KN_INFLUX flag enables you to unlock the kq that this knote - * is on, and modify kn_status as if you had the KQ lock. + * Setting the influx state enables to unlock the kq that this knote + * is on and still ensure that the knote is not dropped. If the + * KN_SCAN flag is not set, the thread can only set kn_influx when it + * is exclusive owner of the knote state, and can modify kn_status as + * if it had the KQ lock. KN_SCAN must not be set on the knote which + * is already in flux. * * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock. */ @@ -219,11 +223,11 @@ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DETACHED 0x08 /* knote is detached */ -#define KN_INFLUX 0x10 /* knote is in flux */ #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ #define KN_SCAN 0x100 /* flux set in kqueue_scan() */ + int kn_influx; int kn_sfflags; /* saved filter flags */ intptr_t kn_sdata; /* saved data field */ union {