Index: head/sys/kern/kern_event.c =================================================================== --- head/sys/kern/kern_event.c +++ head/sys/kern/kern_event.c @@ -193,7 +193,7 @@ SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); -/* XXX - ensure not KN_INFLUX?? */ +/* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ @@ -254,6 +254,32 @@ } } +static bool +kn_in_flux(struct knote *kn) +{ + + return (kn->kn_influx > 0); +} + +static void +kn_enter_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx < INT_MAX); + kn->kn_influx++; +} + +static bool +kn_leave_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx > 0); + kn->kn_influx--; + return (kn->kn_influx == 0); +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -498,7 +524,7 @@ SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } @@ -521,7 +547,7 @@ * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); @@ -561,7 +587,7 @@ if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } @@ -1262,7 +1288,7 @@ } /* knote is in the process of changing, wait for it to stabilize. */ - if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); @@ -1306,7 +1332,8 @@ kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); - kn->kn_status = KN_INFLUX|KN_DETACHED; + kn->kn_status = KN_DETACHED; + kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); @@ -1330,7 +1357,7 @@ } if (kev->flags & EV_DELETE) { - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1348,7 +1375,8 @@ * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; @@ -1383,7 +1411,8 @@ if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); @@ -1546,7 +1575,7 @@ /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. - * We treat KN_MARKER knotes as if they are INFLUX. + * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, @@ -1620,7 +1649,7 @@ kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || - (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); @@ -1643,17 +1672,17 @@ goto retry; goto done; } - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + KASSERT(!kn_in_flux(kn), + ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked it as in flux. */ if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1662,12 +1691,12 @@ continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked the knote as being in flux. */ *kevp = kn->kn_kevent; if (!(kn->kn_status & KN_DETACHED)) @@ -1676,7 +1705,8 @@ KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); @@ -1684,9 +1714,9 @@ if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); + kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; @@ -1716,7 +1746,8 @@ } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } @@ -1864,12 +1895,12 @@ for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -1880,13 +1911,13 @@ if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2010,7 +2041,6 @@ struct kqueue *kq; struct knote *kn, *tkn; int error; - bool own_influx; if (list == NULL) return; @@ -2021,7 +2051,7 @@ list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can + * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters @@ -2030,7 +2060,7 @@ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the @@ -2041,14 +2071,11 @@ */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { - own_influx = (kn->kn_status & KN_INFLUX) == 0; - if (own_influx) - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - if (own_influx) - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); @@ -2070,10 +2097,12 @@ void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { + KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == - (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); + KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) != 0, + ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); @@ -2089,12 +2118,13 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { - KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); + + KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); - if (!kqislocked) - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, - ("knlist_remove called w/o knote being KN_INFLUX or already removed")); + KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) == 0, + ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); @@ -2287,30 +2317,30 @@ SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX)) { + if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { - kn->kn_status |= KN_INFLUX | KN_DETACHED; + kn->kn_status |= KN_DETACHED; + kn_enter_flux(kn); KQ_UNLOCK(kq); knote_drop(kn, td); } else { /* Make sure cleared knotes disappear soon */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { - /* there are still KN_INFLUX remaining */ + /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); - KASSERT(kn->kn_status & KN_INFLUX, - ("knote removed w/o list lock")); + KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); @@ -2352,7 +2382,7 @@ influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { - if (kn->kn_status & KN_INFLUX) { + if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); @@ -2360,7 +2390,7 @@ msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); @@ -2377,7 +2407,7 @@ { struct klist *list; - KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); + KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { @@ -2395,7 +2425,7 @@ /* * knote must already have been detached using the f_detach method. - * no lock need to be held, it is assumed that the KN_INFLUX flag is set + * no lock need to be held, it is assumed that the influx state is set * to prevent other removal. */ static void @@ -2407,10 +2437,10 @@ kq = kn->kn_kq; KQ_NOTOWNED(kq); - KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, - ("knote_drop called without KN_INFLUX set in kn_status")); - KQ_LOCK(kq); + KASSERT(kn->kn_influx == 1, + ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); + if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else Index: head/sys/sys/event.h =================================================================== --- head/sys/sys/event.h +++ head/sys/sys/event.h @@ -202,8 +202,11 @@ }; /* - * Setting the KN_INFLUX flag enables you to unlock the kq that this knote - * is on, and modify kn_status as if you had the KQ lock. + * An in-flux knote cannot be dropped from its kq while the kq is + * unlocked. If the KN_SCAN flag is not set, a thread can only set + * kn_influx when it is exclusive owner of the knote state, and can + * modify kn_status as if it had the KQ lock. KN_SCAN must not be set + * on a knote which is already in flux. * * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock. */ @@ -214,16 +217,18 @@ TAILQ_ENTRY(knote) kn_tqe; struct kqueue *kn_kq; /* which queue we are on */ struct kevent kn_kevent; + void *kn_hook; + int kn_hookid; int kn_status; /* protected by kq lock */ #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DETACHED 0x08 /* knote is detached */ -#define KN_INFLUX 0x10 /* knote is in flux */ #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ #define KN_SCAN 0x100 /* flux set in kqueue_scan() */ + int kn_influx; int kn_sfflags; /* saved filter flags */ intptr_t kn_sdata; /* saved data field */ union { @@ -234,8 +239,6 @@ void *p_v; /* generic other pointer */ } kn_ptr; struct filterops *kn_fop; - void *kn_hook; - int kn_hookid; #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter