Index: sys/kern/kern_event.c =================================================================== --- sys/kern/kern_event.c +++ sys/kern/kern_event.c @@ -373,11 +373,21 @@ kn->kn_flags |= EV_CLEAR; /* automatically set */ /* - * internal flag indicating registration done by kernel + * Internal flag indicating registration done by kernel for the + * purposes of getting a NOTE_CHILD notification. */ - if (kn->kn_flags & EV_FLAG1) { + if (kn->kn_flags & EV_FLAG2) { + kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; + kn->kn_sfflags &= ~NOTE_EXIT; + immediate = 1; /* Force immediate activation of child note. */ + } + /* + * Internal flag indicating registration done by kernel (for other than + * NOTE_CHILD). + */ + if (kn->kn_flags & EV_FLAG1) { kn->kn_flags &= ~EV_FLAG1; } @@ -385,9 +395,10 @@ knlist_add(&p->p_klist, kn, 1); /* - * Immediately activate any exit notes if the target process is a - * zombie. This is necessary to handle the case where the target - * process, e.g. a child, dies before the kevent is registered. + * Immediately activate any child notes or, in the case of a zombie + * target process, exit notes. The latter is necessary to handle the + * case where the target process, e.g. a child, dies before the kevent + * is registered. */ if (immediate && filt_proc(kn, NOTE_EXIT)) KNOTE_ACTIVATE(kn, 0); @@ -495,7 +506,7 @@ /* * The NOTE_TRACK case. In addition to the activation - * of the event, we need to register new event to + * of the event, we need to register new events to * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ @@ -504,8 +515,27 @@ list->kl_unlock(list->kl_lockarg); /* - * Activate existing knote and register a knote with + * Activate existing knote and register tracking knotes with * new process. + * + * First register a knote to get just the child notice. This + * must be a separate note from a potential NOTE_EXIT + * notification since both NOTE_CHILD and NOTE_EXIT are defined + * to use the data field (in conflicting ways). + */ + kev.ident = pid; + kev.filter = kn->kn_filter; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2; + kev.fflags = kn->kn_sfflags; + kev.data = kn->kn_id; /* parent */ + kev.udata = kn->kn_kevent.udata;/* preserve udata */ + error = kqueue_register(kq, &kev, NULL, 0); + if (error) + kn->kn_fflags |= NOTE_TRACKERR; + + /* + * Then register another knote to track other potential events + * from the new process. */ kev.ident = pid; kev.filter = kn->kn_filter; @@ -1129,7 +1159,7 @@ if (fp->f_type == DTYPE_KQUEUE) { /* - * if we add some inteligence about what we are doing, + * If we add some intelligence about what we are doing, * we should be able to support events on ourselves. * We need to know when we are doing this to prevent * getting both the knlist lock and the kq lock since @@ -1161,7 +1191,18 @@ kqueue_expand(kq, fops, kev->ident, waitok); KQ_LOCK(kq); - if (kq->kq_knhashmask != 0) { + + /* + * If possible, find an existing knote to use for this kevent. + */ + if (kev->filter == EVFILT_PROC && + (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { + /* This is an internal creation of a process tracking + * note. Don't attempt to coalesce this with an + * existing note. + */ + ; + } else if (kq->kq_knhashmask != 0) { struct klist *list; list = &kq->kq_knhash[ @@ -1173,7 +1214,7 @@ } } - /* knote is in the process of changing, wait for it to stablize. */ + /* knote is in the process of changing, wait for it to stabilize. */ if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { Index: sys/sys/event.h =================================================================== --- sys/sys/event.h +++ sys/sys/event.h @@ -80,6 +80,7 @@ #define EV_SYSFLAGS 0xF000 /* reserved by system */ #define EV_DROP 0x1000 /* note should be dropped */ #define EV_FLAG1 0x2000 /* filter-specific flag */ +#define EV_FLAG2 0x4000 /* filter-specific flag */ /* returned values */ #define EV_EOF 0x8000 /* EOF detected */ Index: tests/sys/kqueue/common.h =================================================================== --- tests/sys/kqueue/common.h +++ tests/sys/kqueue/common.h @@ -46,6 +46,7 @@ extern const char * kevent_to_str(struct kevent *); struct kevent * kevent_get(int); +struct kevent * kevent_get_timeout(int, int); void kevent_cmp(struct kevent *, struct kevent *); Index: tests/sys/kqueue/main.c =================================================================== --- tests/sys/kqueue/main.c +++ tests/sys/kqueue/main.c @@ -69,6 +69,28 @@ return (kev); } +/* Retrieve a single kevent, specifying a maximum time to wait for it. */ +struct kevent * +kevent_get_timeout(int kqfd, int seconds) +{ + int nfds; + struct kevent *kev; + struct timespec timeout = {seconds, 0}; + + if ((kev = calloc(1, sizeof(*kev))) == NULL) + err(1, "out of memory"); + + nfds = kevent(kqfd, NULL, 0, kev, 1, &timeout); + if (nfds < 0) { + err(1, "kevent(2)"); + } else if (nfds == 0) { + free(kev); + kev = NULL; + } + + return (kev); +} + char * kevent_fflags_dump(struct kevent *kev) { @@ -82,25 +104,39 @@ abort(); /* Not every filter has meaningful fflags */ - if (kev->filter != EVFILT_VNODE) { - snprintf(buf, 1024, "fflags = %d", kev->fflags); - return (buf); - } - - snprintf(buf, 1024, "fflags = %d (", kev->fflags); - KEVFFL_DUMP(NOTE_DELETE); - KEVFFL_DUMP(NOTE_WRITE); - KEVFFL_DUMP(NOTE_EXTEND); + if (kev->filter == EVFILT_PROC) { + snprintf(buf, 1024, "fflags = %x (", kev->fflags); + KEVFFL_DUMP(NOTE_EXIT); + KEVFFL_DUMP(NOTE_FORK); + KEVFFL_DUMP(NOTE_EXEC); + KEVFFL_DUMP(NOTE_CHILD); + KEVFFL_DUMP(NOTE_TRACKERR); + KEVFFL_DUMP(NOTE_TRACK); + buf[strlen(buf) - 1] = ')'; + } else if (kev->filter == EVFILT_PROCDESC) { + snprintf(buf, 1024, "fflags = %x (", kev->fflags); + KEVFFL_DUMP(NOTE_EXIT); + KEVFFL_DUMP(NOTE_FORK); + KEVFFL_DUMP(NOTE_EXEC); + buf[strlen(buf) - 1] = ')'; + } else if (kev->filter == EVFILT_VNODE) { + snprintf(buf, 1024, "fflags = %x (", kev->fflags); + KEVFFL_DUMP(NOTE_DELETE); + KEVFFL_DUMP(NOTE_WRITE); + KEVFFL_DUMP(NOTE_EXTEND); #if HAVE_NOTE_TRUNCATE - KEVFFL_DUMP(NOTE_TRUNCATE); + KEVFFL_DUMP(NOTE_TRUNCATE); #endif - KEVFFL_DUMP(NOTE_ATTRIB); - KEVFFL_DUMP(NOTE_LINK); - KEVFFL_DUMP(NOTE_RENAME); + KEVFFL_DUMP(NOTE_ATTRIB); + KEVFFL_DUMP(NOTE_LINK); + KEVFFL_DUMP(NOTE_RENAME); #if HAVE_NOTE_REVOKE - KEVFFL_DUMP(NOTE_REVOKE); + KEVFFL_DUMP(NOTE_REVOKE); #endif - buf[strlen(buf) - 1] = ')'; + buf[strlen(buf) - 1] = ')'; + } else { + snprintf(buf, 1024, "fflags = %x", kev->fflags); + } return (buf); } @@ -260,6 +296,15 @@ argc--; } + /* + * Some tests fork. If output is fully buffered, + * the children inherit some buffered data and flush + * it when they exit, causing some data to be printed twice. + * Use line buffering to avoid this problem. + */ + setlinebuf(stdout); + setlinebuf(stderr); + test_kqueue(); test_kqueue_close(); Index: tests/sys/kqueue/proc.c =================================================================== --- tests/sys/kqueue/proc.c +++ tests/sys/kqueue/proc.c @@ -74,6 +74,155 @@ } +static void +proc_track(int sleep_time) +{ + char test_id[64]; + struct kevent kev; + pid_t pid; + + snprintf(test_id, sizeof(test_id), + "kevent(EVFILT_PROC, NOTE_TRACK); sleep %d", sleep_time); + test_begin(test_id); + test_no_kevents(); + + /* Create a child to track. */ + pid = fork(); + if (pid == 0) { /* Child */ + pid_t grandchild = -1; + + /* + * Give the parent a chance to start tracking us. There are more + * sophisticated ways to do this, but this works fine for a test. + */ + sleep(2); + + /* + * Spawn a grandchild that will immediately exit. If the kernel has bug + * 180385, the parent will see a kevent with both NOTE_CHILD and + * NOTE_EXIT. If that bug is fixed, it will see two separate kevents + * for those notes. Note that this triggers the conditions for + * detecting the bug quite reliably on a 1 CPU system (or if the test + * process is restricted to a single CPU), but may not trigger it on a + * multi-CPU system. + */ + grandchild = fork(); + if (grandchild == 0) { /* Grandchild */ + if (sleep_time) sleep(sleep_time); + exit(1); + } else if (grandchild == -1) { /* Error */ + err(1, "fork (grandchild) failed! (%s() at %s:%d)", + __func__, __FILE__, __LINE__); + } else { /* Child (Grandchild Parent) */ + printf(" -- grandchild created (pid %d)\n", (int) grandchild); + } + if (sleep_time) sleep(sleep_time); + exit(0); + } else if (pid == -1) { /* Error */ + err(1, "fork (child) failed! (%s() at %s:%d)", + __func__, __FILE__, __LINE__); + } + + printf(" -- child created (pid %d)\n", (int) pid); + + kevent_add(kqfd, &kev, pid, EVFILT_PROC, EV_ADD | EV_ENABLE, + NOTE_TRACK | NOTE_EXEC | NOTE_EXIT | NOTE_FORK, + 0, NULL); + + printf(" -- tracking child (pid %d)\n", (int) pid); + + /* + * Several events should be received: + * - NOTE_FORK (from child) + * - NOTE_CHILD (from grandchild) + * - NOTE_EXIT (from grandchild) + * - NOTE_EXIT (from child) + * + * The NOTE_FORK and NOTE_EXIT from the child could be combined into a + * single event, but the NOTE_CHILD and NOTE_EXIT from the grandchild must + * not be combined. + * + * The loop continues until no events are received within a 5 second + * period, at which point it is assumed that no more will be coming. The + * loop is deliberately designed to attempt to get events even after all + * the expected ones are received in case some spurious events are + * generated as well as the expected ones. + */ + { + int child_exit = 0; + int child_fork = 0; + int gchild_exit = 0; + int gchild_note = 0; + pid_t gchild_pid = -1; + int done = 0; + + while (!done) + { + int handled = 0; + struct kevent *kevp; + + kevp = kevent_get_timeout(kqfd, 5); + if (kevp == NULL) { + done = 1; + } else { + printf(" -- Received kevent: %s\n", kevent_to_str(kevp)); + + if ((kevp->fflags & NOTE_CHILD) && (kevp->fflags & NOTE_EXIT)) { + errx(1, "NOTE_CHILD and NOTE_EXIT in same kevent: %s", kevent_to_str(kevp)); + } + + if (kevp->fflags & NOTE_CHILD) { + if (kevp->data == pid) { + if (!gchild_note) { + ++gchild_note; + gchild_pid = kevp->ident; + ++handled; + } else { + errx(1, "Spurious NOTE_CHILD: %s", kevent_to_str(kevp)); + } + } + } + + if (kevp->fflags & NOTE_EXIT) { + if ((kevp->ident == pid) && (!child_exit)) { + ++child_exit; + ++handled; + } else if ((kevp->ident == gchild_pid) && (!gchild_exit)) { + ++gchild_exit; + ++handled; + } else { + errx(1, "Spurious NOTE_CHILD: %s", kevent_to_str(kevp)); + } + } + + if (kevp->fflags & NOTE_FORK) { + if ((kevp->ident == pid) && (!child_fork)) { + ++child_fork; + ++handled; + } else { + errx(1, "Spurious NOTE_FORK: %s", kevent_to_str(kevp)); + } + } + + if (!handled) { + errx(1, "Spurious kevent: %s", kevent_to_str(kevp)); + } + + free(kevp); + } + } + + /* Make sure all expected events were received. */ + if (child_exit && child_fork && gchild_exit && gchild_note) { + printf(" -- Received all expected events.\n"); + } else { + errx(1, "Did not receive all expected events."); + } + } + + success(); +} + #ifdef TODO static void event_trigger(void) @@ -236,6 +385,8 @@ signal(SIGUSR1, sig_handler); add_and_delete(); + proc_track(0); /* Run without sleeping before children exit. */ + proc_track(1); /* Sleep a bit in the children before exiting. */ #if TODO event_trigger();