diff --git a/lib/libc/sys/kqueue.2 b/lib/libc/sys/kqueue.2
index ed737c626ef8..3ded4ae3d8f7 100644
--- a/lib/libc/sys/kqueue.2
+++ b/lib/libc/sys/kqueue.2
@@ -1,833 +1,848 @@
 .\" Copyright (c) 2000 Jonathan Lemon
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 7, 2021
+.Dd September 23, 2021
 .Dt KQUEUE 2
 .Os
 .Sh NAME
 .Nm kqueue ,
 .Nm kevent
 .Nd kernel event notification mechanism
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In sys/event.h
 .Ft int
 .Fn kqueue "void"
 .Ft int
 .Fo kevent
 .Fa "int kq"
 .Fa "const struct kevent *changelist"
 .Fa "int nchanges"
 .Fa "struct kevent *eventlist"
 .Fa "int nevents"
 .Fa "const struct timespec *timeout"
 .Fc
 .Fn EV_SET "kev" ident filter flags fflags data udata
 .Sh DESCRIPTION
 The
 .Fn kqueue
 system call
 provides a generic method of notifying the user when an event
 happens or a condition holds, based on the results of small
 pieces of kernel code termed filters.
 A kevent is identified by the (ident, filter) pair; there may only
 be one unique kevent per kqueue.
 .Pp
 The filter is executed upon the initial registration of a kevent
 in order to detect whether a preexisting condition is present, and is also
 executed whenever an event is passed to the filter for evaluation.
 If the filter determines that the condition should be reported,
 then the kevent is placed on the kqueue for the user to retrieve.
 .Pp
 The filter is also run when the user attempts to retrieve the kevent
 from the kqueue.
 If the filter indicates that the condition that triggered
 the event no longer holds, the kevent is removed from the kqueue and
 is not returned.
 .Pp
 Multiple events which trigger the filter do not result in multiple
 kevents being placed on the kqueue; instead, the filter will aggregate
 the events into a single struct kevent.
 Calling
 .Fn close
 on a file descriptor will remove any kevents that reference the descriptor.
 .Pp
 The
 .Fn kqueue
 system call
 creates a new kernel event queue and returns a descriptor.
 The queue is not inherited by a child created with
 .Xr fork 2 .
 However, if
 .Xr rfork 2
 is called without the
 .Dv RFFDG
 flag, then the descriptor table is shared,
 which will allow sharing of the kqueue between two processes.
 .Pp
 The
 .Fn kevent
 system call
 is used to register events with the queue, and return any pending
 events to the user.
 The
 .Fa changelist
 argument
 is a pointer to an array of
 .Va kevent
 structures, as defined in
 .In sys/event.h .
 All changes contained in the
 .Fa changelist
 are applied before any pending events are read from the queue.
 The
 .Fa nchanges
 argument
 gives the size of
 .Fa changelist .
 The
 .Fa eventlist
 argument
 is a pointer to an array of kevent structures.
 The
 .Fa nevents
 argument
 determines the size of
 .Fa eventlist .
 When
 .Fa nevents
 is zero,
 .Fn kevent
 will return immediately even if there is a
 .Fa timeout
 specified unlike
 .Xr select 2 .
 If
 .Fa timeout
 is a non-NULL pointer, it specifies a maximum interval to wait
 for an event, which will be interpreted as a struct timespec.
 If
 .Fa timeout
 is a NULL pointer,
 .Fn kevent
 waits indefinitely.
 To effect a poll, the
 .Fa timeout
 argument should be non-NULL, pointing to a zero-valued
 .Va timespec
 structure.
 The same array may be used for the
 .Fa changelist
 and
 .Fa eventlist .
 .Pp
 The
 .Fn EV_SET
 macro is provided for ease of initializing a
 kevent structure.
 .Pp
 The
 .Va kevent
 structure is defined as:
 .Bd -literal
 struct kevent {
 	uintptr_t  ident;	/* identifier for this event */
 	short	  filter;	/* filter for event */
 	u_short	  flags;	/* action flags for kqueue */
 	u_int	  fflags;	/* filter flag value */
 	int64_t   data;		/* filter data value */
 	void	  *udata;	/* opaque user data identifier */
 	uint64_t  ext[4];	/* extensions */
 };
 .Ed
 .Pp
 The fields of
 .Fa struct kevent
 are:
 .Bl -tag -width "Fa filter"
 .It Fa ident
 Value used to identify this event.
 The exact interpretation is determined by the attached filter,
 but often is a file descriptor.
 .It Fa filter
 Identifies the kernel filter used to process this event.
 The pre-defined
 system filters are described below.
 .It Fa flags
 Actions to perform on the event.
 .It Fa fflags
 Filter-specific flags.
 .It Fa data
 Filter-specific data value.
 .It Fa udata
 Opaque user-defined value passed through the kernel unchanged.
 .It Fa ext
 Extended data passed to and from kernel.
 The
 .Fa ext[0]
 and
 .Fa ext[1]
 members use is defined by the filter.
 If the filter does not use them, the members are copied unchanged.
 The
 .Fa ext[2]
 and
 .Fa ext[3]
 members are always passed through the kernel as-is,
 making additional context available to application.
 .El
 .Pp
 The
 .Va flags
 field can contain the following values:
 .Bl -tag -width EV_DISPATCH
 .It Dv EV_ADD
 Adds the event to the kqueue.
 Re-adding an existing event
 will modify the parameters of the original event, and not result
 in a duplicate entry.
 Adding an event automatically enables it,
 unless overridden by the EV_DISABLE flag.
 .It Dv EV_ENABLE
 Permit
 .Fn kevent
 to return the event if it is triggered.
 .It Dv EV_DISABLE
 Disable the event so
 .Fn kevent
 will not return it.
 The filter itself is not disabled.
 .It Dv EV_DISPATCH
 Disable the event source immediately after delivery of an event.
 See
 .Dv EV_DISABLE
 above.
 .It Dv EV_DELETE
 Removes the event from the kqueue.
 Events which are attached to
 file descriptors are automatically deleted on the last close of
 the descriptor.
 .It Dv EV_RECEIPT
 This flag is useful for making bulk changes to a kqueue without draining
 any pending events.
 When passed as input, it forces
 .Dv EV_ERROR
 to always be returned.
 When a filter is successfully added the
 .Va data
 field will be zero.
 Note that if this flag is encountered and there is no remaining space in
 .Fa eventlist
 to hold the
 .Dv EV_ERROR
 event, then subsequent changes will not get processed.
 .It Dv EV_ONESHOT
 Causes the event to return only the first occurrence of the filter
 being triggered.
 After the user retrieves the event from the kqueue,
 it is deleted.
 .It Dv EV_CLEAR
 After the event is retrieved by the user, its state is reset.
 This is useful for filters which report state transitions
 instead of the current state.
 Note that some filters may automatically
 set this flag internally.
 .It Dv EV_EOF
 Filters may set this flag to indicate filter-specific EOF condition.
 .It Dv EV_ERROR
 See
 .Sx RETURN VALUES
 below.
+.It Dv EV_KEEPUDATA
+Causes
+.Fn kevent
+to leave unchanged any
+.Fa udata
+associated with an existing event.  This allows other aspects of the
+event to be modified without requiring the caller to know the
+.Fa udata
+value presently associated.
+This is especially useful with
+.Dv NOTE_TRIGGER
+or flags like
+.Dv EV_ENABLE.
+This flag may not be used with
+.Dv EV_ADD.
 .El
 .Pp
 The predefined system filters are listed below.
 Arguments may be passed to and from the filter via the
 .Va fflags
 and
 .Va data
 fields in the kevent structure.
 .Bl -tag -width "Dv EVFILT_PROCDESC"
 .It Dv EVFILT_READ
 Takes a descriptor as the identifier, and returns whenever
 there is data available to read.
 The behavior of the filter is slightly different depending
 on the descriptor type.
 .Bl -tag -width 2n
 .It Sockets
 Sockets which have previously been passed to
 .Fn listen
 return when there is an incoming connection pending.
 .Va data
 contains the size of the listen backlog.
 .Pp
 Other socket descriptors return when there is data to be read,
 subject to the
 .Dv SO_RCVLOWAT
 value of the socket buffer.
 This may be overridden with a per-filter low water mark at the
 time the filter is added by setting the
 .Dv NOTE_LOWAT
 flag in
 .Va fflags ,
 and specifying the new low water mark in
 .Va data .
 On return,
 .Va data
 contains the number of bytes of protocol data available to read.
 .Pp
 If the read direction of the socket has shutdown, then the filter
 also sets
 .Dv EV_EOF
 in
 .Va flags ,
 and returns the socket error (if any) in
 .Va fflags .
 It is possible for EOF to be returned (indicating the connection is gone)
 while there is still data pending in the socket buffer.
 .It Vnodes
 Returns when the file pointer is not at the end of file.
 .Va data
 contains the offset from current position to end of file,
 and may be negative.
 .Pp
 This behavior is different from
 .Xr poll 2 ,
 where read events are triggered for regular files unconditionally.
 This event can be triggered unconditionally by setting the
 .Dv NOTE_FILE_POLL
 flag in
 .Va fflags .
 .It "Fifos, Pipes"
 Returns when the there is data to read;
 .Va data
 contains the number of bytes available.
 .Pp
 When the last writer disconnects, the filter will set
 .Dv EV_EOF
 in
 .Va flags .
 This will be cleared by the filter when a new writer connects,
 at which point the
 filter will resume waiting for data to become available before
 returning.
 .It "BPF devices"
 Returns when the BPF buffer is full, the BPF timeout has expired, or
 when the BPF has
 .Dq immediate mode
 enabled and there is any data to read;
 .Va data
 contains the number of bytes available.
 .It Eventfds
 Returns when the counter is greater than 0;
 .Va data
 contains the counter value, which must be cast to
 .Vt uint64_t .
 .It Kqueues
 Returns when pending events are present on the queue;
 .Va data
 contains the number of events available.
 .El
 .It Dv EVFILT_WRITE
 Takes a descriptor as the identifier, and returns whenever
 it is possible to write to the descriptor.
 For sockets, pipes
 and fifos,
 .Va data
 will contain the amount of space remaining in the write buffer.
 The filter will set
 .Dv EV_EOF
 when the reader disconnects, and for the fifo case, this will be cleared
 when a new reader connects.
 Note that this filter is not supported for vnodes or BPF devices.
 .Pp
 For sockets, the low water mark and socket error handling is
 identical to the
 .Dv EVFILT_READ
 case.
 .Pp
 For eventfds,
 .Va data
 will contain the maximum value that can be added to the counter
 without blocking.
 .It Dv EVFILT_EMPTY
 Takes a descriptor as the identifier, and returns whenever
 there is no remaining data in the write buffer.
 .It Dv EVFILT_AIO
 Events for this filter are not registered with
 .Fn kevent
 directly but are registered via the
 .Va aio_sigevent
 member of an asynchronous I/O request when it is scheduled via an
 asynchronous I/O system call such as
 .Fn aio_read .
 The filter returns under the same conditions as
 .Fn aio_error .
 For more details on this filter see
 .Xr sigevent 3 and
 .Xr aio 4 .
 .It Dv EVFILT_VNODE
 Takes a file descriptor as the identifier and the events to watch for in
 .Va fflags ,
 and returns when one or more of the requested events occurs on the descriptor.
 The events to monitor are:
 .Bl -tag -width "Dv NOTE_CLOSE_WRITE"
 .It Dv NOTE_ATTRIB
 The file referenced by the descriptor had its attributes changed.
 .It Dv NOTE_CLOSE
 A file descriptor referencing the monitored file, was closed.
 The closed file descriptor did not have write access.
 .It Dv NOTE_CLOSE_WRITE
 A file descriptor referencing the monitored file, was closed.
 The closed file descriptor had write access.
 .Pp
 This note, as well as
 .Dv NOTE_CLOSE ,
 are not activated when files are closed forcibly by
 .Xr unmount 2 or
 .Xr revoke 2 .
 Instead,
 .Dv NOTE_REVOKE
 is sent for such events.
 .It Dv NOTE_DELETE
 The
 .Fn unlink
 system call was called on the file referenced by the descriptor.
 .It Dv NOTE_EXTEND
 For regular file, the file referenced by the descriptor was extended.
 .Pp
 For directory, reports that a directory entry was added or removed,
 as the result of rename operation.
 The
 .Dv NOTE_EXTEND
 event is not reported when a name is changed inside the directory.
 .It Dv NOTE_LINK
 The link count on the file changed.
 In particular, the
 .Dv NOTE_LINK
 event is reported if a subdirectory was created or deleted inside
 the directory referenced by the descriptor.
 .It Dv NOTE_OPEN
 The file referenced by the descriptor was opened.
 .It Dv NOTE_READ
 A read occurred on the file referenced by the descriptor.
 .It Dv NOTE_RENAME
 The file referenced by the descriptor was renamed.
 .It Dv NOTE_REVOKE
 Access to the file was revoked via
 .Xr revoke 2
 or the underlying file system was unmounted.
 .It Dv NOTE_WRITE
 A write occurred on the file referenced by the descriptor.
 .El
 .Pp
 On return,
 .Va fflags
 contains the events which triggered the filter.
 .It Dv EVFILT_PROC
 Takes the process ID to monitor as the identifier and the events to watch for
 in
 .Va fflags ,
 and returns when the process performs one or more of the requested events.
 If a process can normally see another process, it can attach an event to it.
 The events to monitor are:
 .Bl -tag -width "Dv NOTE_TRACKERR"
 .It Dv NOTE_EXIT
 The process has exited.
 The exit status will be stored in
 .Va data .
 .It Dv NOTE_FORK
 The process has called
 .Fn fork .
 .It Dv NOTE_EXEC
 The process has executed a new process via
 .Xr execve 2
 or a similar call.
 .It Dv NOTE_TRACK
 Follow a process across
 .Fn fork
 calls.
 The parent process registers a new kevent to monitor the child process
 using the same
 .Va fflags
 as the original event.
 The child process will signal an event with
 .Dv NOTE_CHILD
 set in
 .Va fflags
 and the parent PID in
 .Va data .
 .Pp
 If the parent process fails to register a new kevent
 .Pq usually due to resource limitations ,
 it will signal an event with
 .Dv NOTE_TRACKERR
 set in
 .Va fflags ,
 and the child process will not signal a
 .Dv NOTE_CHILD
 event.
 .El
 .Pp
 On return,
 .Va fflags
 contains the events which triggered the filter.
 .It Dv EVFILT_PROCDESC
 Takes the process descriptor created by
 .Xr pdfork 2
 to monitor as the identifier and the events to watch for in
 .Va fflags ,
 and returns when the associated process performs one or more of the
 requested events.
 The events to monitor are:
 .Bl -tag -width "Dv NOTE_EXIT"
 .It Dv NOTE_EXIT
 The process has exited.
 The exit status will be stored in
 .Va data .
 .El
 .Pp
 On return,
 .Va fflags
 contains the events which triggered the filter.
 .It Dv EVFILT_SIGNAL
 Takes the signal number to monitor as the identifier and returns
 when the given signal is delivered to the process.
 This coexists with the
 .Fn signal
 and
 .Fn sigaction
 facilities, and has a lower precedence.
 The filter will record
 all attempts to deliver a signal to a process, even if the signal has
 been marked as
 .Dv SIG_IGN ,
 except for the
 .Dv SIGCHLD
 signal, which, if ignored, will not be recorded by the filter.
 Event notification happens after normal
 signal delivery processing.
 .Va data
 returns the number of times the signal has occurred since the last call to
 .Fn kevent .
 This filter automatically sets the
 .Dv EV_CLEAR
 flag internally.
 .It Dv EVFILT_TIMER
 Establishes an arbitrary timer identified by
 .Va ident .
 When adding a timer,
 .Va data
 specifies the moment to fire the timer (for
 .Dv NOTE_ABSTIME )
 or the timeout period.
 The timer will be periodic unless
 .Dv EV_ONESHOT
 or
 .Dv NOTE_ABSTIME
 is specified.
 On return,
 .Va data
 contains the number of times the timeout has expired since the last call to
 .Fn kevent .
 For non-monotonic timers, this filter automatically sets the
 .Dv EV_CLEAR
 flag internally.
 .Pp
 The filter accepts the following flags in the
 .Va fflags
 argument:
 .Bl -tag -width "Dv NOTE_MSECONDS"
 .It Dv NOTE_SECONDS
 .Va data
 is in seconds.
 .It Dv NOTE_MSECONDS
 .Va data
 is in milliseconds.
 .It Dv NOTE_USECONDS
 .Va data
 is in microseconds.
 .It Dv NOTE_NSECONDS
 .Va data
 is in nanoseconds.
 .It Dv NOTE_ABSTIME
 The specified expiration time is absolute.
 .El
 .Pp
 If
 .Va fflags
 is not set, the default is milliseconds.
 On return,
 .Va fflags
 contains the events which triggered the filter.
 .Pp
 If an existing timer is re-added, the existing timer will be
 effectively canceled (throwing away any undelivered record of previous
 timer expiration) and re-started using the new parameters contained in
 .Va data
 and
 .Va fflags .
 .Pp
 There is a system wide limit on the number of timers
 which is controlled by the
 .Va kern.kq_calloutmax
 sysctl.
 .It Dv EVFILT_USER
 Establishes a user event identified by
 .Va ident
 which is not associated with any kernel mechanism but is triggered by
 user level code.
 The lower 24 bits of the
 .Va fflags
 may be used for user defined flags and manipulated using the following:
 .Bl -tag -width "Dv NOTE_FFLAGSMASK"
 .It Dv NOTE_FFNOP
 Ignore the input
 .Va fflags .
 .It Dv NOTE_FFAND
 Bitwise AND
 .Va fflags .
 .It Dv NOTE_FFOR
 Bitwise OR
 .Va fflags .
 .It Dv NOTE_FFCOPY
 Copy
 .Va fflags .
 .It Dv NOTE_FFCTRLMASK
 Control mask for
 .Va fflags .
 .It Dv NOTE_FFLAGSMASK
 User defined flag mask for
 .Va fflags .
 .El
 .Pp
 A user event is triggered for output with the following:
 .Bl -tag -width "Dv NOTE_FFLAGSMASK"
 .It Dv NOTE_TRIGGER
 Cause the event to be triggered.
 .El
 .Pp
 On return,
 .Va fflags
 contains the users defined flags in the lower 24 bits.
 .El
 .Sh CANCELLATION BEHAVIOUR
 If
 .Fa nevents
 is non-zero, i.e., the function is potentially blocking, the call
 is a cancellation point.
 Otherwise, i.e., if
 .Fa nevents
 is zero, the call is not cancellable.
 Cancellation can only occur before any changes are made to the kqueue,
 or when the call was blocked and no changes to the queue were requested.
 .Sh RETURN VALUES
 The
 .Fn kqueue
 system call
 creates a new kernel event queue and returns a file descriptor.
 If there was an error creating the kernel event queue, a value of -1 is
 returned and errno set.
 .Pp
 The
 .Fn kevent
 system call
 returns the number of events placed in the
 .Fa eventlist ,
 up to the value given by
 .Fa nevents .
 If an error occurs while processing an element of the
 .Fa changelist
 and there is enough room in the
 .Fa eventlist ,
 then the event will be placed in the
 .Fa eventlist
 with
 .Dv EV_ERROR
 set in
 .Va flags
 and the system error in
 .Va data .
 Otherwise,
 .Dv -1
 will be returned, and
 .Dv errno
 will be set to indicate the error condition.
 If the time limit expires, then
 .Fn kevent
 returns 0.
 .Sh EXAMPLES
 .Bd -literal -compact
 #include <sys/event.h>
 #include <err.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 int
 main(int argc, char **argv)
 {
     struct kevent event;    /* Event we want to monitor */
     struct kevent tevent;   /* Event triggered */
     int kq, fd, ret;
 
     if (argc != 2)
 	err(EXIT_FAILURE, "Usage: %s path\en", argv[0]);
     fd = open(argv[1], O_RDONLY);
     if (fd == -1)
 	err(EXIT_FAILURE, "Failed to open '%s'", argv[1]);
 
     /* Create kqueue. */
     kq = kqueue();
     if (kq == -1)
 	err(EXIT_FAILURE, "kqueue() failed");
 
     /* Initialize kevent structure. */
     EV_SET(&event, fd, EVFILT_VNODE, EV_ADD | EV_CLEAR, NOTE_WRITE,
 	0, NULL);
     /* Attach event to the kqueue. */
     ret = kevent(kq, &event, 1, NULL, 0, NULL);
     if (ret == -1)
 	err(EXIT_FAILURE, "kevent register");
     if (event.flags & EV_ERROR)
 	errx(EXIT_FAILURE, "Event error: %s", strerror(event.data));
 
     for (;;) {
 	/* Sleep until something happens. */
 	ret = kevent(kq, NULL, 0, &tevent, 1, NULL);
 	if (ret == -1) {
 	    err(EXIT_FAILURE, "kevent wait");
 	} else if (ret > 0) {
 	    printf("Something was written in '%s'\en", argv[1]);
 	}
     }
 }
 .Ed
 .Sh ERRORS
 The
 .Fn kqueue
 system call fails if:
 .Bl -tag -width Er
 .It Bq Er ENOMEM
 The kernel failed to allocate enough memory for the kernel queue.
 .It Bq Er ENOMEM
 The
 .Dv RLIMIT_KQUEUES
 rlimit
 (see
 .Xr getrlimit 2 )
 for the current user would be exceeded.
 .It Bq Er EMFILE
 The per-process descriptor table is full.
 .It Bq Er ENFILE
 The system file table is full.
 .El
 .Pp
 The
 .Fn kevent
 system call fails if:
 .Bl -tag -width Er
 .It Bq Er EACCES
 The process does not have permission to register a filter.
 .It Bq Er EFAULT
 There was an error reading or writing the
 .Va kevent
 structure.
 .It Bq Er EBADF
 The specified descriptor is invalid.
 .It Bq Er EINTR
 A signal was delivered before the timeout expired and before any
 events were placed on the kqueue for return.
 .It Bq Er EINTR
 A cancellation request was delivered to the thread, but not yet handled.
 .It Bq Er EINVAL
 The specified time limit or filter is invalid.
 .It Bq Er EINVAL
 The specified length of the event or change lists is negative.
 .It Bq Er ENOENT
 The event could not be found to be modified or deleted.
 .It Bq Er ENOMEM
 No memory was available to register the event
 or, in the special case of a timer, the maximum number of
 timers has been exceeded.
 This maximum is configurable via the
 .Va kern.kq_calloutmax
 sysctl.
 .It Bq Er ESRCH
 The specified process to attach to does not exist.
 .El
 .Pp
 When
 .Fn kevent
 call fails with
 .Er EINTR
 error, all changes in the
 .Fa changelist
 have been applied.
 .Sh SEE ALSO
 .Xr aio_error 2 ,
 .Xr aio_read 2 ,
 .Xr aio_return 2 ,
 .Xr poll 2 ,
 .Xr read 2 ,
 .Xr select 2 ,
 .Xr sigaction 2 ,
 .Xr write 2 ,
 .Xr pthread_setcancelstate 3 ,
 .Xr signal 3
 .Rs
 .%A Jonathan Lemon
 .%T "Kqueue: A Generic and Scalable Event Notification Facility"
 .%I USENIX Association
 .%B Proceedings of the FREENIX Track: 2001 USENIX Annual Technical Conference
 .%D June 25-30, 2001
 .\".http://www.usenix.org/event/usenix01/freenix01/full_papers/lemon/lemon.pdf
 .Re
 .Sh HISTORY
 The
 .Fn kqueue
 and
 .Fn kevent
 system calls first appeared in
 .Fx 4.1 .
 .Sh AUTHORS
 The
 .Fn kqueue
 system and this manual page were written by
 .An Jonathan Lemon Aq Mt jlemon@FreeBSD.org .
 .Sh BUGS
 The
 .Fa timeout
 value is limited to 24 hours; longer timeouts will be silently
 reinterpreted as 24 hours.
 .Pp
 In versions older than
 .Fx 12.0 ,
 .In sys/event.h
 failed to parse without including
 .In sys/types.h
 manually.
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index db505b234268..5fa5bf9cad06 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1,2841 +1,2849 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  * Copyright 2004 John-Mark Gurney <jmg@FreeBSD.org>
  * Copyright (c) 2009 Apple, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_kqueue.h"
 
 #ifdef COMPAT_FREEBSD11
 #define	_WANT_FREEBSD11_KEVENT
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/unistd.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/fcntl.h>
 #include <sys/kthread.h>
 #include <sys/selinfo.h>
 #include <sys/queue.h>
 #include <sys/event.h>
 #include <sys/eventvar.h>
 #include <sys/poll.h>
 #include <sys/protosw.h>
 #include <sys/resourcevar.h>
 #include <sys/sigio.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/syscallsubr.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/user.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #include <machine/atomic.h>
 
 #include <vm/uma.h>
 
 static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system");
 
 /*
  * This lock is used if multiple kq locks are required.  This possibly
  * should be made into a per proc lock.
  */
 static struct mtx	kq_global;
 MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF);
 #define KQ_GLOBAL_LOCK(lck, haslck)	do {	\
 	if (!haslck)				\
 		mtx_lock(lck);			\
 	haslck = 1;				\
 } while (0)
 #define KQ_GLOBAL_UNLOCK(lck, haslck)	do {	\
 	if (haslck)				\
 		mtx_unlock(lck);			\
 	haslck = 0;				\
 } while (0)
 
 TASKQUEUE_DEFINE_THREAD(kqueue_ctx);
 
 static int	kevent_copyout(void *arg, struct kevent *kevp, int count);
 static int	kevent_copyin(void *arg, struct kevent *kevp, int count);
 static int	kqueue_register(struct kqueue *kq, struct kevent *kev,
 		    struct thread *td, int mflag);
 static int	kqueue_acquire(struct file *fp, struct kqueue **kqp);
 static void	kqueue_release(struct kqueue *kq, int locked);
 static void	kqueue_destroy(struct kqueue *kq);
 static void	kqueue_drain(struct kqueue *kq, struct thread *td);
 static int	kqueue_expand(struct kqueue *kq, struct filterops *fops,
 		    uintptr_t ident, int mflag);
 static void	kqueue_task(void *arg, int pending);
 static int	kqueue_scan(struct kqueue *kq, int maxevents,
 		    struct kevent_copyops *k_ops,
 		    const struct timespec *timeout,
 		    struct kevent *keva, struct thread *td);
 static void 	kqueue_wakeup(struct kqueue *kq);
 static struct filterops *kqueue_fo_find(int filt);
 static void	kqueue_fo_release(int filt);
 struct g_kevent_args;
 static int	kern_kevent_generic(struct thread *td,
 		    struct g_kevent_args *uap,
 		    struct kevent_copyops *k_ops, const char *struct_name);
 
 static fo_ioctl_t	kqueue_ioctl;
 static fo_poll_t	kqueue_poll;
 static fo_kqfilter_t	kqueue_kqfilter;
 static fo_stat_t	kqueue_stat;
 static fo_close_t	kqueue_close;
 static fo_fill_kinfo_t	kqueue_fill_kinfo;
 
 static struct fileops kqueueops = {
 	.fo_read = invfo_rdwr,
 	.fo_write = invfo_rdwr,
 	.fo_truncate = invfo_truncate,
 	.fo_ioctl = kqueue_ioctl,
 	.fo_poll = kqueue_poll,
 	.fo_kqfilter = kqueue_kqfilter,
 	.fo_stat = kqueue_stat,
 	.fo_close = kqueue_close,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_fill_kinfo = kqueue_fill_kinfo,
 };
 
 static int 	knote_attach(struct knote *kn, struct kqueue *kq);
 static void 	knote_drop(struct knote *kn, struct thread *td);
 static void 	knote_drop_detached(struct knote *kn, struct thread *td);
 static void 	knote_enqueue(struct knote *kn);
 static void 	knote_dequeue(struct knote *kn);
 static void 	knote_init(void);
 static struct 	knote *knote_alloc(int mflag);
 static void 	knote_free(struct knote *kn);
 
 static void	filt_kqdetach(struct knote *kn);
 static int	filt_kqueue(struct knote *kn, long hint);
 static int	filt_procattach(struct knote *kn);
 static void	filt_procdetach(struct knote *kn);
 static int	filt_proc(struct knote *kn, long hint);
 static int	filt_fileattach(struct knote *kn);
 static void	filt_timerexpire(void *knx);
 static void	filt_timerexpire_l(struct knote *kn, bool proc_locked);
 static int	filt_timerattach(struct knote *kn);
 static void	filt_timerdetach(struct knote *kn);
 static void	filt_timerstart(struct knote *kn, sbintime_t to);
 static void	filt_timertouch(struct knote *kn, struct kevent *kev,
 		    u_long type);
 static int	filt_timervalidate(struct knote *kn, sbintime_t *to);
 static int	filt_timer(struct knote *kn, long hint);
 static int	filt_userattach(struct knote *kn);
 static void	filt_userdetach(struct knote *kn);
 static int	filt_user(struct knote *kn, long hint);
 static void	filt_usertouch(struct knote *kn, struct kevent *kev,
 		    u_long type);
 
 static struct filterops file_filtops = {
 	.f_isfd = 1,
 	.f_attach = filt_fileattach,
 };
 static struct filterops kqread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_kqdetach,
 	.f_event = filt_kqueue,
 };
 /* XXX - move to kern_proc.c?  */
 static struct filterops proc_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_procattach,
 	.f_detach = filt_procdetach,
 	.f_event = filt_proc,
 };
 static struct filterops timer_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_timerattach,
 	.f_detach = filt_timerdetach,
 	.f_event = filt_timer,
 	.f_touch = filt_timertouch,
 };
 static struct filterops user_filtops = {
 	.f_attach = filt_userattach,
 	.f_detach = filt_userdetach,
 	.f_event = filt_user,
 	.f_touch = filt_usertouch,
 };
 
 static uma_zone_t	knote_zone;
 static unsigned int __exclusive_cache_line	kq_ncallouts;
 static unsigned int 	kq_calloutmax = 4 * 1024;
 SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
     &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue");
 
 /* XXX - ensure not influx ? */
 #define KNOTE_ACTIVATE(kn, islock) do { 				\
 	if ((islock))							\
 		mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED);		\
 	else								\
 		KQ_LOCK((kn)->kn_kq);					\
 	(kn)->kn_status |= KN_ACTIVE;					\
 	if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0)		\
 		knote_enqueue((kn));					\
 	if (!(islock))							\
 		KQ_UNLOCK((kn)->kn_kq);					\
 } while (0)
 #define KQ_LOCK(kq) do {						\
 	mtx_lock(&(kq)->kq_lock);					\
 } while (0)
 #define KQ_FLUX_WAKEUP(kq) do {						\
 	if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) {		\
 		(kq)->kq_state &= ~KQ_FLUXWAIT;				\
 		wakeup((kq));						\
 	}								\
 } while (0)
 #define KQ_UNLOCK_FLUX(kq) do {						\
 	KQ_FLUX_WAKEUP(kq);						\
 	mtx_unlock(&(kq)->kq_lock);					\
 } while (0)
 #define KQ_UNLOCK(kq) do {						\
 	mtx_unlock(&(kq)->kq_lock);					\
 } while (0)
 #define KQ_OWNED(kq) do {						\
 	mtx_assert(&(kq)->kq_lock, MA_OWNED);				\
 } while (0)
 #define KQ_NOTOWNED(kq) do {						\
 	mtx_assert(&(kq)->kq_lock, MA_NOTOWNED);			\
 } while (0)
 
 static struct knlist *
 kn_list_lock(struct knote *kn)
 {
 	struct knlist *knl;
 
 	knl = kn->kn_knlist;
 	if (knl != NULL)
 		knl->kl_lock(knl->kl_lockarg);
 	return (knl);
 }
 
 static void
 kn_list_unlock(struct knlist *knl)
 {
 	bool do_free;
 
 	if (knl == NULL)
 		return;
 	do_free = knl->kl_autodestroy && knlist_empty(knl);
 	knl->kl_unlock(knl->kl_lockarg);
 	if (do_free) {
 		knlist_destroy(knl);
 		free(knl, M_KQUEUE);
 	}
 }
 
 static bool
 kn_in_flux(struct knote *kn)
 {
 
 	return (kn->kn_influx > 0);
 }
 
 static void
 kn_enter_flux(struct knote *kn)
 {
 
 	KQ_OWNED(kn->kn_kq);
 	MPASS(kn->kn_influx < INT_MAX);
 	kn->kn_influx++;
 }
 
 static bool
 kn_leave_flux(struct knote *kn)
 {
 
 	KQ_OWNED(kn->kn_kq);
 	MPASS(kn->kn_influx > 0);
 	kn->kn_influx--;
 	return (kn->kn_influx == 0);
 }
 
 #define	KNL_ASSERT_LOCK(knl, islocked) do {				\
 	if (islocked)							\
 		KNL_ASSERT_LOCKED(knl);				\
 	else								\
 		KNL_ASSERT_UNLOCKED(knl);				\
 } while (0)
 #ifdef INVARIANTS
 #define	KNL_ASSERT_LOCKED(knl) do {					\
 	knl->kl_assert_lock((knl)->kl_lockarg, LA_LOCKED);		\
 } while (0)
 #define	KNL_ASSERT_UNLOCKED(knl) do {					\
 	knl->kl_assert_lock((knl)->kl_lockarg, LA_UNLOCKED);		\
 } while (0)
 #else /* !INVARIANTS */
 #define	KNL_ASSERT_LOCKED(knl) do {} while (0)
 #define	KNL_ASSERT_UNLOCKED(knl) do {} while (0)
 #endif /* INVARIANTS */
 
 #ifndef	KN_HASHSIZE
 #define	KN_HASHSIZE		64		/* XXX should be tunable */
 #endif
 
 #define KN_HASH(val, mask)	(((val) ^ (val >> 8)) & (mask))
 
 static int
 filt_nullattach(struct knote *kn)
 {
 
 	return (ENXIO);
 };
 
 struct filterops null_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_nullattach,
 };
 
 /* XXX - make SYSINIT to add these, and move into respective modules. */
 extern struct filterops sig_filtops;
 extern struct filterops fs_filtops;
 
 /*
  * Table for for all system-defined filters.
  */
 static struct mtx	filterops_lock;
 MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops",
 	MTX_DEF);
 static struct {
 	struct filterops *for_fop;
 	int for_nolock;
 	int for_refcnt;
 } sysfilt_ops[EVFILT_SYSCOUNT] = {
 	{ &file_filtops, 1 },			/* EVFILT_READ */
 	{ &file_filtops, 1 },			/* EVFILT_WRITE */
 	{ &null_filtops },			/* EVFILT_AIO */
 	{ &file_filtops, 1 },			/* EVFILT_VNODE */
 	{ &proc_filtops, 1 },			/* EVFILT_PROC */
 	{ &sig_filtops, 1 },			/* EVFILT_SIGNAL */
 	{ &timer_filtops, 1 },			/* EVFILT_TIMER */
 	{ &file_filtops, 1 },			/* EVFILT_PROCDESC */
 	{ &fs_filtops, 1 },			/* EVFILT_FS */
 	{ &null_filtops },			/* EVFILT_LIO */
 	{ &user_filtops, 1 },			/* EVFILT_USER */
 	{ &null_filtops },			/* EVFILT_SENDFILE */
 	{ &file_filtops, 1 },                   /* EVFILT_EMPTY */
 };
 
 /*
  * Simple redirection for all cdevsw style objects to call their fo_kqfilter
  * method.
  */
 static int
 filt_fileattach(struct knote *kn)
 {
 
 	return (fo_kqfilter(kn->kn_fp, kn));
 }
 
 /*ARGSUSED*/
 static int
 kqueue_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_fp->f_data;
 
 	if (kn->kn_filter != EVFILT_READ)
 		return (EINVAL);
 
 	kn->kn_status |= KN_KQUEUE;
 	kn->kn_fop = &kqread_filtops;
 	knlist_add(&kq->kq_sel.si_note, kn, 0);
 
 	return (0);
 }
 
 static void
 filt_kqdetach(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_fp->f_data;
 
 	knlist_remove(&kq->kq_sel.si_note, kn, 0);
 }
 
 /*ARGSUSED*/
 static int
 filt_kqueue(struct knote *kn, long hint)
 {
 	struct kqueue *kq = kn->kn_fp->f_data;
 
 	kn->kn_data = kq->kq_count;
 	return (kn->kn_data > 0);
 }
 
 /* XXX - move to kern_proc.c?  */
 static int
 filt_procattach(struct knote *kn)
 {
 	struct proc *p;
 	int error;
 	bool exiting, immediate;
 
 	exiting = immediate = false;
 	if (kn->kn_sfflags & NOTE_EXIT)
 		p = pfind_any(kn->kn_id);
 	else
 		p = pfind(kn->kn_id);
 	if (p == NULL)
 		return (ESRCH);
 	if (p->p_flag & P_WEXIT)
 		exiting = true;
 
 	if ((error = p_cansee(curthread, p))) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 
 	kn->kn_ptr.p_proc = p;
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
 	/*
 	 * Internal flag indicating registration done by kernel for the
 	 * purposes of getting a NOTE_CHILD notification.
 	 */
 	if (kn->kn_flags & EV_FLAG2) {
 		kn->kn_flags &= ~EV_FLAG2;
 		kn->kn_data = kn->kn_sdata;		/* ppid */
 		kn->kn_fflags = NOTE_CHILD;
 		kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK);
 		immediate = true; /* Force immediate activation of child note. */
 	}
 	/*
 	 * Internal flag indicating registration done by kernel (for other than
 	 * NOTE_CHILD).
 	 */
 	if (kn->kn_flags & EV_FLAG1) {
 		kn->kn_flags &= ~EV_FLAG1;
 	}
 
 	knlist_add(p->p_klist, kn, 1);
 
 	/*
 	 * Immediately activate any child notes or, in the case of a zombie
 	 * target process, exit notes.  The latter is necessary to handle the
 	 * case where the target process, e.g. a child, dies before the kevent
 	 * is registered.
 	 */
 	if (immediate || (exiting && filt_proc(kn, NOTE_EXIT)))
 		KNOTE_ACTIVATE(kn, 0);
 
 	PROC_UNLOCK(p);
 
 	return (0);
 }
 
 /*
  * The knote may be attached to a different process, which may exit,
  * leaving nothing for the knote to be attached to.  So when the process
  * exits, the knote is marked as DETACHED and also flagged as ONESHOT so
  * it will be deleted when read out.  However, as part of the knote deletion,
  * this routine is called, so a check is needed to avoid actually performing
  * a detach, because the original process does not exist any more.
  */
 /* XXX - move to kern_proc.c?  */
 static void
 filt_procdetach(struct knote *kn)
 {
 
 	knlist_remove(kn->kn_knlist, kn, 0);
 	kn->kn_ptr.p_proc = NULL;
 }
 
 /* XXX - move to kern_proc.c?  */
 static int
 filt_proc(struct knote *kn, long hint)
 {
 	struct proc *p;
 	u_int event;
 
 	p = kn->kn_ptr.p_proc;
 	if (p == NULL) /* already activated, from attach filter */
 		return (0);
 
 	/* Mask off extra data. */
 	event = (u_int)hint & NOTE_PCTRLMASK;
 
 	/* If the user is interested in this event, record it. */
 	if (kn->kn_sfflags & event)
 		kn->kn_fflags |= event;
 
 	/* Process is gone, so flag the event as finished. */
 	if (event == NOTE_EXIT) {
 		kn->kn_flags |= EV_EOF | EV_ONESHOT;
 		kn->kn_ptr.p_proc = NULL;
 		if (kn->kn_fflags & NOTE_EXIT)
 			kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig);
 		if (kn->kn_fflags == 0)
 			kn->kn_flags |= EV_DROP;
 		return (1);
 	}
 
 	return (kn->kn_fflags != 0);
 }
 
 /*
  * Called when the process forked. It mostly does the same as the
  * knote(), activating all knotes registered to be activated when the
  * process forked. Additionally, for each knote attached to the
  * parent, check whether user wants to track the new process. If so
  * attach a new knote to it, and immediately report an event with the
  * child's pid.
  */
 void
 knote_fork(struct knlist *list, int pid)
 {
 	struct kqueue *kq;
 	struct knote *kn;
 	struct kevent kev;
 	int error;
 
 	MPASS(list != NULL);
 	KNL_ASSERT_LOCKED(list);
 	if (SLIST_EMPTY(&list->kl_list))
 		return;
 
 	memset(&kev, 0, sizeof(kev));
 	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
 		if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
 			KQ_UNLOCK(kq);
 			continue;
 		}
 
 		/*
 		 * The same as knote(), activate the event.
 		 */
 		if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
 			if (kn->kn_fop->f_event(kn, NOTE_FORK))
 				KNOTE_ACTIVATE(kn, 1);
 			KQ_UNLOCK(kq);
 			continue;
 		}
 
 		/*
 		 * The NOTE_TRACK case. In addition to the activation
 		 * of the event, we need to register new events to
 		 * track the child. Drop the locks in preparation for
 		 * the call to kqueue_register().
 		 */
 		kn_enter_flux(kn);
 		KQ_UNLOCK(kq);
 		list->kl_unlock(list->kl_lockarg);
 
 		/*
 		 * Activate existing knote and register tracking knotes with
 		 * new process.
 		 *
 		 * First register a knote to get just the child notice. This
 		 * must be a separate note from a potential NOTE_EXIT
 		 * notification since both NOTE_CHILD and NOTE_EXIT are defined
 		 * to use the data field (in conflicting ways).
 		 */
 		kev.ident = pid;
 		kev.filter = kn->kn_filter;
 		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT |
 		    EV_FLAG2;
 		kev.fflags = kn->kn_sfflags;
 		kev.data = kn->kn_id;		/* parent */
 		kev.udata = kn->kn_kevent.udata;/* preserve udata */
 		error = kqueue_register(kq, &kev, NULL, M_NOWAIT);
 		if (error)
 			kn->kn_fflags |= NOTE_TRACKERR;
 
 		/*
 		 * Then register another knote to track other potential events
 		 * from the new process.
 		 */
 		kev.ident = pid;
 		kev.filter = kn->kn_filter;
 		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 		kev.fflags = kn->kn_sfflags;
 		kev.data = kn->kn_id;		/* parent */
 		kev.udata = kn->kn_kevent.udata;/* preserve udata */
 		error = kqueue_register(kq, &kev, NULL, M_NOWAIT);
 		if (error)
 			kn->kn_fflags |= NOTE_TRACKERR;
 		if (kn->kn_fop->f_event(kn, NOTE_FORK))
 			KNOTE_ACTIVATE(kn, 0);
 		list->kl_lock(list->kl_lockarg);
 		KQ_LOCK(kq);
 		kn_leave_flux(kn);
 		KQ_UNLOCK_FLUX(kq);
 	}
 }
 
 /*
  * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
  * interval timer support code.
  */
 
 #define NOTE_TIMER_PRECMASK						\
     (NOTE_SECONDS | NOTE_MSECONDS | NOTE_USECONDS | NOTE_NSECONDS)
 
 static sbintime_t
 timer2sbintime(int64_t data, int flags)
 {
 	int64_t secs;
 
         /*
          * Macros for converting to the fractional second portion of an
          * sbintime_t using 64bit multiplication to improve precision.
          */
 #define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32)
 #define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32)
 #define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32)
 	switch (flags & NOTE_TIMER_PRECMASK) {
 	case NOTE_SECONDS:
 #ifdef __LP64__
 		if (data > (SBT_MAX / SBT_1S))
 			return (SBT_MAX);
 #endif
 		return ((sbintime_t)data << 32);
 	case NOTE_MSECONDS: /* FALLTHROUGH */
 	case 0:
 		if (data >= 1000) {
 			secs = data / 1000;
 #ifdef __LP64__
 			if (secs > (SBT_MAX / SBT_1S))
 				return (SBT_MAX);
 #endif
 			return (secs << 32 | MS_TO_SBT(data % 1000));
 		}
 		return (MS_TO_SBT(data));
 	case NOTE_USECONDS:
 		if (data >= 1000000) {
 			secs = data / 1000000;
 #ifdef __LP64__
 			if (secs > (SBT_MAX / SBT_1S))
 				return (SBT_MAX);
 #endif
 			return (secs << 32 | US_TO_SBT(data % 1000000));
 		}
 		return (US_TO_SBT(data));
 	case NOTE_NSECONDS:
 		if (data >= 1000000000) {
 			secs = data / 1000000000;
 #ifdef __LP64__
 			if (secs > (SBT_MAX / SBT_1S))
 				return (SBT_MAX);
 #endif
 			return (secs << 32 | NS_TO_SBT(data % 1000000000));
 		}
 		return (NS_TO_SBT(data));
 	default:
 		break;
 	}
 	return (-1);
 }
 
 struct kq_timer_cb_data {
 	struct callout c;
 	struct proc *p;
 	struct knote *kn;
 	int cpuid;
 	int flags;
 	TAILQ_ENTRY(kq_timer_cb_data) link;
 	sbintime_t next;	/* next timer event fires at */
 	sbintime_t to;		/* precalculated timer period, 0 for abs */
 };
 
 #define	KQ_TIMER_CB_ENQUEUED	0x01
 
 static void
 kqtimer_sched_callout(struct kq_timer_cb_data *kc)
 {
 	callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kc->kn,
 	    kc->cpuid, C_ABSOLUTE);
 }
 
 void
 kqtimer_proc_continue(struct proc *p)
 {
 	struct kq_timer_cb_data *kc, *kc1;
 	struct bintime bt;
 	sbintime_t now;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	getboottimebin(&bt);
 	now = bttosbt(bt);
 
 	TAILQ_FOREACH_SAFE(kc, &p->p_kqtim_stop, link, kc1) {
 		TAILQ_REMOVE(&p->p_kqtim_stop, kc, link);
 		kc->flags &= ~KQ_TIMER_CB_ENQUEUED;
 		if (kc->next <= now)
 			filt_timerexpire_l(kc->kn, true);
 		else
 			kqtimer_sched_callout(kc);
 	}
 }
 
 static void
 filt_timerexpire_l(struct knote *kn, bool proc_locked)
 {
 	struct kq_timer_cb_data *kc;
 	struct proc *p;
 	uint64_t delta;
 	sbintime_t now;
 
 	kc = kn->kn_ptr.p_v;
 
 	if ((kn->kn_flags & EV_ONESHOT) != 0 || kc->to == 0) {
 		kn->kn_data++;
 		KNOTE_ACTIVATE(kn, 0);
 		return;
 	}
 
 	now = sbinuptime();
 	if (now >= kc->next) {
 		delta = (now - kc->next) / kc->to;
 		if (delta == 0)
 			delta = 1;
 		kn->kn_data += delta;
 		kc->next += (delta + 1) * kc->to;
 		if (now >= kc->next)	/* overflow */
 			kc->next = now + kc->to;
 		KNOTE_ACTIVATE(kn, 0);	/* XXX - handle locking */
 	}
 
 	/*
 	 * Initial check for stopped kc->p is racy.  It is fine to
 	 * miss the set of the stop flags, at worst we would schedule
 	 * one more callout.  On the other hand, it is not fine to not
 	 * schedule when we we missed clearing of the flags, we
 	 * recheck them under the lock and observe consistent state.
 	 */
 	p = kc->p;
 	if (P_SHOULDSTOP(p) || P_KILLED(p)) {
 		if (!proc_locked)
 			PROC_LOCK(p);
 		if (P_SHOULDSTOP(p) || P_KILLED(p)) {
 			if ((kc->flags & KQ_TIMER_CB_ENQUEUED) == 0) {
 				kc->flags |= KQ_TIMER_CB_ENQUEUED;
 				TAILQ_INSERT_TAIL(&p->p_kqtim_stop, kc, link);
 			}
 			if (!proc_locked)
 				PROC_UNLOCK(p);
 			return;
 		}
 		if (!proc_locked)
 			PROC_UNLOCK(p);
 	}
 	kqtimer_sched_callout(kc);
 }
 
 static void
 filt_timerexpire(void *knx)
 {
 	filt_timerexpire_l(knx, false);
 }
 
 /*
  * data contains amount of time to sleep
  */
 static int
 filt_timervalidate(struct knote *kn, sbintime_t *to)
 {
 	struct bintime bt;
 	sbintime_t sbt;
 
 	if (kn->kn_sdata < 0)
 		return (EINVAL);
 	if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
 		kn->kn_sdata = 1;
 	/*
 	 * The only fflags values supported are the timer unit
 	 * (precision) and the absolute time indicator.
 	 */
 	if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0)
 		return (EINVAL);
 
 	*to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
 	if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
 		getboottimebin(&bt);
 		sbt = bttosbt(bt);
 		*to -= sbt;
 	}
 	if (*to < 0)
 		return (EINVAL);
 	return (0);
 }
 
 static int
 filt_timerattach(struct knote *kn)
 {
 	struct kq_timer_cb_data *kc;
 	sbintime_t to;
 	int error;
 
 	error = filt_timervalidate(kn, &to);
 	if (error != 0)
 		return (error);
 
 	if (atomic_fetchadd_int(&kq_ncallouts, 1) + 1 > kq_calloutmax) {
 		atomic_subtract_int(&kq_ncallouts, 1);
 		return (ENOMEM);
 	}
 
 	if ((kn->kn_sfflags & NOTE_ABSTIME) == 0)
 		kn->kn_flags |= EV_CLEAR;	/* automatically set */
 	kn->kn_status &= ~KN_DETACHED;		/* knlist_add clears it */
 	kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
 	kc->kn = kn;
 	kc->p = curproc;
 	kc->cpuid = PCPU_GET(cpuid);
 	kc->flags = 0;
 	callout_init(&kc->c, 1);
 	filt_timerstart(kn, to);
 
 	return (0);
 }
 
 static void
 filt_timerstart(struct knote *kn, sbintime_t to)
 {
 	struct kq_timer_cb_data *kc;
 
 	kc = kn->kn_ptr.p_v;
 	if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
 		kc->next = to;
 		kc->to = 0;
 	} else {
 		kc->next = to + sbinuptime();
 		kc->to = to;
 	}
 	kqtimer_sched_callout(kc);
 }
 
 static void
 filt_timerdetach(struct knote *kn)
 {
 	struct kq_timer_cb_data *kc;
 	unsigned int old __unused;
 	bool pending;
 
 	kc = kn->kn_ptr.p_v;
 	do {
 		callout_drain(&kc->c);
 
 		/*
 		 * kqtimer_proc_continue() might have rescheduled this callout.
 		 * Double-check, using the process mutex as an interlock.
 		 */
 		PROC_LOCK(kc->p);
 		if ((kc->flags & KQ_TIMER_CB_ENQUEUED) != 0) {
 			kc->flags &= ~KQ_TIMER_CB_ENQUEUED;
 			TAILQ_REMOVE(&kc->p->p_kqtim_stop, kc, link);
 		}
 		pending = callout_pending(&kc->c);
 		PROC_UNLOCK(kc->p);
 	} while (pending);
 	free(kc, M_KQUEUE);
 	old = atomic_fetchadd_int(&kq_ncallouts, -1);
 	KASSERT(old > 0, ("Number of callouts cannot become negative"));
 	kn->kn_status |= KN_DETACHED;	/* knlist_remove sets it */
 }
 
 static void
 filt_timertouch(struct knote *kn, struct kevent *kev, u_long type)
 {
 	struct kq_timer_cb_data *kc;	
 	struct kqueue *kq;
 	sbintime_t to;
 	int error;
 
 	switch (type) {
 	case EVENT_REGISTER:
 		/* Handle re-added timers that update data/fflags */
 		if (kev->flags & EV_ADD) {
 			kc = kn->kn_ptr.p_v;
 
 			/* Drain any existing callout. */
 			callout_drain(&kc->c);
 
 			/* Throw away any existing undelivered record
 			 * of the timer expiration. This is done under
 			 * the presumption that if a process is
 			 * re-adding this timer with new parameters,
 			 * it is no longer interested in what may have
 			 * happened under the old parameters. If it is
 			 * interested, it can wait for the expiration,
 			 * delete the old timer definition, and then
 			 * add the new one.
 			 *
 			 * This has to be done while the kq is locked:
 			 *   - if enqueued, dequeue
 			 *   - make it no longer active
 			 *   - clear the count of expiration events
 			 */
 			kq = kn->kn_kq;
 			KQ_LOCK(kq);
 			if (kn->kn_status & KN_QUEUED)
 				knote_dequeue(kn);
 
 			kn->kn_status &= ~KN_ACTIVE;
 			kn->kn_data = 0;
 			KQ_UNLOCK(kq);
 			
 			/* Reschedule timer based on new data/fflags */
 			kn->kn_sfflags = kev->fflags;
 			kn->kn_sdata = kev->data;
 			error = filt_timervalidate(kn, &to);
 			if (error != 0) {
 			  	kn->kn_flags |= EV_ERROR;
 				kn->kn_data = error;
 			} else
 			  	filt_timerstart(kn, to);
 		}
 		break;
 
         case EVENT_PROCESS:
 		*kev = kn->kn_kevent;
 		if (kn->kn_flags & EV_CLEAR) {
 			kn->kn_data = 0;
 			kn->kn_fflags = 0;
 		}
 		break;
 
 	default:
 		panic("filt_timertouch() - invalid type (%ld)", type);
 		break;
 	}
 }
 
 static int
 filt_timer(struct knote *kn, long hint)
 {
 
 	return (kn->kn_data != 0);
 }
 
 static int
 filt_userattach(struct knote *kn)
 {
 
 	/* 
 	 * EVFILT_USER knotes are not attached to anything in the kernel.
 	 */ 
 	kn->kn_hook = NULL;
 	if (kn->kn_fflags & NOTE_TRIGGER)
 		kn->kn_hookid = 1;
 	else
 		kn->kn_hookid = 0;
 	return (0);
 }
 
 static void
 filt_userdetach(__unused struct knote *kn)
 {
 
 	/*
 	 * EVFILT_USER knotes are not attached to anything in the kernel.
 	 */
 }
 
 static int
 filt_user(struct knote *kn, __unused long hint)
 {
 
 	return (kn->kn_hookid);
 }
 
 static void
 filt_usertouch(struct knote *kn, struct kevent *kev, u_long type)
 {
 	u_int ffctrl;
 
 	switch (type) {
 	case EVENT_REGISTER:
 		if (kev->fflags & NOTE_TRIGGER)
 			kn->kn_hookid = 1;
 
 		ffctrl = kev->fflags & NOTE_FFCTRLMASK;
 		kev->fflags &= NOTE_FFLAGSMASK;
 		switch (ffctrl) {
 		case NOTE_FFNOP:
 			break;
 
 		case NOTE_FFAND:
 			kn->kn_sfflags &= kev->fflags;
 			break;
 
 		case NOTE_FFOR:
 			kn->kn_sfflags |= kev->fflags;
 			break;
 
 		case NOTE_FFCOPY:
 			kn->kn_sfflags = kev->fflags;
 			break;
 
 		default:
 			/* XXX Return error? */
 			break;
 		}
 		kn->kn_sdata = kev->data;
 		if (kev->flags & EV_CLEAR) {
 			kn->kn_hookid = 0;
 			kn->kn_data = 0;
 			kn->kn_fflags = 0;
 		}
 		break;
 
         case EVENT_PROCESS:
 		*kev = kn->kn_kevent;
 		kev->fflags = kn->kn_sfflags;
 		kev->data = kn->kn_sdata;
 		if (kn->kn_flags & EV_CLEAR) {
 			kn->kn_hookid = 0;
 			kn->kn_data = 0;
 			kn->kn_fflags = 0;
 		}
 		break;
 
 	default:
 		panic("filt_usertouch() - invalid type (%ld)", type);
 		break;
 	}
 }
 
 int
 sys_kqueue(struct thread *td, struct kqueue_args *uap)
 {
 
 	return (kern_kqueue(td, 0, NULL));
 }
 
 static void
 kqueue_init(struct kqueue *kq)
 {
 
 	mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK);
 	TAILQ_INIT(&kq->kq_head);
 	knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
 	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
 }
 
 int
 kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps)
 {
 	struct filedesc *fdp;
 	struct kqueue *kq;
 	struct file *fp;
 	struct ucred *cred;
 	int fd, error;
 
 	fdp = td->td_proc->p_fd;
 	cred = td->td_ucred;
 	if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES)))
 		return (ENOMEM);
 
 	error = falloc_caps(td, &fp, &fd, flags, fcaps);
 	if (error != 0) {
 		chgkqcnt(cred->cr_ruidinfo, -1, 0);
 		return (error);
 	}
 
 	/* An extra reference on `fp' has been held for us by falloc(). */
 	kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
 	kqueue_init(kq);
 	kq->kq_fdp = fdp;
 	kq->kq_cred = crhold(cred);
 
 	FILEDESC_XLOCK(fdp);
 	TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
 	FILEDESC_XUNLOCK(fdp);
 
 	finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
 	fdrop(fp, td);
 
 	td->td_retval[0] = fd;
 	return (0);
 }
 
 struct g_kevent_args {
 	int	fd;
 	void	*changelist;
 	int	nchanges;
 	void	*eventlist;
 	int	nevents;
 	const struct timespec *timeout;
 };
 
 int
 sys_kevent(struct thread *td, struct kevent_args *uap)
 {
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = kevent_copyout,
 		.k_copyin = kevent_copyin,
 		.kevent_size = sizeof(struct kevent),
 	};
 	struct g_kevent_args gk_args = {
 		.fd = uap->fd,
 		.changelist = uap->changelist,
 		.nchanges = uap->nchanges,
 		.eventlist = uap->eventlist,
 		.nevents = uap->nevents,
 		.timeout = uap->timeout,
 	};
 
 	return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent"));
 }
 
 static int
 kern_kevent_generic(struct thread *td, struct g_kevent_args *uap,
     struct kevent_copyops *k_ops, const char *struct_name)
 {
 	struct timespec ts, *tsp;
 #ifdef KTRACE
 	struct kevent *eventlist = uap->eventlist;
 #endif
 	int error;
 
 	if (uap->timeout != NULL) {
 		error = copyin(uap->timeout, &ts, sizeof(ts));
 		if (error)
 			return (error);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray(struct_name, UIO_USERSPACE, uap->changelist,
 		    uap->nchanges, k_ops->kevent_size);
 #endif
 
 	error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents,
 	    k_ops, tsp);
 
 #ifdef KTRACE
 	if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY))
 		ktrstructarray(struct_name, UIO_USERSPACE, eventlist,
 		    td->td_retval[0], k_ops->kevent_size);
 #endif
 
 	return (error);
 }
 
 /*
  * Copy 'count' items into the destination list pointed to by uap->eventlist.
  */
 static int
 kevent_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct kevent_args *uap;
 	int error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct kevent_args *)arg;
 
 	error = copyout(kevp, uap->eventlist, count * sizeof *kevp);
 	if (error == 0)
 		uap->eventlist += count;
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 kevent_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct kevent_args *uap;
 	int error;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct kevent_args *)arg;
 
 	error = copyin(uap->changelist, kevp, count * sizeof *kevp);
 	if (error == 0)
 		uap->changelist += count;
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD11
 static int
 kevent11_copyout(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_kevent_args *uap;
 	struct kevent_freebsd11 kev11;
 	int error, i;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		kev11.ident = kevp->ident;
 		kev11.filter = kevp->filter;
 		kev11.flags = kevp->flags;
 		kev11.fflags = kevp->fflags;
 		kev11.data = kevp->data;
 		kev11.udata = kevp->udata;
 		error = copyout(&kev11, uap->eventlist, sizeof(kev11));
 		if (error != 0)
 			break;
 		uap->eventlist++;
 		kevp++;
 	}
 	return (error);
 }
 
 /*
  * Copy 'count' items from the list pointed to by uap->changelist.
  */
 static int
 kevent11_copyin(void *arg, struct kevent *kevp, int count)
 {
 	struct freebsd11_kevent_args *uap;
 	struct kevent_freebsd11 kev11;
 	int error, i;
 
 	KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count));
 	uap = (struct freebsd11_kevent_args *)arg;
 
 	for (i = 0; i < count; i++) {
 		error = copyin(uap->changelist, &kev11, sizeof(kev11));
 		if (error != 0)
 			break;
 		kevp->ident = kev11.ident;
 		kevp->filter = kev11.filter;
 		kevp->flags = kev11.flags;
 		kevp->fflags = kev11.fflags;
 		kevp->data = (uintptr_t)kev11.data;
 		kevp->udata = kev11.udata;
 		bzero(&kevp->ext, sizeof(kevp->ext));
 		uap->changelist++;
 		kevp++;
 	}
 	return (error);
 }
 
 int
 freebsd11_kevent(struct thread *td, struct freebsd11_kevent_args *uap)
 {
 	struct kevent_copyops k_ops = {
 		.arg = uap,
 		.k_copyout = kevent11_copyout,
 		.k_copyin = kevent11_copyin,
 		.kevent_size = sizeof(struct kevent_freebsd11),
 	};
 	struct g_kevent_args gk_args = {
 		.fd = uap->fd,
 		.changelist = uap->changelist,
 		.nchanges = uap->nchanges,
 		.eventlist = uap->eventlist,
 		.nevents = uap->nevents,
 		.timeout = uap->timeout,
 	};
 
 	return (kern_kevent_generic(td, &gk_args, &k_ops, "kevent_freebsd11"));
 }
 #endif
 
 int
 kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
     struct kevent_copyops *k_ops, const struct timespec *timeout)
 {
 	cap_rights_t rights;
 	struct file *fp;
 	int error;
 
 	cap_rights_init_zero(&rights);
 	if (nchanges > 0)
 		cap_rights_set_one(&rights, CAP_KQUEUE_CHANGE);
 	if (nevents > 0)
 		cap_rights_set_one(&rights, CAP_KQUEUE_EVENT);
 	error = fget(td, fd, &rights, &fp);
 	if (error != 0)
 		return (error);
 
 	error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout);
 	fdrop(fp, td);
 
 	return (error);
 }
 
 static int
 kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents,
     struct kevent_copyops *k_ops, const struct timespec *timeout)
 {
 	struct kevent keva[KQ_NEVENTS];
 	struct kevent *kevp, *changes;
 	int i, n, nerrors, error;
 
 	if (nchanges < 0)
 		return (EINVAL);
 
 	nerrors = 0;
 	while (nchanges > 0) {
 		n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges;
 		error = k_ops->k_copyin(k_ops->arg, keva, n);
 		if (error)
 			return (error);
 		changes = keva;
 		for (i = 0; i < n; i++) {
 			kevp = &changes[i];
 			if (!kevp->filter)
 				continue;
 			kevp->flags &= ~EV_SYSFLAGS;
 			error = kqueue_register(kq, kevp, td, M_WAITOK);
 			if (error || (kevp->flags & EV_RECEIPT)) {
 				if (nevents == 0)
 					return (error);
 				kevp->flags = EV_ERROR;
 				kevp->data = error;
 				(void)k_ops->k_copyout(k_ops->arg, kevp, 1);
 				nevents--;
 				nerrors++;
 			}
 		}
 		nchanges -= n;
 	}
 	if (nerrors) {
 		td->td_retval[0] = nerrors;
 		return (0);
 	}
 
 	return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td));
 }
 
 int
 kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents,
     struct kevent_copyops *k_ops, const struct timespec *timeout)
 {
 	struct kqueue *kq;
 	int error;
 
 	error = kqueue_acquire(fp, &kq);
 	if (error != 0)
 		return (error);
 	error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout);
 	kqueue_release(kq, 0);
 	return (error);
 }
 
 /*
  * Performs a kevent() call on a temporarily created kqueue. This can be
  * used to perform one-shot polling, similar to poll() and select().
  */
 int
 kern_kevent_anonymous(struct thread *td, int nevents,
     struct kevent_copyops *k_ops)
 {
 	struct kqueue kq = {};
 	int error;
 
 	kqueue_init(&kq);
 	kq.kq_refcnt = 1;
 	error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL);
 	kqueue_drain(&kq, td);
 	kqueue_destroy(&kq);
 	return (error);
 }
 
 int
 kqueue_add_filteropts(int filt, struct filterops *filtops)
 {
 	int error;
 
 	error = 0;
 	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) {
 		printf(
 "trying to add a filterop that is out of range: %d is beyond %d\n",
 		    ~filt, EVFILT_SYSCOUNT);
 		return EINVAL;
 	}
 	mtx_lock(&filterops_lock);
 	if (sysfilt_ops[~filt].for_fop != &null_filtops &&
 	    sysfilt_ops[~filt].for_fop != NULL)
 		error = EEXIST;
 	else {
 		sysfilt_ops[~filt].for_fop = filtops;
 		sysfilt_ops[~filt].for_refcnt = 0;
 	}
 	mtx_unlock(&filterops_lock);
 
 	return (error);
 }
 
 int
 kqueue_del_filteropts(int filt)
 {
 	int error;
 
 	error = 0;
 	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
 		return EINVAL;
 
 	mtx_lock(&filterops_lock);
 	if (sysfilt_ops[~filt].for_fop == &null_filtops ||
 	    sysfilt_ops[~filt].for_fop == NULL)
 		error = EINVAL;
 	else if (sysfilt_ops[~filt].for_refcnt != 0)
 		error = EBUSY;
 	else {
 		sysfilt_ops[~filt].for_fop = &null_filtops;
 		sysfilt_ops[~filt].for_refcnt = 0;
 	}
 	mtx_unlock(&filterops_lock);
 
 	return error;
 }
 
 static struct filterops *
 kqueue_fo_find(int filt)
 {
 
 	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
 		return NULL;
 
 	if (sysfilt_ops[~filt].for_nolock)
 		return sysfilt_ops[~filt].for_fop;
 
 	mtx_lock(&filterops_lock);
 	sysfilt_ops[~filt].for_refcnt++;
 	if (sysfilt_ops[~filt].for_fop == NULL)
 		sysfilt_ops[~filt].for_fop = &null_filtops;
 	mtx_unlock(&filterops_lock);
 
 	return sysfilt_ops[~filt].for_fop;
 }
 
 static void
 kqueue_fo_release(int filt)
 {
 
 	if (filt > 0 || filt + EVFILT_SYSCOUNT < 0)
 		return;
 
 	if (sysfilt_ops[~filt].for_nolock)
 		return;
 
 	mtx_lock(&filterops_lock);
 	KASSERT(sysfilt_ops[~filt].for_refcnt > 0,
 	    ("filter object refcount not valid on release"));
 	sysfilt_ops[~filt].for_refcnt--;
 	mtx_unlock(&filterops_lock);
 }
 
 /*
  * A ref to kq (obtained via kqueue_acquire) must be held.
  */
 static int
 kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td,
     int mflag)
 {
 	struct filterops *fops;
 	struct file *fp;
 	struct knote *kn, *tkn;
 	struct knlist *knl;
 	int error, filt, event;
 	int haskqglobal, filedesc_unlock;
 
 	if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE))
 		return (EINVAL);
 
 	fp = NULL;
 	kn = NULL;
 	knl = NULL;
 	error = 0;
 	haskqglobal = 0;
 	filedesc_unlock = 0;
 
 	filt = kev->filter;
 	fops = kqueue_fo_find(filt);
 	if (fops == NULL)
 		return EINVAL;
 
 	if (kev->flags & EV_ADD) {
+		/* Reject an invalid flag pair early */
+		if (kev->flags & EV_KEEPUDATA) {
+			tkn = NULL;
+			error = EINVAL;
+			goto done;
+		}
+
 		/*
 		 * Prevent waiting with locks.  Non-sleepable
 		 * allocation failures are handled in the loop, only
 		 * if the spare knote appears to be actually required.
 		 */
 		tkn = knote_alloc(mflag);
 	} else {
 		tkn = NULL;
 	}
 
 findkn:
 	if (fops->f_isfd) {
 		KASSERT(td != NULL, ("td is NULL"));
 		if (kev->ident > INT_MAX)
 			error = EBADF;
 		else
 			error = fget(td, kev->ident, &cap_event_rights, &fp);
 		if (error)
 			goto done;
 
 		if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops,
 		    kev->ident, M_NOWAIT) != 0) {
 			/* try again */
 			fdrop(fp, td);
 			fp = NULL;
 			error = kqueue_expand(kq, fops, kev->ident, mflag);
 			if (error)
 				goto done;
 			goto findkn;
 		}
 
 		if (fp->f_type == DTYPE_KQUEUE) {
 			/*
 			 * If we add some intelligence about what we are doing,
 			 * we should be able to support events on ourselves.
 			 * We need to know when we are doing this to prevent
 			 * getting both the knlist lock and the kq lock since
 			 * they are the same thing.
 			 */
 			if (fp->f_data == kq) {
 				error = EINVAL;
 				goto done;
 			}
 
 			/*
 			 * Pre-lock the filedesc before the global
 			 * lock mutex, see the comment in
 			 * kqueue_close().
 			 */
 			FILEDESC_XLOCK(td->td_proc->p_fd);
 			filedesc_unlock = 1;
 			KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
 		}
 
 		KQ_LOCK(kq);
 		if (kev->ident < kq->kq_knlistsize) {
 			SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
 				if (kev->filter == kn->kn_filter)
 					break;
 		}
 	} else {
 		if ((kev->flags & EV_ADD) == EV_ADD) {
 			error = kqueue_expand(kq, fops, kev->ident, mflag);
 			if (error != 0)
 				goto done;
 		}
 
 		KQ_LOCK(kq);
 
 		/*
 		 * If possible, find an existing knote to use for this kevent.
 		 */
 		if (kev->filter == EVFILT_PROC &&
 		    (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) {
 			/* This is an internal creation of a process tracking
 			 * note. Don't attempt to coalesce this with an
 			 * existing note.
 			 */
 			;			
 		} else if (kq->kq_knhashmask != 0) {
 			struct klist *list;
 
 			list = &kq->kq_knhash[
 			    KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
 			SLIST_FOREACH(kn, list, kn_link)
 				if (kev->ident == kn->kn_id &&
 				    kev->filter == kn->kn_filter)
 					break;
 		}
 	}
 
 	/* knote is in the process of changing, wait for it to stabilize. */
 	if (kn != NULL && kn_in_flux(kn)) {
 		KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 		if (filedesc_unlock) {
 			FILEDESC_XUNLOCK(td->td_proc->p_fd);
 			filedesc_unlock = 0;
 		}
 		kq->kq_state |= KQ_FLUXWAIT;
 		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0);
 		if (fp != NULL) {
 			fdrop(fp, td);
 			fp = NULL;
 		}
 		goto findkn;
 	}
 
 	/*
 	 * kn now contains the matching knote, or NULL if no match
 	 */
 	if (kn == NULL) {
 		if (kev->flags & EV_ADD) {
 			kn = tkn;
 			tkn = NULL;
 			if (kn == NULL) {
 				KQ_UNLOCK(kq);
 				error = ENOMEM;
 				goto done;
 			}
 			kn->kn_fp = fp;
 			kn->kn_kq = kq;
 			kn->kn_fop = fops;
 			/*
 			 * apply reference counts to knote structure, and
 			 * do not release it at the end of this routine.
 			 */
 			fops = NULL;
 			fp = NULL;
 
 			kn->kn_sfflags = kev->fflags;
 			kn->kn_sdata = kev->data;
 			kev->fflags = 0;
 			kev->data = 0;
 			kn->kn_kevent = *kev;
 			kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE |
 			    EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT);
 			kn->kn_status = KN_DETACHED;
 			if ((kev->flags & EV_DISABLE) != 0)
 				kn->kn_status |= KN_DISABLED;
 			kn_enter_flux(kn);
 
 			error = knote_attach(kn, kq);
 			KQ_UNLOCK(kq);
 			if (error != 0) {
 				tkn = kn;
 				goto done;
 			}
 
 			if ((error = kn->kn_fop->f_attach(kn)) != 0) {
 				knote_drop_detached(kn, td);
 				goto done;
 			}
 			knl = kn_list_lock(kn);
 			goto done_ev_add;
 		} else {
 			/* No matching knote and the EV_ADD flag is not set. */
 			KQ_UNLOCK(kq);
 			error = ENOENT;
 			goto done;
 		}
 	}
 
 	if (kev->flags & EV_DELETE) {
 		kn_enter_flux(kn);
 		KQ_UNLOCK(kq);
 		knote_drop(kn, td);
 		goto done;
 	}
 
 	if (kev->flags & EV_FORCEONESHOT) {
 		kn->kn_flags |= EV_ONESHOT;
 		KNOTE_ACTIVATE(kn, 1);
 	}
 
 	if ((kev->flags & EV_ENABLE) != 0)
 		kn->kn_status &= ~KN_DISABLED;
 	else if ((kev->flags & EV_DISABLE) != 0)
 		kn->kn_status |= KN_DISABLED;
 
 	/*
 	 * The user may change some filter values after the initial EV_ADD,
 	 * but doing so will not reset any filter which has already been
 	 * triggered.
 	 */
 	kn->kn_status |= KN_SCAN;
 	kn_enter_flux(kn);
 	KQ_UNLOCK(kq);
 	knl = kn_list_lock(kn);
-	kn->kn_kevent.udata = kev->udata;
+	if ((kev->flags & EV_KEEPUDATA) == 0)
+		kn->kn_kevent.udata = kev->udata;
 	if (!fops->f_isfd && fops->f_touch != NULL) {
 		fops->f_touch(kn, kev, EVENT_REGISTER);
 	} else {
 		kn->kn_sfflags = kev->fflags;
 		kn->kn_sdata = kev->data;
 	}
 
 done_ev_add:
 	/*
 	 * We can get here with kn->kn_knlist == NULL.  This can happen when
 	 * the initial attach event decides that the event is "completed" 
 	 * already, e.g., filt_procattach() is called on a zombie process.  It
 	 * will call filt_proc() which will remove it from the list, and NULL
 	 * kn_knlist.
 	 *
 	 * KN_DISABLED will be stable while the knote is in flux, so the
 	 * unlocked read will not race with an update.
 	 */
 	if ((kn->kn_status & KN_DISABLED) == 0)
 		event = kn->kn_fop->f_event(kn, 0);
 	else
 		event = 0;
 
 	KQ_LOCK(kq);
 	if (event)
 		kn->kn_status |= KN_ACTIVE;
 	if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) ==
 	    KN_ACTIVE)
 		knote_enqueue(kn);
 	kn->kn_status &= ~KN_SCAN;
 	kn_leave_flux(kn);
 	kn_list_unlock(knl);
 	KQ_UNLOCK_FLUX(kq);
 
 done:
 	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 	if (filedesc_unlock)
 		FILEDESC_XUNLOCK(td->td_proc->p_fd);
 	if (fp != NULL)
 		fdrop(fp, td);
 	knote_free(tkn);
 	if (fops != NULL)
 		kqueue_fo_release(filt);
 	return (error);
 }
 
 static int
 kqueue_acquire(struct file *fp, struct kqueue **kqp)
 {
 	int error;
 	struct kqueue *kq;
 
 	error = 0;
 
 	kq = fp->f_data;
 	if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
 		return (EBADF);
 	*kqp = kq;
 	KQ_LOCK(kq);
 	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
 		KQ_UNLOCK(kq);
 		return (EBADF);
 	}
 	kq->kq_refcnt++;
 	KQ_UNLOCK(kq);
 
 	return error;
 }
 
 static void
 kqueue_release(struct kqueue *kq, int locked)
 {
 	if (locked)
 		KQ_OWNED(kq);
 	else
 		KQ_LOCK(kq);
 	kq->kq_refcnt--;
 	if (kq->kq_refcnt == 1)
 		wakeup(&kq->kq_refcnt);
 	if (!locked)
 		KQ_UNLOCK(kq);
 }
 
 void
 kqueue_drain_schedtask(void)
 {
 	taskqueue_quiesce(taskqueue_kqueue_ctx);
 }
 
 static void
 kqueue_schedtask(struct kqueue *kq)
 {
 	struct thread *td;
 
 	KQ_OWNED(kq);
 	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
 	    ("scheduling kqueue task while draining"));
 
 	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
 		taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task);
 		kq->kq_state |= KQ_TASKSCHED;
 		td = curthread;
 		thread_lock(td);
 		td->td_flags |= TDF_ASTPENDING | TDF_KQTICKLED;
 		thread_unlock(td);
 	}
 }
 
 /*
  * Expand the kq to make sure we have storage for fops/ident pair.
  *
  * Return 0 on success (or no work necessary), return errno on failure.
  */
 static int
 kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident,
     int mflag)
 {
 	struct klist *list, *tmp_knhash, *to_free;
 	u_long tmp_knhashmask;
 	int error, fd, size;
 
 	KQ_NOTOWNED(kq);
 
 	error = 0;
 	to_free = NULL;
 	if (fops->f_isfd) {
 		fd = ident;
 		if (kq->kq_knlistsize <= fd) {
 			size = kq->kq_knlistsize;
 			while (size <= fd)
 				size += KQEXTENT;
 			list = malloc(size * sizeof(*list), M_KQUEUE, mflag);
 			if (list == NULL)
 				return ENOMEM;
 			KQ_LOCK(kq);
 			if ((kq->kq_state & KQ_CLOSING) != 0) {
 				to_free = list;
 				error = EBADF;
 			} else if (kq->kq_knlistsize > fd) {
 				to_free = list;
 			} else {
 				if (kq->kq_knlist != NULL) {
 					bcopy(kq->kq_knlist, list,
 					    kq->kq_knlistsize * sizeof(*list));
 					to_free = kq->kq_knlist;
 					kq->kq_knlist = NULL;
 				}
 				bzero((caddr_t)list +
 				    kq->kq_knlistsize * sizeof(*list),
 				    (size - kq->kq_knlistsize) * sizeof(*list));
 				kq->kq_knlistsize = size;
 				kq->kq_knlist = list;
 			}
 			KQ_UNLOCK(kq);
 		}
 	} else {
 		if (kq->kq_knhashmask == 0) {
 			tmp_knhash = hashinit_flags(KN_HASHSIZE, M_KQUEUE,
 			    &tmp_knhashmask, (mflag & M_WAITOK) != 0 ?
 			    HASH_WAITOK : HASH_NOWAIT);
 			if (tmp_knhash == NULL)
 				return (ENOMEM);
 			KQ_LOCK(kq);
 			if ((kq->kq_state & KQ_CLOSING) != 0) {
 				to_free = tmp_knhash;
 				error = EBADF;
 			} else if (kq->kq_knhashmask == 0) {
 				kq->kq_knhash = tmp_knhash;
 				kq->kq_knhashmask = tmp_knhashmask;
 			} else {
 				to_free = tmp_knhash;
 			}
 			KQ_UNLOCK(kq);
 		}
 	}
 	free(to_free, M_KQUEUE);
 
 	KQ_NOTOWNED(kq);
 	return (error);
 }
 
 static void
 kqueue_task(void *arg, int pending)
 {
 	struct kqueue *kq;
 	int haskqglobal;
 
 	haskqglobal = 0;
 	kq = arg;
 
 	KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
 	KQ_LOCK(kq);
 
 	KNOTE_LOCKED(&kq->kq_sel.si_note, 0);
 
 	kq->kq_state &= ~KQ_TASKSCHED;
 	if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) {
 		wakeup(&kq->kq_state);
 	}
 	KQ_UNLOCK(kq);
 	KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 }
 
 /*
  * Scan, update kn_data (if not ONESHOT), and copyout triggered events.
  * We treat KN_MARKER knotes as if they are in flux.
  */
 static int
 kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops,
     const struct timespec *tsp, struct kevent *keva, struct thread *td)
 {
 	struct kevent *kevp;
 	struct knote *kn, *marker;
 	struct knlist *knl;
 	sbintime_t asbt, rsbt;
 	int count, error, haskqglobal, influx, nkev, touch;
 
 	count = maxevents;
 	nkev = 0;
 	error = 0;
 	haskqglobal = 0;
 
 	if (maxevents == 0)
 		goto done_nl;
 	if (maxevents < 0) {
 		error = EINVAL;
 		goto done_nl;
 	}
 
 	rsbt = 0;
 	if (tsp != NULL) {
 		if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 ||
 		    tsp->tv_nsec >= 1000000000) {
 			error = EINVAL;
 			goto done_nl;
 		}
 		if (timespecisset(tsp)) {
 			if (tsp->tv_sec <= INT32_MAX) {
 				rsbt = tstosbt(*tsp);
 				if (TIMESEL(&asbt, rsbt))
 					asbt += tc_tick_sbt;
 				if (asbt <= SBT_MAX - rsbt)
 					asbt += rsbt;
 				else
 					asbt = 0;
 				rsbt >>= tc_precexp;
 			} else
 				asbt = 0;
 		} else
 			asbt = -1;
 	} else
 		asbt = 0;
 	marker = knote_alloc(M_WAITOK);
 	marker->kn_status = KN_MARKER;
 	KQ_LOCK(kq);
 
 retry:
 	kevp = keva;
 	if (kq->kq_count == 0) {
 		if (asbt == -1) {
 			error = EWOULDBLOCK;
 		} else {
 			kq->kq_state |= KQ_SLEEP;
 			error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH,
 			    "kqread", asbt, rsbt, C_ABSOLUTE);
 		}
 		if (error == 0)
 			goto retry;
 		/* don't restart after signals... */
 		if (error == ERESTART)
 			error = EINTR;
 		else if (error == EWOULDBLOCK)
 			error = 0;
 		goto done;
 	}
 
 	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
 	influx = 0;
 	while (count) {
 		KQ_OWNED(kq);
 		kn = TAILQ_FIRST(&kq->kq_head);
 
 		if ((kn->kn_status == KN_MARKER && kn != marker) ||
 		    kn_in_flux(kn)) {
 			if (influx) {
 				influx = 0;
 				KQ_FLUX_WAKEUP(kq);
 			}
 			kq->kq_state |= KQ_FLUXWAIT;
 			error = msleep(kq, &kq->kq_lock, PSOCK,
 			    "kqflxwt", 0);
 			continue;
 		}
 
 		TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 		if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) {
 			kn->kn_status &= ~KN_QUEUED;
 			kq->kq_count--;
 			continue;
 		}
 		if (kn == marker) {
 			KQ_FLUX_WAKEUP(kq);
 			if (count == maxevents)
 				goto retry;
 			goto done;
 		}
 		KASSERT(!kn_in_flux(kn),
 		    ("knote %p is unexpectedly in flux", kn));
 
 		if ((kn->kn_flags & EV_DROP) == EV_DROP) {
 			kn->kn_status &= ~KN_QUEUED;
 			kn_enter_flux(kn);
 			kq->kq_count--;
 			KQ_UNLOCK(kq);
 			/*
 			 * We don't need to lock the list since we've
 			 * marked it as in flux.
 			 */
 			knote_drop(kn, td);
 			KQ_LOCK(kq);
 			continue;
 		} else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) {
 			kn->kn_status &= ~KN_QUEUED;
 			kn_enter_flux(kn);
 			kq->kq_count--;
 			KQ_UNLOCK(kq);
 			/*
 			 * We don't need to lock the list since we've
 			 * marked the knote as being in flux.
 			 */
 			*kevp = kn->kn_kevent;
 			knote_drop(kn, td);
 			KQ_LOCK(kq);
 			kn = NULL;
 		} else {
 			kn->kn_status |= KN_SCAN;
 			kn_enter_flux(kn);
 			KQ_UNLOCK(kq);
 			if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE)
 				KQ_GLOBAL_LOCK(&kq_global, haskqglobal);
 			knl = kn_list_lock(kn);
 			if (kn->kn_fop->f_event(kn, 0) == 0) {
 				KQ_LOCK(kq);
 				KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 				kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE |
 				    KN_SCAN);
 				kn_leave_flux(kn);
 				kq->kq_count--;
 				kn_list_unlock(knl);
 				influx = 1;
 				continue;
 			}
 			touch = (!kn->kn_fop->f_isfd &&
 			    kn->kn_fop->f_touch != NULL);
 			if (touch)
 				kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS);
 			else
 				*kevp = kn->kn_kevent;
 			KQ_LOCK(kq);
 			KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal);
 			if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
 				/* 
 				 * Manually clear knotes who weren't 
 				 * 'touch'ed.
 				 */
 				if (touch == 0 && kn->kn_flags & EV_CLEAR) {
 					kn->kn_data = 0;
 					kn->kn_fflags = 0;
 				}
 				if (kn->kn_flags & EV_DISPATCH)
 					kn->kn_status |= KN_DISABLED;
 				kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
 				kq->kq_count--;
 			} else
 				TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 			
 			kn->kn_status &= ~KN_SCAN;
 			kn_leave_flux(kn);
 			kn_list_unlock(knl);
 			influx = 1;
 		}
 
 		/* we are returning a copy to the user */
 		kevp++;
 		nkev++;
 		count--;
 
 		if (nkev == KQ_NEVENTS) {
 			influx = 0;
 			KQ_UNLOCK_FLUX(kq);
 			error = k_ops->k_copyout(k_ops->arg, keva, nkev);
 			nkev = 0;
 			kevp = keva;
 			KQ_LOCK(kq);
 			if (error)
 				break;
 		}
 	}
 	TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe);
 done:
 	KQ_OWNED(kq);
 	KQ_UNLOCK_FLUX(kq);
 	knote_free(marker);
 done_nl:
 	KQ_NOTOWNED(kq);
 	if (nkev != 0)
 		error = k_ops->k_copyout(k_ops->arg, keva, nkev);
 	td->td_retval[0] = maxevents - count;
 	return (error);
 }
 
 /*ARGSUSED*/
 static int
 kqueue_ioctl(struct file *fp, u_long cmd, void *data,
 	struct ucred *active_cred, struct thread *td)
 {
 	/*
 	 * Enabling sigio causes two major problems:
 	 * 1) infinite recursion:
 	 * Synopsys: kevent is being used to track signals and have FIOASYNC
 	 * set.  On receipt of a signal this will cause a kqueue to recurse
 	 * into itself over and over.  Sending the sigio causes the kqueue
 	 * to become ready, which in turn posts sigio again, forever.
 	 * Solution: this can be solved by setting a flag in the kqueue that
 	 * we have a SIGIO in progress.
 	 * 2) locking problems:
 	 * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts
 	 * us above the proc and pgrp locks.
 	 * Solution: Post a signal using an async mechanism, being sure to
 	 * record a generation count in the delivery so that we do not deliver
 	 * a signal to the wrong process.
 	 *
 	 * Note, these two mechanisms are somewhat mutually exclusive!
 	 */
 #if 0
 	struct kqueue *kq;
 
 	kq = fp->f_data;
 	switch (cmd) {
 	case FIOASYNC:
 		if (*(int *)data) {
 			kq->kq_state |= KQ_ASYNC;
 		} else {
 			kq->kq_state &= ~KQ_ASYNC;
 		}
 		return (0);
 
 	case FIOSETOWN:
 		return (fsetown(*(int *)data, &kq->kq_sigio));
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&kq->kq_sigio);
 		return (0);
 	}
 #endif
 
 	return (ENOTTY);
 }
 
 /*ARGSUSED*/
 static int
 kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
 	struct thread *td)
 {
 	struct kqueue *kq;
 	int revents = 0;
 	int error;
 
 	if ((error = kqueue_acquire(fp, &kq)))
 		return POLLERR;
 
 	KQ_LOCK(kq);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (kq->kq_count) {
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			selrecord(td, &kq->kq_sel);
 			if (SEL_WAITING(&kq->kq_sel))
 				kq->kq_state |= KQ_SEL;
 		}
 	}
 	kqueue_release(kq, 1);
 	KQ_UNLOCK(kq);
 	return (revents);
 }
 
 /*ARGSUSED*/
 static int
 kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
 	struct thread *td)
 {
 
 	bzero((void *)st, sizeof *st);
 	/*
 	 * We no longer return kq_count because the unlocked value is useless.
 	 * If you spent all this time getting the count, why not spend your
 	 * syscall better by calling kevent?
 	 *
 	 * XXX - This is needed for libc_r.
 	 */
 	st->st_mode = S_IFIFO;
 	return (0);
 }
 
 static void
 kqueue_drain(struct kqueue *kq, struct thread *td)
 {
 	struct knote *kn;
 	int i;
 
 	KQ_LOCK(kq);
 
 	KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING,
 	    ("kqueue already closing"));
 	kq->kq_state |= KQ_CLOSING;
 	if (kq->kq_refcnt > 1)
 		msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0);
 
 	KASSERT(kq->kq_refcnt == 1, ("other refs are out there!"));
 
 	KASSERT(knlist_empty(&kq->kq_sel.si_note),
 	    ("kqueue's knlist not empty"));
 
 	for (i = 0; i < kq->kq_knlistsize; i++) {
 		while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
 			if (kn_in_flux(kn)) {
 				kq->kq_state |= KQ_FLUXWAIT;
 				msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
 				continue;
 			}
 			kn_enter_flux(kn);
 			KQ_UNLOCK(kq);
 			knote_drop(kn, td);
 			KQ_LOCK(kq);
 		}
 	}
 	if (kq->kq_knhashmask != 0) {
 		for (i = 0; i <= kq->kq_knhashmask; i++) {
 			while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
 				if (kn_in_flux(kn)) {
 					kq->kq_state |= KQ_FLUXWAIT;
 					msleep(kq, &kq->kq_lock, PSOCK,
 					       "kqclo2", 0);
 					continue;
 				}
 				kn_enter_flux(kn);
 				KQ_UNLOCK(kq);
 				knote_drop(kn, td);
 				KQ_LOCK(kq);
 			}
 		}
 	}
 
 	if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) {
 		kq->kq_state |= KQ_TASKDRAIN;
 		msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0);
 	}
 
 	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
 		selwakeuppri(&kq->kq_sel, PSOCK);
 		if (!SEL_WAITING(&kq->kq_sel))
 			kq->kq_state &= ~KQ_SEL;
 	}
 
 	KQ_UNLOCK(kq);
 }
 
 static void
 kqueue_destroy(struct kqueue *kq)
 {
 
 	KASSERT(kq->kq_fdp == NULL,
 	    ("kqueue still attached to a file descriptor"));
 	seldrain(&kq->kq_sel);
 	knlist_destroy(&kq->kq_sel.si_note);
 	mtx_destroy(&kq->kq_lock);
 
 	if (kq->kq_knhash != NULL)
 		free(kq->kq_knhash, M_KQUEUE);
 	if (kq->kq_knlist != NULL)
 		free(kq->kq_knlist, M_KQUEUE);
 
 	funsetown(&kq->kq_sigio);
 }
 
 /*ARGSUSED*/
 static int
 kqueue_close(struct file *fp, struct thread *td)
 {
 	struct kqueue *kq = fp->f_data;
 	struct filedesc *fdp;
 	int error;
 	int filedesc_unlock;
 
 	if ((error = kqueue_acquire(fp, &kq)))
 		return error;
 	kqueue_drain(kq, td);
 
 	/*
 	 * We could be called due to the knote_drop() doing fdrop(),
 	 * called from kqueue_register().  In this case the global
 	 * lock is owned, and filedesc sx is locked before, to not
 	 * take the sleepable lock after non-sleepable.
 	 */
 	fdp = kq->kq_fdp;
 	kq->kq_fdp = NULL;
 	if (!sx_xlocked(FILEDESC_LOCK(fdp))) {
 		FILEDESC_XLOCK(fdp);
 		filedesc_unlock = 1;
 	} else
 		filedesc_unlock = 0;
 	TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list);
 	if (filedesc_unlock)
 		FILEDESC_XUNLOCK(fdp);
 
 	kqueue_destroy(kq);
 	chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0);
 	crfree(kq->kq_cred);
 	free(kq, M_KQUEUE);
 	fp->f_data = NULL;
 
 	return (0);
 }
 
 static int
 kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 
 	kif->kf_type = KF_TYPE_KQUEUE;
 	return (0);
 }
 
 static void
 kqueue_wakeup(struct kqueue *kq)
 {
 	KQ_OWNED(kq);
 
 	if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) {
 		kq->kq_state &= ~KQ_SLEEP;
 		wakeup(kq);
 	}
 	if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
 		selwakeuppri(&kq->kq_sel, PSOCK);
 		if (!SEL_WAITING(&kq->kq_sel))
 			kq->kq_state &= ~KQ_SEL;
 	}
 	if (!knlist_empty(&kq->kq_sel.si_note))
 		kqueue_schedtask(kq);
 	if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) {
 		pgsigio(&kq->kq_sigio, SIGIO, 0);
 	}
 }
 
 /*
  * Walk down a list of knotes, activating them if their event has triggered.
  *
  * There is a possibility to optimize in the case of one kq watching another.
  * Instead of scheduling a task to wake it up, you could pass enough state
  * down the chain to make up the parent kqueue.  Make this code functional
  * first.
  */
 void
 knote(struct knlist *list, long hint, int lockflags)
 {
 	struct kqueue *kq;
 	struct knote *kn, *tkn;
 	int error;
 
 	if (list == NULL)
 		return;
 
 	KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED);
 
 	if ((lockflags & KNF_LISTLOCKED) == 0)
 		list->kl_lock(list->kl_lockarg); 
 
 	/*
 	 * If we unlock the list lock (and enter influx), we can
 	 * eliminate the kqueue scheduling, but this will introduce
 	 * four lock/unlock's for each knote to test.  Also, marker
 	 * would be needed to keep iteration position, since filters
 	 * or other threads could remove events.
 	 */
 	SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) {
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
 		if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) {
 			/*
 			 * Do not process the influx notes, except for
 			 * the influx coming from the kq unlock in the
 			 * kqueue_scan().  In the later case, we do
 			 * not interfere with the scan, since the code
 			 * fragment in kqueue_scan() locks the knlist,
 			 * and cannot proceed until we finished.
 			 */
 			KQ_UNLOCK(kq);
 		} else if ((lockflags & KNF_NOKQLOCK) != 0) {
 			kn_enter_flux(kn);
 			KQ_UNLOCK(kq);
 			error = kn->kn_fop->f_event(kn, hint);
 			KQ_LOCK(kq);
 			kn_leave_flux(kn);
 			if (error)
 				KNOTE_ACTIVATE(kn, 1);
 			KQ_UNLOCK_FLUX(kq);
 		} else {
 			if (kn->kn_fop->f_event(kn, hint))
 				KNOTE_ACTIVATE(kn, 1);
 			KQ_UNLOCK(kq);
 		}
 	}
 	if ((lockflags & KNF_LISTLOCKED) == 0)
 		list->kl_unlock(list->kl_lockarg); 
 }
 
 /*
  * add a knote to a knlist
  */
 void
 knlist_add(struct knlist *knl, struct knote *kn, int islocked)
 {
 
 	KNL_ASSERT_LOCK(knl, islocked);
 	KQ_NOTOWNED(kn->kn_kq);
 	KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn));
 	KASSERT((kn->kn_status & KN_DETACHED) != 0,
 	    ("knote %p was not detached", kn));
 	if (!islocked)
 		knl->kl_lock(knl->kl_lockarg);
 	SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
 	if (!islocked)
 		knl->kl_unlock(knl->kl_lockarg);
 	KQ_LOCK(kn->kn_kq);
 	kn->kn_knlist = knl;
 	kn->kn_status &= ~KN_DETACHED;
 	KQ_UNLOCK(kn->kn_kq);
 }
 
 static void
 knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked,
     int kqislocked)
 {
 
 	KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked"));
 	KNL_ASSERT_LOCK(knl, knlislocked);
 	mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED);
 	KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn));
 	KASSERT((kn->kn_status & KN_DETACHED) == 0,
 	    ("knote %p was already detached", kn));
 	if (!knlislocked)
 		knl->kl_lock(knl->kl_lockarg);
 	SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
 	kn->kn_knlist = NULL;
 	if (!knlislocked)
 		kn_list_unlock(knl);
 	if (!kqislocked)
 		KQ_LOCK(kn->kn_kq);
 	kn->kn_status |= KN_DETACHED;
 	if (!kqislocked)
 		KQ_UNLOCK(kn->kn_kq);
 }
 
 /*
  * remove knote from the specified knlist
  */
 void
 knlist_remove(struct knlist *knl, struct knote *kn, int islocked)
 {
 
 	knlist_remove_kq(knl, kn, islocked, 0);
 }
 
 int
 knlist_empty(struct knlist *knl)
 {
 
 	KNL_ASSERT_LOCKED(knl);
 	return (SLIST_EMPTY(&knl->kl_list));
 }
 
 static struct mtx knlist_lock;
 MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
     MTX_DEF);
 static void knlist_mtx_lock(void *arg);
 static void knlist_mtx_unlock(void *arg);
 
 static void
 knlist_mtx_lock(void *arg)
 {
 
 	mtx_lock((struct mtx *)arg);
 }
 
 static void
 knlist_mtx_unlock(void *arg)
 {
 
 	mtx_unlock((struct mtx *)arg);
 }
 
 static void
 knlist_mtx_assert_lock(void *arg, int what)
 {
 
 	if (what == LA_LOCKED)
 		mtx_assert((struct mtx *)arg, MA_OWNED);
 	else
 		mtx_assert((struct mtx *)arg, MA_NOTOWNED);
 }
 
 static void
 knlist_rw_rlock(void *arg)
 {
 
 	rw_rlock((struct rwlock *)arg);
 }
 
 static void
 knlist_rw_runlock(void *arg)
 {
 
 	rw_runlock((struct rwlock *)arg);
 }
 
 static void
 knlist_rw_assert_lock(void *arg, int what)
 {
 
 	if (what == LA_LOCKED)
 		rw_assert((struct rwlock *)arg, RA_LOCKED);
 	else
 		rw_assert((struct rwlock *)arg, RA_UNLOCKED);
 }
 
 void
 knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
     void (*kl_unlock)(void *),
     void (*kl_assert_lock)(void *, int))
 {
 
 	if (lock == NULL)
 		knl->kl_lockarg = &knlist_lock;
 	else
 		knl->kl_lockarg = lock;
 
 	if (kl_lock == NULL)
 		knl->kl_lock = knlist_mtx_lock;
 	else
 		knl->kl_lock = kl_lock;
 	if (kl_unlock == NULL)
 		knl->kl_unlock = knlist_mtx_unlock;
 	else
 		knl->kl_unlock = kl_unlock;
 	if (kl_assert_lock == NULL)
 		knl->kl_assert_lock = knlist_mtx_assert_lock;
 	else
 		knl->kl_assert_lock = kl_assert_lock;
 
 	knl->kl_autodestroy = 0;
 	SLIST_INIT(&knl->kl_list);
 }
 
 void
 knlist_init_mtx(struct knlist *knl, struct mtx *lock)
 {
 
 	knlist_init(knl, lock, NULL, NULL, NULL);
 }
 
 struct knlist *
 knlist_alloc(struct mtx *lock)
 {
 	struct knlist *knl;
 
 	knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK);
 	knlist_init_mtx(knl, lock);
 	return (knl);
 }
 
 void
 knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock)
 {
 
 	knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock,
 	    knlist_rw_assert_lock);
 }
 
 void
 knlist_destroy(struct knlist *knl)
 {
 
 	KASSERT(KNLIST_EMPTY(knl),
 	    ("destroying knlist %p with knotes on it", knl));
 }
 
 void
 knlist_detach(struct knlist *knl)
 {
 
 	KNL_ASSERT_LOCKED(knl);
 	knl->kl_autodestroy = 1;
 	if (knlist_empty(knl)) {
 		knlist_destroy(knl);
 		free(knl, M_KQUEUE);
 	}
 }
 
 /*
  * Even if we are locked, we may need to drop the lock to allow any influx
  * knotes time to "settle".
  */
 void
 knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
 {
 	struct knote *kn, *kn2;
 	struct kqueue *kq;
 
 	KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl));
 	if (islocked)
 		KNL_ASSERT_LOCKED(knl);
 	else {
 		KNL_ASSERT_UNLOCKED(knl);
 again:		/* need to reacquire lock since we have dropped it */
 		knl->kl_lock(knl->kl_lockarg);
 	}
 
 	SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
 		if (kn_in_flux(kn)) {
 			KQ_UNLOCK(kq);
 			continue;
 		}
 		knlist_remove_kq(knl, kn, 1, 1);
 		if (killkn) {
 			kn_enter_flux(kn);
 			KQ_UNLOCK(kq);
 			knote_drop_detached(kn, td);
 		} else {
 			/* Make sure cleared knotes disappear soon */
 			kn->kn_flags |= EV_EOF | EV_ONESHOT;
 			KQ_UNLOCK(kq);
 		}
 		kq = NULL;
 	}
 
 	if (!SLIST_EMPTY(&knl->kl_list)) {
 		/* there are still in flux knotes remaining */
 		kn = SLIST_FIRST(&knl->kl_list);
 		kq = kn->kn_kq;
 		KQ_LOCK(kq);
 		KASSERT(kn_in_flux(kn), ("knote removed w/o list lock"));
 		knl->kl_unlock(knl->kl_lockarg);
 		kq->kq_state |= KQ_FLUXWAIT;
 		msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0);
 		kq = NULL;
 		goto again;
 	}
 
 	if (islocked)
 		KNL_ASSERT_LOCKED(knl);
 	else {
 		knl->kl_unlock(knl->kl_lockarg);
 		KNL_ASSERT_UNLOCKED(knl);
 	}
 }
 
 /*
  * Remove all knotes referencing a specified fd must be called with FILEDESC
  * lock.  This prevents a race where a new fd comes along and occupies the
  * entry and we attach a knote to the fd.
  */
 void
 knote_fdclose(struct thread *td, int fd)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	struct kqueue *kq;
 	struct knote *kn;
 	int influx;
 
 	FILEDESC_XLOCK_ASSERT(fdp);
 
 	/*
 	 * We shouldn't have to worry about new kevents appearing on fd
 	 * since filedesc is locked.
 	 */
 	TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
 		KQ_LOCK(kq);
 
 again:
 		influx = 0;
 		while (kq->kq_knlistsize > fd &&
 		    (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
 			if (kn_in_flux(kn)) {
 				/* someone else might be waiting on our knote */
 				if (influx)
 					wakeup(kq);
 				kq->kq_state |= KQ_FLUXWAIT;
 				msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
 				goto again;
 			}
 			kn_enter_flux(kn);
 			KQ_UNLOCK(kq);
 			influx = 1;
 			knote_drop(kn, td);
 			KQ_LOCK(kq);
 		}
 		KQ_UNLOCK_FLUX(kq);
 	}
 }
 
 static int
 knote_attach(struct knote *kn, struct kqueue *kq)
 {
 	struct klist *list;
 
 	KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn));
 	KQ_OWNED(kq);
 
 	if ((kq->kq_state & KQ_CLOSING) != 0)
 		return (EBADF);
 	if (kn->kn_fop->f_isfd) {
 		if (kn->kn_id >= kq->kq_knlistsize)
 			return (ENOMEM);
 		list = &kq->kq_knlist[kn->kn_id];
 	} else {
 		if (kq->kq_knhash == NULL)
 			return (ENOMEM);
 		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
 	}
 	SLIST_INSERT_HEAD(list, kn, kn_link);
 	return (0);
 }
 
 static void
 knote_drop(struct knote *kn, struct thread *td)
 {
 
 	if ((kn->kn_status & KN_DETACHED) == 0)
 		kn->kn_fop->f_detach(kn);
 	knote_drop_detached(kn, td);
 }
 
 static void
 knote_drop_detached(struct knote *kn, struct thread *td)
 {
 	struct kqueue *kq;
 	struct klist *list;
 
 	kq = kn->kn_kq;
 
 	KASSERT((kn->kn_status & KN_DETACHED) != 0,
 	    ("knote %p still attached", kn));
 	KQ_NOTOWNED(kq);
 
 	KQ_LOCK(kq);
 	KASSERT(kn->kn_influx == 1,
 	    ("knote_drop called on %p with influx %d", kn, kn->kn_influx));
 
 	if (kn->kn_fop->f_isfd)
 		list = &kq->kq_knlist[kn->kn_id];
 	else
 		list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
 
 	if (!SLIST_EMPTY(list))
 		SLIST_REMOVE(list, kn, knote, kn_link);
 	if (kn->kn_status & KN_QUEUED)
 		knote_dequeue(kn);
 	KQ_UNLOCK_FLUX(kq);
 
 	if (kn->kn_fop->f_isfd) {
 		fdrop(kn->kn_fp, td);
 		kn->kn_fp = NULL;
 	}
 	kqueue_fo_release(kn->kn_kevent.filter);
 	kn->kn_fop = NULL;
 	knote_free(kn);
 }
 
 static void
 knote_enqueue(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_kq;
 
 	KQ_OWNED(kn->kn_kq);
 	KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued"));
 
 	TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
 	kn->kn_status |= KN_QUEUED;
 	kq->kq_count++;
 	kqueue_wakeup(kq);
 }
 
 static void
 knote_dequeue(struct knote *kn)
 {
 	struct kqueue *kq = kn->kn_kq;
 
 	KQ_OWNED(kn->kn_kq);
 	KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued"));
 
 	TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
 	kn->kn_status &= ~KN_QUEUED;
 	kq->kq_count--;
 }
 
 static void
 knote_init(void)
 {
 
 	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
 
 static struct knote *
 knote_alloc(int mflag)
 {
 
 	return (uma_zalloc(knote_zone, mflag | M_ZERO));
 }
 
 static void
 knote_free(struct knote *kn)
 {
 
 	uma_zfree(knote_zone, kn);
 }
 
 /*
  * Register the kev w/ the kq specified by fd.
  */
 int 
 kqfd_register(int fd, struct kevent *kev, struct thread *td, int mflag)
 {
 	struct kqueue *kq;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	error = fget(td, fd, cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE),
 	    &fp);
 	if (error != 0)
 		return (error);
 	if ((error = kqueue_acquire(fp, &kq)) != 0)
 		goto noacquire;
 
 	error = kqueue_register(kq, kev, td, mflag);
 	kqueue_release(kq, 0);
 
 noacquire:
 	fdrop(fp, td);
 	return (error);
 }
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 80ed1268c8a1..f1bdc7e2a80e 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -1,368 +1,369 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_EVENT_H_
 #define _SYS_EVENT_H_
 
 #include <sys/_types.h>
 #include <sys/queue.h>
 
 #define EVFILT_READ		(-1)
 #define EVFILT_WRITE		(-2)
 #define EVFILT_AIO		(-3)	/* attached to aio requests */
 #define EVFILT_VNODE		(-4)	/* attached to vnodes */
 #define EVFILT_PROC		(-5)	/* attached to struct proc */
 #define EVFILT_SIGNAL		(-6)	/* attached to struct proc */
 #define EVFILT_TIMER		(-7)	/* timers */
 #define EVFILT_PROCDESC		(-8)	/* attached to process descriptors */
 #define EVFILT_FS		(-9)	/* filesystem events */
 #define EVFILT_LIO		(-10)	/* attached to lio requests */
 #define EVFILT_USER		(-11)	/* User events */
 #define EVFILT_SENDFILE		(-12)	/* attached to sendfile requests */
 #define EVFILT_EMPTY		(-13)	/* empty send socket buf */
 #define EVFILT_SYSCOUNT		13
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 #define	EV_SET(kevp_, a, b, c, d, e, f) do {	\
 	*(kevp_) = (struct kevent){		\
 	    .ident = (a),			\
 	    .filter = (b),			\
 	    .flags = (c),			\
 	    .fflags = (d),			\
 	    .data = (e),			\
 	    .udata = (f),			\
 	    .ext = {0},				\
 	};					\
 } while (0)
 #else /* Pre-C99 or not STDC (e.g., C++) */
 /*
  * The definition of the local variable kevp could possibly conflict
  * with a user-defined value passed in parameters a-f.
  */
 #define EV_SET(kevp_, a, b, c, d, e, f) do {	\
 	struct kevent *kevp = (kevp_);		\
 	(kevp)->ident = (a);			\
 	(kevp)->filter = (b);			\
 	(kevp)->flags = (c);			\
 	(kevp)->fflags = (d);			\
 	(kevp)->data = (e);			\
 	(kevp)->udata = (f);			\
 	(kevp)->ext[0] = 0;			\
 	(kevp)->ext[1] = 0;			\
 	(kevp)->ext[2] = 0;			\
 	(kevp)->ext[3] = 0;			\
 } while (0)
 #endif
 
 struct kevent {
 	__uintptr_t	ident;		/* identifier for this event */
 	short		filter;		/* filter for event */
 	unsigned short	flags;		/* action flags for kqueue */
 	unsigned int	fflags;		/* filter flag value */
 	__int64_t	data;		/* filter data value */
 	void		*udata;		/* opaque user data identifier */
 	__uint64_t	ext[4];		/* extensions */
 };
 
 #if defined(_WANT_FREEBSD11_KEVENT)
 /* Older structure used in FreeBSD 11.x and older. */
 struct kevent_freebsd11 {
 	__uintptr_t	ident;		/* identifier for this event */
 	short		filter;		/* filter for event */
 	unsigned short	flags;
 	unsigned int	fflags;
 	__intptr_t	data;
 	void		*udata;		/* opaque user data identifier */
 };
 #endif
 
 #if defined(_WANT_KEVENT32) || (defined(_KERNEL) && defined(__LP64__))
 struct kevent32 {
 	uint32_t	ident;		/* identifier for this event */
 	short		filter;		/* filter for event */
 	u_short		flags;
 	u_int		fflags;
 #ifndef __amd64__
 	uint32_t	pad0;
 #endif
 	uint32_t	data1, data2;
 	uint32_t	udata;		/* opaque user data identifier */
 #ifndef __amd64__
 	uint32_t	pad1;
 #endif
 	uint32_t	ext64[8];
 };
 
 #ifdef _WANT_FREEBSD11_KEVENT
 struct kevent32_freebsd11 {
 	u_int32_t	ident;		/* identifier for this event */
 	short		filter;		/* filter for event */
 	u_short		flags;
 	u_int		fflags;
 	int32_t		data;
 	u_int32_t	udata;		/* opaque user data identifier */
 };
 #endif
 #endif
 
 /* actions */
 #define EV_ADD		0x0001		/* add event to kq (implies enable) */
 #define EV_DELETE	0x0002		/* delete event from kq */
 #define EV_ENABLE	0x0004		/* enable event */
 #define EV_DISABLE	0x0008		/* disable event (not reported) */
 #define EV_FORCEONESHOT	0x0100		/* enable _ONESHOT and force trigger */
+#define EV_KEEPUDATA	0x0200		/* do not update the udata field */
 
 /* flags */
 #define EV_ONESHOT	0x0010		/* only report one occurrence */
 #define EV_CLEAR	0x0020		/* clear event state after reporting */
 #define EV_RECEIPT	0x0040		/* force EV_ERROR on success, data=0 */
 #define EV_DISPATCH	0x0080		/* disable event after reporting */
 
 #define EV_SYSFLAGS	0xF000		/* reserved by system */
 #define	EV_DROP		0x1000		/* note should be dropped */
 #define EV_FLAG1	0x2000		/* filter-specific flag */
 #define EV_FLAG2	0x4000		/* filter-specific flag */
 
 /* returned values */
 #define EV_EOF		0x8000		/* EOF detected */
 #define EV_ERROR	0x4000		/* error, data contains errno */
 
  /*
   * data/hint flags/masks for EVFILT_USER, shared with userspace
   *
   * On input, the top two bits of fflags specifies how the lower twenty four
   * bits should be applied to the stored value of fflags.
   *
   * On output, the top two bits will always be set to NOTE_FFNOP and the
   * remaining twenty four bits will contain the stored fflags value.
   */
 #define NOTE_FFNOP	0x00000000		/* ignore input fflags */
 #define NOTE_FFAND	0x40000000		/* AND fflags */
 #define NOTE_FFOR	0x80000000		/* OR fflags */
 #define NOTE_FFCOPY	0xc0000000		/* copy fflags */
 #define NOTE_FFCTRLMASK	0xc0000000		/* masks for operations */
 #define NOTE_FFLAGSMASK	0x00ffffff
 
 #define NOTE_TRIGGER	0x01000000		/* Cause the event to be
 						   triggered for output. */
 
 /*
  * data/hint flags for EVFILT_{READ|WRITE}, shared with userspace
  */
 #define NOTE_LOWAT	0x0001			/* low water mark */
 #define NOTE_FILE_POLL	0x0002			/* behave like poll() */
 
 /*
  * data/hint flags for EVFILT_VNODE, shared with userspace
  */
 #define	NOTE_DELETE	0x0001			/* vnode was removed */
 #define	NOTE_WRITE	0x0002			/* data contents changed */
 #define	NOTE_EXTEND	0x0004			/* size increased */
 #define	NOTE_ATTRIB	0x0008			/* attributes changed */
 #define	NOTE_LINK	0x0010			/* link count changed */
 #define	NOTE_RENAME	0x0020			/* vnode was renamed */
 #define	NOTE_REVOKE	0x0040			/* vnode access was revoked */
 #define	NOTE_OPEN	0x0080			/* vnode was opened */
 #define	NOTE_CLOSE	0x0100			/* file closed, fd did not
 						   allowed write */
 #define	NOTE_CLOSE_WRITE 0x0200			/* file closed, fd did allowed
 						   write */
 #define	NOTE_READ	0x0400			/* file was read */
 
 /*
  * data/hint flags for EVFILT_PROC and EVFILT_PROCDESC, shared with userspace
  */
 #define	NOTE_EXIT	0x80000000		/* process exited */
 #define	NOTE_FORK	0x40000000		/* process forked */
 #define	NOTE_EXEC	0x20000000		/* process exec'd */
 #define	NOTE_PCTRLMASK	0xf0000000		/* mask for hint bits */
 #define	NOTE_PDATAMASK	0x000fffff		/* mask for pid */
 
 /* additional flags for EVFILT_PROC */
 #define	NOTE_TRACK	0x00000001		/* follow across forks */
 #define	NOTE_TRACKERR	0x00000002		/* could not track child */
 #define	NOTE_CHILD	0x00000004		/* am a child process */
 
 /* additional flags for EVFILT_TIMER */
 #define NOTE_SECONDS		0x00000001	/* data is seconds */
 #define NOTE_MSECONDS		0x00000002	/* data is milliseconds */
 #define NOTE_USECONDS		0x00000004	/* data is microseconds */
 #define NOTE_NSECONDS		0x00000008	/* data is nanoseconds */
 #define	NOTE_ABSTIME		0x00000010	/* timeout is absolute */
 
 struct knote;
 SLIST_HEAD(klist, knote);
 struct kqueue;
 TAILQ_HEAD(kqlist, kqueue);
 struct knlist {
 	struct	klist	kl_list;
 	void    (*kl_lock)(void *);	/* lock function */
 	void    (*kl_unlock)(void *);
 	void	(*kl_assert_lock)(void *, int);
 	void	*kl_lockarg;		/* argument passed to lock functions */
 	int	kl_autodestroy;
 };
 
 #ifdef _KERNEL
 
 /*
  * Flags for knote call
  */
 #define	KNF_LISTLOCKED	0x0001			/* knlist is locked */
 #define	KNF_NOKQLOCK	0x0002			/* do not keep KQ_LOCK */
 
 #define KNOTE(list, hint, flags)	knote(list, hint, flags)
 #define KNOTE_LOCKED(list, hint)	knote(list, hint, KNF_LISTLOCKED)
 #define KNOTE_UNLOCKED(list, hint)	knote(list, hint, 0)
 
 #define	KNLIST_EMPTY(list)		SLIST_EMPTY(&(list)->kl_list)
 
 /*
  * Flag indicating hint is a signal.  Used by EVFILT_SIGNAL, and also
  * shared by EVFILT_PROC  (all knotes attached to p->p_klist)
  */
 #define NOTE_SIGNAL	0x08000000
 
 /*
  * Hint values for the optional f_touch event filter.  If f_touch is not set 
  * to NULL and f_isfd is zero the f_touch filter will be called with the type
  * argument set to EVENT_REGISTER during a kevent() system call.  It is also
  * called under the same conditions with the type argument set to EVENT_PROCESS
  * when the event has been triggered.
  */
 #define EVENT_REGISTER	1
 #define EVENT_PROCESS	2
 
 struct filterops {
 	int	f_isfd;		/* true if ident == filedescriptor */
 	int	(*f_attach)(struct knote *kn);
 	void	(*f_detach)(struct knote *kn);
 	int	(*f_event)(struct knote *kn, long hint);
 	void	(*f_touch)(struct knote *kn, struct kevent *kev, u_long type);
 };
 
 /*
  * An in-flux knote cannot be dropped from its kq while the kq is
  * unlocked.  If the KN_SCAN flag is not set, a thread can only set
  * kn_influx when it is exclusive owner of the knote state, and can
  * modify kn_status as if it had the KQ lock.  KN_SCAN must not be set
  * on a knote which is already in flux.
  *
  * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock.
  */
 struct knote {
 	SLIST_ENTRY(knote)	kn_link;	/* for kq */
 	SLIST_ENTRY(knote)	kn_selnext;	/* for struct selinfo */
 	struct			knlist *kn_knlist;	/* f_attach populated */
 	TAILQ_ENTRY(knote)	kn_tqe;
 	struct			kqueue *kn_kq;	/* which queue we are on */
 	struct 			kevent kn_kevent;
 	void			*kn_hook;
 	int			kn_hookid;
 	int			kn_status;	/* protected by kq lock */
 #define KN_ACTIVE	0x01			/* event has been triggered */
 #define KN_QUEUED	0x02			/* event is on queue */
 #define KN_DISABLED	0x04			/* event is disabled */
 #define KN_DETACHED	0x08			/* knote is detached */
 #define KN_MARKER	0x20			/* ignore this knote */
 #define KN_KQUEUE	0x40			/* this knote belongs to a kq */
 #define	KN_SCAN		0x100			/* flux set in kqueue_scan() */
 	int			kn_influx;
 	int			kn_sfflags;	/* saved filter flags */
 	int64_t			kn_sdata;	/* saved data field */
 	union {
 		struct		file *p_fp;	/* file data pointer */
 		struct		proc *p_proc;	/* proc pointer */
 		struct		kaiocb *p_aio;	/* AIO job pointer */
 		struct		aioliojob *p_lio;	/* LIO job pointer */
 		void		*p_v;		/* generic other pointer */
 	} kn_ptr;
 	struct			filterops *kn_fop;
 
 #define kn_id		kn_kevent.ident
 #define kn_filter	kn_kevent.filter
 #define kn_flags	kn_kevent.flags
 #define kn_fflags	kn_kevent.fflags
 #define kn_data		kn_kevent.data
 #define kn_fp		kn_ptr.p_fp
 };
 struct kevent_copyops {
 	void	*arg;
 	int	(*k_copyout)(void *arg, struct kevent *kevp, int count);
 	int	(*k_copyin)(void *arg, struct kevent *kevp, int count);
 	size_t	kevent_size;
 };
 
 struct thread;
 struct proc;
 struct knlist;
 struct mtx;
 struct rwlock;
 
 void	knote(struct knlist *list, long hint, int lockflags);
 void	knote_fork(struct knlist *list, int pid);
 struct knlist *knlist_alloc(struct mtx *lock);
 void	knlist_detach(struct knlist *knl);
 void	knlist_add(struct knlist *knl, struct knote *kn, int islocked);
 void	knlist_remove(struct knlist *knl, struct knote *kn, int islocked);
 int	knlist_empty(struct knlist *knl);
 void	knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
 	    void (*kl_unlock)(void *), void (*kl_assert_lock)(void *, int));
 void	knlist_init_mtx(struct knlist *knl, struct mtx *lock);
 void	knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock);
 void	knlist_destroy(struct knlist *knl);
 void	knlist_cleardel(struct knlist *knl, struct thread *td,
 	    int islocked, int killkn);
 #define knlist_clear(knl, islocked)				\
 	knlist_cleardel((knl), NULL, (islocked), 0)
 #define knlist_delete(knl, td, islocked)			\
 	knlist_cleardel((knl), (td), (islocked), 1)
 void	knote_fdclose(struct thread *p, int fd);
 int 	kqfd_register(int fd, struct kevent *kev, struct thread *p,
 	    int mflag);
 int	kqueue_add_filteropts(int filt, struct filterops *filtops);
 int	kqueue_del_filteropts(int filt);
 void	kqueue_drain_schedtask(void);
 
 #else 	/* !_KERNEL */
 
 #include <sys/cdefs.h>
 struct timespec;
 
 __BEGIN_DECLS
 int     kqueue(void);
 int     kevent(int kq, const struct kevent *changelist, int nchanges,
 	    struct kevent *eventlist, int nevents,
 	    const struct timespec *timeout);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #endif /* !_SYS_EVENT_H_ */
diff --git a/tests/sys/kqueue/libkqueue/user.c b/tests/sys/kqueue/libkqueue/user.c
index 3844251ff4ba..1f66234c4cda 100644
--- a/tests/sys/kqueue/libkqueue/user.c
+++ b/tests/sys/kqueue/libkqueue/user.c
@@ -1,128 +1,188 @@
 /*
  * Copyright (c) 2009 Mark Heily <mark@heily.com>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #include "common.h"
 
 
 static void
 add_and_delete(void)
 {
     const char *test_id = "kevent(EVFILT_USER, EV_ADD and EV_DELETE)";
     struct kevent kev;
 
     test_begin(test_id);
 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ADD, 0, 0, NULL);
     test_no_kevents();
 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_DELETE, 0, 0, NULL);
     test_no_kevents();
 
     success();
 }
 
 static void
 event_wait(void)
 {
     const char *test_id = "kevent(EVFILT_USER, wait)";
     struct kevent kev;
 
     test_begin(test_id);
 
     test_no_kevents();
 
     /* Add the event, and then trigger it */
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, NULL);    
     kevent_add(kqfd, &kev, 1, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);    
 
     kev.fflags &= ~NOTE_FFCTRLMASK;
     kev.fflags &= ~NOTE_TRIGGER;
     kev.flags = EV_CLEAR;
     kevent_cmp(&kev, kevent_get(kqfd));
 
     test_no_kevents();
 
     success();
 }
 
+static void
+event_wait_keepudata(void)
+{
+    const char *test_id = "kevent(EVFILT_USER, wait w/ EV_KEEPUDATA)";
+    struct kevent kev;
+
+    test_begin(test_id);
+
+    test_no_kevents();
+
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, &kev);
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_KEEPUDATA, NOTE_TRIGGER, 0,
+        NULL);
+
+    kev.fflags &= ~NOTE_FFCTRLMASK;
+    kev.fflags &= ~NOTE_TRIGGER;
+    kev.flags = EV_CLEAR;
+    kev.udata = &kev;
+    kevent_cmp(&kev, kevent_get(kqfd));
+
+    test_no_kevents();
+
+    success();
+}
+
+
 static void
 disable_and_enable(void)
 {
     const char *test_id = "kevent(EVFILT_USER, EV_DISABLE and EV_ENABLE)";
     struct kevent kev;
 
     test_begin(test_id);
 
     test_no_kevents();
 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ADD, 0, 0, NULL); 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_DISABLE, 0, 0, NULL); 
 
     /* Trigger the event, but since it is disabled, nothing will happen. */
     kevent_add(kqfd, &kev, 1, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); 
     test_no_kevents();
 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ENABLE, 0, 0, NULL); 
     kevent_add(kqfd, &kev, 1, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL); 
 
     kev.flags = EV_CLEAR;
     kev.fflags &= ~NOTE_FFCTRLMASK;
     kev.fflags &= ~NOTE_TRIGGER;
     kevent_cmp(&kev, kevent_get(kqfd));
 
     success();
 }
 
+static void
+disable_and_enable_keepudata(void)
+{
+    const char *test_id =
+        "kevent(EVFILT_USER, EV_DISABLE and EV_ENABLE w/ EV_KEEPUDATA)";
+    struct kevent kev;
+
+    test_begin(test_id);
+
+    test_no_kevents();
+
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ADD, 0, 0, &kev);
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_DISABLE | EV_KEEPUDATA, 0, 0,
+        NULL);
+
+    /* Trigger the event, but since it is disabled, nothing will happen. */
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_KEEPUDATA, NOTE_TRIGGER, 0, NULL);
+    test_no_kevents();
+
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_ENABLE | EV_KEEPUDATA, 0, 0,
+        NULL);
+    kevent_add(kqfd, &kev, 1, EVFILT_USER, EV_KEEPUDATA, NOTE_TRIGGER, 0, NULL);
+
+    kev.flags = EV_CLEAR;
+    kev.fflags &= ~NOTE_FFCTRLMASK;
+    kev.fflags &= ~NOTE_TRIGGER;
+    kev.udata = &kev;
+    kevent_cmp(&kev, kevent_get(kqfd));
+
+    success();
+}
+
 static void
 oneshot(void)
 {
     const char *test_id = "kevent(EVFILT_USER, EV_ONESHOT)";
     struct kevent kev;
 
     test_begin(test_id);
 
     test_no_kevents();
 
     kevent_add(kqfd, &kev, 2, EVFILT_USER, EV_ADD | EV_ONESHOT, 0, 0, NULL);
 
     puts("  -- event 1");
     kevent_add(kqfd, &kev, 2, EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);    
 
     kev.flags = EV_ONESHOT;
     kev.fflags &= ~NOTE_FFCTRLMASK;
     kev.fflags &= ~NOTE_TRIGGER;
     kevent_cmp(&kev, kevent_get(kqfd));
 
     test_no_kevents();
 
     success();
 }
 
 void
 test_evfilt_user(void)
 {
     kqfd = kqueue();
 
     add_and_delete();
     event_wait();
+    event_wait_keepudata();
     disable_and_enable();
+    disable_and_enable_keepudata();
     oneshot();
     /* TODO: try different fflags operations */
 
     close(kqfd);
 }