Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F140202311
D50315.id155695.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
63 KB
Referenced Files
None
Subscribers
None
D50315.id155695.diff
View Options
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -89,6 +89,7 @@
glob.c \
glob-compat11.c \
initgroups.c \
+ inotify.c \
isatty.c \
isinf.c \
isnan.c \
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -459,6 +459,10 @@
aio_write2;
execvpe;
fts_open_b;
+ inotify_add_watch;
+ inotify_init;
+ inotify_init1;
+ inotify_rm_watch;
psiginfo;
rtld_get_var;
rtld_set_var;
diff --git a/lib/libc/gen/inotify.c b/lib/libc/gen/inotify.c
new file mode 100644
--- /dev/null
+++ b/lib/libc/gen/inotify.c
@@ -0,0 +1,26 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#include "namespace.h"
+#include <sys/inotify.h>
+#include <sys/specialfd.h>
+#include "un-namespace.h"
+#include "libc_private.h"
+
+int
+inotify_init1(int flags)
+{
+ struct specialfd_inotify args;
+
+ args.flags = flags;
+ return (__sys___specialfd(SPECIALFD_INOTIFY, &args, sizeof(args)));
+}
+
+int
+inotify_init(void)
+{
+ return (inotify_init1(0));
+}
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -465,6 +465,8 @@
typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *);
typedef int (__sys_fchroot_t)(int);
typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t);
+typedef int (__sys_inotify_add_watch_t)(int, const char *, uint32_t);
+typedef int (__sys_inotify_rm_watch_t)(int, int);
void __sys_exit(int rval);
int __sys_fork(void);
@@ -866,6 +868,8 @@
int __sys_getrlimitusage(u_int which, int flags, rlim_t * res);
int __sys_fchroot(int fd);
int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size);
+int __sys_inotify_add_watch(int fd, const char * path, uint32_t mask);
+int __sys_inotify_rm_watch(int fd, int wd);
__END_DECLS
#endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -807,4 +807,8 @@
__sys_fchroot;
_setcred;
__sys_setcred;
+ _inotify_add_watch;
+ __sys_inotify_add_watch;
+ _inotify_rm_watch;
+ __sys_inotify_rm_watch;
};
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -663,6 +663,7 @@
#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */
#define AUE_TIMERFD 43270 /* FreeBSD/Linux. */
#define AUE_SETCRED 43271 /* FreeBSD-specific. */
+#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3979,6 +3979,7 @@
kern/vfs_extattr.c standard
kern/vfs_hash.c standard
kern/vfs_init.c standard
+kern/vfs_inotify.c standard
kern/vfs_lookup.c standard
kern/vfs_mount.c standard
kern/vfs_mountroot.c standard
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -245,6 +245,10 @@
vp->v_object = lowervp->v_object;
vn_irflag_set(vp, VIRF_PGREAD);
}
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY);
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY_PARENT);
if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
vp->v_vflag |= VV_ROOT;
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -26,7 +26,6 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_capsicum.h"
#include "opt_hwpmc_hooks.h"
#include "opt_ktrace.h"
@@ -44,6 +43,7 @@
#include <sys/filedesc.h>
#include <sys/imgact.h>
#include <sys/imgact_elf.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -1882,8 +1882,10 @@
* general case).
*/
error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
- if (error == 0)
+ if (error == 0) {
+ INOTIFY(vp, IN_OPEN);
imgp->opened = true;
+ }
return (error);
}
@@ -1892,6 +1894,7 @@
{
if (imgp->opened) {
VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
+ INOTIFY(imgp->vp, IN_CLOSE);
imgp->opened = false;
}
if (imgp->textset) {
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -1637,6 +1637,12 @@
if (uip->ui_pipecnt != 0)
printf("freeing uidinfo: uid = %d, pipecnt = %ld\n",
uip->ui_uid, uip->ui_pipecnt);
+ if (uip->ui_inotifycnt != 0)
+ printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n",
+ uip->ui_uid, uip->ui_inotifycnt);
+ if (uip->ui_inotifywatchcnt != 0)
+ printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n",
+ uip->ui_uid, uip->ui_inotifywatchcnt);
free(uip, M_UIDINFO);
}
@@ -1742,6 +1748,21 @@
return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt"));
}
+int
+chginotifycnt(struct uidinfo *uip, int diff, rlim_t max)
+{
+
+ return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt"));
+}
+
+int
+chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max)
+{
+
+ return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max,
+ "inotifywatchcnt"));
+}
+
static int
sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
{
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -27,12 +27,12 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/capsicum.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/ktls.h>
@@ -1242,6 +1242,8 @@
*/
if (error == 0) {
td->td_retval[0] = 0;
+ if (sbytes > 0 && vp != NULL)
+ INOTIFY(vp, IN_ACCESS);
}
if (sent != NULL) {
(*sent) = sbytes;
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -34,7 +34,6 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_capsicum.h"
#include "opt_ktrace.h"
@@ -46,6 +45,7 @@
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/file.h>
+#include <sys/inotify.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
@@ -938,7 +938,6 @@
kern_specialfd(struct thread *td, int type, void *arg)
{
struct file *fp;
- struct specialfd_eventfd *ae;
int error, fd, fflags;
fflags = 0;
@@ -947,12 +946,22 @@
return (error);
switch (type) {
- case SPECIALFD_EVENTFD:
+ case SPECIALFD_EVENTFD: {
+ struct specialfd_eventfd *ae;
+
ae = arg;
if ((ae->flags & EFD_CLOEXEC) != 0)
fflags |= O_CLOEXEC;
error = eventfd_create_file(td, fp, ae->initval, ae->flags);
break;
+ }
+ case SPECIALFD_INOTIFY: {
+ struct specialfd_inotify *si;
+
+ si = arg;
+ error = inotify_create_file(td, fp, si->flags, &fflags);
+ break;
+ }
default:
error = EINVAL;
break;
@@ -969,11 +978,12 @@
int
sys___specialfd(struct thread *td, struct __specialfd_args *args)
{
- struct specialfd_eventfd ae;
int error;
switch (args->type) {
- case SPECIALFD_EVENTFD:
+ case SPECIALFD_EVENTFD: {
+ struct specialfd_eventfd ae;
+
if (args->len != sizeof(struct specialfd_eventfd)) {
error = EINVAL;
break;
@@ -988,6 +998,20 @@
}
error = kern_specialfd(td, args->type, &ae);
break;
+ }
+ case SPECIALFD_INOTIFY: {
+ struct specialfd_inotify si;
+
+ if (args->len != sizeof(si)) {
+ error = EINVAL;
+ break;
+ }
+ error = copyin(args->req, &si, sizeof(si));
+ if (error != 0)
+ break;
+ error = kern_specialfd(td, args->type, &si);
+ break;
+ }
default:
error = EINVAL;
break;
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -597,4 +597,6 @@
"getrlimitusage", /* 589 = getrlimitusage */
"fchroot", /* 590 = fchroot */
"setcred", /* 591 = setcred */
+ "inotify_add_watch", /* 592 = inotify_add_watch */
+ "inotify_rm_watch", /* 593 = inotify_rm_watch */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3349,5 +3349,18 @@
size_t size
);
}
+592 AUE_INOTIFY STD {
+ int inotify_add_watch(
+ int fd,
+ _In_z_ const char *path,
+ uint32_t mask
+ );
+ }
+593 AUE_INOTIFY STD|CAPENABLED {
+ int inotify_rm_watch(
+ int fd,
+ int wd
+ );
+ }
; vim: syntax=off
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -67,6 +67,7 @@
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
+#include <sys/inotify.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -41,6 +41,7 @@
#include <sys/counter.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -2636,6 +2637,9 @@
atomic_thread_fence_rel();
atomic_store_ptr(&dvp->v_cache_dd, ncp);
} else if (vp != NULL) {
+ if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0))
+ vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT);
+
/*
* For this case, the cache entry maps both the
* directory name in it and the name ".." for the
@@ -4061,6 +4065,46 @@
return (error);
}
+void
+cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie)
+{
+ struct mtx *vlp;
+ struct namecache *ncp;
+ int isdir;
+ bool logged, self;
+
+ isdir = vp->v_type == VDIR ? IN_ISDIR : 0;
+ self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 &&
+ (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0);
+
+ vlp = VP2VNODELOCK(vp);
+ mtx_lock(vlp);
+ if (self)
+ inotify_log(vp, NULL, 0, event | isdir, cookie);
+ logged = false;
+ TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) {
+ if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
+ continue;
+ if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) {
+ /*
+ * XXX-MJ if the vnode has two links in the same
+ * dir, we'll log the same event twice.
+ */
+ inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen,
+ event | isdir, cookie);
+ logged = true;
+ }
+ }
+ if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) {
+ /*
+ * We didn't find a watched directory that contains this vnode,
+ * so stop calling VOP_INOTIFY for operations on the vnode.
+ */
+ vn_irflag_unset(vp, VIRF_INOTIFY_PARENT);
+ }
+ mtx_unlock(vlp);
+}
+
#ifdef DDB
static void
db_print_vpath(struct vnode *vp)
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -39,6 +39,7 @@
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/filio.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
@@ -119,6 +120,8 @@
.vop_getwritemount = vop_stdgetwritemount,
.vop_inactive = VOP_NULL,
.vop_need_inactive = vop_stdneed_inactive,
+ .vop_inotify = vop_stdinotify,
+ .vop_inotify_add_watch = vop_stdinotify_add_watch,
.vop_ioctl = vop_stdioctl,
.vop_kqfilter = vop_stdkqfilter,
.vop_islocked = vop_stdislocked,
@@ -1305,6 +1308,20 @@
return (1);
}
+int
+vop_stdinotify(struct vop_inotify_args *ap)
+{
+ vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie);
+ return (0);
+}
+
+int
+vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap)
+{
+ return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask,
+ ap->a_wdp, ap->a_td));
+}
+
int
vop_stdioctl(struct vop_ioctl_args *ap)
{
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/vfs_inotify.c
@@ -0,0 +1,803 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+/*
+ * XXX-MJ nullfs integration isn't really sorted out
+ * XXX-MJ inotify(2) man page
+ * XXX-MJ audit/capsicum/mac integration, new inotify capability rights?
+ * XXX-MJ limits
+ * XXX-MJ libsysdecode/ktrace handling
+ * XXX-MJ add an example program
+ */
+
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/caprights.h>
+#include <sys/dirent.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/inotify.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/resourcevar.h>
+#include <sys/selinfo.h>
+#include <sys/stat.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/syslimits.h>
+#include <sys/sysproto.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+
+uint32_t inotify_rename_cookie;
+
+static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "inotify configuration");
+
+static int inotify_max_events = 512;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_events, CTLFLAG_RWTUN,
+ &inotify_max_events, 0,
+ "Maximum number of events to queue on an inotify descriptor");
+
+static int inotify_max_user_instances = 1024;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN,
+ &inotify_max_user_instances, 0,
+ "Maximum number of inotify descriptors per user");
+
+static int inotify_max_user_watches = 128 * 1024;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN,
+ &inotify_max_user_watches, 0,
+ "Maximum number of inotify watches per user");
+
+static fo_rdwr_t inotify_read;
+static fo_ioctl_t inotify_ioctl;
+static fo_poll_t inotify_poll;
+static fo_kqfilter_t inotify_kqfilter;
+static fo_stat_t inotify_stat;
+static fo_close_t inotify_close;
+static fo_fill_kinfo_t inotify_fill_kinfo;
+
+static const struct fileops inotifyfdops = {
+ .fo_read = inotify_read,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = inotify_ioctl,
+ .fo_poll = inotify_poll,
+ .fo_kqfilter = inotify_kqfilter,
+ .fo_stat = inotify_stat,
+ .fo_close = inotify_close,
+ .fo_fill_kinfo = inotify_fill_kinfo,
+ .fo_cmp = file_kcmp_generic,
+};
+
+static void filt_inotifydetach(struct knote *kn);
+static int filt_inotifyevent(struct knote *kn, long hint);
+
+static const struct filterops inotify_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_inotifydetach,
+ .f_event = filt_inotifyevent,
+};
+
+static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures");
+
+struct inotify_record {
+ STAILQ_ENTRY(inotify_record) link;
+ struct inotify_event ev;
+};
+
+struct inotify_watch {
+ struct inotify_softc *sc; /* back-pointer */
+ int wd; /* unique ID */
+ uint32_t mask; /* event mask */
+ struct vnode *vp; /* vnode being watched, refed */
+ TAILQ_ENTRY(inotify_watch) ilink; /* inotify linkage */
+ TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */
+};
+
+struct inotify_softc {
+ struct mtx lock;
+ STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */
+ struct inotify_record overflow; /* preallocated record */
+ int nextwatch; /* next watch ID to try */
+ int npending; /* number of pending events */
+ size_t nbpending; /* bytes available to read */
+ TAILQ_HEAD(, inotify_watch) watches; /* active watches */
+ struct selinfo sel; /* select/poll/kevent info */
+ struct ucred *cred; /* credential ref */
+};
+
+static int
+inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags,
+ struct thread *td)
+{
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ int error;
+ bool first;
+
+ sc = fp->f_data;
+ error = 0;
+
+ mtx_lock(&sc->lock);
+ while (STAILQ_EMPTY(&sc->pending)) {
+ if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&sc->lock);
+ return (EWOULDBLOCK);
+ }
+ error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0);
+ if (error != 0) {
+ mtx_unlock(&sc->lock);
+ return (error);
+ }
+ }
+ for (first = true; (rec = STAILQ_FIRST(&sc->pending)) != NULL;
+ first = false) {
+ size_t len;
+
+ len = sizeof(rec->ev) + rec->ev.len;
+ if (uio->uio_resid < (ssize_t)len) {
+ if (first)
+ error = EINVAL;
+ break;
+ }
+ STAILQ_REMOVE_HEAD(&sc->pending, link);
+ sc->npending--;
+ sc->nbpending -= len;
+ mtx_unlock(&sc->lock);
+ error = uiomove(&rec->ev, len, uio);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+ ktrstruct("inotify", &rec->ev, len);
+#endif
+ mtx_lock(&sc->lock);
+ if (error != 0) {
+ sc->npending++;
+ sc->nbpending += len;
+ STAILQ_INSERT_HEAD(&sc->pending, rec, link);
+ mtx_unlock(&sc->lock);
+ return (error);
+ }
+ if (rec == &sc->overflow)
+ memset(rec, 0, sizeof(*rec));
+ else
+ free(rec, M_INOTIFY);
+ }
+ mtx_unlock(&sc->lock);
+ return (error);
+}
+
+static int
+inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred,
+ struct thread *td)
+{
+ struct inotify_softc *sc;
+
+ sc = fp->f_data;
+
+ switch (com) {
+ case FIONREAD:
+ *(int *)data = (int)sc->nbpending;
+ return (0);
+ default:
+ return (ENOTTY);
+ }
+
+ return (0);
+}
+
+static int
+inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td)
+{
+ struct inotify_softc *sc;
+ int revents;
+
+ sc = fp->f_data;
+ revents = 0;
+
+ mtx_lock(&sc->lock);
+ if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0)
+ revents |= events & (POLLIN | POLLRDNORM);
+ else
+ selrecord(td, &sc->sel);
+ mtx_unlock(&sc->lock);
+ return (revents);
+}
+
+static void
+filt_inotifydetach(struct knote *kn)
+{
+ struct inotify_softc *sc;
+
+ sc = kn->kn_hook;
+ knlist_remove(&sc->sel.si_note, kn, 0);
+}
+
+static int
+filt_inotifyevent(struct knote *kn, long hint)
+{
+ struct inotify_softc *sc;
+
+ sc = kn->kn_hook;
+ mtx_assert(&sc->lock, MA_OWNED);
+ kn->kn_data = sc->nbpending;
+ return (kn->kn_data > 0);
+}
+
+static int
+inotify_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct inotify_softc *sc;
+
+ if (kn->kn_filter != EVFILT_READ)
+ return (EINVAL);
+ sc = fp->f_data;
+ kn->kn_fop = &inotify_rfiltops;
+ kn->kn_hook = sc;
+ knlist_add(&sc->sel.si_note, kn, 0);
+ return (0);
+}
+
+static int
+inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred)
+{
+ /* XXX-MJ */
+ memset(sb, 0, sizeof(*sb));
+ return (0);
+}
+
+static void
+inotify_unlink_watch_locked(struct inotify_watch *watch)
+{
+ struct vnode *vp;
+
+ vp = watch->vp;
+ mtx_assert(VI_MTX(vp), MA_OWNED);
+
+ TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink);
+ if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify))
+ vn_irflag_unset_locked(vp, VIRF_INOTIFY);
+}
+
+/*
+ * Assumes that the watch has already been removed from its softc.
+ */
+static void
+inotify_remove_watch(struct inotify_watch *watch)
+{
+ struct inotify_softc *sc;
+ struct vnode *vp;
+
+ sc = watch->sc;
+ (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0);
+
+ vp = watch->vp;
+ VI_LOCK(vp);
+ inotify_unlink_watch_locked(watch);
+ VI_UNLOCK(vp);
+
+ vrele(vp);
+ free(watch, M_INOTIFY);
+}
+
+static int
+inotify_close(struct file *fp, struct thread *td)
+{
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ struct inotify_watch *watch;
+
+ sc = fp->f_data;
+
+ (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0);
+ while ((rec = STAILQ_FIRST(&sc->pending)) != NULL) {
+ STAILQ_REMOVE_HEAD(&sc->pending, link);
+ if (rec != &sc->overflow)
+ free(rec, M_INOTIFY);
+ }
+ while ((watch = TAILQ_FIRST(&sc->watches)) != NULL) {
+ TAILQ_REMOVE(&sc->watches, watch, ilink);
+ inotify_remove_watch(watch);
+ }
+ seldrain(&sc->sel);
+ knlist_destroy(&sc->sel.si_note);
+ mtx_destroy(&sc->lock);
+ crfree(sc->cred);
+ free(sc, M_INOTIFY);
+ return (0);
+}
+
+static int
+inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ struct inotify_softc *sc;
+
+ sc = fp->f_data;
+
+ kif->kf_type = KF_TYPE_INOTIFY;
+ kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending;
+ kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending;
+ return (0);
+}
+
+int
+inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp)
+{
+ struct inotify_softc *sc;
+ int fflags;
+
+ if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0)
+ return (EINVAL);
+
+ if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1,
+ inotify_max_user_instances))
+ return (EMFILE);
+
+ sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO);
+ STAILQ_INIT(&sc->pending);
+ TAILQ_INIT(&sc->watches);
+ mtx_init(&sc->lock, "inotify", NULL, MTX_DEF);
+ knlist_init_mtx(&sc->sel.si_note, &sc->lock);
+ sc->cred = crhold(td->td_ucred);
+
+ fflags = FREAD;
+ if ((flags & IN_NONBLOCK) != 0)
+ fflags |= FNONBLOCK;
+ if ((flags & IN_CLOEXEC) != 0)
+ *fflagsp |= O_CLOEXEC;
+ finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops);
+
+ return (0);
+}
+
+static struct inotify_record *
+inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event,
+ uint32_t cookie, int waitok)
+{
+ struct inotify_event *evp;
+ struct inotify_record *rec;
+
+ rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY,
+ waitok | M_ZERO);
+ if (rec == NULL)
+ return (NULL);
+ evp = &rec->ev;
+ evp->wd = wd;
+ evp->mask = event;
+ evp->cookie = cookie;
+ evp->len = _IN_NAMESIZE(namelen);
+ if (name != NULL)
+ memcpy(evp->name, name, namelen);
+ return (rec);
+}
+
+/*
+ * Put an event record on the queue for an inotify desscriptor. Return false if
+ * the record was not enqueued for some reason, true otherwise.
+ */
+static bool
+inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec)
+{
+ struct inotify_record *prev;
+ struct inotify_event *evp;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ /* Try to coalesce duplicate events. */
+ evp = &rec->ev;
+ if (rec != &sc->overflow &&
+ (prev = STAILQ_LAST(&sc->pending, inotify_record, link)) != NULL &&
+ prev->ev.wd == evp->wd &&
+ prev->ev.mask == evp->mask &&
+ prev->ev.cookie == evp->cookie &&
+ prev->ev.len == evp->len &&
+ (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0))
+ return (false);
+ /* Would this one overflow the queue? */
+ if (sc->npending >= inotify_max_events || rec == &sc->overflow) {
+ /* Have we already recorded a drop? */
+ if (sc->overflow.ev.mask == IN_Q_OVERFLOW)
+ return (false);
+ evp->wd = -1;
+ evp->mask = IN_Q_OVERFLOW;
+ evp->cookie = 0;
+ evp->len = 0;
+ }
+ STAILQ_INSERT_TAIL(&sc->pending, rec, link);
+ sc->npending++;
+ sc->nbpending += sizeof(rec->ev) + evp->len;
+ selwakeup(&sc->sel);
+ KNOTE_LOCKED(&sc->sel.si_note, 0);
+ wakeup(&sc->pending);
+ return (true);
+}
+
+static int
+inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen,
+ int event, uint32_t cookie)
+{
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ int relecount;
+ bool allocfail;
+
+ relecount = 0;
+
+ sc = watch->sc;
+ rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie,
+ M_NOWAIT);
+ if (rec == NULL) {
+ rec = &sc->overflow;
+ allocfail = true;
+ } else {
+ allocfail = false;
+ }
+
+ mtx_lock(&sc->lock);
+ if (!inotify_queue_record(sc, rec) && !allocfail)
+ free(rec, M_INOTIFY);
+ if (!allocfail &&
+ ((watch->mask & IN_ONESHOT) != 0 || (event & IN_DELETE_SELF) != 0)) {
+ /*
+ * For compatibility with Linux, we're supposed to generate an
+ * IN_IGNORED record after a ONESHOT record.
+ */
+ rec = inotify_alloc_record(watch->wd, NULL, 0,
+ IN_IGNORED, 0, M_NOWAIT);
+ if (!inotify_queue_record(sc, rec))
+ free(rec, M_INOTIFY);
+
+ inotify_unlink_watch_locked(watch);
+ TAILQ_REMOVE(&sc->watches, watch, ilink);
+ free(watch, M_INOTIFY);
+
+ /*
+ * Defer vrele() to until the interlock is
+ * dropped.
+ */
+ relecount++;
+ }
+ mtx_unlock(&sc->lock);
+ return (relecount);
+}
+
+void
+inotify_log(struct vnode *vp, const char *name, size_t namelen, int event,
+ uint32_t cookie)
+{
+ struct inotify_watch *watch, *tmp;
+ int relecount;
+
+ KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR)) == 0,
+ ("inotify_log: invalid event %#x", event));
+
+ relecount = 0;
+ VI_LOCK(vp);
+ TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) {
+ KASSERT(watch->vp == vp,
+ ("inotify_log: watch %p vp != vp", watch));
+ if ((watch->mask & event) != 0) {
+ relecount += inotify_log_one(watch, name, namelen, event,
+ cookie);
+ }
+ }
+ VI_UNLOCK(vp);
+
+ for (int i = 0; i < relecount; i++)
+ vrele(vp);
+}
+
+/*
+ * An inotify event occurred on a watched vnode.
+ */
+void
+vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp,
+ int event, uint32_t cookie)
+{
+ int isdir;
+
+ VNPASS(vp->v_holdcnt > 0, vp);
+
+ isdir = vp->v_type == VDIR ? IN_ISDIR : 0;
+
+ /*
+ * IN_DELETE and IN_DELETE_SELF are only generated when the last hard
+ * link of a file is removed.
+ */
+ if (event == IN_DELETE && vp->v_type != VDIR) {
+ struct vattr va;
+ int error;
+
+ error = VOP_GETATTR(vp, &va, cnp->cn_cred);
+ if (error == 0 && va.va_nlink != 0)
+ return;
+ }
+
+ if (dvp != NULL) {
+ VNPASS(dvp->v_holdcnt > 0, dvp);
+
+ /*
+ * Should we log an event for the vnode itself?
+ */
+ if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) {
+ int selfevent;
+
+ selfevent = event;
+ if (selfevent == IN_DELETE)
+ selfevent = IN_DELETE_SELF;
+ else if (selfevent == IN_MOVED_FROM) {
+ cookie = 0;
+ selfevent = IN_MOVE_SELF;
+ }
+ if ((selfevent & ~_IN_DIR_EVENTS) != 0) {
+ inotify_log(vp, NULL, 0, selfevent | isdir, cookie);
+ }
+ }
+
+ /*
+ * Something is watching the directory through which this vnode
+ * was referenced, so we may need to log the event.
+ */
+ if ((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) {
+ inotify_log(dvp, cnp->cn_nameptr,
+ cnp->cn_namelen, event | isdir, cookie);
+ }
+ } else {
+ /*
+ * We don't know which watched directory might contain the
+ * vnode, so we have to fall back to searching the name cache.
+ */
+ cache_vop_inotify(vp, event, cookie);
+ }
+}
+
+int
+vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask,
+ uint32_t *wdp, struct thread *td)
+{
+ struct inotify_watch *watch, *watch1;
+ uint32_t wd;
+ int error;
+
+ /*
+ * If this is a directory, make sure all of its entries are present in
+ * the name cache so that we're able to look them up if an event occurs.
+ * The reference on the directory prevents the outgoing name cache
+ * entries from being reclaimed.
+ */
+ if (vp->v_type == VDIR) {
+ struct dirent *dp;
+ char *buf;
+ off_t off;
+ size_t buflen, len;
+ int eof;
+
+ buflen = 128 * sizeof(struct dirent);
+ buf = malloc(buflen, M_TEMP, M_WAITOK);
+
+ len = off = eof = 0;
+ for (;;) {
+ struct nameidata nd;
+
+ error = vn_dir_next_dirent(vp, td, buf, buflen, &dp,
+ &len, &off, &eof);
+ if (error != 0) {
+ free(buf, M_TEMP);
+ return (error);
+ }
+ if (len == 0)
+ /* Finished reading. */
+ break;
+ if (strcmp(dp->d_name, ".") == 0 ||
+ strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /*
+ * namei() consumes a reference on the starting
+ * directory if it's specified as a vnode.
+ */
+ vrefact(vp);
+ NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE,
+ dp->d_name, vp);
+ error = namei(&nd);
+ if (error != 0) {
+ free(buf, M_TEMP);
+ return (error);
+ }
+ vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT);
+ vrele(nd.ni_vp);
+ }
+ free(buf, M_TEMP);
+ }
+
+ watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO);
+ watch->sc = sc;
+ watch->vp = vp;
+ watch->mask = mask;
+
+ mtx_lock(&sc->lock);
+ /*
+ * Are we updating an existing watch?
+ */
+ TAILQ_FOREACH(watch1, &sc->watches, ilink) {
+ if (watch1->vp == vp) {
+ if ((mask & IN_MASK_CREATE) != 0) {
+ mtx_unlock(&sc->lock);
+ free(watch, M_INOTIFY);
+ return (EEXIST);
+ }
+ if ((mask & IN_MASK_ADD) != 0)
+ watch1->mask |= mask;
+ else
+ watch1->mask = mask;
+ mtx_unlock(&sc->lock);
+ free(watch, M_INOTIFY);
+ *wdp = watch1->wd;
+ return (0);
+ }
+ }
+ do {
+ /*
+ * Search for the next available watch descriptor. This is
+ * implemented so as to avoid reusing watch descriptors for as
+ * long as possible.
+ */
+ wd = sc->nextwatch++;
+ TAILQ_FOREACH(watch1, &sc->watches, ilink) {
+ if (wd == watch1->wd)
+ break;
+ }
+ } while (watch1 != NULL);
+ watch->wd = wd;
+ TAILQ_INSERT_TAIL(&sc->watches, watch, ilink);
+ mtx_unlock(&sc->lock);
+
+ v_addpollinfo(vp);
+ VI_LOCK(vp);
+ vn_irflag_set_cond_locked(vp, VIRF_INOTIFY);
+ TAILQ_INSERT_HEAD(&vp->v_pollinfo->vpi_inotify, watch, vlink);
+ VI_UNLOCK(vp);
+
+ *wdp = wd;
+
+ return (0);
+}
+
+/* XXX-MJ add a capsicum-friendly variant */
+int
+sys_inotify_add_watch(struct thread *td, struct inotify_add_watch_args *uap)
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct inotify_softc *sc;
+ struct vnode *vp;
+ uint32_t mask, wd;
+ int error;
+
+ fp = NULL;
+ vp = NULL;
+
+ mask = uap->mask;
+ if ((mask & IN_ALL_EVENTS) == 0)
+ return (EINVAL);
+ if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) ==
+ (IN_MASK_ADD | IN_MASK_CREATE))
+ return (EINVAL);
+ if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS)) != 0)
+ return (EINVAL);
+
+ error = fget(td, uap->fd, &cap_read_rights /* XXX-MJ no */, &fp);
+ if (error != 0)
+ goto out;
+ if (fp->f_type != DTYPE_INOTIFY) {
+ error = EINVAL;
+ goto out;
+ }
+
+ sc = fp->f_data;
+ NDINIT(&nd, LOOKUP,
+ ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF |
+ LOCKSHARED, UIO_USERSPACE, uap->path);
+ error = namei(&nd);
+ if (error != 0)
+ goto out;
+ NDFREE_PNBUF(&nd);
+ vp = nd.ni_vp;
+
+ error = VOP_ACCESS(vp, VREAD, td->td_ucred, td);
+ if (error != 0)
+ goto out;
+
+ if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+
+ if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1,
+ inotify_max_user_watches)) {
+ error = ENOSPC;
+ goto out;
+ }
+ error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td);
+ if (error != 0)
+ goto outlim;
+ VOP_UNLOCK(vp);
+ fdrop(fp, td);
+
+ td->td_retval[0] = wd;
+ return (0);
+
+outlim:
+ (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0);
+
+out:
+ if (vp != NULL)
+ vput(vp);
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap)
+{
+ struct file *fp;
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ struct inotify_watch *watch;
+ int error;
+
+ error = fget(td, uap->fd, &cap_no_rights, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_INOTIFY) {
+ error = EINVAL;
+ goto out;
+ }
+ sc = fp->f_data;
+
+ rec = inotify_alloc_record(uap->wd, NULL, 0, IN_IGNORED, 0, M_WAITOK);
+
+ /*
+ * For compatibility with Linux, we do not remove pending events
+ * associated with the watch. Watch descriptors are implemented so as
+ * to avoid being reused for as long as possible, so one hopes that any
+ * pending events from the removed watch descriptor will be removed
+ * before the watch descriptor is recycled.
+ */
+ mtx_lock(&sc->lock);
+ TAILQ_FOREACH(watch, &sc->watches, ilink) {
+ if (watch->wd == uap->wd) {
+ TAILQ_REMOVE(&sc->watches, watch, ilink);
+ break;
+ }
+ }
+ if (watch == NULL) {
+ free(rec, M_INOTIFY);
+ error = EINVAL;
+ } else if (!inotify_queue_record(sc, rec)) {
+ free(rec, M_INOTIFY);
+ error = 0;
+ }
+ mtx_unlock(&sc->lock);
+ if (watch != NULL)
+ inotify_remove_watch(watch);
+out:
+ fdrop(fp, td);
+ return (error);
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -38,7 +38,6 @@
* External virtual filesystem routines
*/
-#include <sys/cdefs.h>
#include "opt_ddb.h"
#include "opt_watchdog.h"
@@ -57,6 +56,7 @@
#include <sys/extattr.h>
#include <sys/file.h>
#include <sys/fcntl.h>
+#include <sys/inotify.h>
#include <sys/jail.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
@@ -5247,7 +5247,8 @@
static void
destroy_vpollinfo(struct vpollinfo *vi)
{
-
+ KASSERT(TAILQ_EMPTY(&vi->vpi_inotify),
+ ("%s: pollinfo %p has lingering watches", __func__, vi));
knlist_clear(&vi->vpi_selinfo.si_note, 1);
seldrain(&vi->vpi_selinfo);
destroy_vpollinfo_free(vi);
@@ -5261,7 +5262,7 @@
{
struct vpollinfo *vi;
- if (vp->v_pollinfo != NULL)
+ if (atomic_load_ptr(&vp->v_pollinfo) != NULL)
return;
vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO);
mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
@@ -6077,8 +6078,10 @@
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
void
@@ -6123,8 +6126,10 @@
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6154,6 +6159,7 @@
if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_LINK);
VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE);
+ INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE);
}
}
@@ -6177,8 +6183,10 @@
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
#ifdef DEBUG_VFS_LOCKS
@@ -6213,8 +6221,10 @@
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
void
@@ -6258,6 +6268,8 @@
if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, IN_ATTRIB);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE);
}
}
@@ -6287,8 +6299,12 @@
}
VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
- if (a->a_tvp)
+ INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tdvp,
+ a->a_tcnp);
+ if (a->a_tvp) {
VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
+ INOTIFY_NAME(a->a_tvp, a->a_tdvp, a->a_tcnp, IN_DELETE);
+ }
}
if (a->a_tdvp != a->a_fdvp)
vdrop(a->a_fdvp);
@@ -6328,6 +6344,7 @@
vp->v_vflag |= VV_UNLINKED;
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE);
}
}
@@ -6351,8 +6368,10 @@
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6397,8 +6416,10 @@
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6421,8 +6442,10 @@
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
void
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -50,6 +50,7 @@
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
+#include <sys/inotify.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#ifdef KTRACE
@@ -4312,6 +4313,7 @@
VOP_UNLOCK(vp);
goto fail;
}
+ INOTIFY(vp, IN_ACCESS);
if (count == auio.uio_resid &&
(vp->v_vflag & VV_ROOT) &&
(vp->v_mount->mnt_flag & MNT_UNION)) {
@@ -5129,6 +5131,10 @@
retlen = len;
error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen,
flags, infp->f_cred, outfp->f_cred, td);
+ if (error == 0 && retlen > 0) {
+ INOTIFY(invp, IN_ACCESS);
+ INOTIFY(outvp, IN_MODIFY);
+ }
out:
if (rl_rcookie != NULL)
vn_rangelock_unlock(invp, rl_rcookie);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -51,6 +51,7 @@
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filio.h>
+#include <sys/inotify.h>
#include <sys/ktr.h>
#include <sys/ktrace.h>
#include <sys/limits.h>
@@ -296,7 +297,8 @@
NDREINIT(ndp);
goto restart;
}
- if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0)
+ if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 ||
+ (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0)
ndp->ni_cnd.cn_flags |= MAKEENTRY;
#ifdef MAC
error = mac_vnode_check_create(cred, ndp->ni_dvp,
@@ -306,12 +308,14 @@
error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
&ndp->ni_cnd, vap);
vp = ndp->ni_vp;
- if (error == 0 && (fmode & O_EXCL) != 0 &&
- (fmode & (O_EXLOCK | O_SHLOCK)) != 0) {
- VI_LOCK(vp);
- vp->v_iflag |= VI_FOPENING;
- VI_UNLOCK(vp);
- first_open = true;
+ if (error == 0) {
+ if ((fmode & O_EXCL) != 0 &&
+ (fmode & (O_EXLOCK | O_SHLOCK)) != 0) {
+ VI_LOCK(vp);
+ vp->v_iflag |= VI_FOPENING;
+ VI_UNLOCK(vp);
+ first_open = true;
+ }
}
VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &vp : NULL,
false);
@@ -473,6 +477,7 @@
if (vp->v_type != VFIFO && vp->v_type != VSOCK &&
VOP_ACCESS(vp, VREAD, cred, td) == 0)
fp->f_flag |= FKQALLOWED;
+ INOTIFY(vp, IN_OPEN);
return (0);
}
@@ -481,6 +486,7 @@
error = VOP_OPEN(vp, fmode, cred, td, fp);
if (error != 0)
return (error);
+ INOTIFY(vp, IN_OPEN);
error = vn_open_vnode_advlock(vp, fmode, fp);
if (error == 0 && (fmode & FWRITE) != 0) {
@@ -572,6 +578,9 @@
__func__, vp, vp->v_writecount);
}
error = VOP_CLOSE(vp, flags, file_cred, td);
+ if (error == 0 && (flags & FOPENFAILED) == 0)
+ INOTIFY(vp,
+ (flags & FWRITE) != 0 ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE);
if (keep_ref)
VOP_UNLOCK(vp);
else
@@ -1147,6 +1156,7 @@
(vn_irflag_read(vp) & (VIRF_DOOMED | VIRF_PGREAD)) == VIRF_PGREAD) {
error = VOP_READ_PGCACHE(vp, uio, ioflag, fp->f_cred);
if (error == 0) {
+ INOTIFY(vp, IN_ACCESS);
fp->f_nextoff[UIO_READ] = uio->uio_offset;
return (0);
}
@@ -1174,8 +1184,10 @@
if (error == 0)
#endif
error = VOP_READ(vp, uio, ioflag, fp->f_cred);
- fp->f_nextoff[UIO_READ] = uio->uio_offset;
VOP_UNLOCK(vp);
+ if (error == 0)
+ INOTIFY(vp, IN_ACCESS);
+ fp->f_nextoff[UIO_READ] = uio->uio_offset;
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
orig_offset != uio->uio_offset)
/*
@@ -1244,8 +1256,10 @@
if (error == 0)
#endif
error = VOP_WRITE(vp, uio, ioflag, fp->f_cred);
- fp->f_nextoff[UIO_WRITE] = uio->uio_offset;
VOP_UNLOCK(vp);
+ if (error == 0)
+ INOTIFY(vp, IN_MODIFY);
+ fp->f_nextoff[UIO_WRITE] = uio->uio_offset;
if (need_finished_write)
vn_finished_write(mp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
@@ -1735,6 +1749,8 @@
vattr.va_vaflags |= VA_SYNC;
error = VOP_SETATTR(vp, &vattr, cred);
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
+ if (error == 0)
+ INOTIFY(vp, IN_MODIFY);
}
return (error);
}
@@ -3753,6 +3769,8 @@
td->td_ucred);
VOP_UNLOCK(vp);
vn_finished_write(mp);
+ if (error == 0)
+ INOTIFY(vp, IN_MODIFY);
if (olen + ooffset != offset + len) {
panic("offset + len changed from %jx/%jx to %jx/%jx",
@@ -3818,6 +3836,8 @@
if (error == 0)
error = VOP_DEALLOCATE(vp, &off, &len, flags, ioflag,
cred);
+ if (error == 0)
+ INOTIFY(vp, IN_MODIFY);
if ((ioflag & IO_NODELOCKED) == 0) {
VOP_UNLOCK(vp);
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -821,6 +821,27 @@
};
+%% inotify vp - - -
+
+vop_inotify {
+ IN struct vnode *vp;
+ IN struct vnode *dvp;
+ IN struct componentname *cnp;
+ IN int event;
+ IN uint32_t cookie;
+};
+
+
+%% inotify_add_watch vp L L L
+
+vop_inotify_add_watch {
+ IN struct vnode *vp;
+ IN struct inotify_softc *sc;
+ IN uint32_t mask;
+ OUT uint32_t *wdp;
+ IN struct thread *td;
+};
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
diff --git a/sys/sys/file.h b/sys/sys/file.h
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -71,6 +71,7 @@
#define DTYPE_PROCDESC 12 /* process descriptor */
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
+#define DTYPE_INOTIFY 15 /* inotify descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/inotify.h
@@ -0,0 +1,118 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#ifndef _INOTIFY_H_
+#define _INOTIFY_H_
+
+#include <sys/_types.h>
+
+/* Flags for inotify_init1(). */
+#define IN_NONBLOCK 0x00000001
+#define IN_CLOEXEC 0x00000002
+
+struct inotify_event {
+ int wd;
+ __uint32_t mask;
+ __uint32_t cookie;
+ __uint32_t len;
+ char name[0];
+};
+
+/* Events, set in the mask field. */
+#define IN_ACCESS 0x00000001
+#define IN_ATTRIB 0x00000002
+#define IN_CLOSE_WRITE 0x00000004
+#define IN_CLOSE_NOWRITE 0x00000008
+#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE)
+#define IN_CREATE 0x00000010
+#define IN_DELETE 0x00000020
+#define IN_DELETE_SELF 0x00000040
+#define IN_MODIFY 0x00000080
+#define IN_MOVE_SELF 0x00000100
+#define IN_MOVED_FROM 0x00000200
+#define IN_MOVED_TO 0x00000400
+#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO)
+#define IN_OPEN 0x00000800
+#define IN_ALL_EVENTS 0x00000fff
+
+/* Events report only for entries in a watched dir, not the dir itself. */
+#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \
+ IN_MOVED_FROM | IN_MOVED_TO)
+
+/* Flags, set in the mask field. */
+#define IN_DONT_FOLLOW 0x00010000
+#define IN_EXCL_UNLINK 0x00020000
+#define IN_MASK_ADD 0x00040000
+#define IN_ONESHOT 0x00080000
+#define IN_ONLYDIR 0x00100000
+#define IN_MASK_CREATE 0x00200000
+#define _IN_ALL_FLAGS 0x003f0000
+
+/* Flags returned by the kernel. */
+#define IN_IGNORED 0x01000000
+#define IN_ISDIR 0x02000000
+#define IN_Q_OVERFLOW 0x04000000
+#define IN_UNMOUNT 0x08000000
+#define _IN_ALL_RETFLAGS 0x0f000000
+
+#define _IN_ALIGN _Alignof(struct inotify_event)
+#define _IN_NAMESIZE(namelen) \
+ ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN))
+
+#ifdef _KERNEL
+struct componentname;
+struct file;
+struct inotify_softc;
+struct thread;
+struct vnode;
+
+int inotify_create_file(struct thread *, struct file *, int, int *);
+void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t);
+
+void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int,
+ uint32_t);
+int vn_inotify_add_watch(struct vnode *, struct inotify_softc *,
+ __uint32_t, __uint32_t *, struct thread *);
+
+/* Log an inotify event. */
+#define INOTIFY(vp, ev) do { \
+ if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \
+ VIRF_INOTIFY_PARENT)) != 0)) \
+ VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \
+} while (0)
+
+/* Log an inotify event using a specific name for the vnode. */
+#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \
+ if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \
+ VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \
+} while (0)
+
+extern __uint32_t inotify_rename_cookie;
+
+#define INOTIFY_MOVE(vp, fdvp, fcnp, tdvp, tcnp) do { \
+ if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \
+ __uint32_t cookie; \
+ \
+ cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \
+ VOP_INOTIFY((vp), fdvp, fcnp, IN_MOVED_FROM, cookie); \
+ VOP_INOTIFY((vp), tdvp, tcnp, IN_MOVED_TO, cookie); \
+ } \
+} while (0)
+
+#else
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int inotify_init1(int flags);
+int inotify_add_watch(int fd, const char *pathname, __uint32_t mask);
+int inotify_rm_watch(int fd, int wd);
+__END_DECLS
+#endif /* !_KERNEL */
+
+#endif /* !_INOTIFY_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -122,6 +122,8 @@
long ui_kqcnt; /* (b) number of kqueues */
long ui_umtxcnt; /* (b) number of shared umtxs */
long ui_pipecnt; /* (b) consumption of pipe buffers */
+ long ui_inotifycnt; /* (b) number of inotify descriptors */
+ long ui_inotifywatchcnt; /* (b) number of inotify watches */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
#ifdef RACCT
@@ -144,6 +146,8 @@
int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max);
+int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
+int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
struct rlimit *limp);
struct plimit
diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h
--- a/sys/sys/specialfd.h
+++ b/sys/sys/specialfd.h
@@ -30,6 +30,7 @@
enum specialfd_type {
SPECIALFD_EVENTFD = 1,
+ SPECIALFD_INOTIFY = 2,
};
struct specialfd_eventfd {
@@ -37,4 +38,8 @@
int flags;
};
+struct specialfd_inotify {
+ int flags;
+};
+
#endif /* !_SYS_SPECIALFD_H_ */
diff --git a/sys/sys/user.h b/sys/sys/user.h
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -264,6 +264,7 @@
#define KF_TYPE_DEV 12
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
+#define KF_TYPE_INOTIFY 15
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -455,6 +456,10 @@
int32_t kf_kqueue_count;
int32_t kf_kqueue_state;
} kf_kqueue;
+ struct {
+ uint64_t kf_inotify_npending;
+ uint64_t kf_inotify_nbpending;
+ } kf_inotify;
} kf_un;
};
uint16_t kf_status; /* Status flags. */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -86,10 +86,12 @@
* it from v_data. If non-null, this area is freed in getnewvnode().
*/
-struct namecache;
struct cache_fpl;
+struct inotify_watch;
+struct namecache;
struct vpollinfo {
+ TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */
struct mtx vpi_lock; /* lock to protect below */
struct selinfo vpi_selinfo; /* identity of poller(s) */
short vpi_events; /* what they are looking for */
@@ -248,6 +250,9 @@
#define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */
#define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */
#define VIRF_NAMEDATTR 0x0040 /* Named attribute */
+#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */
+#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being
+ watched */
#define VI_UNUSED0 0x0001 /* unused */
#define VI_MOUNT 0x0002 /* Mount in progress */
@@ -667,6 +672,7 @@
void cache_symlink_free(char *string, size_t size);
int cache_symlink_resolve(struct cache_fpl *fpl, const char *string,
size_t len);
+void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie);
void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp);
@@ -869,8 +875,10 @@
int vop_stdgetwritemount(struct vop_getwritemount_args *);
int vop_stdgetpages(struct vop_getpages_args *);
int vop_stdinactive(struct vop_inactive_args *);
-int vop_stdioctl(struct vop_ioctl_args *);
int vop_stdneed_inactive(struct vop_need_inactive_args *);
+int vop_stdinotify(struct vop_inotify_args *);
+int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *);
+int vop_stdioctl(struct vop_ioctl_args *);
int vop_stdkqfilter(struct vop_kqfilter_args *);
int vop_stdlock(struct vop_lock1_args *);
int vop_stdunlock(struct vop_unlock_args *);
diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk
--- a/sys/tools/vnode_if.awk
+++ b/sys/tools/vnode_if.awk
@@ -193,6 +193,7 @@
printc(common_head \
"#include <sys/param.h>\n" \
"#include <sys/event.h>\n" \
+ "#include <sys/inotify.h>\n" \
"#include <sys/kernel.h>\n" \
"#include <sys/mount.h>\n" \
"#include <sys/sdt.h>\n" \
diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile
--- a/tests/sys/kern/Makefile
+++ b/tests/sys/kern/Makefile
@@ -18,6 +18,7 @@
# One test modifies the maxfiles limit, which can cause spurious test failures.
TEST_METADATA.kern_descrip_test+= is_exclusive="true"
ATF_TESTS_C+= fdgrowtable_test
+ATF_TESTS_C+= inotify_test
ATF_TESTS_C+= kill_zombie
.if ${MK_OPENSSL} != "no"
ATF_TESTS_C+= ktls_test
diff --git a/tests/sys/kern/inotify_test.c b/tests/sys/kern/inotify_test.c
new file mode 100644
--- /dev/null
+++ b/tests/sys/kern/inotify_test.c
@@ -0,0 +1,608 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#include <sys/filio.h>
+#include <sys/inotify.h>
+#include <sys/ioccom.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/un.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+static void
+close_checked(int fd)
+{
+ ATF_REQUIRE(close(fd) == 0);
+}
+
+/*
+ * Make sure that no other events are pending, and close the inotify descriptor.
+ */
+static void
+close_inotify(int fd)
+{
+ int n;
+
+ ATF_REQUIRE(ioctl(fd, FIONREAD, &n) == 0);
+ ATF_REQUIRE(n == 0);
+ close_checked(fd);
+}
+
+static uint32_t
+consume_event_cookie(int ifd, int wd, int event, int flags, const char *name)
+{
+ struct inotify_event *ev;
+ size_t evsz, namelen;
+ ssize_t n;
+ uint32_t cookie;
+
+ /* Only read one record. */
+ namelen = name == NULL ? 0 : strlen(name);
+ evsz = sizeof(*ev) + _IN_NAMESIZE(namelen);
+ ev = malloc(evsz);
+ ATF_REQUIRE(ev != NULL);
+
+ n = read(ifd, ev, evsz);
+ ATF_REQUIRE(n >= 0);
+ ATF_REQUIRE((size_t)n >= sizeof(*ev));
+ ATF_REQUIRE((size_t)n == sizeof(*ev) + ev->len);
+ ATF_REQUIRE((size_t)n == evsz);
+
+ ATF_REQUIRE_MSG((ev->mask & IN_ALL_EVENTS) == event,
+ "expected event %#x, got %#x", event, ev->mask);
+ ATF_REQUIRE_MSG((ev->mask & _IN_ALL_RETFLAGS) == flags,
+ "expected flags %#x, got %#x", flags, ev->mask);
+ ATF_REQUIRE_MSG(ev->wd == wd,
+ "expected wd %d, got %d", wd, ev->wd);
+ ATF_REQUIRE_MSG(name == NULL || strcmp(name, ev->name) == 0,
+ "expected name '%s', got '%s'", name, ev->name);
+ cookie = ev->cookie;
+ if ((ev->mask & (IN_MOVED_FROM | IN_MOVED_TO)) == 0)
+ ATF_REQUIRE(cookie == 0);
+ free(ev);
+ return (cookie);
+}
+
+/*
+ * Read an event from the inotify file descriptor and check that it
+ * matches the expected values.
+ */
+static void
+consume_event(int ifd, int wd, int event, int flags, const char *name)
+{
+ (void)consume_event_cookie(ifd, wd, event, flags, name);
+}
+
+static int
+inotify(int flags)
+{
+ int ifd;
+
+ ifd = inotify_init1(flags);
+ ATF_REQUIRE(ifd != -1);
+ return (ifd);
+}
+
+static int
+watch_file(int ifd, int events, char *path)
+{
+ int fd, wd;
+
+ strncpy(path, "test.XXXXXX", PATH_MAX);
+ fd = mkstemp(path);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+
+ wd = inotify_add_watch(ifd, path, events);
+ ATF_REQUIRE(wd != -1);
+
+ return (wd);
+}
+
+static int
+watch_dir(int ifd, int events, char *path)
+{
+ char *p;
+ int wd;
+
+ strlcpy(path, "test.XXXXXX", PATH_MAX);
+ p = mkdtemp(path);
+ ATF_REQUIRE(p == path);
+
+ wd = inotify_add_watch(ifd, path, events);
+ ATF_REQUIRE(wd != -1);
+
+ return (wd);
+}
+
+/*
+ * Make sure that duplicate, back-to-back events are coalesced.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_coalesce);
+ATF_TC_BODY(inotify_coalesce, tc)
+{
+ char file[PATH_MAX], path[PATH_MAX];
+ int fd, fd1, ifd, n, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it. */
+ wd = watch_dir(ifd, IN_OPEN, path);
+ /* Create a file in the directory and open it. */
+ snprintf(file, sizeof(file), "%s/file", path);
+ fd = open(file, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ fd = open(file, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ fd1 = open(file, O_RDONLY);
+ ATF_REQUIRE(fd1 != -1);
+ close_checked(fd1);
+ close_checked(fd);
+
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+ ATF_REQUIRE(ioctl(ifd, FIONREAD, &n) == 0);
+ ATF_REQUIRE(n == 0);
+
+ close_inotify(ifd);
+}
+
+/*
+ * Check handling of IN_MASK_CREATE.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_mask_create);
+ATF_TC_BODY(inotify_mask_create, tc)
+{
+ char path[PATH_MAX];
+ int ifd, wd, wd1;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it. */
+ wd = watch_dir(ifd, IN_CREATE, path);
+ /* Updating the watch with IN_MASK_CREATE should result in an error. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_CREATE);
+ ATF_REQUIRE_ERRNO(EEXIST, wd1 == -1);
+ /* It's an error to specify IN_MASK_ADD with IN_MASK_CREATE. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_ADD |
+ IN_MASK_CREATE);
+ ATF_REQUIRE_ERRNO(EINVAL, wd1 == -1);
+ /* Updating the watch without IN_MASK_CREATE should work. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY);
+ ATF_REQUIRE(wd1 != -1);
+ ATF_REQUIRE_EQ(wd, wd1);
+
+ close_inotify(ifd);
+}
+
+/*
+ * Make sure that exceeding max_events pending events results in an overflow
+ * event.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_queue_overflow);
+ATF_TC_BODY(inotify_queue_overflow, tc)
+{
+ char path[PATH_MAX];
+ size_t size;
+ int error, dfd, ifd, max, wd;
+
+ size = sizeof(max);
+ error = sysctlbyname("vfs.inotify.max_events", &max, &size, NULL, 0);
+ ATF_REQUIRE(error == 0);
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it for file creation events. */
+ wd = watch_dir(ifd, IN_CREATE, path);
+ dfd = open(path, O_DIRECTORY);
+ ATF_REQUIRE(dfd != -1);
+ /* Generate max+1 file creation events. */
+ for (int i = 0; i < max + 1; i++) {
+ char name[NAME_MAX];
+ int fd;
+
+ (void)snprintf(name, sizeof(name), "file%d", i);
+ fd = openat(dfd, name, O_CREAT | O_RDWR, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ }
+
+ /*
+ * Read our events. We should see files 0..max-1 and then an overflow
+ * event.
+ */
+ for (int i = 0; i < max; i++) {
+ char name[NAME_MAX];
+
+ (void)snprintf(name, sizeof(name), "file%d", i);
+ consume_event(ifd, wd, IN_CREATE, 0, name);
+ }
+
+ /* Look for an overflow event. */
+ consume_event(ifd, -1, 0, IN_Q_OVERFLOW, NULL);
+
+ close_checked(dfd);
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_access_file);
+ATF_TC_BODY(inotify_event_access_file, tc)
+{
+ char path[PATH_MAX], buf[16];
+ off_t nb;
+ ssize_t n;
+ int error, fd, fd1, ifd, s[2], wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_ACCESS, path);
+
+ fd = open(path, O_RDWR);
+ n = write(fd, "test", 4);
+ ATF_REQUIRE(n == 4);
+
+ /* A simple read(2) should generate an access. */
+ ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0);
+ n = read(fd, buf, sizeof(buf));
+ ATF_REQUIRE(n == 4);
+ ATF_REQUIRE(memcmp(buf, "test", 4) == 0);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+
+ /* copy_file_range(2) should as well. */
+ ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0);
+ fd1 = open("sink", O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd1 != -1);
+ n = copy_file_range(fd, NULL, fd1, NULL, 4, 0);
+ ATF_REQUIRE(n == 4);
+ close_checked(fd1);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+
+ /* As should sendfile(2). */
+ error = socketpair(AF_UNIX, SOCK_STREAM, 0, s);
+ ATF_REQUIRE(error == 0);
+ error = sendfile(fd, s[0], 0, 4, NULL, &nb, 0);
+ ATF_REQUIRE(error == 0);
+ ATF_REQUIRE(nb == 4);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+ close_checked(s[0]);
+ close_checked(s[1]);
+
+ close_checked(fd);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_access_dir);
+ATF_TC_BODY(inotify_event_access_dir, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ int error, ifd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_ACCESS, root);
+ snprintf(path, sizeof(path), "%s/dir", root);
+ error = mkdir(path, 0755);
+ ATF_REQUIRE(error == 0);
+
+ /* Read an entry and generate an access. */
+ dir = opendir(path);
+ ATF_REQUIRE(dir != NULL);
+ ent = readdir(dir);
+ ATF_REQUIRE(ent != NULL);
+ ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 ||
+ strcmp(ent->d_name, "..") == 0);
+ ATF_REQUIRE(closedir(dir) == 0);
+ consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, "dir");
+
+ /*
+ * Reading the watched directory should generate an access event.
+ * This is contrary to Linux's inotify man page, which states that
+ * IN_ACCESS is only generated for accesses to objects in a watched
+ * directory.
+ */
+ dir = opendir(root);
+ ATF_REQUIRE(dir != NULL);
+ ent = readdir(dir);
+ ATF_REQUIRE(ent != NULL);
+ ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 ||
+ strcmp(ent->d_name, "..") == 0);
+ ATF_REQUIRE(closedir(dir) == 0);
+ consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_attrib);
+ATF_TC_BODY(inotify_event_attrib, tc)
+{
+ char path[PATH_MAX];
+ int error, ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_ATTRIB, path);
+
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ error = fchmod(fd, 0600);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_ATTRIB, 0, NULL);
+
+ error = fchown(fd, 0, 0);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_ATTRIB, 0, NULL);
+
+ close_checked(fd);
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_close_nowrite);
+ATF_TC_BODY(inotify_event_close_nowrite, tc)
+{
+ char file[PATH_MAX], file1[PATH_MAX], dir[PATH_MAX];
+ int ifd, fd, wd1, wd2;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd1 = watch_dir(ifd, IN_CLOSE_NOWRITE, dir);
+ wd2 = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, file);
+
+ fd = open(dir, O_DIRECTORY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd1, IN_CLOSE_NOWRITE, IN_ISDIR, NULL);
+
+ fd = open(file, O_RDONLY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd2, IN_CLOSE_NOWRITE, 0, NULL);
+
+ snprintf(file1, sizeof(file1), "%s/file", dir);
+ fd = open(file1, O_RDONLY | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd1, IN_CLOSE_NOWRITE, 0, "file");
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_close_write);
+ATF_TC_BODY(inotify_event_close_write, tc)
+{
+ char path[PATH_MAX];
+ int ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, path);
+
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_CLOSE_WRITE, 0, NULL);
+
+ close_inotify(ifd);
+}
+
+/* Verify that various operations in a directory generate IN_CREATE events. */
+ATF_TC_WITHOUT_HEAD(inotify_event_create);
+ATF_TC_BODY(inotify_event_create, tc)
+{
+ struct sockaddr_un sun;
+ char path[PATH_MAX], path1[PATH_MAX], root[PATH_MAX];
+ ssize_t n;
+ int error, ifd, ifd1, fd, s, wd, wd1;
+ char b;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_CREATE, root);
+
+ /* Regular file. */
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ /*
+ * Make sure we get an event triggered by the fd used to create the
+ * file.
+ */
+ ifd1 = inotify(IN_NONBLOCK);
+ wd1 = inotify_add_watch(ifd1, root, IN_MODIFY);
+ b = 42;
+ n = write(fd, &b, sizeof(b));
+ ATF_REQUIRE(n == sizeof(b));
+ close_checked(fd);
+ consume_event(ifd, wd, IN_CREATE, 0, "file");
+ consume_event(ifd1, wd1, IN_MODIFY, 0, "file");
+ close_inotify(ifd1);
+
+ /* Hard link. */
+ snprintf(path1, sizeof(path1), "%s/link", root);
+ error = link(path, path1);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "link");
+
+ /* Directory. */
+ snprintf(path, sizeof(path), "%s/dir", root);
+ error = mkdir(path, 0755);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, IN_ISDIR, "dir");
+
+ /* Symbolic link. */
+ snprintf(path1, sizeof(path1), "%s/symlink", root);
+ error = symlink(path, path1);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "symlink");
+
+ /* FIFO. */
+ snprintf(path, sizeof(path), "%s/fifo", root);
+ error = mkfifo(path, 0644);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "fifo");
+
+ /* Binding a socket. */
+ s = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&sun, 0, sizeof(sun));
+ sun.sun_family = AF_UNIX;
+ sun.sun_len = sizeof(sun);
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "%s/socket", root);
+ error = bind(s, (struct sockaddr *)&sun, sizeof(sun));
+ ATF_REQUIRE(error == 0);
+ close_checked(s);
+ consume_event(ifd, wd, IN_CREATE, 0, "socket");
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_delete);
+ATF_TC_BODY(inotify_event_delete, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX], file[PATH_MAX];
+ int error, fd, ifd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_DELETE | IN_DELETE_SELF, root);
+
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ error = unlink(path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE, 0, "file");
+ close_checked(fd);
+
+ /* Make sure that renaming over a file generates a delete event. */
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ snprintf(file, sizeof(file), "%s/file2", root);
+ fd = open(file, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ error = rename(file, path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE, 0, "file");
+
+ error = unlink(path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE, 0, "file");
+ error = rmdir(root);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL);
+ consume_event(ifd, wd, 0, IN_IGNORED, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_move);
+ATF_TC_BODY(inotify_event_move, tc)
+{
+ char dir1[PATH_MAX], dir2[PATH_MAX], path1[PATH_MAX], path2[PATH_MAX];
+ char path3[PATH_MAX];
+ int error, ifd, fd, wd1, wd2, wd3;
+ uint32_t cookie1, cookie2;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd1 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir1);
+ wd2 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir2);
+
+ snprintf(path1, sizeof(path1), "%s/file", dir1);
+ fd = open(path1, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ snprintf(path2, sizeof(path2), "%s/file2", dir2);
+ error = rename(path1, path2);
+ ATF_REQUIRE(error == 0);
+ cookie1 = consume_event_cookie(ifd, wd1, IN_MOVED_FROM, 0, "file");
+ cookie2 = consume_event_cookie(ifd, wd2, IN_MOVED_TO, 0, "file2");
+ ATF_REQUIRE_MSG(cookie1 == cookie2,
+ "expected cookie %u, got %u", cookie1, cookie2);
+
+ snprintf(path2, sizeof(path2), "%s/dir", dir2);
+ error = rename(dir1, path2);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd1, IN_MOVE_SELF, IN_ISDIR, NULL);
+ consume_event(ifd, wd2, IN_MOVED_TO, IN_ISDIR, "dir");
+
+ wd3 = watch_file(ifd, IN_MOVE_SELF, path3);
+ error = rename(path3, "foo");
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd3, IN_MOVE_SELF, 0, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_open);
+ATF_TC_BODY(inotify_event_open, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX];
+ int error, ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_OPEN, root);
+
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+
+ fd = open(path, O_PATH);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+
+ fd = open(root, O_DIRECTORY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL);
+
+ snprintf(path, sizeof(path), "%s/fifo", root);
+ error = mkfifo(path, 0644);
+ ATF_REQUIRE(error == 0);
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "fifo");
+
+ close_inotify(ifd);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ /* Tests for the inotify syscalls. */
+ ATF_TP_ADD_TC(tp, inotify_coalesce);
+ ATF_TP_ADD_TC(tp, inotify_mask_create);
+ ATF_TP_ADD_TC(tp, inotify_queue_overflow);
+ /* Tests for the various inotify event types. */
+ ATF_TP_ADD_TC(tp, inotify_event_access_file);
+ ATF_TP_ADD_TC(tp, inotify_event_access_dir);
+ ATF_TP_ADD_TC(tp, inotify_event_attrib);
+ ATF_TP_ADD_TC(tp, inotify_event_close_nowrite);
+ ATF_TP_ADD_TC(tp, inotify_event_close_write);
+ ATF_TP_ADD_TC(tp, inotify_event_create);
+ ATF_TP_ADD_TC(tp, inotify_event_delete);
+ ATF_TP_ADD_TC(tp, inotify_event_move);
+ ATF_TP_ADD_TC(tp, inotify_event_open);
+ return (atf_no_error());
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Dec 22, 9:46 AM (7 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27144215
Default Alt Text
D50315.id155695.diff (63 KB)
Attached To
Mode
D50315: inotify: Initial revision
Attached
Detach File
Event Timeline
Log In to Comment