diff --git a/RELNOTES b/RELNOTES --- a/RELNOTES +++ b/RELNOTES @@ -10,6 +10,9 @@ Changes to this file should not be MFCed. +f1f230439fa4: + FreeBSD now implements the inotify(2) family of system calls. + 50e733f19b37, 171f66b0c2ca: These commits helped improve utilization of NFSv4.1/4.2 delegations. The changes are only used when the NFSv4 diff --git a/UPDATING b/UPDATING --- a/UPDATING +++ b/UPDATING @@ -27,6 +27,10 @@ world, or to merely disable the most expensive debugging functionality at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20250704: + LinuxKPI device.h and acpi changes effecting drivers and drm-kmod. + Bump __FreeBSD_version 1500050 to be able to detect these changes. + 20250630: Commits 171f66b0c2ca and 8e2a90ac8089 changed the internal api between nfscommon.ko and the other nfs modules. diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist --- a/etc/mtree/BSD.usr.dist +++ b/etc/mtree/BSD.usr.dist @@ -304,6 +304,8 @@ .. indent .. + inotify + .. ipfilter .. ipfw diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -89,6 +89,7 @@ glob.c \ glob-compat11.c \ initgroups.c \ + inotify.c \ isatty.c \ isinf.c \ isnan.c \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -464,6 +464,9 @@ fdscandir_b; fts_open_b; glob_b; + inotify_add_watch; + inotify_init; + inotify_init1; psiginfo; rtld_get_var; rtld_set_var; diff --git a/lib/libc/gen/inotify.c b/lib/libc/gen/inotify.c new file mode 100644 --- /dev/null +++ b/lib/libc/gen/inotify.c @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "namespace.h" +#include +#include +#include +#include "un-namespace.h" +#include "libc_private.h" + +/* + * Provide compatibility with libinotify, which uses different values for these + * flags. + */ +#define IN_NONBLOCK_OLD 0x80000 +#define IN_CLOEXEC_OLD 0x00800 + +int +inotify_add_watch(int fd, const char *pathname, uint32_t mask) +{ + return (inotify_add_watch_at(fd, AT_FDCWD, pathname, mask)); +} + +int +inotify_init1(int flags) +{ + struct specialfd_inotify args; + + if ((flags & IN_NONBLOCK_OLD) != 0) { + flags &= ~IN_NONBLOCK_OLD; + flags |= IN_NONBLOCK; + } + if ((flags & IN_CLOEXEC_OLD) != 0) { + flags &= ~IN_CLOEXEC_OLD; + flags |= IN_CLOEXEC; + } + args.flags = flags; + return (__sys___specialfd(SPECIALFD_INOTIFY, &args, sizeof(args))); +} + +int +inotify_init(void) +{ + return (inotify_init1(0)); +} diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -29,6 +29,7 @@ #include #include +#include /* for ifbvlan_set_t */ #include #include @@ -64,6 +65,7 @@ struct ifconfig_bridge_status { struct ifbropreq *params; /**< current operational parameters */ struct ifbreq *members; /**< list of bridge members */ + ifbvlan_set_t *member_vlans; /**< bridge member vlan sets */ size_t members_count; /**< how many member interfaces */ uint32_t cache_size; /**< size of address cache */ uint32_t cache_lifetime; /**< address cache entry lifetime */ diff --git a/lib/libifconfig/libifconfig_bridge.c b/lib/libifconfig/libifconfig_bridge.c --- a/lib/libifconfig/libifconfig_bridge.c +++ b/lib/libifconfig/libifconfig_bridge.c @@ -66,40 +66,37 @@ { struct ifbifconf members; struct ifbrparam cache_param; - struct _ifconfig_bridge_status *bridge; - char *buf; + struct _ifconfig_bridge_status *bridge = NULL; + char *buf = NULL; + members.ifbic_buf = NULL; *bridgep = NULL; bridge = calloc(1, sizeof(struct _ifconfig_bridge_status)); if (bridge == NULL) { h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.params = &bridge->params; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGCACHE, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_size = cache_param.ifbrp_csize; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGTO, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_lifetime = cache_param.ifbrp_ctime; if (ifconfig_bridge_ioctlwrap(h, name, BRDGPARAM, &bridge->params, sizeof(bridge->params), false) != 0) { - free(bridge); - return (-1); + goto err; } - members.ifbic_buf = NULL; for (size_t len = 8192; (buf = realloc(members.ifbic_buf, len)) != NULL; len *= 2) { @@ -107,27 +104,52 @@ members.ifbic_len = len; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFS, &members, sizeof(members), false) != 0) { - free(buf); - free(bridge); - return (-1); + goto err; } if ((members.ifbic_len + sizeof(*members.ifbic_req)) < len) break; } if (buf == NULL) { - free(members.ifbic_buf); - free(bridge); h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.members = members.ifbic_req; bridge->inner.members_count = members.ifbic_len / sizeof(*members.ifbic_req); + bridge->inner.member_vlans = calloc(bridge->inner.members_count, + sizeof(ifbvlan_set_t)); + if (bridge->inner.member_vlans == NULL) { + h->error.errtype = OTHER; + h->error.errcode = ENOMEM; + goto err; + } + for (size_t i = 0; i < bridge->inner.members_count; ++i) { + struct ifbif_vlan_req vreq; + memset(&vreq, 0, sizeof(vreq)); + strlcpy(vreq.bv_ifname, bridge->inner.members[i].ifbr_ifsname, + sizeof(vreq.bv_ifname)); + + if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFVLANSET, &vreq, + sizeof(vreq), false) != 0) { + goto err; + } + + __BIT_COPY(BRVLAN_SETSIZE, &vreq.bv_set, + &bridge->inner.member_vlans[i]); + } + *bridgep = &bridge->inner; return (0); + +err: + free(members.ifbic_buf); + if (bridge) + free(bridge->inner.member_vlans); + free(bridge); + return (-1); } void diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -71,6 +71,7 @@ #define PS_FST_TYPE_PROCDESC 13 #define PS_FST_TYPE_DEV 14 #define PS_FST_TYPE_EVENTFD 15 +#define PS_FST_TYPE_INOTIFY 16 /* * Special descriptor numbers. diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -625,6 +625,10 @@ type = PS_FST_TYPE_EVENTFD; data = file.f_data; break; + case DTYPE_INOTIFY: + type = PS_FST_TYPE_INOTIFY; + data = file.f_data; + break; default: continue; } @@ -717,6 +721,7 @@ { KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET }, { KF_TYPE_VNODE, PS_FST_TYPE_VNODE }, { KF_TYPE_EVENTFD, PS_FST_TYPE_EVENTFD }, + { KF_TYPE_INOTIFY, PS_FST_TYPE_INOTIFY }, { KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN } }; #define NKFTYPES (sizeof(kftypes2fst) / sizeof(*kftypes2fst)) diff --git a/lib/libsys/Makefile.sys b/lib/libsys/Makefile.sys --- a/lib/libsys/Makefile.sys +++ b/lib/libsys/Makefile.sys @@ -224,6 +224,7 @@ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ + inotify.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -448,6 +449,11 @@ MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 +MLINKS+=inotify.2 inotify_init.2 \ + inotify.2 inotify_init1.2 \ + inotify.2 inotify_add_watch.2 \ + inotify.2 inotify_add_watch_at.2 \ + inotify.2 inotify_rm_watch.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map --- a/lib/libsys/Symbol.sys.map +++ b/lib/libsys/Symbol.sys.map @@ -381,6 +381,8 @@ exterrctl; fchroot; getrlimitusage; + inotify_add_watch_at; + inotify_rm_watch; kcmp; setcred; }; diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h --- a/lib/libsys/_libsys.h +++ b/lib/libsys/_libsys.h @@ -466,6 +466,8 @@ typedef int (__sys_fchroot_t)(int); typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t); typedef int (__sys_exterrctl_t)(u_int, u_int, void *); +typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t); +typedef int (__sys_inotify_rm_watch_t)(int, int); void __sys_exit(int rval); int __sys_fork(void); @@ -868,6 +870,8 @@ int __sys_fchroot(int fd); int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size); int __sys_exterrctl(u_int op, u_int flags, void * ptr); +int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask); +int __sys_inotify_rm_watch(int fd, int wd); __END_DECLS #endif /* __LIBSYS_H_ */ diff --git a/lib/libsys/inotify.2 b/lib/libsys/inotify.2 new file mode 100644 --- /dev/null +++ b/lib/libsys/inotify.2 @@ -0,0 +1,379 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 19, 2025 +.Dt INOTIFY 2 +.Os +.Sh NAME +.Nm inotify_init , +.Nm inotify_init1 , +.Nm inotify_add_watch , +.Nm inotify_add_watch_at , +.Nm inotify_rm_watch +.Nd monitor file system events +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/inotify.h +.Ft int +.Fo inotify_init +.Fc +.Ft int +.Fo inotify_init1 +.Fa "int flags" +.Fc +.Ft int +.Fo inotify_add_watch +.Fa "int fd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_add_watch_at +.Fa "int fd" +.Fa "int dfd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_rm_watch +.Fa "int fd" +.Fa "uint32_t wd" +.Fc +.Bd -literal +struct inotify_event { + int wd; /* Watch descriptor */ + uint32_t mask; /* Event and flags */ + uint32_t cookie; /* Unique ID which links rename events */ + uint32_t len; /* Name field size, including nul bytes */ + char name[0]; /* Filename (nul-terminated) */ +}; +.Ed +.Sh DESCRIPTION +The inotify system calls provide an interface to monitor file system events. +They aim to be compatible with the Linux inotify interface. +The provided functionality is similar to the +.Dv EVFILT_VNODE +filter of the +.Xr kevent 2 +system call, but further allows monitoring of a directory without needing to +open each object in that directory. +This avoids races and reduces the number of file descriptors needed to monitor +a large file hierarchy. +.Pp +inotify allows one or more file system objects, generally files or directories, +to be watched for events, such as file open or close. +Watched objects are associated with a file descriptor returned +by +.Fn inotify_init +or +.Fn inotify_init1 . +When an event occurs, a record describing the event becomes available for +reading from the inotify file descriptor. +Each inotify descriptor thus refers to a queue of events waiting to be read. +inotify descriptors are inherited across +.Xr fork 2 +calls and may be passed to other processes via +.Xr unix 4 +sockets. +.Pp +The +.Fn inotify_init1 +system call accepts two flags. +The +.Dv IN_NONBLOCK +flag causes the inotify descriptor to be opened in non-blocking mode, such that +.Xr read 2 +calls will not block if no records are available to consume, and will instead +return +.Er EWOULDBLOCK . +The +.Dv IN_CLOEXEC +flag causes the inotify descriptor to be closed automatically when +.Xr execve 2 +is called. +.Pp +To watch a file or directory, the +.Fn inotify_add_watch +or +.Fn inotify_add_watch_at +system calls must be used. +They take a path and a mask of events to watch for, and return a +.Dq watch descriptor , +a non-negative integer which uniquely identifies the watched object within the +inotify descriptor. +.Pp +The +.Fn inotify_rm_watch +system call removes a watch from an inotify descriptor. +.Pp +When watching a directory, objects within the directory are monitored for events +as well as the directory itself. +A record describing an inotify event consists of a +.Dq struct inotify_event +followed by the name of the object in the directory being watched. +If the watched object itself generates an event, no name is present. +Extra nul bytes may follow the file name in order to provide alignment for a +subsequent record. +.Pp +The following events are defined: +.Bl -tag -width IN_CLOSE_NOWRITE +.It Dv IN_ACCESS +A file's contents were accessed, e.g., by +.Xr read 2 +.Xr copy_file_range 2 , +.Xr sendfile 2 , +or +.Xr getdirentries 2 . +.It Dv IN_ATTRIB +A file's metadata was changed, e.g., by +.Xr chmod 2 +or +.Xr unlink 2 . +.It Dv IN_CLOSE_WRITE +A file that was previously opened for writing was closed. +.It Dv IN_CLOSE_NOWRITE +A file that was previously opened read-only was closed. +.It Dv IN_CREATE +A file within a watched directory was created, e.g., by +.Xr open 2 , +.Xr mkdir 2 , +.Xr symlink 2 , +.Xr mknod 2 , +or +.Xr bind 2 . +.It Dv IN_DELETE +A file or directory within a watched directory was removed. +.It Dv IN_DELETE_SELF +The watched file or directory itself was deleted. +This event is generated only when the link count of the file drops +to zero. +.It Dv IN_MODIFY +A file's contents were modified, e.g., by +.Xr write 2 +or +.Xr copy_file_range 2 . +.It Dv IN_MOVE_SELF +The watched file or directory itself was renamed. +.It Dv IN_MOVED_FROM +A file or directory was moved from a watched directory. +.It Dv IN_MOVED_TO +A file or directory was moved into a watched directory. +A +.Xr rename 2 +call thus may generate two events, one for the old name and one for the new +name. +These are linked together by the +.Ar cookie +field in the inotify record, which can be compared to link the two records +to the same event. +.It Dv IN_OPEN +A file was opened. +.El +.Pp +Some additional flags may be set in inotify event records: +.Bl -tag -width IN_Q_OVERFLOW +.It Dv IN_IGNORED +When a watch is removed from a file, for example because it was created with the +.Dv IN_ONESHOT +flag, the file was deleted, or the watch was explicitly removed with +.Xr inotify_rm_watch 2 , +an event with this mask is generated to indicate that the watch will not +generate any more events. +Once this event is generated, the watch is automatically removed, and in +particular should not be removed manually with +.Xr inotify_rm_watch 2 . +.It Dv IN_ISDIR +When the subject of an event is a directory, this flag is set in the +.Ar mask +.It Dv IN_Q_OVERFLOW +One or more events were dropped, for example because of a kernel memory allocation +failure or because the event queue size hit a limit. +.It Dv IN_UNMOUNT +The filesystem containing the watched object was unmounted. +.El +.Pp +A number of flags may also be specified in the +.Ar mask +given to +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at : +.Bl -tag -width IN_DONT_FOLLOW +.It Dv IN_DONT_FOLLOW +If +.Ar pathname +is a symbolic link, do not follow it. +.It Dv IN_EXCL_UNLINK +This currently has no effect, see the +.Sx BUGS +section. +.In Dv IN_MASK_ADD +When adding a watch to an object, and that object is already watched by the +same inotify descriptor, by default the mask of the existing watch is +overwritten. +When +.Dv IN_MASK_ADD +is specified, the mask of the existing watch is instead logically ORed with +the new mask. +.In Dv IN_MASK_CREATE +When +.Fn inotify_add watch +is used to add a watch to an object, +.Dv IN_MASK_CREATE +is specified, and that object is already watched by the same inotify descriptor, +return an error instead of updating the existing watch. +.In Dv IN_ONESHOT +Monitor the object for a single event, after which the watch is automatically +removed. +As part of removal, a +.Dv IN_IGNORED +event is generated. +.In Dv IN_ONLYDIR +When creating a watch, fail with +.Er ENOTDIR +if the path does not refer to a directory. +.El +.Sh SYSCTL VARIABLES +The following variables are available as both +.Xr sysctl 8 +variables and +.Xr loader 8 +tunables: +.Bl -tag -width 15 +.It Va vfs.inotify.max_events +The maximum number of inotify records that can be queued for a single +inotify descriptor. +Records in excess of this limit are discarded, and a single event with +mask equal to +.Dv IN_Q_OVERFLOW +will be present in the queue. +.It Va vfs.inotify.max_user_instances +The maximum number of inotify descriptors that can be created by a single +user. +.It Va vfs.inotify.max_user_watches +The maximum number of inotify watches per user. +.El +.Sh EXAMPLES +See the example program in +.Pa /usr/share/examples/inotify/inotify.c . +.Sh ERRORS +The +.Fn inotify_init +and +.Fn inotify_init1 +functions will fail if: +.Bl -tag -width Er +.It Bq Er ENFILE +The system limit on the total number of open files has been reached. +.It Bq Er EMFILE +A per-process limit on the number of open files has been reached. +.It Bq Er EMFILE +The system limit on the number of inotify descriptors has been reached. +.It Bq Er EINVAL +An unrecognized flag was passed to +.Fn inotify_init1 . +.El +.Pp +The +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at +system calls will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar mask +parameter does not specify an event, or +the +.Dv IN_MASK_CREATE +and +.Dv IN_MASK_ADD +flags are both set, or an unrecognized flag was passed. +.It Bq Er ENOTDIR +The +.Ar pathname +parameter refers to a file that is not a directory, and the +.Dv IN_ONLYDIR +flag was specified. +.It Bq Er ENOSPC +The per-user limit on the total number of inotify watches has been reached. +.It Bq Er ECAPMODE +The process is in capability mode and +.Fn inotify_add_watch +was called, or +.Fn inotify_add_watch_at +was called with +.Dv AT_FDCWD +as the directory file descriptor +.Ar dfd . +.It Bq Er ENOTCAPABLE +The process is in capability mode and +.Ar pathname +contains a +.Dq .. +component leading to a directory outside the directory hierarchy specified +by +.Ar dfd . +.El +.Pp +The +.Fn inotify_rm_watch +system call will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar wd +parameter is not a valid watch descriptor. +.El +.Sh SEE ALSO +.Xr kevent 2 , +.Xr capsicum 4 +.Sh STANDARDS +The +.Nm +interface originates from Linux and is non-standard. +This implementation aims to be compatible with that of Linux and is based +on the documentation available at +.Pa https://man7.org/linux/man-pages/man7/inotify.7.html . +.Sh HISTORY +The inotify system calls first appeared in +.Fx 15.0 . +.Sh BUGS +If a file in a watched directory has multiple hard links, +an access via any hard link for that file will generate an event, even +if the accessed link belongs to an unwatched directory. +This is not the case for the Linux implementation, where only accesses +via the hard link in the watched directory will generate an event. +.Pp +If a watched directory contains multiple hard links of a file, an event +on one of the hard links will generate an inotify record for each link +in the directory. +.Pp +When a file is unlinked, no more events will be generated for that file, +even if it continues to be accessed. +By default, the Linux implementation will continue to generate events in +this case. +Thus, the +.Fx +implementation behaves as though +.Dv IN_EXCL_UNLINK +is always set. diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map --- a/lib/libsys/syscalls.map +++ b/lib/libsys/syscalls.map @@ -809,4 +809,8 @@ __sys_setcred; _exterrctl; __sys_exterrctl; + _inotify_add_watch_at; + __sys_inotify_add_watch_at; + _inotify_rm_watch; + __sys_inotify_rm_watch; }; diff --git a/lib/libsys/write.2 b/lib/libsys/write.2 --- a/lib/libsys/write.2 +++ b/lib/libsys/write.2 @@ -195,6 +195,9 @@ if the sysctl .Va debug.iosize_max_clamp is non-zero). +.It Bq Er EINVAL +The file descriptor refers to a raw device, and the write +offset or size is not a multiple of the device's block size. .It Bq Er EINTEGRITY The backing store for .Fa fd diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile --- a/lib/libsysdecode/Makefile +++ b/lib/libsysdecode/Makefile @@ -84,6 +84,7 @@ sysdecode_mask.3 sysdecode_fileflags.3 \ sysdecode_mask.3 sysdecode_filemode.3 \ sysdecode_mask.3 sysdecode_flock_operation.3 \ + sysdecode_mask.3 sysdecode_inotifyflags.3 \ sysdecode_mask.3 sysdecode_mlockall_flags.3 \ sysdecode_mask.3 sysdecode_mmap_flags.3 \ sysdecode_mask.3 sysdecode_mmap_prot.3 \ diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c --- a/lib/libsysdecode/flags.c +++ b/lib/libsysdecode/flags.c @@ -23,7 +23,6 @@ * SUCH DAMAGE. */ -#include #define L2CAP_SOCKET_CHECKED #include @@ -31,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -351,6 +351,13 @@ return (lookup_value(rusage, who)); } +bool +sysdecode_inotifyflags(FILE *fp, int flag, int *rem) +{ + + return (print_mask_int(fp, inotifyflags, flag, rem)); +} + static struct name_table kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables --- a/lib/libsysdecode/mktables +++ b/lib/libsysdecode/mktables @@ -98,6 +98,7 @@ gen_table "fadvisebehav" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" gen_table "openflags" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" "O_RDONLY|O_RDWR|O_WRONLY" gen_table "flockops" "LOCK_[A-Z]+[[:space:]]+0x[0-9]+" "sys/fcntl.h" +gen_table "inotifyflags" "IN_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/inotify.h" "IN_CLOEXEC|IN_NONBLOCK" gen_table "kldsymcmd" "KLDSYM_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "kldunloadfflags" "LINKER_UNLOAD_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "lio_listiomodes" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" "aio.h" diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h --- a/lib/libsysdecode/sysdecode.h +++ b/lib/libsysdecode/sysdecode.h @@ -61,6 +61,7 @@ const char *sysdecode_getrusage_who(int _who); const char *sysdecode_idtype(int _idtype); const char *sysdecode_ioctlname(unsigned long _val); +bool sysdecode_inotifyflags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_ipproto(int _protocol); void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags, int _base); diff --git a/libexec/flua/modules/lposix.c b/libexec/flua/modules/lposix.c --- a/libexec/flua/modules/lposix.c +++ b/libexec/flua/modules/lposix.c @@ -88,18 +88,23 @@ lua_chown(lua_State *L) { const char *path; - uid_t owner = (uid_t) -1; - gid_t group = (gid_t) -1; + uid_t owner = (uid_t)-1; + gid_t group = (gid_t)-1; + int error; enforce_max_args(L, 3); path = luaL_checkstring(L, 1); if (lua_isinteger(L, 2)) - owner = (uid_t) lua_tointeger(L, 2); + owner = (uid_t)lua_tointeger(L, 2); else if (lua_isstring(L, 2)) { - struct passwd *p = getpwnam(lua_tostring(L, 2)); - if (p != NULL) - owner = p->pw_uid; + char buf[4096]; + struct passwd passwd, *pwd; + + error = getpwnam_r(lua_tostring(L, 2), &passwd, + buf, sizeof(buf), &pwd); + if (error == 0) + owner = pwd->pw_uid; else return (luaL_argerror(L, 2, lua_pushfstring(L, "unknown user %s", @@ -112,11 +117,15 @@ } if (lua_isinteger(L, 3)) - group = (gid_t) lua_tointeger(L, 3); + group = (gid_t)lua_tointeger(L, 3); else if (lua_isstring(L, 3)) { - struct group *g = getgrnam(lua_tostring(L, 3)); - if (g != NULL) - group = g->gr_gid; + char buf[4096]; + struct group gr, *grp; + + error = getgrnam_r(lua_tostring(L, 3), &gr, buf, sizeof(buf), + &grp); + if (error == 0) + group = grp->gr_gid; else return (luaL_argerror(L, 3, lua_pushfstring(L, "unknown group %s", diff --git a/libexec/nuageinit/nuage.lua b/libexec/nuageinit/nuage.lua --- a/libexec/nuageinit/nuage.lua +++ b/libexec/nuageinit/nuage.lua @@ -56,6 +56,21 @@ os.exit(1) end +local function chmod(path, mode) + local mode = tonumber(mode, 8) + local _, err, msg = sys_stat.chmod(path, mode) + if err then + errmsg("chmod(" .. path .. ", " .. mode .. ") failed: " .. msg) + end +end + +local function chown(path, owner, group) + local _, err, msg = unistd.chown(path, owner, group) + if err then + errmsg("chown(" .. path .. ", " .. owner .. ", " .. group .. ") failed: " .. msg) + end +end + local function dirname(oldpath) if not oldpath then return nil @@ -252,12 +267,12 @@ f:write(key .. "\n") f:close() if chownak then - sys_stat.chmod(ak_path, 384) - unistd.chown(ak_path, dirattrs.uid, dirattrs.gid) + chmod(ak_path, "0600") + chown(ak_path, dirattrs.uid, dirattrs.gid) end if chowndotssh then - sys_stat.chmod(dotssh_path, 448) - unistd.chown(dotssh_path, dirattrs.uid, dirattrs.gid) + chmod(dotssh_path, "0700") + chown(dotssh_path, dirattrs.uid, dirattrs.gid) end end @@ -296,10 +311,10 @@ end f:close() if chmodsudoers then - sys_stat.chmod(sudoers, 416) + chmod(sudoers, "0640") end if chmodsudoersd then - sys_stat.chmod(sudoers, 480) + chmod(sudoers, "0740") end end @@ -521,16 +536,14 @@ end f:close() if file.permissions then - -- convert from octal to decimal - local perm = tonumber(file.permissions, 8) - sys_stat.chmod(filepath, perm) + chmod(filepath, file.permissions) end if file.owner then local owner, group = string.match(file.owner, "([^:]+):([^:]+)") if not owner then owner = file.owner end - unistd.chown(filepath, owner, group) + chown(filepath, owner, group) end return true end @@ -538,6 +551,8 @@ local n = { warn = warnmsg, err = errmsg, + chmod = chmod, + chown = chown, dirname = dirname, mkdir_p = mkdir_p, sethostname = sethostname, diff --git a/libexec/nuageinit/nuageinit b/libexec/nuageinit/nuageinit --- a/libexec/nuageinit/nuageinit +++ b/libexec/nuageinit/nuageinit @@ -7,7 +7,6 @@ local nuage = require("nuage") local ucl = require("ucl") local yaml = require("lyaml") -local sys_stat = require("posix.sys.stat") if #arg ~= 2 then nuage.err("Usage: " .. arg[0] .. " ( | )", false) @@ -157,7 +156,7 @@ sshkey:close() end if keytype == "private" then - sys_stat.chmod(path, 384) + nuage.chmod(path, "0600") end end end @@ -281,7 +280,7 @@ end if f ~= nil then f:close() - sys_stat.chmod(root .. "/var/cache/nuageinit/runcmds", 493) + nuage.chmod(root .. "/var/cache/nuageinit/runcmds", "0755") end end @@ -503,5 +502,5 @@ end elseif line:sub(1, 2) == "#!" then -- delay for execution at rc.local time -- - sys_stat.chmod(root .. "/var/cache/nuageinit/user_data", 493) + nuage.chmod(root .. "/var/cache/nuageinit/user_data", "0755") end diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c --- a/sbin/ifconfig/ifbridge.c +++ b/sbin/ifconfig/ifbridge.c @@ -146,6 +146,36 @@ free(inbuf); } +static void +print_vlans(ifbvlan_set_t *vlans) +{ + unsigned printed = 0; + + for (unsigned vlan = DOT1Q_VID_MIN; vlan <= DOT1Q_VID_MAX;) { + unsigned last; + + if (!BRVLAN_TEST(vlans, vlan)) { + ++vlan; + continue; + } + + last = vlan; + while (last < DOT1Q_VID_MAX && BRVLAN_TEST(vlans, last + 1)) + ++last; + + if (printed == 0) + printf(" tagged "); + else + printf(","); + + printf("%u", vlan); + if (last != vlan) + printf("-%u", last); + ++printed; + vlan = last + 1; + } +} + static void bridge_status(if_ctx *ctx) { @@ -211,6 +241,9 @@ else printf(" ", state); } + if (member->ifbr_untagged != 0) + printf(" untagged %u", (unsigned)member->ifbr_untagged); + print_vlans(&bridge->member_vlans[i]); printf("\n"); } @@ -576,6 +609,45 @@ err(1, "BRDGSIFCOST %s", cost); } +static void +setbridge_untagged(if_ctx *ctx, const char *ifn, const char *vlanid) +{ + struct ifbreq req; + u_long val; + + memset(&req, 0, sizeof(req)); + + if (get_val(vlanid, &val) < 0) + errx(1, "invalid VLAN identifier: %s", vlanid); + + /* + * Reject vlan 0, since it's not a valid vlan identifier and has a + * special meaning in the kernel interface. + */ + if (val == 0) + errx(1, "invalid VLAN identifier: %lu", val); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = val; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED %s", vlanid); +} + +static void +unsetbridge_untagged(if_ctx *ctx, const char *ifn, int dummy __unused) +{ + struct ifbreq req; + + memset(&req, 0, sizeof(req)); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = 0; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED"); +} + static void setbridge_ifmaxaddr(if_ctx *ctx, const char *ifn, const char *arg) { @@ -612,17 +684,118 @@ static void setbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 1); } static void unsetbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 0); } +static void +setbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 1); +} + +static void +unsetbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 0); +} + +static int +parse_vlans(ifbvlan_set_t *set, const char *str) +{ + char *s, *token; + + /* "none" means the empty vlan set */ + if (strcmp(str, "none") == 0) { + __BIT_ZERO(BRVLAN_SETSIZE, set); + return (0); + } + + /* "all" means all vlans, except for 0 and 4095 which are reserved */ + if (strcmp(str, "all") == 0) { + __BIT_FILL(BRVLAN_SETSIZE, set); + BRVLAN_CLR(set, DOT1Q_VID_NULL); + BRVLAN_CLR(set, DOT1Q_VID_RSVD_IMPL); + return (0); + } + + if ((s = strdup(str)) == NULL) + return (-1); + + while ((token = strsep(&s, ",")) != NULL) { + unsigned long first, last; + char *p, *lastp; + + if ((lastp = strchr(token, '-')) != NULL) + *lastp++ = '\0'; + + first = last = strtoul(token, &p, 10); + if (*p != '\0') + goto err; + if (first < DOT1Q_VID_MIN || first > DOT1Q_VID_MAX) + goto err; + + if (lastp) { + last = strtoul(lastp, &p, 10); + if (*p != '\0') + goto err; + if (last < DOT1Q_VID_MIN || last > DOT1Q_VID_MAX || + last < first) + goto err; + } + + for (unsigned vlan = first; vlan <= last; ++vlan) + BRVLAN_SET(set, vlan); + } + + free(s); + return (0); + +err: + free(s); + return (-1); +} + +static void +set_bridge_vlanset(if_ctx *ctx, const char *ifn, const char *vlans, int op) +{ + struct ifbif_vlan_req req; + + memset(&req, 0, sizeof(req)); + + if (parse_vlans(&req.bv_set, vlans) != 0) + errx(1, "invalid vlan set: %s", vlans); + + strlcpy(req.bv_ifname, ifn, sizeof(req.bv_ifname)); + req.bv_op = op; + + if (do_cmd(ctx, BRDGSIFVLANSET, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFVLANSET %s", vlans); +} + +static void +setbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_SET); +} + +static void +addbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_ADD); +} + +static void +delbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_DEL); +} + static struct cmd bridge_cmds[] = { DEF_CMD_ARG("addm", setbridge_add), DEF_CMD_ARG("deletem", setbridge_delete), @@ -659,6 +832,13 @@ DEF_CMD_ARG2("ifpriority", setbridge_ifpriority), DEF_CMD_ARG2("ifpathcost", setbridge_ifpathcost), DEF_CMD_ARG2("ifmaxaddr", setbridge_ifmaxaddr), + DEF_CMD_ARG("vlanfilter", setbridge_vlanfilter), + DEF_CMD_ARG("-vlanfilter", unsetbridge_vlanfilter), + DEF_CMD_ARG2("untagged", setbridge_untagged), + DEF_CMD_ARG("-untagged", unsetbridge_untagged), + DEF_CMD_ARG2("tagged", setbridge_tagged), + DEF_CMD_ARG2("+tagged", addbridge_tagged), + DEF_CMD_ARG2("-tagged", delbridge_tagged), DEF_CMD_ARG("timeout", setbridge_timeout), DEF_CMD_ARG("private", setbridge_private), DEF_CMD_ARG("-private", unsetbridge_private), diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 24, 2025 +.Dd July 5, 2025 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2696,6 +2696,57 @@ removed. Set to 0 to disable. .El +.Ss Bridge VLAN Filtering Parameters +The behaviour of these options is described in the +.Dq VLAN SUPPORT +section of +.Xr bridge 4 . +.Bl -tag -width indent +.It Cm vlanfilter Ar interface +Enable VLAN filtering on an interface. +.It Cm -vlanfilter Ar interface +Disable VLAN filtering on an interface. +.It Cm untagged Ar interface Ar vlan-id +Set the untagged VLAN identifier for an interface. +.Pp +Setting +.Cm untagged +will automatically enable VLAN filtering on the interface. +.It Cm -untagged Ar interface Ar vlan-id +Clear the untagged VLAN identifier for an interface. +.It Cm tagged Ar interface Ar vlan-list +Set the interface's VLAN access list to the provided list of VLANs. +The list should be a comma-separated list of one or more VLAN IDs +or ranges formatted as +.Ar first-last , +the value +.Dq none +meaning the empty set, +or the value +.Dq all +meaning all VLANs (1-4094). +.Pp +Setting +.Cm tagged +will automatically enable VLAN filtering on the interface. +.It Cm +tagged Ar interface Ar vlan-list +Add the provided list of VLAN IDs to the interface's VLAN access list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm +tagged +will automatically enable VLAN filtering on the interface. +.It Cm -tagged Ar interface Ar vlan-list +Remove the provided list of VLAN IDs from the interface's VLAN access +list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm -tagged +will automatically enable VLAN filtering on the interface. +.El .Ss Link Aggregation and Link Failover Parameters The following parameters are specific to lagg interfaces: .Bl -tag -width indent diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -6854,7 +6854,8 @@ } else if (c == '\\') { if ((next = lgetc(quotec)) == EOF) return (0); - if (next == quotec || c == ' ' || c == '\t') + if (next == quotec || next == ' ' || + next == '\t') c = next; else if (next == '\n') { file->lineno++; @@ -7149,11 +7150,10 @@ if ((val = strrchr(s, '=')) == NULL) return (-1); - if ((sym = malloc(strlen(s) - strlen(val) + 1)) == NULL) + sym = strndup(s, val - s); + if (sym == NULL) err(1, "%s: malloc", __func__); - strlcpy(sym, s, strlen(s) - strlen(val) + 1); - ret = symset(sym, val + 1, 1); free(sym); diff --git a/sbin/pfctl/pfctl.h b/sbin/pfctl/pfctl.h --- a/sbin/pfctl/pfctl.h +++ b/sbin/pfctl/pfctl.h @@ -83,7 +83,7 @@ void pfctl_print_title(char *); void pfctl_do_clear_tables(const char *, int); void pfctl_show_tables(const char *, int); -int pfctl_command_tables(int, char *[], char *, const char *, char *, +int pfctl_table(int, char *[], char *, const char *, char *, const char *, int); int pfctl_show_altq(int, const char *, int, int); void warn_namespace_collision(const char *); diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8 --- a/sbin/pfctl/pfctl.8 +++ b/sbin/pfctl/pfctl.8 @@ -474,7 +474,7 @@ .It Fl s Cm osfp Show the list of operating system fingerprints. .It Fl s Cm Interfaces -Show the list of interfaces and interface drivers available to PF. +Show the list of interfaces and interface groups available to PF. When used together with .Fl v , it additionally lists which interfaces have skip rules activated. @@ -544,7 +544,7 @@ Flush all addresses of a table. .It Fl T Cm add Add one or more addresses in a table. -Automatically create a nonexisting table. +Automatically create a persistent table if it does not exist. .It Fl T Cm delete Delete one or more addresses from a table. .It Fl T Cm expire Ar number @@ -556,7 +556,7 @@ refers to the time they were added to the table. .It Fl T Cm replace Replace the addresses of the table. -Automatically create a nonexisting table. +Automatically create a persistent table if it does not exist. .It Fl T Cm show Show the content (addresses) of a table. .It Fl T Cm test diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -3175,6 +3175,9 @@ } } + if (tblcmdopt == NULL ^ tableopt == NULL) + usage(); + if (tblcmdopt != NULL) { argc -= optind; argv += optind; @@ -3400,7 +3403,7 @@ pfctl_kill_src_nodes(dev, ifaceopt, opts); if (tblcmdopt != NULL) { - error = pfctl_command_tables(argc, argv, tableopt, + error = pfctl_table(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } diff --git a/sbin/pfctl/pfctl_table.c b/sbin/pfctl/pfctl_table.c --- a/sbin/pfctl/pfctl_table.c +++ b/sbin/pfctl/pfctl_table.c @@ -55,8 +55,6 @@ #include "pfctl.h" extern void usage(void); -static int pfctl_table(int, char *[], char *, const char *, char *, - const char *, int); static void print_table(const struct pfr_table *, int, int); static int print_tstats(const struct pfr_tstats *, int); static int load_addr(struct pfr_buffer *, int, char *[], char *, int, int); @@ -118,15 +116,6 @@ exit(1); } -int -pfctl_command_tables(int argc, char *argv[], char *tname, - const char *command, char *file, const char *anchor, int opts) -{ - if (tname == NULL || command == NULL) - usage(); - return pfctl_table(argc, argv, tname, command, file, anchor, opts); -} - int pfctl_table(int argc, char *argv[], char *tname, const char *command, char *file, const char *anchor, int opts) @@ -214,7 +203,8 @@ xprintf(opts, "%d/%d addresses added", nadd, b.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "delete")) { @@ -228,7 +218,8 @@ xprintf(opts, "%d/%d addresses deleted", ndel, b.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "replace")) { @@ -259,7 +250,8 @@ xprintf(opts, "no changes"); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "expire")) { @@ -282,7 +274,7 @@ break; } PFRB_FOREACH(p, &b) { - ((struct pfr_astats *)p)->pfras_a.pfra_fback = 0; + ((struct pfr_astats *)p)->pfras_a.pfra_fback = PFR_FB_NONE; if (time(NULL) - ((struct pfr_astats *)p)->pfras_tzero > lifetime) if (pfr_buf_add(&b2, @@ -297,7 +289,8 @@ xprintf(opts, "%d/%d addresses expired", ndel, b2.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b2) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "reset")) { diff --git a/share/examples/Makefile b/share/examples/Makefile --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -15,6 +15,7 @@ find_interface \ flua \ indent \ + inotify \ ipfw \ jails \ kld \ @@ -97,6 +98,10 @@ SE_DIRS+= indent SE_INDENT= indent.pro +SE_DIRS+= inotify +SE_INOTIFY= inotify.c \ + Makefile + .if ${MK_IPFILTER} != "no" SUBDIR+= ipfilter .endif diff --git a/share/examples/inotify/Makefile b/share/examples/inotify/Makefile new file mode 100644 --- /dev/null +++ b/share/examples/inotify/Makefile @@ -0,0 +1,6 @@ +PROG= inotify +MAN= + +LIBADD= xo + +.include diff --git a/share/examples/inotify/inotify.c b/share/examples/inotify/inotify.c new file mode 100644 --- /dev/null +++ b/share/examples/inotify/inotify.c @@ -0,0 +1,172 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +/* + * A simple program to demonstrate inotify. Given one or more paths, it watches + * all events on those paths and prints them to standard output. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +usage(void) +{ + xo_errx(1, "usage: inotify [ ...]"); +} + +static const char * +ev2str(uint32_t event) +{ + switch (event & IN_ALL_EVENTS) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + switch (event) { + case IN_IGNORED: + return ("IN_IGNORED"); + case IN_Q_OVERFLOW: + return ("IN_Q_OVERFLOW"); + case IN_UNMOUNT: + return ("IN_UNMOUNT"); + } + warnx("unknown event %#x", event); + assert(0); + } +} + +static void +set_handler(int kq, int sig) +{ + struct kevent kev; + + (void)signal(sig, SIG_IGN); + EV_SET(&kev, sig, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); +} + +int +main(int argc, char **argv) +{ + struct inotify_event *iev, *iev1; + struct kevent kev; + size_t ievsz; + int ifd, kq; + + argc = xo_parse_args(argc, argv); + if (argc < 2) + usage(); + argc--; + argv++; + + ifd = inotify_init1(IN_NONBLOCK); + if (ifd < 0) + xo_err(1, "inotify"); + for (int i = 0; i < argc; i++) { + int wd; + + wd = inotify_add_watch(ifd, argv[i], IN_ALL_EVENTS); + if (wd < 0) + xo_err(1, "inotify_add_watch(%s)", argv[i]); + } + + xo_set_version("1"); + xo_open_list("events"); + + kq = kqueue(); + if (kq < 0) + xo_err(1, "kqueue"); + + /* + * Handle signals in the event loop so that we can close the xo list. + */ + set_handler(kq, SIGINT); + set_handler(kq, SIGTERM); + set_handler(kq, SIGHUP); + set_handler(kq, SIGQUIT); + + /* + * Monitor the inotify descriptor for events. + */ + EV_SET(&kev, ifd, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); + + ievsz = sizeof(*iev) + NAME_MAX + 1; + iev = malloc(ievsz); + if (iev == NULL) + err(1, "malloc"); + + for (;;) { + ssize_t n; + const char *ev; + + if (kevent(kq, NULL, 0, &kev, 1, NULL) < 0) + xo_err(1, "kevent"); + if (kev.filter == EVFILT_SIGNAL) + break; + + n = read(ifd, iev, ievsz); + if (n < 0) + xo_err(1, "read"); + assert(n >= (ssize_t)sizeof(*iev)); + + for (iev1 = iev; n > 0;) { + assert(n >= (ssize_t)sizeof(*iev1)); + + ev = ev2str(iev1->mask); + xo_open_instance("event"); + xo_emit("{:wd/%3d} {:event/%16s} {:name/%s}\n", + iev1->wd, ev, iev1->name); + xo_close_instance("event"); + + n -= sizeof(*iev1) + iev1->len; + iev1 = (struct inotify_event *)(void *) + ((char *)iev1 + sizeof(*iev1) + iev1->len); + } + (void)xo_flush(); + } + + xo_close_list("events"); + + if (xo_finish() < 0) + xo_err(1, "stdout"); + exit(0); +} diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4 --- a/share/man/man4/bridge.4 +++ b/share/man/man4/bridge.4 @@ -36,7 +36,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd May 28, 2025 +.Dd July 5, 2025 .Dt IF_BRIDGE 4 .Os .Sh NAME @@ -271,6 +271,54 @@ .Va net.link.bridge.log_stp node using .Xr sysctl 8 . +.Sh VLAN SUPPORT +The +.Nm +driver has full support for virtual LANs (VLANs). +The bridge implements independent VLAN learning, i.e. MAC addresses are +learned on a per-VLAN basis, and the same MAC address may be learned on +multiple interfaces on different VLANs. +Incoming frames with an 802.1Q tag will be assigned to the appropriate +VLAN. +.Pp +Traffic sent to or from the host is not assigned to a VLAN by default. +To allow the host to communicate on a VLAN, configure a +.Xr vlan 4 +interface on the bridge and (if necessary) assign IP addresses there. +.Pp +By default no access control is enabled, so any interface may +participate in any VLAN. +.Pp +VLAN filtering may be enabled on an interface using the +.Xr ifconfig 8 +.Cm vlanfilter +option. +When VLAN filtering is enabled, an interface may only send and receive +frames based on its configured VLAN access list. +.Pp +The interface's untagged VLAN ID may be configured using the +.Xr ifconfig 8 +.Cm untagged +option. +If an untagged VLAN ID is configured, incoming frames will be assigned +to that VLAN, and the interface may receive outgoing untagged frames +in that VLAN. +.Pp +The tagged VLAN access list may be configured using the +.Cm tagged , +.Cm +tagged +and +.Cm -tagged +options to +.Xr ifconfig 8 . +An interface may send and receive tagged frames for any VLAN in its +access list. +.Pp +The bridge will automatically insert or remove 802.1q tags as needed, +based on the interface configuration, when forwarding frames between +interfaces. +This tag processing is only done for interfaces with VLAN filtering +enabled. .Sh PACKET FILTERING Packet filtering can be used with any firewall package that hooks in via the .Xr pfil 9 @@ -538,6 +586,7 @@ .Xr ipfw 4 , .Xr netmap 4 , .Xr pf 4 , +.Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The diff --git a/share/man/man4/pf.4 b/share/man/man4/pf.4 --- a/share/man/man4/pf.4 +++ b/share/man/man4/pf.4 @@ -26,7 +26,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 1, 2025 +.Dd July 2, 2025 .Dt PF 4 .Os .Sh NAME @@ -1114,7 +1114,7 @@ .It Dv DIOCCLRSRCNODES Clear the tree of source tracking nodes. .It Dv DIOCIGETIFACES Fa "struct pfioc_iface *io" -Get the list of interfaces and interface drivers known to +Get the list of interfaces and interface groups known to .Nm . All the ioctls that manipulate interfaces use the same structure described below: @@ -1131,7 +1131,7 @@ .Pp If not empty, .Va pfiio_name -can be used to restrict the search to a specific interface or driver. +can be used to restrict the search to a specific interface or group. .Va pfiio_buffer[pfiio_size] is the user-supplied buffer for returning the data. On entry, diff --git a/share/man/man4/rights.4 b/share/man/man4/rights.4 --- a/share/man/man4/rights.4 +++ b/share/man/man4/rights.4 @@ -30,7 +30,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 1, 2024 +.Dd May 22, 2025 .Dt RIGHTS 4 .Os .Sh NAME @@ -319,6 +319,14 @@ .It Dv CAP_GETSOCKOPT Permit .Xr getsockopt 2 . +.It Dv CAP_INOTIFY_ADD +Permit +.Xr inotify_add_watch 2 +and +.Xr inotify_add_watch_at 2 . +.It Dv CAP_INOTIFY_RM +Permit +.Xr inotify_rm_watch 2 . .It Dv CAP_IOCTL Permit .Xr ioctl 2 . diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -434,6 +434,7 @@ VOP_GETEXTATTR.9 \ VOP_GETPAGES.9 \ VOP_INACTIVE.9 \ + VOP_INOTIFY.9 \ VOP_IOCTL.9 \ VOP_LINK.9 \ VOP_LISTEXTATTR.9 \ @@ -2460,6 +2461,7 @@ MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 +MLINKS+=VOP_INOTIFY.9 VOP_INOTIFY_ADD_WATCH.9 MLINKS+=VOP_LOCK.9 vn_lock.9 \ VOP_LOCK.9 VOP_ISLOCKED.9 \ VOP_LOCK.9 VOP_UNLOCK.9 diff --git a/share/man/man9/VOP_INOTIFY.9 b/share/man/man9/VOP_INOTIFY.9 new file mode 100644 --- /dev/null +++ b/share/man/man9/VOP_INOTIFY.9 @@ -0,0 +1,60 @@ +.\"- +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 27, 2025 +.Dt VOP_INOTIFY 9 +.Os +.Sh NAME +.Nm VOP_INOTIFY +.Nd vnode inotify interface +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fo VOP_INOTIFY +.Fa struct vnode *vp +.Fa struct vnode *dvp +.Fa struct componentname *cnp +.Fa int event +.Fa uint32_t cookie +.Fc +.Ft int +.Fo VOP_INOTIFY_ADD_WATCH +.Fa struct vnode *vp +.Fa struct inotify_softc *sc +.Fa uint32_t mask +.Fa uint32_t *wdp +.Fa struct thread *td +.Fc +.Sh DESCRIPTION +The +.Fn VOP_INOTIFY +operation notifies the +.Xr inotify 2 +subsystem of a file system event on a vnode. +The +.Fa dvp +and +.Fa cnp +arguments are optional and are only used to obtain a file name for the event. +If they are omitted, the inotify subsystem will use the file name cache to +find a name for the vnode, but this is more expensive. +.Pp +The +.Fn VOP_INOTIFY_ADD_WATCH +operation is for internal use by the inotify subsystem to add a watch to a +vnode. +.Sh LOCKS +The +.Fn VOP_INOTIFY +operation does not assume any particular vnode lock state. +The +.Fn VOP_INOTIFY_ADD_WATCH +operation should be called with the vnode locked. +.Sh RETURN VALUES +Zero is returned on success, otherwise an error code is returned. +.Sh SEE ALSO +.Xr inotify 2 , +.Xr vnode 9 diff --git a/share/man/man9/firmware.9 b/share/man/man9/firmware.9 --- a/share/man/man9/firmware.9 +++ b/share/man/man9/firmware.9 @@ -201,7 +201,7 @@ or manually loaded with .Xr kldload 8 . However, a system can implement additional mechanisms to bring -these images in memory before calling +these images into memory before calling .Fn firmware_register . .Pp When @@ -347,7 +347,7 @@ .Fa imagename matches the trailing subpath of a registered image with a full path, that image is returned. .It -he kernel linker searches for a kernel module named +The kernel linker searches for a kernel module named .Fa imagename . If a kernel module is found, it is loaded, and the list of registered firmware images is searched again. diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c --- a/stand/efi/loader/main.c +++ b/stand/efi/loader/main.c @@ -1547,6 +1547,7 @@ } COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff); +COMMAND_SET(halt, "halt", "power off the system", command_poweroff); static int command_poweroff(int argc __unused, char *argv[] __unused) diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -325,6 +325,10 @@ wrmsr(MSR_EFER, msr); pg_nx = PG_NX; } + if ((amd_feature2 & AMDID2_TCE) != 0) { + msr = rdmsr(MSR_EFER) | EFER_TCE; + wrmsr(MSR_EFER, msr); + } hw_ibrs_recalculate(false); hw_ssb_recalculate(false); amd64_syscall_ret_flush_l1d_recalc(); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1302,7 +1302,7 @@ static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, - vm_offset_t va); + vm_offset_t va, vm_page_t m); static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, @@ -1334,7 +1334,7 @@ static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp); + bool remove_pt, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -5999,7 +5999,7 @@ SLIST_INIT(&free); sva = trunc_2mpage(va); - pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_remove_pde(pmap, pde, sva, true, &free, lockp); if ((oldpde & pmap_global_bit(pmap)) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); @@ -6153,7 +6153,8 @@ * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + bool remove_pt) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6161,7 +6162,10 @@ KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte = pmap_remove_pt_page(pmap, va); + if (remove_pt) + mpte = pmap_remove_pt_page(pmap, va); + else + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); @@ -6193,7 +6197,7 @@ * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6234,7 +6238,7 @@ } } if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva); + pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { @@ -6476,7 +6480,8 @@ */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; - pmap_remove_pde(pmap, pde, sva, &free, &lock); + pmap_remove_pde(pmap, pde, sva, true, &free, + &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { @@ -7552,13 +7557,36 @@ /* * The reference to the PD page that was acquired by * pmap_alloc_pde() ensures that it won't be freed. - * However, if the PDE resulted from a promotion, then + * However, if the PDE resulted from a promotion, and + * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, + pmap != kernel_pmap, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { + if (va >= VM_MAXUSER_ADDRESS) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, false)) { + if (pdpg != NULL) + pdpg->ref_count--; + CTR1(KTR_PMAP, + "pmap_enter_pde: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_pde() and + * pmap_remove_ptes() will zero-fill + * the kernel page table page. + */ + } pmap_delayed_invl_start(); if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, lockp)) @@ -7572,14 +7600,6 @@ } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_pde: freed kernel page table page")); - - /* - * Both pmap_remove_pde() and pmap_remove_ptes() will - * leave the kernel page table page zero filled. - */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_pde: trie insert failed"); } } @@ -9547,7 +9567,7 @@ * Tries to demote a 1GB page mapping. */ static bool -pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) +pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; @@ -9564,12 +9584,19 @@ oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); - pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, - VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT); - if (pdpg == NULL) { - CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" - " in pmap %p", va, pmap); - return (false); + if (m == NULL) { + pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, + VM_ALLOC_WIRED); + if (pdpg == NULL) { + CTR2(KTR_PMAP, + "pmap_demote_pdpe: failure for va %#lx in pmap %p", + va, pmap); + return (false); + } + } else { + pdpg = m; + pdpg->pindex = va >> PDPSHIFT; + pmap_pt_page_count_adj(pmap, 1); } pdpgpa = VM_PAGE_TO_PHYS(pdpg); firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); @@ -9779,7 +9806,7 @@ tmpva += NBPDP; continue; } - if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva, NULL)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); @@ -9937,17 +9964,20 @@ } /* - * Demotes any mapping within the direct map region that covers more than the - * specified range of physical addresses. This range's size must be a power - * of two and its starting address must be a multiple of its size. Since the - * demotion does not change any attributes of the mapping, a TLB invalidation - * is not mandatory. The caller may, however, request a TLB invalidation. + * Demotes any mapping within the direct map region that covers more + * than the specified range of physical addresses. This range's size + * must be a power of two and its starting address must be a multiple + * of its size, which means that any pdp from the mapping is fully + * covered by the range if len > NBPDP. Since the demotion does not + * change any attributes of the mapping, a TLB invalidation is not + * mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) { pdp_entry_t *pdpe; pd_entry_t *pde; + vm_page_t m; vm_offset_t va; bool changed; @@ -9956,17 +9986,28 @@ KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "pmap_demote_DMAP"); + if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = false; + + /* + * Assume that it is fine to sleep there. + * The only existing caller of pmap_demote_DMAP() is the + * x86_mr_split_dmap() function. + */ + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); + PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { - if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, va, m)) panic("pmap_demote_DMAP: PDPE failed"); changed = true; + m = NULL; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); @@ -9981,6 +10022,10 @@ if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); + if (m != NULL) { + vm_page_unwire_noq(m); + vm_page_free(m); + } } } diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h --- a/sys/amd64/linux/linux_proto.h +++ b/sys/amd64/linux/linux_proto.h @@ -914,10 +914,13 @@ syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c --- a/sys/amd64/linux/linux_sysent.c +++ b/sys/amd64/linux/linux_sysent.c @@ -268,8 +268,8 @@ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */ diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c --- a/sys/amd64/linux/linux_systrace_args.c +++ b/sys/amd64/linux/linux_systrace_args.c @@ -1918,12 +1918,19 @@ } /* linux_inotify_add_watch */ case 254: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 255: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -5860,9 +5867,32 @@ break; /* linux_inotify_add_watch */ case 254: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 255: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 256: @@ -8353,8 +8383,14 @@ case 253: /* linux_inotify_add_watch */ case 254: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 255: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 256: /* linux_openat */ diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -1476,10 +1476,17 @@ int linux_inotify_init(void); } 254 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 255 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 256 AUE_NULL STD { int linux_migrate_pages(void); diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h --- a/sys/amd64/linux32/linux32_proto.h +++ b/sys/amd64/linux32/linux32_proto.h @@ -983,10 +983,13 @@ syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1184,7 +1187,7 @@ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c --- a/sys/amd64/linux32/linux32_sysent.c +++ b/sys/amd64/linux32/linux32_sysent.c @@ -307,8 +307,8 @@ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -347,7 +347,7 @@ { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c --- a/sys/amd64/linux32/linux32_systrace_args.c +++ b/sys/amd64/linux32/linux32_systrace_args.c @@ -2036,12 +2036,19 @@ } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2379,7 +2386,9 @@ } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6536,9 +6545,32 @@ break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7116,6 +7148,13 @@ break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9809,8 +9848,14 @@ case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -9982,6 +10027,9 @@ break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -1589,10 +1589,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1860,7 +1867,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c --- a/sys/arm/allwinner/aw_gpio.c +++ b/sys/arm/allwinner/aw_gpio.c @@ -1154,10 +1154,6 @@ aw_gpio_register_isrcs(sc); intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; - /* * Register as a pinctrl device */ @@ -1166,6 +1162,10 @@ fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; + config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); return (0); diff --git a/sys/arm/allwinner/aw_rtc.c b/sys/arm/allwinner/aw_rtc.c --- a/sys/arm/allwinner/aw_rtc.c +++ b/sys/arm/allwinner/aw_rtc.c @@ -134,6 +134,7 @@ { "allwinner,sun7i-a20-rtc", (uintptr_t) &a20_conf }, { "allwinner,sun6i-a31-rtc", (uintptr_t) &a31_conf }, { "allwinner,sun8i-h3-rtc", (uintptr_t) &h3_conf }, + { "allwinner,sun20i-d1-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h5-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h6-rtc", (uintptr_t) &h3_conf }, { NULL, 0 } @@ -147,11 +148,13 @@ static struct clk_fixed_def aw_rtc_osc32k = { .clkdef.id = 0, + .clkdef.name = "osc32k", .freq = 32768, }; static struct clk_fixed_def aw_rtc_iosc = { .clkdef.id = 2, + .clkdef.name = "iosc", }; static void aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev); @@ -250,23 +253,33 @@ int nclocks; node = ofw_bus_get_node(dev); - nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); - /* No clocks to export */ - if (nclocks <= 0) - return; - if (nclocks != 3) { - device_printf(dev, "Having only %d clocks instead of 3, aborting\n", nclocks); + /* Nothing to do. */ + if (!OF_hasprop(node, "clocks")) return; + + /* + * If the device tree gives us specific output names for the clocks, + * use them. + */ + nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); + if (nclocks > 0) { + if (nclocks != 3) { + device_printf(dev, + "Found %d clocks names instead of 3, aborting\n", + nclocks); + return; + } + + aw_rtc_osc32k.clkdef.name = clknames[0]; + aw_rtc_iosc.clkdef.name = clknames[2]; } clkdom = clkdom_create(dev); - aw_rtc_osc32k.clkdef.name = clknames[0]; if (clknode_fixed_register(clkdom, &aw_rtc_osc32k) != 0) device_printf(dev, "Cannot register osc32k clock\n"); - aw_rtc_iosc.clkdef.name = clknames[2]; aw_rtc_iosc.freq = sc->conf->iosc_freq; if (clknode_fixed_register(clkdom, &aw_rtc_iosc) != 0) device_printf(dev, "Cannot register iosc clock\n"); diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c --- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c @@ -837,12 +837,12 @@ } sc->sc_gpio_npins = i; bcm_gpio_sysctl_init(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; fdt_pinctrl_register(dev, "brcm,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; return (0); diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c --- a/sys/arm/mv/mvebu_gpio.c +++ b/sys/arm/mv/mvebu_gpio.c @@ -810,7 +810,6 @@ return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm/nvidia/as3722_gpio.c b/sys/arm/nvidia/as3722_gpio.c --- a/sys/arm/nvidia/as3722_gpio.c +++ b/sys/arm/nvidia/as3722_gpio.c @@ -544,7 +544,7 @@ sc->gpio_pins = malloc(sizeof(struct as3722_gpio_pin *) * sc->gpio_npins, M_AS3722_GPIO, M_WAITOK | M_ZERO); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); for (i = 0; i < sc->gpio_npins; i++) { diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c --- a/sys/arm/nvidia/tegra_gpio.c +++ b/sys/arm/nvidia/tegra_gpio.c @@ -824,7 +824,6 @@ return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -161,22 +161,22 @@ goto error; } + fdt_pinctrl_register(dev, "pinmux"); + fdt_pinctrl_configure_tree(dev); + + if (OF_hasprop(node, "interrupt-controller")) { + sc->sc_irqs = mallocarray(sc->sc_ngpios, + sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); + intr_pic_register(dev, + OF_xref_from_node(ofw_bus_get_node(dev))); + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } - fdt_pinctrl_register(dev, "pinmux"); - fdt_pinctrl_configure_tree(dev); - - if (!OF_hasprop(node, "interrupt-controller")) - return (0); - - sc->sc_irqs = mallocarray(sc->sc_ngpios, - sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); - intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/linux/linux_proto.h b/sys/arm64/linux/linux_proto.h --- a/sys/arm64/linux/linux_proto.h +++ b/sys/arm64/linux/linux_proto.h @@ -141,10 +141,13 @@ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_ioctl_args { char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)]; diff --git a/sys/arm64/linux/linux_sysent.c b/sys/arm64/linux/linux_sysent.c --- a/sys/arm64/linux/linux_sysent.c +++ b/sys/arm64/linux/linux_sysent.c @@ -41,8 +41,8 @@ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 24 = linux_dup3 */ { .sy_narg = AS(linux_fcntl_args), .sy_call = (sy_call_t *)linux_fcntl, .sy_auevent = AUE_FCNTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 25 = linux_fcntl */ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 26 = linux_inotify_init1 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ { .sy_narg = AS(linux_ioctl_args), .sy_call = (sy_call_t *)linux_ioctl, .sy_auevent = AUE_IOCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 29 = linux_ioctl */ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 30 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 31 = linux_ioprio_get */ diff --git a/sys/arm64/linux/linux_systrace_args.c b/sys/arm64/linux/linux_systrace_args.c --- a/sys/arm64/linux/linux_systrace_args.c +++ b/sys/arm64/linux/linux_systrace_args.c @@ -210,12 +210,19 @@ } /* linux_inotify_add_watch */ case 27: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 28: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_ioctl */ @@ -2780,9 +2787,32 @@ break; /* linux_inotify_add_watch */ case 27: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 28: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_ioctl */ case 29: @@ -6455,8 +6485,14 @@ break; /* linux_inotify_add_watch */ case 27: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 28: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_ioctl */ case 29: if (ndx == 0 || ndx == 1) diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -170,10 +170,17 @@ ); } 27 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 28 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 29 AUE_IOCTL STD { int linux_ioctl( diff --git a/sys/arm64/nvidia/tegra210/max77620_gpio.c b/sys/arm64/nvidia/tegra210/max77620_gpio.c --- a/sys/arm64/nvidia/tegra210/max77620_gpio.c +++ b/sys/arm64/nvidia/tegra210/max77620_gpio.c @@ -672,7 +672,7 @@ sx_init(&sc->gpio_lock, "MAX77620 GPIO lock"); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -362,12 +362,6 @@ return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) { - rk_gpio_detach(dev); - return (ENXIO); - } - /* Set the cached value to unknown */ for (i = 0; i < RK_GPIO_MAX_PINS; i++) sc->pin_cached[i].is_gpio = 2; @@ -377,6 +371,12 @@ sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) { + rk_gpio_detach(dev); + return (ENXIO); + } + return (0); } diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -663,6 +663,7 @@ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ #define AUE_SETCRED 43271 /* FreeBSD-specific. */ +#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c --- a/sys/cam/mmc/mmc_da.c +++ b/sys/cam/mmc/mmc_da.c @@ -1198,27 +1198,6 @@ return (cts->host_caps); } -static uint32_t -sdda_get_max_data(struct cam_periph *periph, union ccb *ccb) -{ - struct ccb_trans_settings_mmc *cts; - - cts = &ccb->cts.proto_specific.mmc; - memset(cts, 0, sizeof(struct ccb_trans_settings_mmc)); - - ccb->ccb_h.func_code = XPT_GET_TRAN_SETTINGS; - ccb->ccb_h.flags = CAM_DIR_NONE; - ccb->ccb_h.retry_count = 0; - ccb->ccb_h.timeout = 100; - ccb->ccb_h.cbfcnp = NULL; - xpt_action(ccb); - - if (ccb->ccb_h.status != CAM_REQ_CMP) - panic("Cannot get host max data"); - KASSERT(cts->host_max_data != 0, ("host_max_data == 0?!")); - return (cts->host_max_data); -} - static void sdda_start_init(void *context, union ccb *start_ccb) { diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -511,4 +511,6 @@ #define FREEBSD32_SYS_fchroot 590 #define FREEBSD32_SYS_freebsd32_setcred 591 #define FREEBSD32_SYS_exterrctl 592 -#define FREEBSD32_SYS_MAXSYSCALL 593 +#define FREEBSD32_SYS_inotify_add_watch_at 593 +#define FREEBSD32_SYS_inotify_rm_watch 594 +#define FREEBSD32_SYS_MAXSYSCALL 595 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -598,4 +598,6 @@ "fchroot", /* 590 = fchroot */ "freebsd32_setcred", /* 591 = freebsd32_setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -660,4 +660,6 @@ { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(freebsd32_setcred_args), .sy_call = (sy_call_t *)freebsd32_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = freebsd32_setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3395,6 +3395,24 @@ *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9172,6 +9190,38 @@ break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11070,6 +11120,16 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c --- a/sys/compat/linux/linux_dummy.c +++ b/sys/compat/linux/linux_dummy.c @@ -74,9 +74,6 @@ DUMMY(add_key); DUMMY(request_key); DUMMY(keyctl); -/* Linux 2.6.13: */ -DUMMY(inotify_add_watch); -DUMMY(inotify_rm_watch); /* Linux 2.6.16: */ DUMMY(migrate_pages); DUMMY(unshare); @@ -87,7 +84,6 @@ DUMMY(move_pages); /* Linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(inotify_init1); /* Linux 2.6.31: */ DUMMY(perf_event_open); /* Linux 2.6.36: */ diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h --- a/sys/compat/linux/linux_file.h +++ b/sys/compat/linux/linux_file.h @@ -189,6 +189,38 @@ #define LINUX_HUGETLB_FLAG_ENCODE_2GB (31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) #define LINUX_HUGETLB_FLAG_ENCODE_16GB (34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) +/* inotify flags */ +#define LINUX_IN_ACCESS 0x00000001 +#define LINUX_IN_MODIFY 0x00000002 +#define LINUX_IN_ATTRIB 0x00000004 +#define LINUX_IN_CLOSE_WRITE 0x00000008 +#define LINUX_IN_CLOSE_NOWRITE 0x00000010 +#define LINUX_IN_OPEN 0x00000020 +#define LINUX_IN_MOVED_FROM 0x00000040 +#define LINUX_IN_MOVED_TO 0x00000080 +#define LINUX_IN_CREATE 0x00000100 +#define LINUX_IN_DELETE 0x00000200 +#define LINUX_IN_DELETE_SELF 0x00000400 +#define LINUX_IN_MOVE_SELF 0x00000800 + +#define LINUX_IN_UNMOUNT 0x00002000 +#define LINUX_IN_Q_OVERFLOW 0x00004000 +#define LINUX_IN_IGNORED 0x00008000 + +#define LINUX_IN_ONLYDIR 0x01000000 +#define LINUX_IN_DONT_FOLLOW 0x02000000 +#define LINUX_IN_EXCL_UNLINK 0x04000000 +#define LINUX_IN_MASK_CREATE 0x10000000 +#define LINUX_IN_MASK_ADD 0x20000000 +#define LINUX_IN_ISDIR 0x40000000 +#define LINUX_IN_ONESHOT 0x80000000 + +#define LINUX_IN_ALL_EVENTS 0x00000fff +#define LINUX_IN_ALL_FLAGS 0xf700e000 + +#define LINUX_IN_NONBLOCK 0x00000800 +#define LINUX_IN_CLOEXEC 0x00080000 + #if defined(_KERNEL) struct l_file_handle { l_uint handle_bytes; diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -32,11 +32,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -1877,3 +1879,122 @@ freeuio(auio); return (linux_enobufs2eagain(td, args->fd, error)); } + +static int +linux_inotify_init_flags(int l_flags) +{ + int bsd_flags; + + if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0) + linux_msg(NULL, "inotify_init1 unsupported flags 0x%x", + l_flags); + + bsd_flags = 0; + if ((l_flags & LINUX_IN_CLOEXEC) != 0) + bsd_flags |= O_CLOEXEC; + if ((l_flags & LINUX_IN_NONBLOCK) != 0) + bsd_flags |= O_NONBLOCK; + return (bsd_flags); +} + +static int +inotify_init_common(struct thread *td, int flags) +{ + struct specialfd_inotify si; + + si.flags = linux_inotify_init_flags(flags); + return (kern_specialfd(td, SPECIALFD_INOTIFY, &si)); +} + +#if defined(__i386__) || defined(__amd64__) +int +linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args) +{ + return (inotify_init_common(td, 0)); +} +#endif + +int +linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args) +{ + return (inotify_init_common(td, args->flags)); +} + +/* + * The native implementation uses the same values for inotify events as + * libinotify, which gives us binary compatibility with Linux. This simplifies + * the shim implementation a lot, as otherwise we would have to handle read(2) + * calls on inotify descriptors and translate events to Linux's ABI. + */ +_Static_assert(LINUX_IN_ACCESS == IN_ACCESS, + "IN_ACCESS mismatch"); +_Static_assert(LINUX_IN_MODIFY == IN_MODIFY, + "IN_MODIFY mismatch"); +_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB, + "IN_ATTRIB mismatch"); +_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE, + "IN_CLOSE_WRITE mismatch"); +_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE, + "IN_CLOSE_NOWRITE mismatch"); +_Static_assert(LINUX_IN_OPEN == IN_OPEN, + "IN_OPEN mismatch"); +_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM, + "IN_MOVED_FROM mismatch"); +_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO, + "IN_MOVED_TO mismatch"); +_Static_assert(LINUX_IN_CREATE == IN_CREATE, + "IN_CREATE mismatch"); +_Static_assert(LINUX_IN_DELETE == IN_DELETE, + "IN_DELETE mismatch"); +_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF, + "IN_DELETE_SELF mismatch"); +_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF, + "IN_MOVE_SELF mismatch"); + +_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT, + "IN_UNMOUNT mismatch"); +_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW, + "IN_Q_OVERFLOW mismatch"); +_Static_assert(LINUX_IN_IGNORED == IN_IGNORED, + "IN_IGNORED mismatch"); + +_Static_assert(LINUX_IN_ISDIR == IN_ISDIR, + "IN_ISDIR mismatch"); +_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR, + "IN_ONLYDIR mismatch"); +_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW, + "IN_DONT_FOLLOW mismatch"); +_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE, + "IN_MASK_CREATE mismatch"); +_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD, + "IN_MASK_ADD mismatch"); +_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT, + "IN_ONESHOT mismatch"); +_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK, + "IN_EXCL_UNLINK mismatch"); + +static int +linux_inotify_watch_flags(int l_flags) +{ + if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) { + linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x", + l_flags); + } + + return (l_flags); +} + +int +linux_inotify_add_watch(struct thread *td, + struct linux_inotify_add_watch_args *args) +{ + return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname, + linux_inotify_watch_flags(args->mask), td)); +} + +int +linux_inotify_rm_watch(struct thread *td, + struct linux_inotify_rm_watch_args *args) +{ + return (kern_inotify_rm_watch(args->fd, args->wd, td)); +} diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi.h b/sys/compat/linuxkpi/common/include/acpi/acpi.h --- a/sys/compat/linuxkpi/common/include/acpi/acpi.h +++ b/sys/compat/linuxkpi/common/include/acpi/acpi.h @@ -3,6 +3,10 @@ * * Copyright (c) 2017 Mark Johnston * Copyright (c) 2020 Vladimir Kondratyev + * Copyright (c) 2025 The FreeBSD Foundation + * + * Portions of this software were developed by Björn Zeeb + * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -30,6 +34,13 @@ #ifndef _LINUXKPI_ACPI_ACPI_H_ #define _LINUXKPI_ACPI_ACPI_H_ +/* + * LINUXKPI_WANT_LINUX_ACPI is a temporary workaround to allow drm-kmod + * to update all needed branches without breaking builds. + * Once that happened and checks are implemented based on __FreeBSD_verison + * we will remove these conditions again. + */ + /* * FreeBSD import of ACPICA has a typedef for BOOLEAN which conflicts with * amdgpu driver. Workaround it on preprocessor level. @@ -46,8 +57,8 @@ #include #undef BOOLEAN +typedef ACPI_IO_ADDRESS acpi_io_address; typedef ACPI_HANDLE acpi_handle; -typedef ACPI_OBJECT acpi_object; typedef ACPI_OBJECT_HANDLER acpi_object_handler; typedef ACPI_OBJECT_TYPE acpi_object_type; typedef ACPI_STATUS acpi_status; @@ -55,12 +66,62 @@ typedef ACPI_SIZE acpi_size; typedef ACPI_WALK_CALLBACK acpi_walk_callback; +union linuxkpi_acpi_object { + acpi_object_type type; + struct { + acpi_object_type type; + UINT64 value; + } integer; + struct { + acpi_object_type type; + UINT32 length; + char *pointer; + } string; + struct { + acpi_object_type type; + UINT32 length; + UINT8 *pointer; + } buffer; + struct { + acpi_object_type type; + UINT32 count; + union linuxkpi_acpi_object *elements; + } package; + struct { + acpi_object_type type; + acpi_object_type actual_type; + acpi_handle handle; + } reference; + struct { + acpi_object_type type; + UINT32 proc_id; + acpi_io_address pblk_address; + UINT32 pblk_length; + } processor; + struct { + acpi_object_type type; + UINT32 system_level; + UINT32 resource_order; + } power_resource; +}; + +#ifdef LINUXKPI_WANT_LINUX_ACPI +struct linuxkpi_acpi_buffer { + acpi_size length; /* Length in bytes of the buffer */ + void *pointer; /* pointer to buffer */ +}; + +typedef struct linuxkpi_acpi_buffer lkpi_acpi_buffer_t; +#else +typedef ACPI_BUFFER lkpi_acpi_buffer_t; +#endif + static inline ACPI_STATUS acpi_evaluate_object(ACPI_HANDLE Object, ACPI_STRING Pathname, - ACPI_OBJECT_LIST *ParameterObjects, ACPI_BUFFER *ReturnObjectBuffer) + ACPI_OBJECT_LIST *ParameterObjects, lkpi_acpi_buffer_t *ReturnObjectBuffer) { return (AcpiEvaluateObject( - Object, Pathname, ParameterObjects, ReturnObjectBuffer)); + Object, Pathname, ParameterObjects, (ACPI_BUFFER *)ReturnObjectBuffer)); } static inline const char * @@ -83,9 +144,9 @@ } static inline ACPI_STATUS -acpi_get_name(ACPI_HANDLE Object, UINT32 NameType, ACPI_BUFFER *RetPathPtr) +acpi_get_name(ACPI_HANDLE Object, UINT32 NameType, lkpi_acpi_buffer_t *RetPathPtr) { - return (AcpiGetName(Object, NameType, RetPathPtr)); + return (AcpiGetName(Object, NameType, (ACPI_BUFFER *)RetPathPtr)); } static inline ACPI_STATUS @@ -101,4 +162,9 @@ AcpiPutTable(Table); } +#ifdef LINUXKPI_WANT_LINUX_ACPI +#define acpi_object linuxkpi_acpi_object +#define acpi_buffer linuxkpi_acpi_buffer +#endif + #endif /* _LINUXKPI_ACPI_ACPI_H_ */ diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h --- a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h +++ b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h @@ -58,4 +58,10 @@ struct acpi_device *lkpi_acpi_dev_get_first_match_dev(const char *hid, const char *uid, int64_t hrv); +union linuxkpi_acpi_object; + +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *arg); + #endif /* _LINUXKPI_ACPI_ACPI_BUS_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -90,6 +90,8 @@ struct device_driver { const char *name; const struct dev_pm_ops *pm; + + void (*shutdown) (struct device *); }; struct device_type { diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -72,6 +72,10 @@ uintptr_t driver_data; }; +#define MODULE_DEVICE_TABLE_BUS_pci(_bus, _table) \ +MODULE_PNP_INFO("U32:vendor;U32:device;V32:subvendor;V32:subdevice", \ + _bus, lkpi_ ## _table, _table, nitems(_table) - 1) + /* Linux has an empty element at the end of the ID table -> nitems() - 1. */ #define MODULE_DEVICE_TABLE(_bus, _table) \ \ @@ -85,11 +89,10 @@ 0 \ }; \ \ -DRIVER_MODULE(lkpi_ ## _table, pci, _ ## _bus ## _ ## _table ## _driver,\ +DRIVER_MODULE(lkpi_ ## _table, _bus, _ ## _bus ## _ ## _table ## _driver,\ 0, 0); \ \ -MODULE_PNP_INFO("U32:vendor;U32:device;V32:subvendor;V32:subdevice", \ - _bus, lkpi_ ## _table, _table, nitems(_table) - 1) +MODULE_DEVICE_TABLE_BUS_ ## _bus(_bus, _table) #define PCI_ANY_ID -1U diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c --- a/sys/compat/linuxkpi/common/src/linux_acpi.c +++ b/sys/compat/linuxkpi/common/src/linux_acpi.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -99,6 +100,17 @@ argv4, &buf, type)) ? (ACPI_OBJECT *)buf.Pointer : NULL); } +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *pkg) +{ + ACPI_BUFFER buf; + + return (ACPI_SUCCESS(acpi_EvaluateDSM(ObjHandle, (const uint8_t *)guid, + rev, func, (ACPI_OBJECT *)pkg, &buf)) ? + (union linuxkpi_acpi_object *)buf.Pointer : NULL); +} + static void linux_handle_power_suspend_event(void *arg __unused) { @@ -323,6 +335,13 @@ return (NULL); } +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *pkg) +{ + return (NULL); +} + int register_acpi_notifier(struct notifier_block *nb) { diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3992,6 +3992,7 @@ kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard +kern/vfs_inotify.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard diff --git a/sys/contrib/dev/iwlwifi/iwl-debug.h b/sys/contrib/dev/iwlwifi/iwl-debug.h --- a/sys/contrib/dev/iwlwifi/iwl-debug.h +++ b/sys/contrib/dev/iwlwifi/iwl-debug.h @@ -47,7 +47,7 @@ IWL_DL_DROP = 0x00000010, IWL_DL_EEPROM = 0x00000020, IWL_DL_FW = 0x00000040, - /* = 0x00000080, */ + IWL_DL_DEV_RADIO = 0x00000080, IWL_DL_HC = 0x00000100, IWL_DL_HT = 0x00000200, IWL_DL_INFO = 0x00000400, @@ -195,6 +195,8 @@ IWL_DPRINTF(_subsys, IWL_DL_WEP, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_WOWLAN(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_WOWLAN, _fmt, ##__VA_ARGS__) +#define IWL_DEBUG_DEV_RADIO(_dev, _fmt, ...) \ + IWL_DPRINTF_DEV((_dev), IWL_DL_DEV_RADIO, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_PCI_RW(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_PCI_RW, _fmt, ##__VA_ARGS__) diff --git a/sys/contrib/dev/rtw89/acpi.c b/sys/contrib/dev/rtw89/acpi.c --- a/sys/contrib/dev/rtw89/acpi.c +++ b/sys/contrib/dev/rtw89/acpi.c @@ -8,7 +8,6 @@ #include "acpi.h" #include "debug.h" -#if defined(__linux__) static const guid_t rtw89_guid = GUID_INIT(0xD2A8C3E8, 0x4B69, 0x4F00, 0x82, 0xBD, 0xFE, 0x86, 0x07, 0x80, 0x3A, 0xA7); @@ -149,14 +148,6 @@ ACPI_FREE(obj); return ret; } -#elif defined(__FreeBSD__) -int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev, - enum rtw89_acpi_dsm_func func, - struct rtw89_acpi_dsm_result *res) -{ - return -ENOENT; -} -#endif int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, struct rtw89_acpi_rtag_result *res) @@ -180,28 +171,15 @@ if (ACPI_FAILURE(status)) return -EIO; -#if defined(__linux__) obj = buf.pointer; if (obj->type != ACPI_TYPE_BUFFER) { -#elif defined(__FreeBSD__) - obj = buf.Pointer; - if (obj->Type != ACPI_TYPE_BUFFER) { -#endif rtw89_debug(rtwdev, RTW89_DBG_ACPI, -#if defined(__linux__) "acpi: expect buffer but type: %d\n", obj->type); -#elif defined(__FreeBSD__) - "acpi: expect buffer but type: %d\n", obj->Type); -#endif ret = -EINVAL; goto out; } -#if defined(__linux__) buf_len = obj->buffer.length; -#elif defined(__FreeBSD__) - buf_len = obj->Buffer.Length; -#endif if (buf_len != sizeof(*res)) { rtw89_debug(rtwdev, RTW89_DBG_ACPI, "%s: invalid buffer length: %u\n", __func__, buf_len); @@ -209,11 +187,7 @@ goto out; } -#if defined(__linux__) *res = *(struct rtw89_acpi_rtag_result *)obj->buffer.pointer; -#elif defined(__FreeBSD__) - *res = *(struct rtw89_acpi_rtag_result *)obj->Buffer.Pointer; -#endif rtw89_hex_dump(rtwdev, RTW89_DBG_ACPI, "antenna_gain: ", res, sizeof(*res)); diff --git a/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h b/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h --- a/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h +++ b/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h @@ -39,6 +39,7 @@ char name[AUXILIARY_NAME_SIZE]; uint64_t driver_data; }; +#define MODULE_DEVICE_TABLE_BUS_auxiliary(_bus, _table) struct auxiliary_device { struct device dev; diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -36,6 +36,7 @@ #include #include +#include #include "gpiobus_if.h" diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -39,6 +39,7 @@ #include #include +#include #include "gpiobus_if.h" @@ -213,20 +214,40 @@ return (0); } +/* + * Note that this function should only + * be used in cases where a pre-existing + * gpiobus_pin structure exists. In most + * cases, the gpio_pin_get_by_* functions + * suffice. + */ +int +gpio_pin_acquire(gpio_pin_t gpio) +{ + device_t busdev; + + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); + KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); + + busdev = GPIO_GET_BUS(gpio->dev); + if (busdev == NULL) + return (ENXIO); + + return (gpiobus_acquire_pin(busdev, gpio->pin)); +} + void gpio_pin_release(gpio_pin_t gpio) { device_t busdev; - if (gpio == NULL) - return; - + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); busdev = GPIO_GET_BUS(gpio->dev); - if (busdev != NULL) - gpiobus_release_pin(busdev, gpio->pin); + KASSERT(busdev != NULL, ("gpiobus dev is NULL.")); + gpiobus_release_pin(busdev, gpio->pin); free(gpio, M_DEVBUF); } @@ -293,7 +314,7 @@ } device_t -gpiobus_attach_bus(device_t dev) +gpiobus_add_bus(device_t dev) { device_t busdev; @@ -307,8 +328,24 @@ #ifdef FDT ofw_gpiobus_register_provider(dev); #endif - bus_attach_children(dev); + return (busdev); +} + +/* + * Attach a gpiobus child. + * Note that the controller is expected + * to be fully initialized at this point. + */ +device_t +gpiobus_attach_bus(device_t dev) +{ + device_t busdev; + busdev = gpiobus_add_bus(dev); + if (busdev == NULL) + return (NULL); + + bus_attach_children(dev); return (busdev); } @@ -385,14 +422,13 @@ sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max: %d\n", pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } /* Mark pin as mapped and give warning if it's already mapped. */ if (sc->sc_pins[pin].mapped) { device_printf(bus, "warning: pin %d is already mapped\n", pin); - return (-1); + return (EBUSY); } sc->sc_pins[pin].mapped = 1; @@ -400,7 +436,7 @@ } /* Release mapped pin */ -int +void gpiobus_release_pin(device_t bus, uint32_t pin) { struct gpiobus_softc *sc; @@ -408,19 +444,15 @@ sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max=%d\n", - pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } - if (!sc->sc_pins[pin].mapped) { - device_printf(bus, "pin %d is not mapped\n", pin); - return (-1); - } - sc->sc_pins[pin].mapped = 0; + if (!sc->sc_pins[pin].mapped) + panic("%s: pin %d is not mapped", device_get_nameunit(bus), + pin); - return (0); + sc->sc_pins[pin].mapped = 0; } static int @@ -435,8 +467,7 @@ device_printf(child, "cannot acquire pin %d\n", devi->pins[i]); while (--i >= 0) { - (void)gpiobus_release_pin(dev, - devi->pins[i]); + gpiobus_release_pin(dev, devi->pins[i]); } gpiobus_free_ivars(devi); return (EBUSY); diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/gpiobus_internal.h copy from sys/dev/gpio/pl061.h copy to sys/dev/gpio/gpiobus_internal.h --- a/sys/dev/gpio/pl061.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. + * Copyright (c) 2009 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,34 +24,24 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * */ -#ifndef __DEV_PL061_H__ -#define __DEV_PL061_H__ - -DECLARE_CLASS(pl061_driver); - -#define PL061_NUM_GPIO 8 +#ifndef __GPIOBUS_INTERNAL_H__ +#define __GPIOBUS_INTERNAL_H__ -struct pl061_pin_irqsrc { - struct intr_irqsrc isrc; - uint32_t irq; - uint32_t mode; -}; - -struct pl061_softc { - device_t sc_dev; - device_t sc_busdev; - struct mtx sc_mtx; - struct resource *sc_mem_res; - struct resource *sc_irq_res; - void *sc_irq_hdlr; - int sc_mem_rid; - int sc_irq_rid; - struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO]; -}; - -int pl061_attach(device_t); -int pl061_detach(device_t); +/* + * Functions shared between gpiobus and other bus classes that derive from it; + * these should not be called directly by other drivers. + */ +int gpiobus_attach(device_t); +int gpiobus_detach(device_t); +int gpiobus_init_softc(device_t); +int gpiobus_alloc_ivars(struct gpiobus_ivar *); +void gpiobus_free_ivars(struct gpiobus_ivar *); +int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); +int gpiobus_acquire_pin(device_t, uint32_t); +void gpiobus_release_pin(device_t, uint32_t); -#endif /* __DEV_PL061_H__ */ +extern driver_t gpiobus_driver; +#endif diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -156,6 +156,8 @@ /* Acquire a pin by child and index (used by direct children of gpiobus). */ int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp); +/* Acquire a pin from an existing gpio_pin_t. */ +int gpio_pin_acquire(gpio_pin_t gpio); /* Release a pin acquired via any gpio_pin_get_xxx() function. */ void gpio_pin_release(gpio_pin_t gpio); @@ -167,22 +169,9 @@ struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode); -/* - * Functions shared between gpiobus and other bus classes that derive from it; - * these should not be called directly by other drivers. - */ int gpio_check_flags(uint32_t, uint32_t); +device_t gpiobus_add_bus(device_t); device_t gpiobus_attach_bus(device_t); int gpiobus_detach_bus(device_t); -int gpiobus_attach(device_t); -int gpiobus_detach(device_t); -int gpiobus_init_softc(device_t); -int gpiobus_alloc_ivars(struct gpiobus_ivar *); -void gpiobus_free_ivars(struct gpiobus_ivar *); -int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); -int gpiobus_acquire_pin(device_t, uint32_t); -int gpiobus_release_pin(device_t, uint32_t); - -extern driver_t gpiobus_driver; #endif /* __GPIOBUS_H__ */ diff --git a/sys/dev/gpio/gpiopps.c b/sys/dev/gpio/gpiopps.c --- a/sys/dev/gpio/gpiopps.c +++ b/sys/dev/gpio/gpiopps.c @@ -160,7 +160,7 @@ if (sc->ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irid, sc->ires); if (sc->gpin != NULL) - gpiobus_release_pin(GPIO_GET_BUS(sc->gpin->dev), sc->gpin->pin); + gpio_pin_release(sc->gpin); return (0); } diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -36,6 +36,7 @@ #include #include +#include #include #include "gpiobus_if.h" diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/pl061.h --- a/sys/dev/gpio/pl061.h +++ b/sys/dev/gpio/pl061.h @@ -46,6 +46,7 @@ struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_irq_hdlr; + intptr_t sc_xref; int sc_mem_rid; int sc_irq_rid; struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO]; diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c --- a/sys/dev/gpio/pl061.c +++ b/sys/dev/gpio/pl061.c @@ -487,14 +487,21 @@ } } + mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); + + if (sc->sc_xref != 0 && !intr_pic_register(dev, sc->sc_xref)) { + device_printf(dev, "couldn't register PIC\n"); + PL061_LOCK_DESTROY(sc); + goto free_isrc; + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "couldn't attach gpio bus\n"); + PL061_LOCK_DESTROY(sc); goto free_isrc; } - mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); - return (0); free_isrc: @@ -503,6 +510,7 @@ * for (irq = 0; irq < PL061_NUM_GPIO; irq++) * intr_isrc_deregister(PIC_INTR_ISRC(sc, irq)); */ + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_hdlr); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc->sc_irq_res); free_pic: diff --git a/sys/dev/gpio/pl061_acpi.c b/sys/dev/gpio/pl061_acpi.c --- a/sys/dev/gpio/pl061_acpi.c +++ b/sys/dev/gpio/pl061_acpi.c @@ -67,19 +67,12 @@ static int pl061_acpi_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = ACPI_GPIO_XREF; - if (!intr_pic_register(dev, ACPI_GPIO_XREF)) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_acpi_methods[] = { diff --git a/sys/dev/gpio/pl061_fdt.c b/sys/dev/gpio/pl061_fdt.c --- a/sys/dev/gpio/pl061_fdt.c +++ b/sys/dev/gpio/pl061_fdt.c @@ -61,19 +61,12 @@ static int pl061_fdt_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev)); - if (!intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)))) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_fdt_methods[] = { diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c --- a/sys/dev/gpio/qoriq_gpio.c +++ b/sys/dev/gpio/qoriq_gpio.c @@ -369,11 +369,6 @@ for (i = 0; i <= MAXPIN; i++) sc->sc_pins[i].gp_caps = DEFAULT_CAPS; - sc->busdev = gpiobus_attach_bus(dev); - if (sc->busdev == NULL) { - qoriq_gpio_detach(dev); - return (ENOMEM); - } /* * Enable the GPIO Input Buffer for all GPIOs. * This is safe on devices without a GPIBE register, because those @@ -384,6 +379,12 @@ OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); + if (sc->busdev == NULL) { + qoriq_gpio_detach(dev); + return (ENOMEM); + } + return (0); } diff --git a/sys/dev/hwt/hwt_contexthash.c b/sys/dev/hwt/hwt_contexthash.c --- a/sys/dev/hwt/hwt_contexthash.c +++ b/sys/dev/hwt/hwt_contexthash.c @@ -30,6 +30,7 @@ #include #include +#include #include #include #include diff --git a/sys/dev/hwt/hwt_ioctl.c b/sys/dev/hwt/hwt_ioctl.c --- a/sys/dev/hwt/hwt_ioctl.c +++ b/sys/dev/hwt/hwt_ioctl.c @@ -305,9 +305,11 @@ return (error); CPU_FOREACH_ISSET(cpu_id, &cpu_map) { +#ifdef SMP /* Ensure CPU is not halted. */ if (CPU_ISSET(cpu_id, &hlt_cpus_mask)) return (ENXIO); +#endif #if 0 /* TODO: Check if the owner have this cpu configured already. */ ctx = hwt_owner_lookup_ctx_by_cpu(ho, halloc->cpu); diff --git a/sys/dev/hwt/hwt_vm.c b/sys/dev/hwt/hwt_vm.c --- a/sys/dev/hwt/hwt_vm.c +++ b/sys/dev/hwt/hwt_vm.c @@ -213,9 +213,11 @@ CPU_ZERO(&enable_cpus); CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { +#ifdef SMP /* Ensure CPU is not halted. */ if (CPU_ISSET(cpu_id, &hlt_cpus_mask)) continue; +#endif hwt_backend_configure(ctx, cpu_id, cpu_id); diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c --- a/sys/dev/iicbus/gpio/tca64xx.c +++ b/sys/dev/iicbus/gpio/tca64xx.c @@ -261,14 +261,13 @@ sc->addr = iicbus_get_addr(dev); mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF); + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); return (ENXIO); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - #ifdef DEBUG switch (sc->chip) { case TCA6416_TYPE: diff --git a/sys/dev/mem/memutil.c b/sys/dev/mem/memutil.c --- a/sys/dev/mem/memutil.c +++ b/sys/dev/mem/memutil.c @@ -26,15 +26,14 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include +#include #include #include #include #include -#include -#include +#include -static struct rwlock mr_lock; +static struct sx mr_lock; /* * Implementation-neutral, kernel-callable functions for manipulating @@ -46,7 +45,7 @@ if (mem_range_softc.mr_op == NULL) return; - rw_init(&mr_lock, "memrange"); + sx_init(&mr_lock, "memrange"); mem_range_softc.mr_op->init(&mem_range_softc); } @@ -56,7 +55,7 @@ if (mem_range_softc.mr_op == NULL) return; - rw_destroy(&mr_lock); + sx_destroy(&mr_lock); } int @@ -67,12 +66,12 @@ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); nd = *arg; - rw_rlock(&mr_lock); + sx_slock(&mr_lock); if (nd == 0) *arg = mem_range_softc.mr_ndesc; else bcopy(mem_range_softc.mr_desc, mrd, nd * sizeof(*mrd)); - rw_runlock(&mr_lock); + sx_sunlock(&mr_lock); return (0); } @@ -83,8 +82,8 @@ if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); - rw_wlock(&mr_lock); + sx_xlock(&mr_lock); ret = mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg); - rw_wunlock(&mr_lock); + sx_xunlock(&mr_lock); return (ret); } diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h --- a/sys/dev/random/random_harvestq.h +++ b/sys/dev/random/random_harvestq.h @@ -27,6 +27,9 @@ #ifndef SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED #define SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED +#include +#include + #define HARVESTSIZE 2 /* Max length in words of each harvested entropy unit */ /* These are used to queue harvested packets of entropy. The entropy @@ -40,8 +43,10 @@ uint8_t he_source; /* origin of the entropy */ }; -#define RANDOM_HARVEST_INIT_LOCK(x) mtx_init(&harvest_context.hc_mtx, "entropy harvest mutex", NULL, MTX_SPIN) -#define RANDOM_HARVEST_LOCK(x) mtx_lock_spin(&harvest_context.hc_mtx) -#define RANDOM_HARVEST_UNLOCK(x) mtx_unlock_spin(&harvest_context.hc_mtx) +static inline uint32_t +random_get_cyclecount(void) +{ + return ((uint32_t)get_cyclecount()); +} #endif /* SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED */ diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -161,6 +161,11 @@ } hc_entropy_fast_accumulator; } harvest_context; +#define RANDOM_HARVEST_INIT_LOCK() mtx_init(&harvest_context.hc_mtx, \ + "entropy harvest mutex", NULL, MTX_SPIN) +#define RANDOM_HARVEST_LOCK() mtx_lock_spin(&harvest_context.hc_mtx) +#define RANDOM_HARVEST_UNLOCK() mtx_unlock_spin(&harvest_context.hc_mtx) + static struct kproc_desc random_proc_kp = { "rand_harvestq", random_kthread, @@ -212,9 +217,10 @@ kproc_exit(0); /* NOTREACHED */ } -/* This happens well after SI_SUB_RANDOM */ SYSINIT(random_device_h_proc, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, kproc_start, &random_proc_kp); +_Static_assert(SI_SUB_KICK_SCHEDULER > SI_SUB_RANDOM, + "random kthread starting before subsystem initialization"); static void rs_epoch_init(void *dummy __unused) @@ -305,7 +311,6 @@ explicit_bzero(entropy, sizeof(entropy)); } -/* ARGSUSED */ static int random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS) { @@ -336,7 +341,6 @@ random_check_uint_harvestmask, "IU", "Entropy harvesting mask"); -/* ARGSUSED */ static int random_print_harvestmask(SYSCTL_HANDLER_ARGS) { @@ -390,7 +394,6 @@ /* "ENTROPYSOURCE" */ }; -/* ARGSUSED */ static int random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS) { @@ -423,7 +426,6 @@ random_print_harvestmask_symbolic, "A", "Entropy harvesting mask (symbolic)"); -/* ARGSUSED */ static void random_harvestq_init(void *unused __unused) { @@ -453,7 +455,7 @@ return (0); for (i = 0; i < len; i += sizeof(event.he_entropy)) { - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = @@ -493,7 +495,6 @@ * known to the kernel, and inserting it directly into the hashing * module, currently Fortuna. */ -/* ARGSUSED */ static void random_harvestq_prime(void *unused __unused) { @@ -522,7 +523,6 @@ } SYSINIT(random_device_prime, SI_SUB_RANDOM, SI_ORDER_MIDDLE, random_harvestq_prime, NULL); -/* ARGSUSED */ static void random_harvestq_deinit(void *unused __unused) { @@ -560,7 +560,7 @@ if (ring_in != harvest_context.hc_entropy_ring.out) { /* The ring is not full */ event = harvest_context.hc_entropy_ring.ring + ring_in; - event->he_somecounter = (uint32_t)get_cyclecount(); + event->he_somecounter = random_get_cyclecount(); event->he_source = origin; event->he_destination = harvest_context.hc_destination[origin]++; if (size <= sizeof(event->he_entropy)) { @@ -589,7 +589,8 @@ u_int pos; pos = harvest_context.hc_entropy_fast_accumulator.pos; - harvest_context.hc_entropy_fast_accumulator.buf[pos] ^= jenkins_hash(entropy, size, (uint32_t)get_cyclecount()); + harvest_context.hc_entropy_fast_accumulator.buf[pos] ^= + jenkins_hash(entropy, size, random_get_cyclecount()); harvest_context.hc_entropy_fast_accumulator.pos = (pos + 1)%RANDOM_ACCUM_MAX; } @@ -606,7 +607,7 @@ KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); size = MIN(size, sizeof(event.he_entropy)); - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = size; event.he_source = origin; event.he_destination = harvest_context.hc_destination[origin]++; diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c --- a/sys/dev/random/randomdev.c +++ b/sys/dev/random/randomdev.c @@ -303,14 +303,14 @@ /* Extra timing here is helpful to scrape scheduler jitter entropy */ randomdev_hash_init(&hash); - timestamp = (uint32_t)get_cyclecount(); + timestamp = random_get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_iterate(&hash, buf, count); - timestamp = (uint32_t)get_cyclecount(); + timestamp = random_get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_finish(&hash, entropy_data); for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) { - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = destination++; /* Harmless cheating */ diff --git a/sys/dev/regulator/regulator_fixed.c b/sys/dev/regulator/regulator_fixed.c --- a/sys/dev/regulator/regulator_fixed.c +++ b/sys/dev/regulator/regulator_fixed.c @@ -100,12 +100,8 @@ regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) { struct gpio_entry *entry, *tmp; - device_t busdev; int rv; - busdev = GPIO_GET_BUS(gpio_pin->dev); - if (busdev == NULL) - return (NULL); entry = malloc(sizeof(struct gpio_entry), M_FIXEDREGULATOR, M_WAITOK | M_ZERO); @@ -122,8 +118,8 @@ } /* Reserve pin. */ - /* XXX Can we call gpiobus_acquire_pin() with gpio_list_mtx held? */ - rv = gpiobus_acquire_pin(busdev, gpio_pin->pin); + /* XXX Can we call gpio_pin_acquire() with gpio_list_mtx held? */ + rv = gpio_pin_acquire(gpio_pin); if (rv != 0) { mtx_unlock(&gpio_list_mtx); free(entry, M_FIXEDREGULATOR); diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -351,6 +351,7 @@ VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_STAT_DESC, 0), #if defined(__amd64__) && defined(COMPAT_FREEBSD12) VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -89,6 +89,8 @@ #include #include #include +#define EXTERR_CATEGORY EXTERR_CAT_FUSE +#include #include #include @@ -439,7 +441,8 @@ if (vnode_isvroot(vp)) { return 0; } - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!(data->dataflags & FSESS_INITED)) { if (vnode_isvroot(vp)) { @@ -448,7 +451,8 @@ return 0; } } - return EBADF; + return (EXTERROR(EBADF, "Access denied until FUSE session " + "is initialized")); } if (vnode_islnk(vp)) { return 0; @@ -489,7 +493,8 @@ dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } switch(ap->a_op) { @@ -506,7 +511,7 @@ op = FUSE_SETLK; break; default: - return EINVAL; + return (EXTERROR(EINVAL, "Unsupported lock flags")); } if (!(dataflags & FSESS_POSIX_LOCKS)) @@ -534,14 +539,14 @@ size = vattr.va_size; if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { - err = EOVERFLOW; + err = EXTERROR(EOVERFLOW, "Offset is too large"); goto out; } start = size + fl->l_start; break; default: - return (EINVAL); + return (EXTERROR(EINVAL, "Unsupported offset type")); } err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); @@ -603,15 +608,14 @@ int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); switch (vp->v_type) { case VFIFO: return (ESPIPE); case VLNK: case VREG: - if (vfs_isrdonly(mp)) - return (EROFS); break; default: return (ENODEV); @@ -621,7 +625,8 @@ return (EROFS); if (fsess_not_impl(mp, FUSE_FALLOCATE)) - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not implement " + "FUSE_FALLOCATE")); io.uio_offset = *offset; io.uio_resid = *len; @@ -651,13 +656,14 @@ if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FALLOCATE); - err = EINVAL; + err = EXTERROR(EINVAL, "This server does not implement " + "FUSE_ALLOCATE"); } else if (err == EOPNOTSUPP) { /* * The file system server does not support FUSE_FALLOCATE with * the supplied mode for this particular file. */ - err = EINVAL; + err = EXTERROR(EINVAL, "This file can't be pre-allocated"); } else if (!err) { *offset += *len; *len = 0; @@ -703,7 +709,8 @@ int maxrun; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } mp = vnode_mount(vp); @@ -870,19 +877,21 @@ pid_t pid; int err; - err = ENOSYS; if (mp == NULL || mp != vnode_mount(outvp)) - goto fallback; + return (EXTERROR(ENOSYS, "Mount points do not match")); if (incred->cr_uid != outcred->cr_uid) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); if (incred->cr_groups[0] != outcred->cr_groups[0]) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); /* Caller busied mp, mnt_data can be safely accessed. */ if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) - goto fallback; + return (EXTERROR(ENOSYS, "This daemon does not " + "implement COPY_FILE_RANGE")); if (ap->a_fsizetd == NULL) td = curthread; @@ -892,7 +901,7 @@ vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); if (invp->v_data == NULL || outvp->v_data == NULL) { - err = EBADF; + err = EXTERROR(EBADF, "vnode got reclaimed"); goto unlock; } @@ -956,7 +965,6 @@ if (err == ENOSYS) fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); -fallback: /* * No need to call vn_rlimit_fsizex_res before return, since the uio is @@ -1024,7 +1032,8 @@ int flags; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); /* FUSE expects sockets to be created with FUSE_MKNOD */ if (vap->va_type == VSOCK) @@ -1040,7 +1049,7 @@ bzero(&fdi, sizeof(fdi)); if (vap->va_type != VREG) - return (EINVAL); + return (EXTERROR(EINVAL, "Only regular files can be created")); if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { /* Fallback to FUSE_MKNOD/FUSE_OPEN */ @@ -1221,8 +1230,8 @@ if (!(dataflags & FSESS_INITED)) { if (!vnode_isvroot(vp)) { fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); - err = ENOTCONN; - return err; + return (EXTERROR(ENOTCONN, "FUSE daemon is not " + "initialized")); } else { goto fake; } @@ -1351,10 +1360,11 @@ int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_mount(tdvp) != vnode_mount(vp)) { - return EXDEV; + return (EXDEV); } /* @@ -1364,7 +1374,7 @@ * validating that nlink does not overflow. */ if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) - return EMLINK; + return (EMLINK); fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); @@ -1376,12 +1386,13 @@ feo = fdi.answ; if (fli.oldnodeid != feo->nodeid) { + static const char exterr[] = "Server assigned wrong inode " + "for a hard link."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, - "Assigned wrong inode for a hard link."); + fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr); fuse_vnode_clear_attr_cache(vp); fuse_vnode_clear_attr_cache(tdvp); - err = EIO; + err = EXTERROR(EIO, exterr); goto out; } @@ -1458,7 +1469,8 @@ if (fuse_isdeadfs(dvp)) { *vpp = NULL; - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_isdir(dvp)) return ENOTDIR; @@ -1478,7 +1490,8 @@ * Since the file system doesn't support ".." lookups, * we have no way to find this entry. */ - return ESTALE; + return (EXTERROR(ESTALE, "This server does not support " + "'..' lookups")); } nid = VTOFUD(dvp)->parent_nid; if (nid == 0) @@ -1601,11 +1614,11 @@ vref(dvp); *vpp = dvp; } else { + static const char exterr[] = "Server assigned " + "same inode to both parent and child."; fuse_warn(fuse_get_mpdata(mp), - FSESS_WARN_ILLEGAL_INODE, - "Assigned same inode to both parent and " - "child."); - err = EIO; + FSESS_WARN_ILLEGAL_INODE, exterr); + err = EXTERROR(EIO, exterr); } } else { @@ -1693,7 +1706,8 @@ struct fuse_mkdir_in fmdi; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmdi.umask = curthread->td_proc->p_pd->pd_cmask; @@ -1720,7 +1734,8 @@ struct vattr *vap = ap->a_vap; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); return fuse_internal_mknod(dvp, vpp, cnp, vap); } @@ -1744,11 +1759,13 @@ pid_t pid = td->td_proc->p_pid; if (fuse_isdeadfs(vp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type", + vp->v_type)); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) - return EINVAL; + return (EXTERROR(EINVAL, "Illegal mode", a_mode)); if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { fuse_vnode_open(vp, 0, td); @@ -1830,7 +1847,8 @@ return (0); } else if (fsess_not_impl(mp, FUSE_LSEEK)) { /* FUSE_LSEEK is not implemented */ - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not " + "implement FUSE_LSEEK")); } else { return (err); } @@ -1864,7 +1882,8 @@ MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -1941,10 +1960,11 @@ if (ap->a_eofflag) *ap->a_eofflag = 0; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (uio_resid(uio) < sizeof(struct dirent)) - return EINVAL; + return (EXTERROR(EINVAL, "Buffer is too small")); tresid = uio->uio_resid; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); @@ -2014,7 +2034,8 @@ int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_islnk(vp)) { return EINVAL; @@ -2025,10 +2046,11 @@ goto out; } if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { + static const char exterr[] = "Server returned an embedded NUL " + "from FUSE_READLINK."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, - "Returned an embedded NUL from FUSE_READLINK."); - err = EIO; + fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr); + err = EXTERROR(EIO, exterr); goto out; } if (((char *)fdi.answ)[0] == '/' && @@ -2112,10 +2134,11 @@ int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_isdir(vp)) { - return EPERM; + return (EXTERROR(EPERM, "vnode is a directory")); } err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); @@ -2148,12 +2171,13 @@ int err = 0; if (fuse_isdeadfs(fdvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); - err = EXDEV; + err = EXTERROR(EXDEV, "Cross-device rename"); goto out; } cache_purge(fvp); @@ -2224,10 +2248,12 @@ int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp) == VTOFUD(dvp)) { - return EINVAL; + return (EXTERROR(EINVAL, "Directory to be removed " + "contains itself")); } err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); @@ -2264,7 +2290,8 @@ checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vap->va_uid != (uid_t)VNOVAL) { @@ -2429,7 +2456,8 @@ size_t len; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } /* * Unlike the other creator type calls, here we have to create a message @@ -2475,7 +2503,8 @@ MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -2628,10 +2657,12 @@ int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_GETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2669,7 +2700,8 @@ if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_GETXATTR); - err = EOPNOTSUPP; + err = (EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes")); } goto out; } @@ -2715,10 +2747,12 @@ int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_SETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -2730,9 +2764,11 @@ * return EOPNOTSUPP. */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "This server does not " + "implement removing extended attributess")); else - return (EINVAL); + return (EXTERROR(EINVAL, "DELETEEXTATTR should be used " + "to remove extattrs")); } err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, @@ -2778,7 +2814,8 @@ if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_SETXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes"); } if (err == ERESTART) { /* Can't restart after calling uiomove */ @@ -2889,10 +2926,12 @@ int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_LISTXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2920,7 +2959,8 @@ if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LISTXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes"); } goto out; } @@ -3020,7 +3060,8 @@ bool closefufh = false; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vfs_isrdonly(mp)) return (EROFS); @@ -3126,10 +3167,12 @@ int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -3158,7 +3201,8 @@ err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_REMOVEXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes"); } fdisp_destroy(&fdi); @@ -3212,7 +3256,8 @@ /* NFS requires lookups for "." and ".." */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server is " + "missing FUSE_EXPORT_SUPPORT")); } if ((mp->mnt_flag & MNT_EXPORTED) && fsess_is_impl(mp, FUSE_OPENDIR)) @@ -3230,7 +3275,8 @@ */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH with FUSE_OPENDIR"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server implements " + "FUSE_OPENDIR so is not compatible with getfh")); } err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); @@ -3244,6 +3290,7 @@ if (fvdat->generation <= UINT32_MAX) fhp->gen = fvdat->generation; else - return EOVERFLOW; + return (EXTERROR(EOVERFLOW, "inode generation " + "number overflow")); return (0); } diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c --- a/sys/fs/msdosfs/msdosfs_lookup.c +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -845,7 +845,6 @@ *wait_scn = 0; pmp = target->de_pmp; - lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED); KASSERT(pmp == source->de_pmp, ("doscheckpath: source and target on different filesystems")); diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -575,7 +575,6 @@ pmp->pm_bo = bo; lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); - lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0); TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp); @@ -871,7 +870,6 @@ } if (pmp != NULL) { lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; @@ -971,7 +969,6 @@ dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; return (error); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -945,7 +945,7 @@ struct denode *fdip, *fip, *tdip, *tip, *nip; u_char toname[12], oldname[11]; u_long to_diroffset; - bool checkpath_locked, doingdirectory, newparent; + bool doingdirectory, newparent; int error; u_long cn, pcl, blkoff; daddr_t bn, wait_scn, scn; @@ -986,8 +986,6 @@ if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); - checkpath_locked = false; - relock: doingdirectory = newparent = false; @@ -1108,12 +1106,8 @@ if (doingdirectory && newparent) { if (error != 0) /* write access check above */ goto unlock; - lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL); - checkpath_locked = true; error = doscheckpath(fip, tdip, &wait_scn); if (wait_scn != 0) { - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); @@ -1276,8 +1270,6 @@ cache_purge(fvp); unlock: - if (checkpath_locked) - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); vput(fdvp); vput(fvp); if (tvp != NULL) { @@ -1289,7 +1281,6 @@ vput(tdvp); return (error); releout: - MPASS(!checkpath_locked); vrele(tdvp); if (tvp != NULL) vrele(tvp); diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h --- a/sys/fs/msdosfs/msdosfsmount.h +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -118,7 +118,6 @@ void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ struct lock pm_fatlock; /* lockmgr protecting allocations */ - struct lock pm_checkpath_lock; /* protects doscheckpath result */ struct task pm_rw2ro_task; /* context for emergency remount ro */ }; diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -245,6 +245,10 @@ vp->v_object = lowervp->v_object; vn_irflag_set(vp, VIRF_PGREAD); } + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0) + vn_irflag_set(vp, VIRF_INOTIFY); + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0) + vn_irflag_set(vp, VIRF_INOTIFY_PARENT); if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp) vp->v_vflag |= VV_ROOT; diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -189,6 +189,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); +/* + * Synchronize inotify flags with the lower vnode: + * - If the upper vnode has the flag set and the lower does not, then the lower + * vnode is unwatched and the upper vnode does not need to go through + * VOP_INOTIFY. + * - If the lower vnode is watched, then the upper vnode should go through + * VOP_INOTIFY, so copy the flag up. + */ +static void +null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag) +{ + if ((vn_irflag_read(vp) & flag) != 0) { + if (__predict_false((vn_irflag_read(lvp) & flag) == 0)) + vn_irflag_unset(vp, flag); + } else if ((vn_irflag_read(lvp) & flag) != 0) { + if (__predict_false((vn_irflag_read(vp) & flag) == 0)) + vn_irflag_set(vp, flag); + } +} + /* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some @@ -305,7 +325,10 @@ lvp = *(vps_p[i]); /* - * Get rid of the transient hold on lvp. + * Get rid of the transient hold on lvp. Copy inotify + * flags up in case something is watching the lower + * layer. + * * If lowervp was unlocked during VOP * operation, nullfs upper vnode could have * been reclaimed, which changes its v_vnlock @@ -314,6 +337,10 @@ * upper (reclaimed) vnode. */ if (lvp != NULLVP) { + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY); + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY_PARENT); if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE && old_vps[i]->v_vnlock != lvp->v_vnlock) { VOP_UNLOCK(lvp); diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -981,10 +981,13 @@ syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1178,7 +1181,7 @@ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -306,8 +306,8 @@ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -346,7 +346,7 @@ { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c --- a/sys/i386/linux/linux_systrace_args.c +++ b/sys/i386/linux/linux_systrace_args.c @@ -2071,12 +2071,19 @@ } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2410,7 +2417,9 @@ } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6604,9 +6613,32 @@ break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7172,6 +7204,13 @@ break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9889,8 +9928,14 @@ case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -10062,6 +10107,9 @@ break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -1605,10 +1605,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1872,7 +1879,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -659,4 +659,6 @@ { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1637,6 +1637,12 @@ if (uip->ui_pipecnt != 0) printf("freeing uidinfo: uid = %d, pipecnt = %ld\n", uip->ui_uid, uip->ui_pipecnt); + if (uip->ui_inotifycnt != 0) + printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n", + uip->ui_uid, uip->ui_inotifycnt); + if (uip->ui_inotifywatchcnt != 0) + printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n", + uip->ui_uid, uip->ui_inotifywatchcnt); free(uip, M_UIDINFO); } @@ -1742,6 +1748,21 @@ return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt")); } +int +chginotifycnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt")); +} + +int +chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max, + "inotifywatchcnt")); +} + static int sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS) { diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -27,12 +27,12 @@ * SUCH DAMAGE. */ -#include #include "opt_kern_tls.h" #include #include #include +#include #include #include #include @@ -1246,6 +1246,8 @@ */ if (error == 0) { td->td_retval[0] = 0; + if (sbytes > 0 && vp != NULL) + INOTIFY(vp, IN_ACCESS); } if (sent != NULL) { (*sent) = sbytes; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -4139,7 +4139,7 @@ struct flock lf; struct vattr vattr; size_t fullpathsize; - int error, error1, locked; + int error, error1, jid, locked, ppid, sig; char *name; /* name of corefile */ void *rl_cookie; off_t limit; @@ -4168,6 +4168,10 @@ PROC_UNLOCK(p); return (EFBIG); } + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, @@ -4253,6 +4257,9 @@ } devctl_safe_quote_sb(sb, name); sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); if (sbuf_finish(sb) == 0) devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); out2: diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c --- a/sys/kern/subr_capability.c +++ b/sys/kern/subr_capability.c @@ -74,6 +74,10 @@ CAP_RIGHTS_INITIALIZER(CAP_GETSOCKOPT); const cap_rights_t cap_getsockname_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKNAME); +const cap_rights_t cap_inotify_add_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_ADD); +const cap_rights_t cap_inotify_rm_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM); const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL); const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN); const cap_rights_t cap_linkat_source_rights = diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -37,16 +37,17 @@ #include "opt_capsicum.h" #include "opt_ktrace.h" -#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC +#define EXTERR_CATEGORY EXTERR_CAT_GENIO #include #include #include #include +#include #include #include #include #include -#include +#include #include #include #include @@ -195,7 +196,7 @@ int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -233,7 +234,7 @@ int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -329,7 +330,7 @@ error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -396,7 +397,7 @@ int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -435,7 +436,7 @@ int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -531,7 +532,7 @@ error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -602,14 +603,14 @@ AUDIT_ARG_FD(fd); if (length < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative length")); error = fget(td, fd, &cap_ftruncate_rights, &fp); if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); if (!(fp->f_flag & FWRITE)) { fdrop(fp, td); - return (EINVAL); + return (EXTERROR(EINVAL, "non-writable")); } error = fo_truncate(fp, length, td->td_ucred, td); fdrop(fp, td); @@ -840,8 +841,10 @@ int error; AUDIT_ARG_FD(fd); - if (offset < 0 || len <= 0) - return (EINVAL); + if (offset < 0) + return (EXTERROR(EINVAL, "negative offset")); + if (len <= 0) + return (EXTERROR(EINVAL, "negative length")); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); @@ -898,16 +901,21 @@ AUDIT_ARG_FFLAGS(flags); if (rqsr == NULL) - return (EINVAL); + return (EXTERROR(EINVAL, "no range")); rmsr = *rqsr; if (rmsrp != NULL) *rmsrp = rmsr; - if (cmd != SPACECTL_DEALLOC || - rqsr->r_offset < 0 || rqsr->r_len <= 0 || - rqsr->r_offset > OFF_MAX - rqsr->r_len || - (flags & ~SPACECTL_F_SUPPORTED) != 0) - return (EINVAL); + if (cmd != SPACECTL_DEALLOC) + return (EXTERROR(EINVAL, "cmd", cmd)); + if (rqsr->r_offset < 0) + return (EXTERROR(EINVAL, "neg offset")); + if (rqsr->r_len <= 0) + return (EXTERROR(EINVAL, "neg len")); + if (rqsr->r_offset > OFF_MAX - rqsr->r_len) + return (EXTERROR(EINVAL, "offset too large")); + if ((flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EXTERROR(EINVAL, "reserved flags", flags)); error = fget_write(td, fd, &cap_pwrite_rights, &fp); if (error != 0) @@ -939,7 +947,6 @@ kern_specialfd(struct thread *td, int type, void *arg) { struct file *fp; - struct specialfd_eventfd *ae; int error, fd, fflags; fflags = 0; @@ -948,14 +955,24 @@ return (error); switch (type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd *ae; + ae = arg; if ((ae->flags & EFD_CLOEXEC) != 0) fflags |= O_CLOEXEC; error = eventfd_create_file(td, fp, ae->initval, ae->flags); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify *si; + + si = arg; + error = inotify_create_file(td, fp, si->flags, &fflags); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "invalid type", type); break; } @@ -970,13 +987,14 @@ int sys___specialfd(struct thread *td, struct __specialfd_args *args) { - struct specialfd_eventfd ae; int error; switch (args->type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd ae; + if (args->len != sizeof(struct specialfd_eventfd)) { - error = EINVAL; + error = EXTERROR(EINVAL, "eventfd params ABI"); break; } error = copyin(args->req, &ae, sizeof(ae)); @@ -984,13 +1002,27 @@ break; if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) != 0) { - error = EINVAL; + error = EXTERROR(EINVAL, "reserved flag"); break; } error = kern_specialfd(td, args->type, &ae); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify si; + + if (args->len != sizeof(si)) { + error = EINVAL; + break; + } + error = copyin(args->req, &si, sizeof(si)); + if (error != 0) + break; + error = kern_specialfd(td, args->type, &si); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown type", args->type); break; } return (error); @@ -1166,7 +1198,7 @@ int error, lf, ndu; if (nd < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative ndescs")); fdp = td->td_proc->p_fd; ndu = nd; lf = fdp->fd_nfiles; @@ -1259,7 +1291,7 @@ rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) { - error = EINVAL; + error = EXTERROR(EINVAL, "invalid timeval"); goto done; } if (!timevalisset(&rtv)) @@ -1491,7 +1523,7 @@ if (uap->timeout != INFTIM) { if (uap->timeout < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeout")); ts.tv_sec = uap->timeout / 1000; ts.tv_nsec = (uap->timeout % 1000) * 1000000; tsp = &ts; @@ -1516,7 +1548,7 @@ precision = 0; if (tsp != NULL) { if (!timespecvalid_interval(tsp)) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timespec")); if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) sbt = 0; else { @@ -1619,7 +1651,7 @@ int error; if (kern_poll_maxfds(nfds)) - return (EINVAL); + return (EXTERROR(EINVAL, "too large nfds")); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else @@ -1796,7 +1828,7 @@ rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeval")); if (!timevalisset(&rtv)) asbt = 0; else if (rtv.tv_sec <= INT32_MAX) { @@ -2173,7 +2205,7 @@ (uintptr_t)p2->p_vmspace); break; default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown op"); break; } @@ -2277,6 +2309,11 @@ return (EINVAL); td->td_pflags2 &= ~TDP2_UEXTERR; return (0); + case EXTERRCTL_UD: + /* + * Important: this code must always return EINVAL and never any + * extended error, for testing purposes. + */ default: return (EINVAL); } diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -598,4 +598,6 @@ "fchroot", /* 590 = fchroot */ "setcred", /* 591 = setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3356,4 +3356,19 @@ _In_reads_bytes_(4) void *ptr ); } +593 AUE_INOTIFY STD|CAPENABLED { + int inotify_add_watch_at( + int fd, + int dfd, + _In_z_ const char *path, + uint32_t mask + ); + } +594 AUE_INOTIFY STD|CAPENABLED { + int inotify_rm_watch( + int fd, + int wd + ); + } + ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3482,6 +3482,24 @@ *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9317,6 +9335,38 @@ break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11305,6 +11355,16 @@ if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -2628,6 +2629,14 @@ atomic_thread_fence_rel(); atomic_store_ptr(&dvp->v_cache_dd, ncp); } else if (vp != NULL) { + /* + * Take the slow path in INOTIFY(). This flag will be lazily + * cleared by cache_vop_inotify() once all directories referring + * to vp are unwatched. + */ + if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) + vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT); + /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the @@ -4008,6 +4017,56 @@ return (error); } +void +cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie) +{ + struct mtx *vlp; + struct namecache *ncp; + int isdir; + bool logged, self; + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 && + (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0); + + if (self) { + int selfevent; + + if (event == _IN_ATTRIB_LINKCOUNT) + selfevent = IN_ATTRIB; + else + selfevent = event; + inotify_log(vp, NULL, 0, selfevent | isdir, cookie); + } + if ((event & IN_ALL_EVENTS) == 0) + return; + + logged = false; + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); + TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) { + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) + continue; + if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) { + /* + * XXX-MJ if the vnode has two links in the same + * dir, we'll log the same event twice. + */ + inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen, + event | isdir, cookie); + logged = true; + } + } + if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) { + /* + * We didn't find a watched directory that contains this vnode, + * so stop calling VOP_INOTIFY for operations on the vnode. + */ + vn_irflag_unset(vp, VIRF_INOTIFY_PARENT); + } + mtx_unlock(vlp); +} + #ifdef DDB static void db_print_vpath(struct vnode *vp) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -119,6 +120,8 @@ .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, + .vop_inotify = vop_stdinotify, + .vop_inotify_add_watch = vop_stdinotify_add_watch, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, @@ -1305,6 +1308,20 @@ return (1); } +int +vop_stdinotify(struct vop_inotify_args *ap) +{ + vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie); + return (0); +} + +int +vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap) +{ + return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask, + ap->a_wdp, ap->a_td)); +} + int vop_stdioctl(struct vop_ioctl_args *ap) { diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c new file mode 100644 --- /dev/null +++ b/sys/kern/vfs_inotify.c @@ -0,0 +1,1008 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "opt_ktrace.h" + +#include +#include +#include +#include +#include +#define EXTERR_CATEGORY EXTERR_CAT_INOTIFY +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +uint32_t inotify_rename_cookie; + +static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "inotify configuration"); + +static int inotify_max_queued_events = 16384; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, + &inotify_max_queued_events, 0, + "Maximum number of events to queue on an inotify descriptor"); + +static int inotify_max_user_instances = 256; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, + &inotify_max_user_instances, 0, + "Maximum number of inotify descriptors per user"); + +static int inotify_max_user_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, + &inotify_max_user_watches, 0, + "Maximum number of inotify watches per user"); + +static int inotify_max_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, + &inotify_max_watches, 0, + "Maximum number of inotify watches system-wide"); + +static int inotify_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, + &inotify_watches, 0, + "Total number of inotify watches currently in use"); + +static int inotify_coalesce = 1; +SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &inotify_coalesce, 0, + "Coalesce inotify events when possible"); + +static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); +SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, + &inotify_event_drops, + "Number of inotify events dropped due to limits or allocation failures"); + +static fo_rdwr_t inotify_read; +static fo_ioctl_t inotify_ioctl; +static fo_poll_t inotify_poll; +static fo_kqfilter_t inotify_kqfilter; +static fo_stat_t inotify_stat; +static fo_close_t inotify_close; +static fo_fill_kinfo_t inotify_fill_kinfo; + +static const struct fileops inotifyfdops = { + .fo_read = inotify_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = inotify_ioctl, + .fo_poll = inotify_poll, + .fo_kqfilter = inotify_kqfilter, + .fo_stat = inotify_stat, + .fo_close = inotify_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = inotify_fill_kinfo, + .fo_cmp = file_kcmp_generic, + .fo_flags = DFLAG_PASSABLE, +}; + +static void filt_inotifydetach(struct knote *kn); +static int filt_inotifyevent(struct knote *kn, long hint); + +static const struct filterops inotify_rfiltops = { + .f_isfd = 1, + .f_detach = filt_inotifydetach, + .f_event = filt_inotifyevent, +}; + +static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); + +struct inotify_record { + STAILQ_ENTRY(inotify_record) link; + struct inotify_event ev; +}; + +static uint64_t inotify_ino = 1; + +/* + * On LP64 systems this occupies 64 bytes, so we don't get internal + * fragmentation by allocating watches with malloc(9). If the size changes, + * consider using a UMA zone to improve memory efficiency. + */ +struct inotify_watch { + struct inotify_softc *sc; /* back-pointer */ + int wd; /* unique ID */ + uint32_t mask; /* event mask */ + struct vnode *vp; /* vnode being watched, refed */ + RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ + TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ +}; + +static void +inotify_init(void *arg __unused) +{ + /* Don't let a user hold too many vnodes. */ + inotify_max_user_watches = desiredvnodes / 3; + /* Don't let the system hold too many vnodes. */ + inotify_max_watches = desiredvnodes / 2; +} +SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); + +static int +inotify_watch_cmp(const struct inotify_watch *a, + const struct inotify_watch *b) +{ + if (a->wd < b->wd) + return (-1); + else if (a->wd > b->wd) + return (1); + else + return (0); +} +RB_HEAD(inotify_watch_tree, inotify_watch); +RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); + +struct inotify_softc { + struct mtx lock; /* serialize all softc writes */ + STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ + struct inotify_record overflow; /* preallocated record */ + int nextwatch; /* next watch ID to try */ + int npending; /* number of pending events */ + size_t nbpending; /* bytes available to read */ + uint64_t ino; /* unique identifier */ + struct inotify_watch_tree watches; /* active watches */ + struct selinfo sel; /* select/poll/kevent info */ + struct ucred *cred; /* credential ref */ +}; + +static struct inotify_record * +inotify_dequeue(struct inotify_softc *sc) +{ + struct inotify_record *rec; + + mtx_assert(&sc->lock, MA_OWNED); + KASSERT(!STAILQ_EMPTY(&sc->pending), + ("%s: queue for %p is empty", __func__, sc)); + + rec = STAILQ_FIRST(&sc->pending); + STAILQ_REMOVE_HEAD(&sc->pending, link); + sc->npending--; + sc->nbpending -= sizeof(rec->ev) + rec->ev.len; + return (rec); +} + +static void +inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) +{ + mtx_assert(&sc->lock, MA_OWNED); + + if (head) + STAILQ_INSERT_HEAD(&sc->pending, rec, link); + else + STAILQ_INSERT_TAIL(&sc->pending, rec, link); + sc->npending++; + sc->nbpending += sizeof(rec->ev) + rec->ev.len; +} + +static int +inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, + struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + int error; + bool first; + + sc = fp->f_data; + error = 0; + + mtx_lock(&sc->lock); + while (STAILQ_EMPTY(&sc->pending)) { + if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&sc->lock); + return (EWOULDBLOCK); + } + error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); + if (error != 0) { + mtx_unlock(&sc->lock); + return (error); + } + } + for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { + size_t len; + + rec = inotify_dequeue(sc); + len = sizeof(rec->ev) + rec->ev.len; + if (uio->uio_resid < (ssize_t)len) { + inotify_enqueue(sc, rec, true); + if (first) { + error = EXTERROR(EINVAL, + "read buffer is too small"); + } + break; + } + mtx_unlock(&sc->lock); + error = uiomove(&rec->ev, len, uio); +#ifdef KTRACE + if (error == 0 && KTRPOINT(td, KTR_STRUCT)) + ktrstruct("inotify", &rec->ev, len); +#endif + mtx_lock(&sc->lock); + if (error != 0) { + inotify_enqueue(sc, rec, true); + mtx_unlock(&sc->lock); + return (error); + } + if (rec == &sc->overflow) { + /* + * Signal to inotify_queue_record() that the overflow + * record can be reused. + */ + memset(rec, 0, sizeof(*rec)); + } else { + free(rec, M_INOTIFY); + } + } + mtx_unlock(&sc->lock); + return (error); +} + +static int +inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, + struct thread *td) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + switch (com) { + case FIONREAD: + *(int *)data = (int)sc->nbpending; + return (0); + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (ENOTTY); + } + + return (0); +} + +static int +inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) +{ + struct inotify_softc *sc; + int revents; + + sc = fp->f_data; + revents = 0; + + mtx_lock(&sc->lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(td, &sc->sel); + mtx_unlock(&sc->lock); + return (revents); +} + +static void +filt_inotifydetach(struct knote *kn) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + knlist_remove(&sc->sel.si_note, kn, 0); +} + +static int +filt_inotifyevent(struct knote *kn, long hint) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + mtx_assert(&sc->lock, MA_OWNED); + kn->kn_data = sc->nbpending; + return (kn->kn_data > 0); +} + +static int +inotify_kqfilter(struct file *fp, struct knote *kn) +{ + struct inotify_softc *sc; + + if (kn->kn_filter != EVFILT_READ) + return (EINVAL); + sc = fp->f_data; + kn->kn_fop = &inotify_rfiltops; + kn->kn_hook = sc; + knlist_add(&sc->sel.si_note, kn, 0); + return (0); +} + +static int +inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + memset(sb, 0, sizeof(*sb)); + sb->st_mode = S_IFREG | S_IRUSR; + sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); + mtx_lock(&sc->lock); + sb->st_size = sc->nbpending; + sb->st_blocks = sc->npending; + sb->st_uid = sc->cred->cr_ruid; + sb->st_gid = sc->cred->cr_rgid; + sb->st_ino = sc->ino; + mtx_unlock(&sc->lock); + return (0); +} + +static void +inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) +{ + struct vnode *vp; + + vp = watch->vp; + mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); + + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + + TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); + if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) + vn_irflag_unset_locked(vp, VIRF_INOTIFY); +} + +/* + * Assumes that the watch has already been removed from its softc. + */ +static void +inotify_remove_watch(struct inotify_watch *watch) +{ + struct inotify_softc *sc; + struct vnode *vp; + + sc = watch->sc; + + vp = watch->vp; + mtx_lock(&vp->v_pollinfo->vpi_lock); + inotify_unlink_watch_locked(sc, watch); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + vrele(vp); + free(watch, M_INOTIFY); +} + +static int +inotify_close(struct file *fp, struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch *watch; + + sc = fp->f_data; + + mtx_lock(&sc->lock); + (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); + while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + mtx_unlock(&sc->lock); + inotify_remove_watch(watch); + mtx_lock(&sc->lock); + } + while (!STAILQ_EMPTY(&sc->pending)) { + rec = inotify_dequeue(sc); + if (rec != &sc->overflow) + free(rec, M_INOTIFY); + } + mtx_unlock(&sc->lock); + seldrain(&sc->sel); + knlist_destroy(&sc->sel.si_note); + mtx_destroy(&sc->lock); + crfree(sc->cred); + free(sc, M_INOTIFY); + return (0); +} + +static int +inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + kif->kf_type = KF_TYPE_INOTIFY; + kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; + kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; + return (0); +} + +int +inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) +{ + struct inotify_softc *sc; + int fflags; + + if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) + return (EINVAL); + + if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, + inotify_max_user_instances)) + return (EMFILE); + + sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); + sc->nextwatch = 1; /* Required for compatibility. */ + STAILQ_INIT(&sc->pending); + RB_INIT(&sc->watches); + mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); + knlist_init_mtx(&sc->sel.si_note, &sc->lock); + sc->cred = crhold(td->td_ucred); + sc->ino = atomic_fetchadd_64(&inotify_ino, 1); + + fflags = FREAD; + if ((flags & IN_NONBLOCK) != 0) + fflags |= FNONBLOCK; + if ((flags & IN_CLOEXEC) != 0) + *fflagsp |= O_CLOEXEC; + finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); + + return (0); +} + +static struct inotify_record * +inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, + uint32_t cookie, int waitok) +{ + struct inotify_event *evp; + struct inotify_record *rec; + + rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, + waitok | M_ZERO); + if (rec == NULL) + return (NULL); + evp = &rec->ev; + evp->wd = wd; + evp->mask = event; + evp->cookie = cookie; + evp->len = _IN_NAMESIZE(namelen); + if (name != NULL) + memcpy(evp->name, name, namelen); + return (rec); +} + +static bool +inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) +{ + struct inotify_record *prev; + + mtx_assert(&sc->lock, MA_OWNED); + + prev = STAILQ_LAST(&sc->pending, inotify_record, link); + return (prev != NULL && prev->ev.mask == evp->mask && + prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && + prev->ev.len == evp->len && + (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); +} + +static void +inotify_overflow_event(struct inotify_event *evp) +{ + evp->mask = IN_Q_OVERFLOW; + evp->wd = -1; + evp->cookie = 0; + evp->len = 0; +} + +/* + * Put an event record on the queue for an inotify desscriptor. Return false if + * the record was not enqueued for some reason, true otherwise. + */ +static bool +inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) +{ + struct inotify_event *evp; + + mtx_assert(&sc->lock, MA_OWNED); + + evp = &rec->ev; + if (__predict_false(rec == &sc->overflow)) { + /* + * Is the overflow record already in the queue? If so, there's + * not much else we can do: we're here because a kernel memory + * shortage prevented new record allocations. + */ + counter_u64_add(inotify_event_drops, 1); + if (evp->mask == IN_Q_OVERFLOW) + return (false); + inotify_overflow_event(evp); + } else { + /* Try to coalesce duplicate events. */ + if (inotify_coalesce && inotify_can_coalesce(sc, evp)) + return (false); + + /* + * Would this one overflow the queue? If so, convert it to an + * overflow event and try again to coalesce. + */ + if (sc->npending >= inotify_max_queued_events) { + counter_u64_add(inotify_event_drops, 1); + inotify_overflow_event(evp); + if (inotify_can_coalesce(sc, evp)) + return (false); + } + } + inotify_enqueue(sc, rec, false); + selwakeup(&sc->sel); + KNOTE_LOCKED(&sc->sel.si_note, 0); + wakeup(&sc->pending); + return (true); +} + +static int +inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, + int event, uint32_t cookie) +{ + struct inotify_watch key; + struct inotify_softc *sc; + struct inotify_record *rec; + int relecount; + bool allocfail; + + relecount = 0; + + sc = watch->sc; + rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, + M_NOWAIT); + if (rec == NULL) { + rec = &sc->overflow; + allocfail = true; + } else { + allocfail = false; + } + + mtx_lock(&sc->lock); + if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) + free(rec, M_INOTIFY); + if ((watch->mask & IN_ONESHOT) != 0 || + (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { + if (!allocfail) { + rec = inotify_alloc_record(watch->wd, NULL, 0, + IN_IGNORED, 0, M_NOWAIT); + if (rec == NULL) + rec = &sc->overflow; + if (!inotify_queue_record(sc, rec) && + rec != &sc->overflow) + free(rec, M_INOTIFY); + } + + /* + * Remove the watch, taking care to handle races with + * inotify_close(). + */ + key.wd = watch->wd; + if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + inotify_unlink_watch_locked(sc, watch); + free(watch, M_INOTIFY); + + /* Defer vrele() to until locks are dropped. */ + relecount++; + } + } + mtx_unlock(&sc->lock); + return (relecount); +} + +void +inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, + uint32_t cookie) +{ + struct inotify_watch *watch, *tmp; + int relecount; + + KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, + ("inotify_log: invalid event %#x", event)); + + relecount = 0; + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { + KASSERT(watch->vp == vp, + ("inotify_log: watch %p vp != vp", watch)); + if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { + relecount += inotify_log_one(watch, name, namelen, event, + cookie); + } + } + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + for (int i = 0; i < relecount; i++) + vrele(vp); +} + +/* + * An inotify event occurred on a watched vnode. + */ +void +vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, + int event, uint32_t cookie) +{ + int isdir; + + VNPASS(vp->v_holdcnt > 0, vp); + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + + if (dvp != NULL) { + VNPASS(dvp->v_holdcnt > 0, dvp); + + /* + * Should we log an event for the vnode itself? + */ + if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { + int selfevent; + + switch (event) { + case _IN_MOVE_DELETE: + case IN_DELETE: + /* + * IN_DELETE_SELF is only generated when the + * last hard link of a file is removed. + */ + selfevent = IN_DELETE_SELF; + if (vp->v_type != VDIR) { + struct vattr va; + int error; + + error = VOP_GETATTR(vp, &va, cnp->cn_cred); + if (error == 0 && va.va_nlink != 0) + selfevent = 0; + } + break; + case IN_MOVED_FROM: + cookie = 0; + selfevent = IN_MOVE_SELF; + break; + case _IN_ATTRIB_LINKCOUNT: + selfevent = IN_ATTRIB; + break; + default: + selfevent = event; + break; + } + + if ((selfevent & ~_IN_DIR_EVENTS) != 0) { + inotify_log(vp, NULL, 0, selfevent | isdir, + cookie); + } + } + + /* + * Something is watching the directory through which this vnode + * was referenced, so we may need to log the event. + */ + if ((event & IN_ALL_EVENTS) != 0 && + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { + inotify_log(dvp, cnp->cn_nameptr, + cnp->cn_namelen, event | isdir, cookie); + } + } else { + /* + * We don't know which watched directory might contain the + * vnode, so we have to fall back to searching the name cache. + */ + cache_vop_inotify(vp, event, cookie); + } +} + +int +vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, + uint32_t *wdp, struct thread *td) +{ + struct inotify_watch *watch, *watch1; + uint32_t wd; + + /* + * If this is a directory, make sure all of its entries are present in + * the name cache so that we're able to look them up if an event occurs. + * The persistent reference on the directory prevents the outgoing name + * cache entries from being reclaimed. + */ + if (vp->v_type == VDIR) { + struct dirent *dp; + char *buf; + off_t off; + size_t buflen, len; + int eof, error; + + buflen = 128 * sizeof(struct dirent); + buf = malloc(buflen, M_TEMP, M_WAITOK); + + error = 0; + len = off = eof = 0; + for (;;) { + struct nameidata nd; + + error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, + &len, &off, &eof); + if (error != 0) + break; + if (len == 0) + /* Finished reading. */ + break; + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * namei() consumes a reference on the starting + * directory if it's specified as a vnode. + */ + vrefact(vp); + NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, + dp->d_name, vp); + error = namei(&nd); + if (error != 0) + break; + vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); + vrele(nd.ni_vp); + } + free(buf, M_TEMP); + if (error != 0) + return (error); + } + + /* + * The vnode referenced in kern_inotify_add_watch() might be different + * than this one if nullfs is in the picture. + */ + vrefact(vp); + watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); + watch->sc = sc; + watch->vp = vp; + watch->mask = mask; + + /* + * Are we updating an existing watch? Search the vnode's list rather + * than that of the softc, as the former is likely to be shorter. + */ + v_addpollinfo(vp); + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { + if (watch1->sc == sc) + break; + } + mtx_lock(&sc->lock); + if (watch1 != NULL) { + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + /* + * We found an existing watch, update it based on our flags. + */ + if ((mask & IN_MASK_CREATE) != 0) { + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EEXIST); + } + if ((mask & IN_MASK_ADD) != 0) + watch1->mask |= mask; + else + watch1->mask = mask; + *wdp = watch1->wd; + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EJUSTRETURN); + } + + /* + * We're creating a new watch. Add it to the softc and vnode watch + * lists. + */ + do { + struct inotify_watch key; + + /* + * Search for the next available watch descriptor. This is + * implemented so as to avoid reusing watch descriptors for as + * long as possible. + */ + key.wd = wd = sc->nextwatch++; + watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); + } while (watch1 != NULL || wd == 0); + watch->wd = wd; + RB_INSERT(inotify_watch_tree, &sc->watches, watch); + TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); + mtx_unlock(&sc->lock); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + vn_irflag_set_cond(vp, VIRF_INOTIFY); + + *wdp = wd; + + return (0); +} + +void +vn_inotify_revoke(struct vnode *vp) +{ + if (vp->v_pollinfo == NULL) { + /* This is a nullfs vnode which shadows a watched vnode. */ + return; + } + inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); +} + +static int +fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, + struct file **fpp) +{ + struct file *fp; + int error; + + error = fget(td, fd, needrightsp, &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_INOTIFY) { + fdrop(fp, td); + return (EINVAL); + } + *fpp = fp; + return (0); +} + +int +kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, + struct thread *td) +{ + struct nameidata nd; + struct file *fp; + struct inotify_softc *sc; + struct vnode *vp; + uint32_t wd; + int count, error; + + fp = NULL; + vp = NULL; + + if ((mask & IN_ALL_EVENTS) == 0) + return (EXTERROR(EINVAL, "no events specified")); + if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == + (IN_MASK_ADD | IN_MASK_CREATE)) + return (EXTERROR(EINVAL, + "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); + if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) + return (EXTERROR(EINVAL, "unrecognized flag")); + + error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + NDINIT_AT(&nd, LOOKUP, + ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | + LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); + error = namei(&nd); + if (error != 0) + goto out; + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + + error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); + if (error != 0) + goto out; + + if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + + count = atomic_fetchadd_int(&inotify_watches, 1); + if (count > inotify_max_watches) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, + inotify_max_user_watches)) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); + if (error != 0) { + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + if (error == EJUSTRETURN) { + /* We updated an existing watch, everything is ok. */ + error = 0; + } else { + goto out; + } + } + td->td_retval[0] = wd; + +out: + if (vp != NULL) + vput(vp); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_add_watch_at(struct thread *td, + struct inotify_add_watch_at_args *uap) +{ + return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, + uap->mask, td)); +} + +int +kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) +{ + struct file *fp; + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch key, *watch; + int error; + + error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); + + /* + * For compatibility with Linux, we do not remove pending events + * associated with the watch. Watch descriptors are implemented so as + * to avoid being reused for as long as possible, so one hopes that any + * pending events from the removed watch descriptor will be removed + * before the watch descriptor is recycled. + */ + key.wd = wd; + mtx_lock(&sc->lock); + watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); + if (watch == NULL) { + free(rec, M_INOTIFY); + error = EINVAL; + } else { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + if (!inotify_queue_record(sc, rec)) { + free(rec, M_INOTIFY); + error = 0; + } + } + mtx_unlock(&sc->lock); + if (watch != NULL) + inotify_remove_watch(watch); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) +{ + return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); +} diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -74,15 +74,21 @@ #define NDVALIDATE(ndp) #endif +/* + * Reset ndp to its original state. + */ +#define NDRESET(ndp) do { \ + NDREINIT_DBG(ndp); \ + ndp->ni_resflags = 0; \ + ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ +} while (0) /* * Prepare namei() to restart. Reset components to its original state and set * ISRESTARTED flag which signals the underlying lookup code to change the root * from ABI root to actual root and prevents a further restarts. */ #define NDRESTART(ndp) do { \ - NDREINIT_DBG(ndp); \ - ndp->ni_resflags = 0; \ - ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ + NDRESET(ndp); \ ndp->ni_cnd.cn_flags |= ISRESTARTED; \ } while (0) @@ -162,8 +168,8 @@ */ struct nameicap_tracker { - struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; + struct mount *mp; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ @@ -192,49 +198,75 @@ "enables \"..\" components in path lookup in capability mode " "on non-local mount"); -static void +static int nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; + struct mount *mp; + int error; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) - return; + return (0); + mp = NULL; + error = VOP_GETWRITEMOUNT(dp, &mp); + if (error != 0) + return (error); nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); - if (nt != NULL && nt->dp == dp) - return; + if (nt != NULL && nt->mp == mp) { + vfs_rel(mp); + return (0); + } nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); - vhold(dp); - nt->dp = dp; - TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + nt->mp = mp; + error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0); + if (error != 0) { + MPASS(ndp->ni_nctrack_mnt == NULL); + ndp->ni_nctrack_mnt = mp; + free(nt, M_NAMEITRACKER); + error = ERESTART; + } else { + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + } + return (error); } static void -nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) +nameicap_cleanup(struct nameidata *ndp, int error) { struct nameicap_tracker *nt, *nt1; + struct mount *mp; + + KASSERT((ndp->ni_nctrack_mnt == NULL && + TAILQ_EMPTY(&ndp->ni_cap_tracker)) || + (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, + ("tracker active and not strictrelative")); - nt = first; - TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + mp = nt->mp; + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); - vdrop(nt->dp); free(nt, M_NAMEITRACKER); } -} -static void -nameicap_cleanup(struct nameidata *ndp) -{ - KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || - (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); - nameicap_cleanup_from(ndp, NULL); + mp = ndp->ni_nctrack_mnt; + if (mp != NULL) { + if (error == ERESTART) { + lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + } + vfs_rel(mp); + ndp->ni_nctrack_mnt = NULL; + } } /* - * For dotdot lookups in capability mode, only allow the component - * lookup to succeed if the resulting directory was already traversed - * during the operation. This catches situations where already - * traversed directory is moved to different parent, and then we walk - * over it with dotdots. + * For dotdot lookups in capability mode, disallow walking over the + * directory no_rbeneath_dpp that was used as the starting point of + * the lookup. Since we take the mnt_renamelocks of all mounts we + * ever walked over during lookup, parallel renames are disabled. + * This prevents the situation where we circumvent walk over + * ni_rbeneath_dpp following dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to @@ -243,7 +275,6 @@ static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { - struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & @@ -253,22 +284,16 @@ NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) - return (ENOTCAPABLE); + goto violation; + if (dp == ndp->ni_rbeneath_dpp) + goto violation; mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) - goto capfail; - TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, - nm_link) { - if (dp == nt->dp) { - nt = TAILQ_NEXT(nt, nm_link); - if (nt != NULL) - nameicap_cleanup_from(ndp, nt); - return (0); - } - } + goto violation; + return (0); -capfail: +violation: if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); return (ENOTCAPABLE); @@ -394,6 +419,8 @@ NI_LCF_CAP_DOTDOT; } } + if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0) + ndp->ni_rbeneath_dpp = *dpp; /* * If we are auditing the kernel pathname, save the user pathname. @@ -631,6 +658,7 @@ error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); @@ -661,12 +689,12 @@ else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && (cnp->cn_flags & ISRESTARTED) == 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ERESTART); NDRESTART(ndp); goto restart; } return (error); case CACHE_FPL_STATUS_PARTIAL: - TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: @@ -674,18 +702,21 @@ error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } cnp->cn_nameptr = cnp->cn_pnbuf; /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: - TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); if (*cnp->cn_pnbuf == '\0') { if ((cnp->cn_flags & EMPTYPATH) != 0) { - return (namei_emptypath(ndp)); + error = namei_emptypath(ndp); + nameicap_cleanup(ndp, error); + return (error); } namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ENOENT); SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL, false, ndp); return (ENOENT); @@ -693,6 +724,7 @@ error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } break; @@ -705,16 +737,23 @@ ndp->ni_startdir = dp; error = vfs_lookup(ndp); if (error != 0) { - if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && - error == ENOENT && - (cnp->cn_flags & ISRESTARTED) == 0)) { - nameicap_cleanup(ndp); - pwd_drop(pwd); - namei_cleanup_cnp(cnp); - NDRESTART(ndp); - goto restart; - } else + uint64_t was_restarted; + bool abi_restart; + + was_restarted = ndp->ni_cnd.cn_flags & + ISRESTARTED; + abi_restart = pwd->pwd_adir != pwd->pwd_rdir && + error == ENOENT && was_restarted == 0; + if (error != ERESTART && !abi_restart) goto out; + nameicap_cleanup(ndp, error); + pwd_drop(pwd); + namei_cleanup_cnp(cnp); + NDRESET(ndp); + if (abi_restart) + was_restarted = ISRESTARTED; + ndp->ni_cnd.cn_flags |= was_restarted; + goto restart; } /* @@ -723,7 +762,7 @@ if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, false, ndp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, 0); pwd_drop(pwd); NDVALIDATE(ndp); return (0); @@ -756,10 +795,10 @@ ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: - MPASS(error != 0); + MPASS(error != 0 && error != ERESTART); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, error); pwd_drop(pwd); return (error); } @@ -1185,7 +1224,9 @@ } } - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; /* * Make sure degenerate names don't get here, their handling was @@ -1210,9 +1251,7 @@ * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape - * from the initial file descriptor directory. Checks are - * done by ensuring that namei() already traversed the - * result of dotdot lookup. + * from the initial file descriptor directory. */ if (cnp->cn_flags & ISDOTDOT) { if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR | @@ -1238,7 +1277,7 @@ NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) { error = ENOTCAPABLE; - goto capdotdot; + goto bad; } } if (isroot || ((dp->v_vflag & VV_ROOT) != 0 && @@ -1261,11 +1300,6 @@ vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - error = nameicap_check_dotdot(ndp, dp); - if (error != 0) { -capdotdot: - goto bad; - } } } @@ -1314,7 +1348,9 @@ vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; goto unionlookup; } @@ -1415,7 +1451,7 @@ goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { - error = nameicap_check_dotdot(ndp, ndp->ni_vp); + error = nameicap_check_dotdot(ndp, ndp->ni_dvp); if (error != 0) goto bad2; } @@ -1485,8 +1521,11 @@ } success_right_lock: if (ndp->ni_vp != NULL) { - if ((cnp->cn_flags & ISDOTDOT) == 0) - nameicap_tracker_add(ndp, ndp->ni_vp); + if ((cnp->cn_flags & ISDOTDOT) == 0) { + error = nameicap_tracker_add(ndp, ndp->ni_vp); + if (error != 0) + goto bad2; + } if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) return (vfs_lookup_failifexists(ndp)); } diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -156,6 +156,7 @@ mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); + lockinit(&mp->mnt_renamelock, PVFS, "rename", 0, 0); mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; @@ -170,6 +171,7 @@ mp = (struct mount *)mem; uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu); + lockdestroy(&mp->mnt_renamelock); lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_listmtx); mtx_destroy(&mp->mnt_mtx); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -38,7 +38,6 @@ * External virtual filesystem routines */ -#include #include "opt_ddb.h" #include "opt_watchdog.h" @@ -57,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -5246,7 +5246,8 @@ static void destroy_vpollinfo(struct vpollinfo *vi) { - + KASSERT(TAILQ_EMPTY(&vi->vpi_inotify), + ("%s: pollinfo %p has lingering watches", __func__, vi)); knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); @@ -5260,12 +5261,13 @@ { struct vpollinfo *vi; - if (vp->v_pollinfo != NULL) + if (atomic_load_ptr(&vp->v_pollinfo) != NULL) return; vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_lock); + TAILQ_INIT(&vi->vpi_inotify); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); @@ -5851,6 +5853,8 @@ struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS + struct mount *tmp; + if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); @@ -5868,6 +5872,11 @@ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); + + tmp = NULL; + VOP_GETWRITEMOUNT(a->a_tdvp, &tmp); + lockmgr_assert(&tmp->mnt_renamelock, KA_XLOCKED); + vfs_rel(tmp); #endif /* * It may be tempting to add vn_seqc_write_begin/end calls here and @@ -6056,6 +6065,28 @@ } #endif +void +vop_allocate_post(void *ap, int rc) +{ + struct vop_allocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); +} + +void +vop_copy_file_range_post(void *ap, int rc) +{ + struct vop_copy_file_range_args *a; + + a = ap; + if (rc == 0) { + INOTIFY(a->a_invp, IN_ACCESS); + INOTIFY(a->a_outvp, IN_MODIFY); + } +} + void vop_create_pre(void *ap) { @@ -6076,8 +6107,20 @@ a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } +} + +void +vop_deallocate_post(void *ap, int rc) +{ + struct vop_deallocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); } void @@ -6122,8 +6165,10 @@ a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6153,6 +6198,8 @@ if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_LINK); VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE); + INOTIFY_NAME(vp, tdvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE); } } @@ -6176,8 +6223,10 @@ a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } #ifdef DEBUG_VFS_LOCKS @@ -6212,8 +6261,10 @@ a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6225,8 +6276,10 @@ a = ap; vp = a->a_vp; ASSERT_VOP_IN_SEQC(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_REVOKE); + INOTIFY_REVOKE(vp); + } } void @@ -6257,6 +6310,8 @@ if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6288,6 +6343,8 @@ VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); + INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tvp, + a->a_tdvp, a->a_tcnp); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); @@ -6327,6 +6384,7 @@ vp->v_vflag |= VV_UNLINKED; VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6350,8 +6408,10 @@ a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6396,8 +6456,10 @@ a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6420,8 +6482,10 @@ a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6429,8 +6493,10 @@ { struct vop_open_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); + INOTIFY(a->a_vp, IN_OPEN); + } } void @@ -6442,6 +6508,8 @@ !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); + INOTIFY(a->a_vp, (a->a_fflag & FWRITE) != 0 ? + IN_CLOSE_WRITE : IN_CLOSE_NOWRITE); } } @@ -6450,8 +6518,10 @@ { struct vop_read_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } void @@ -6468,8 +6538,10 @@ { struct vop_readdir_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } static struct knlist fs_knlist; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3766,7 +3766,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg) { - struct mount *mp = NULL; + struct mount *mp, *tmp; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; uint64_t tondflags; @@ -3774,6 +3774,7 @@ short irflag; again: + tmp = mp = NULL; bwillwrite(); #ifdef MAC if (mac_vnode_check_rename_from_enabled()) { @@ -3809,6 +3810,7 @@ tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { +again1: NDFREE_PNBUF(&fromnd); NDFREE_PNBUF(&tond); if (tvp != NULL) @@ -3819,11 +3821,25 @@ vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); + if (tmp != NULL) { + lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); + vfs_rel(tmp); + tmp = NULL; + } error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); if (error != 0) return (error); goto again; } + error = VOP_GETWRITEMOUNT(tdvp, &tmp); + if (error != 0 || tmp == NULL) + goto again1; + error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); + if (error != 0) { + vn_finished_write(mp); + goto again1; + } irflag = vn_irflag_read(fvp); if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || (irflag & VIRF_NAMEDDIR) != 0) { @@ -3884,6 +3900,8 @@ vrele(fromnd.ni_dvp); vrele(fvp); } + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(tmp); vn_finished_write(mp); out1: if (error == ERESTART) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -308,7 +309,8 @@ NDREINIT(ndp); goto restart; } - if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) + if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 || + (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, @@ -484,6 +486,7 @@ if (vp->v_type != VFIFO && vp->v_type != VSOCK && VOP_ACCESS(vp, VREAD, cred, td) == 0) fp->f_flag |= FKQALLOWED; + INOTIFY(vp, IN_OPEN); return (0); } @@ -1746,6 +1749,8 @@ vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, cred); VOP_ADD_WRITECOUNT_CHECKED(vp, -1); + if (error == 0) + INOTIFY(vp, IN_MODIFY); } return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -702,6 +702,7 @@ %% allocate vp E E E +%! allocate post vop_allocate_post vop_allocate { IN struct vnode *vp; @@ -786,6 +787,7 @@ %% copy_file_range invp U U U %% copy_file_range outvp U U U +%! copy_file_range post vop_copy_file_range_post vop_copy_file_range { IN struct vnode *invp; @@ -810,6 +812,7 @@ %% deallocate vp L L L +%! deallocate post vop_deallocate_post vop_deallocate { IN struct vnode *vp; @@ -821,6 +824,27 @@ }; +%% inotify vp - - - + +vop_inotify { + IN struct vnode *vp; + IN struct vnode *dvp; + IN struct componentname *cnp; + IN int event; + IN uint32_t cookie; +}; + + +%% inotify_add_watch vp L L L + +vop_inotify_add_watch { + IN struct vnode *vp; + IN struct inotify_softc *sc; + IN uint32_t mask; + OUT uint32_t *wdp; + IN struct thread *td; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -4,6 +4,7 @@ WITH_CONFIG_PM= 0 WITH_DEBUGFS= 1 +WITH_CONFIG_ACPI= 1 KMOD= if_iwlwifi @@ -40,6 +41,12 @@ CFLAGS+= -DCONFIG_PM_SLEEP .endif +.if defined(WITH_CONFIG_ACPI) && ${WITH_CONFIG_ACPI} > 0 +SRCS+= fw/acpi.c +CFLAGS+= -DCONFIG_ACPI +CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI +.endif + SRCS+= iwl-devtrace.c # Other @@ -56,7 +63,6 @@ # Helpful after fresh imports. #CFLAGS+= -ferror-limit=0 -#CFLAGS+= -DCONFIG_ACPI=1 #CFLAGS+= -DCONFIG_INET=1 # Need LKPI TSO implementation. #CFLAGS+= -DCONFIG_IPV6=1 CFLAGS+= -DCONFIG_IWLWIFI_DEBUG=1 diff --git a/sys/modules/rtw89/Makefile b/sys/modules/rtw89/Makefile --- a/sys/modules/rtw89/Makefile +++ b/sys/modules/rtw89/Makefile @@ -39,6 +39,7 @@ SRCS+= opt_wlan.h opt_inet6.h opt_inet.h opt_acpi.h CFLAGS+= -DKBUILD_MODNAME='"rtw89"' +CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI CFLAGS+= -I${DEVRTW89DIR} CFLAGS+= ${LINUXKPI_INCLUDES} diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -80,6 +80,23 @@ (((addr)[0] | (addr)[1] | (addr)[2] | \ (addr)[3] | (addr)[4] | (addr)[5]) == 0x00) +/* + * 802.1q VID constants from IEEE 802.1Q-2014, table 9-2. + */ + +/* Null VID: The tag contains only PCP (priority) and DEI information. */ +#define DOT1Q_VID_NULL 0x0 +/* The default PVID for a bridge port. NB: bridge(4) does not honor this. */ +#define DOT1Q_VID_DEF_PVID 0x1 +/* The default SR_PVID for SRP Stream related traffic. */ +#define DOT1Q_VID_DEF_SR_PVID 0x2 +/* A VID reserved for implementation use, not permitted on the wire. */ +#define DOT1Q_VID_RSVD_IMPL 0xfff +/* The lowest valid VID. */ +#define DOT1Q_VID_MIN 0x1 +/* The highest valid VID. */ +#define DOT1Q_VID_MAX 0xffe + /* * This is the type of the VLAN ID inside the tag, not the tag itself. */ diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -254,6 +254,8 @@ uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; + ether_vlanid_t bif_untagged; /* untagged vlan id */ + ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */ }; /* @@ -331,13 +333,12 @@ static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, - struct mbuf *); + struct mbuf *, struct bridge_iflist *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static bool bridge_member_ifaddrs(void); - static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, @@ -353,6 +354,9 @@ static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, ether_vlanid_t); +static bool bridge_vfilter_in(const struct bridge_iflist *, struct mbuf *); +static bool bridge_vfilter_out(const struct bridge_iflist *, + const struct mbuf *); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); @@ -400,6 +404,9 @@ static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); +static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *); +static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); +static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); @@ -618,6 +625,14 @@ { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + { bridge_ioctl_sifuntagged, sizeof(struct ifbreq), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_gifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_COPYOUT }, }; static const int bridge_control_table_size = nitems(bridge_control_table); @@ -832,6 +847,7 @@ ifp->if_softc = sc; if_initname(ifp, bridge_name, ifd->unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_capabilities = ifp->if_capenable = IFCAP_VLAN_HWTAGGING; ifp->if_ioctl = bridge_ioctl; #ifdef ALTQ ifp->if_start = bridge_altq_start; @@ -954,6 +970,7 @@ struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; + struct ifbif_vlan_req ifvlanreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; @@ -1495,6 +1512,7 @@ req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; + req->ifbr_untagged = bif->bif_untagged; /* Copy STP state options as flags */ if (bp->bp_operedge) @@ -1872,6 +1890,84 @@ return (0); } +static int +bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg) +{ + struct ifbreq *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbr_ifsname); + if (bif == NULL) + return (ENOENT); + + if (req->ifbr_untagged > DOT1Q_VID_MAX) + return (EINVAL); + + if (req->ifbr_untagged != DOT1Q_VID_NULL) + bif->bif_flags |= IFBIF_VLANFILTER; + bif->bif_untagged = req->ifbr_untagged; + return (0); +} + +static int +bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + /* Reject invalid VIDs. */ + if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || + BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) + return (EINVAL); + + switch (req->bv_op) { + /* Replace the existing vlan set with the new set */ + case BRDG_VLAN_OP_SET: + BIT_COPY(BRVLAN_SETSIZE, &req->bv_set, &bif->bif_vlan_set); + break; + + /* Modify the existing vlan set to add the given vlans */ + case BRDG_VLAN_OP_ADD: + BIT_OR(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Modify the existing vlan set to remove the given vlans */ + case BRDG_VLAN_OP_DEL: + BIT_ANDNOT(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Invalid or unknown operation */ + default: + return (EINVAL); + } + + /* + * The only reason to modify the VLAN access list is to use VLAN + * filtering on this interface, so enable it automatically. + */ + bif->bif_flags |= IFBIF_VLANFILTER; + + return (0); +} + +static int +bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + return (0); +} + static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { @@ -2150,12 +2246,25 @@ * */ static int -bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) +bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, + struct bridge_iflist *bif) { int len, err = 0; short mflags; struct mbuf *m0; + /* + * Find the bridge member port this packet is being sent on, if the + * caller didn't already provide it. + */ + if (bif == NULL) + bif = bridge_lookup_member_if(sc, dst_ifp); + if (bif == NULL) { + /* Perhaps the interface was removed from the bridge */ + m_freem(m); + return (EINVAL); + } + /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; @@ -2163,6 +2272,18 @@ len = m->m_pkthdr.len; mflags = m->m_flags; + /* + * If VLAN filtering is enabled, and the native VLAN ID of the + * outgoing interface matches the VLAN ID of the frame, remove + * the VLAN header. + */ + if ((bif->bif_flags & IFBIF_VLANFILTER) && + bif->bif_untagged != DOT1Q_VID_NULL && + VLANTAGOF(m) == bif->bif_untagged) { + m->m_flags &= ~M_VLANTAG; + m->m_pkthdr.ether_vtag = 0; + } + /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. @@ -2234,7 +2355,7 @@ return; } - bridge_enqueue(sc, ifp, m); + bridge_enqueue(sc, ifp, m, NULL); } /* @@ -2329,7 +2450,7 @@ } } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } if (used == 0) m_freem(m); @@ -2347,7 +2468,7 @@ return (0); } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, NULL); return (0); } @@ -2364,17 +2485,18 @@ struct ether_header *eh; struct ifnet *dst_if; int error = 0; + ether_vlanid_t vlan; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); + vlan = VLANTAGOF(m); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && - (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) != - NULL) { - error = bridge_enqueue(sc, dst_if, m); + (dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan)) != NULL) { + error = bridge_enqueue(sc, dst_if, m, NULL); } else bridge_broadcast(sc, ifp, m, 0); @@ -2435,18 +2557,18 @@ struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; - uint16_t vlan; uint8_t *dst; int error; + ether_vlanid_t vlan; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; + vlan = VLANTAGOF(m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); - vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) @@ -2555,6 +2677,10 @@ if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + goto drop; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -2566,7 +2692,7 @@ return; } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, dbif); return; drop: @@ -2636,6 +2762,15 @@ return (NULL); } + /* Do VLAN filtering. */ + if (!bridge_vfilter_in(bif, m)) { + if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (NULL); + } + /* bridge_vfilter_in() may add a tag */ + vlan = VLANTAGOF(m); + bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { @@ -2761,6 +2896,15 @@ } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ + /* Pass tagged packets to if_vlan, if it's loaded */ \ + if (VLANTAGOF(m) != 0) { \ + if (bifp->if_vlantrunk == NULL) { \ + m_freem(m); \ + return (NULL); \ + } \ + (*vlan_input_p)(bifp, m); \ + return (NULL); \ + } \ return (m); \ } \ \ @@ -2817,6 +2961,30 @@ { struct bridge_softc *sc; + if (ifp->if_type == IFT_L2VLAN) { + /* + * vlan(4) gives us the vlan ifnet, so we need to get the + * bridge softc to get a pointer to ether_input to send the + * packet to. + */ + struct ifnet *bifp = NULL; + + if (vlan_trunkdev_p == NULL) { + m_freem(m); + return; + } + + bifp = vlan_trunkdev_p(ifp); + if (bifp == NULL) { + m_freem(m); + return; + } + + sc = if_getsoftc(bifp); + sc->sc_if_input(ifp, m); + return; + } + KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0, ("%s: iface %s is not running in netmap mode", __func__, if_name(ifp))); @@ -2867,6 +3035,10 @@ if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + continue; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -2910,7 +3082,7 @@ continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, dbif); } if (used == 0) m_freem(m); @@ -2946,10 +3118,115 @@ continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } } +/* + * Incoming VLAN filtering. Given a frame and the member interface it was + * received on, decide whether the port configuration allows it. + */ +static bool +bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) +{ + ether_vlanid_t vlan; + + vlan = VLANTAGOF(m); + /* Make sure the vlan id is reasonable. */ + if (vlan > DOT1Q_VID_MAX) + return (false); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((sbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + if (vlan == DOT1Q_VID_NULL) { + /* + * The frame doesn't have a tag. If the interface does not + * have an untagged vlan configured, drop the frame. + */ + if (sbif->bif_untagged == DOT1Q_VID_NULL) + return (false); + + /* + * Otherwise, insert a new tag based on the interface's + * untagged vlan id. + */ + m->m_pkthdr.ether_vtag = sbif->bif_untagged; + m->m_flags |= M_VLANTAG; + } else { + /* + * The frame has a tag, so check it matches the interface's + * vlan access list. We explicitly do not accept tagged + * frames for the untagged vlan id here (unless it's also + * in the access list). + */ + if (!BRVLAN_TEST(&sbif->bif_vlan_set, vlan)) + return (false); + } + + /* Accept the frame. */ + return (true); +} + +/* + * Outgoing VLAN filtering. Given a frame, its vlan, and the member interface + * we intend to send it to, decide whether the port configuration allows it to + * be sent. + */ +static bool +bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) +{ + struct ether_header *eh; + ether_vlanid_t vlan; + + NET_EPOCH_ASSERT(); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((dbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + vlan = VLANTAGOF(m); + + /* + * Always allow untagged 802.1D STP frames, even if they would + * otherwise be dropped. This is required for STP to work on + * a filtering bridge. + * + * Tagged STP (Cisco PVST+) is a non-standard extension, so + * handle those frames via the normal filtering path. + */ + eh = mtod(m, struct ether_header *); + if (vlan == DOT1Q_VID_NULL && + memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) + return (true); + + /* + * If the frame wasn't assigned to a vlan at ingress, drop it. + * We can't forward these frames to filtering ports because we + * don't know what VLAN they're supposed to be in. + */ + if (vlan == DOT1Q_VID_NULL) + return (false); + + /* + * If the frame's vlan matches the interfaces's untagged vlan, + * allow it. + */ + if (vlan == dbif->bif_untagged) + return (true); + + /* + * If the frame's vlan is on the interface's tagged access list, + * allow it. + */ + if (BRVLAN_TEST(&dbif->bif_vlan_set, vlan)) + return (true); + + /* The frame was not permitted, so drop it. */ + return (false); +} + /* * bridge_rtupdate: * diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -78,6 +78,8 @@ #define _NET_IF_BRIDGEVAR_H_ #include +#include +#include #include #include #include @@ -122,6 +124,9 @@ #define BRDGSPROTO 28 /* set protocol (ifbrparam) */ #define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */ +#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */ +#define BRDGSIFVLANSET 32 /* set if vlan set */ +#define BRDGGIFVLANSET 33 /* get if vlan set */ /* * Generic bridge control request. @@ -139,6 +144,7 @@ uint32_t ifbr_addrcnt; /* member if addr number */ uint32_t ifbr_addrmax; /* member if addr max */ uint32_t ifbr_addrexceeded; /* member if addr violations */ + ether_vlanid_t ifbr_untagged; /* member if untagged vlan */ uint8_t pad[32]; }; @@ -155,10 +161,11 @@ #define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */ #define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */ #define IFBIF_PRIVATE 0x0800 /* if is a private segment */ +#define IFBIF_VLANFILTER 0x1000 /* if does vlan filtering */ #define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \ "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \ - "\011AUTOPTP" + "\011AUTOPTP\015VLANFILTER" #define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \ IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \ IFBIF_BSTP_ADMCOST) /* not saved */ @@ -304,6 +311,26 @@ eaddr[5] = pv >> 0; \ } while (0) +/* + * Bridge VLAN access request. + */ +#define BRVLAN_SETSIZE 4096 +typedef __BITSET_DEFINE(ifbvlan_set, BRVLAN_SETSIZE) ifbvlan_set_t; + +#define BRVLAN_SET(set, bit) __BIT_SET(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_CLR(set, bit) __BIT_CLR(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_TEST(set, bit) __BIT_ISSET(BRVLAN_SETSIZE, (bit), set) + +#define BRDG_VLAN_OP_SET 1 /* replace current vlan set */ +#define BRDG_VLAN_OP_ADD 2 /* add vlans to current set */ +#define BRDG_VLAN_OP_DEL 3 /* remove vlans from current set */ + +struct ifbif_vlan_req { + char bv_ifname[IFNAMSIZ]; + uint8_t bv_op; + ifbvlan_set_t bv_set; +}; + #ifdef _KERNEL #define BRIDGE_INPUT(_ifp, _m) do { \ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1673,6 +1673,7 @@ */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && + p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -126,13 +126,6 @@ #define VLAN_PCP_MAX 7 -#define DOT1Q_VID_NULL 0x0 -#define DOT1Q_VID_DEF_PVID 0x1 -#define DOT1Q_VID_DEF_SR_PVID 0x2 -#define DOT1Q_VID_RSVD_IMPL 0xfff -#define DOT1Q_VID_MIN 1 /* minimum valid vlan id */ -#define DOT1Q_VID_MAX 4094 /* maximum valid vlan id */ - /* * 802.1q full tag. Proto and vid are stored in host byte order. */ diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -508,18 +508,6 @@ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv6 */ @@ -544,18 +532,6 @@ !(a)->addr32[2] && \ !(a)->addr32[3] ) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv4 */ @@ -570,29 +546,11 @@ #define PF_AZERO(a, c) \ (!(a)->addr32[0]) -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - (a)->v4.s_addr = (b)->v4.s_addr - -#define PF_AINC(a, f) \ - do { \ - (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ - } while (0) - -#define PF_POOLMASK(a, b, c, d, f) \ - do { \ - (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ - (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ - } while (0) - #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ #ifdef _KERNEL -#ifdef INET6 static void inline pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) { @@ -602,12 +560,13 @@ memcpy(&dst->v4, &src->v4, sizeof(dst->v4)); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: memcpy(&dst->v6, &src->v6, sizeof(dst->v6)); break; +#endif /* INET6 */ } } -#endif /* INET6 */ #endif /* @@ -629,7 +588,7 @@ &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ + !pf_match_addr(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) @@ -2477,11 +2436,11 @@ int pf_normalize_ip(u_short *, struct pf_pdesc *); #endif /* INET */ -#ifdef INET6 -int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, sa_family_t); void pf_addr_inc(struct pf_addr *, sa_family_t); +#ifdef INET6 +int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); int pf_max_frag_size(struct mbuf *); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, struct ifnet *, bool); @@ -2674,11 +2633,10 @@ const struct pf_krule *, char *, size_t); int pf_kanchor_nvcopyout(const struct pf_kruleset *, const struct pf_krule *, nvlist_t *); -void pf_kanchor_remove(struct pf_krule *); +void pf_remove_kanchor(struct pf_krule *); void pf_remove_if_empty_kruleset(struct pf_kruleset *); struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); -struct pf_kanchor *pf_create_kanchor(struct pf_kanchor *, const char *); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -682,7 +682,8 @@ 0); break; case AF_INET6: - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], + pd->af); break; default: unhandled_af(pd->af); @@ -696,7 +697,8 @@ 0); break; case AF_INET6: - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); break; default: unhandled_af(pd->af); @@ -1084,9 +1086,9 @@ (*sn)->af = af; (*sn)->rule = r_track; - PF_ACPY(&(*sn)->addr, src, af); + pf_addrcpy(&(*sn)->addr, src, af); if (raddr != NULL) - PF_ACPY(&(*sn)->raddr, raddr, af); + pf_addrcpy(&(*sn)->raddr, raddr, af); (*sn)->rkif = rkif; LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); (*sn)->creation = time_uptime; @@ -1687,9 +1689,9 @@ copy: #endif /* INET6 */ if (saddr) - PF_ACPY(&key->addr[pd->sidx], saddr, pd->af); + pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af); if (daddr) - PF_ACPY(&key->addr[pd->didx], daddr, pd->af); + pf_addrcpy(&key->addr[pd->didx], daddr, pd->af); return (0); } @@ -1734,13 +1736,17 @@ bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0])); bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1])); if (pd->dir == PF_IN) { - PF_ACPY(&(*nk)->addr[pd->didx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->didx] = pd->nsport; (*nk)->port[pd->sidx] = pd->ndport; } else { - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->didx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->sidx] = pd->nsport; (*nk)->port[pd->didx] = pd->ndport; } @@ -2053,11 +2059,11 @@ mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO); if (mapping == NULL) return (NULL); - PF_ACPY(&mapping->endpoints[0].addr, src_addr, af); + pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af); mapping->endpoints[0].port = src_port; mapping->endpoints[0].af = af; mapping->endpoints[0].mapping = mapping; - PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af); + pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af); mapping->endpoints[1].port = nat_port; mapping->endpoints[1].af = af; mapping->endpoints[1].mapping = mapping; @@ -3295,9 +3301,9 @@ MPASS(pd->ip_sum); } - PF_ACPY(&ao, a, pd->af); + pf_addrcpy(&ao, a, pd->af); if (pd->af == pd->naf) - PF_ACPY(a, an, pd->af); + pf_addrcpy(a, an, pd->af); if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) *pd->pcksum = ~*pd->pcksum; @@ -3426,8 +3432,8 @@ { struct pf_addr ao; - PF_ACPY(&ao, a, AF_INET6); - PF_ACPY(a, an, AF_INET6); + pf_addrcpy(&ao, a, AF_INET6); + pf_addrcpy(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( @@ -3450,9 +3456,9 @@ { struct pf_addr oia, ooa; - PF_ACPY(&oia, ia, af); + pf_addrcpy(&oia, ia, af); if (oa) - PF_ACPY(&ooa, oa, af); + pf_addrcpy(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { @@ -3469,7 +3475,7 @@ *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ - PF_ACPY(ia, na, af); + pf_addrcpy(ia, na, af); switch (af) { #ifdef INET case AF_INET: { @@ -3503,7 +3509,7 @@ } /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ if (oa) { - PF_ACPY(oa, na, af); + pf_addrcpy(oa, na, af); switch (af) { #ifdef INET case AF_INET: @@ -4299,8 +4305,8 @@ { /* undo NAT changes, if they have taken place */ if (nr != NULL) { - PF_ACPY(pd->src, &pd->osrc, pd->af); - PF_ACPY(pd->dst, &pd->odst, pd->af); + pf_addrcpy(pd->src, &pd->osrc, pd->af); + pf_addrcpy(pd->dst, &pd->odst, pd->af); if (pd->sport) *pd->sport = pd->osport; if (pd->dport) @@ -4676,10 +4682,11 @@ } else { rv = pf_match_rule(ctx, &r->anchor->ruleset); /* - * Unless there was an error inside the anchor, - * retain its quick state. + * Unless errors occured, stop iff any rule matched + * within quick anchors. */ - if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK) + if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && + *ctx->am == r) rv = PF_TEST_QUICK; } @@ -4790,7 +4797,6 @@ return (quick); } -#ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) @@ -4802,6 +4808,7 @@ ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); @@ -4812,6 +4819,7 @@ naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; +#endif /* INET6 */ } } @@ -4824,6 +4832,7 @@ addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; @@ -4843,9 +4852,9 @@ addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; +#endif /* INET6 */ } } -#endif /* INET6 */ void pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) @@ -5744,8 +5753,8 @@ ctx.reason = *reason; SLIST_INIT(&ctx.rules); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); if (inp != NULL) { INP_LOCK_ASSERT(inp); @@ -6363,7 +6372,7 @@ &nk->addr[pd->sidx], nk->port[pd->sidx]); pd->sport = &th->th_sport; pd->nsport = th->th_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6372,7 +6381,7 @@ &nk->addr[pd->didx], nk->port[pd->didx]); pd->dport = &th->th_dport; pd->ndport = th->th_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6385,7 +6394,7 @@ nk->port[pd->sidx]); pd->sport = &pd->hdr.udp.uh_sport; pd->nsport = pd->hdr.udp.uh_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6396,7 +6405,7 @@ nk->port[pd->didx]); pd->dport = &pd->hdr.udp.uh_dport; pd->ndport = pd->hdr.udp.uh_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6409,7 +6418,7 @@ nk->port[pd->sidx]); pd->sport = &pd->hdr.sctp.src_port; pd->nsport = pd->hdr.sctp.src_port; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != pd->ndport) { @@ -6419,7 +6428,7 @@ nk->port[pd->didx]); pd->dport = &pd->hdr.sctp.dest_port; pd->ndport = pd->hdr.sctp.dest_port; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; } @@ -6428,13 +6437,13 @@ if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) { pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) { pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } if (ctx->virtual_type == htons(ICMP_ECHO) && @@ -6453,13 +6462,13 @@ if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->sidx], 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->didx], 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6473,7 +6482,7 @@ pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, @@ -6481,7 +6490,7 @@ pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; #endif /* INET */ @@ -6489,14 +6498,17 @@ case AF_INET6: if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { - PF_ACPY(&pd->nsaddr, &nk->addr[pd->sidx], pd->af); - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx], + pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { - PF_ACPY(&pd->ndaddr, &nk->addr[pd->didx], pd->af); - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx], + pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); } break; #endif /* INET6 */ @@ -7015,8 +7027,8 @@ bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->virtual_proto; - PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); - PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); + pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); + pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); key.port[pd->sidx] = pd->osport; key.port[pd->didx] = pd->odport; @@ -7207,8 +7219,8 @@ } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); pd->naf = nk->af; action = PF_AFRT; } @@ -7502,13 +7514,13 @@ key.af = j->pd.af; key.proto = IPPROTO_SCTP; if (j->pd.dir == PF_IN) { /* wire side, straight */ - PF_ACPY(&key.addr[0], j->pd.src, key.af); - PF_ACPY(&key.addr[1], j->pd.dst, key.af); + pf_addrcpy(&key.addr[0], j->pd.src, key.af); + pf_addrcpy(&key.addr[1], j->pd.dst, key.af); key.port[0] = j->pd.hdr.sctp.src_port; key.port[1] = j->pd.hdr.sctp.dest_port; } else { /* stack side, reverse */ - PF_ACPY(&key.addr[1], j->pd.src, key.af); - PF_ACPY(&key.addr[0], j->pd.dst, key.af); + pf_addrcpy(&key.addr[1], j->pd.src, key.af); + pf_addrcpy(&key.addr[0], j->pd.dst, key.af); key.port[1] = j->pd.hdr.sctp.src_port; key.port[0] = j->pd.hdr.sctp.dest_port; } @@ -7904,8 +7916,10 @@ #endif /* INET6 */ } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], + nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], + nk->af); pd->naf = nk->af; return (PF_AFRT); } @@ -8037,8 +8051,8 @@ key.af = pd2.af; key.proto = IPPROTO_TCP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th->th_sport; key.port[pd2.didx] = th->th_dport; @@ -8141,9 +8155,9 @@ &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, &nk->addr[pd2.sidx], - nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->nsaddr, + &nk->addr[pd2.sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8232,8 +8246,8 @@ key.af = pd2.af; key.proto = IPPROTO_UDP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh->uh_sport; key.port[pd2.didx] = uh->uh_dport; @@ -8276,9 +8290,9 @@ &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8364,8 +8378,8 @@ key.af = pd2.af; key.proto = IPPROTO_SCTP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = sh->src_port; key.port[pd2.didx] = sh->dest_port; @@ -8431,9 +8445,9 @@ sh->src_port = nk->port[sidx]; sh->dest_port = nk->port[didx]; m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8574,9 +8588,9 @@ iih->icmp_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, ICMP_MINLEN, (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); /* * IPv4 becomes IPv6 so we must copy @@ -8702,9 +8716,9 @@ iih->icmp6_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, sizeof(struct icmp6_hdr), (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); pd->naf = nk->af; return (PF_AFRT); @@ -8746,8 +8760,8 @@ key.af = pd2.af; key.proto = pd2.proto; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; action = pf_find_state(&pd2, &key, state); @@ -9283,7 +9297,8 @@ bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); - PF_ACPY((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, AF_INET6); + pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, + AF_INET6); if (pd->dir == PF_IN) { if (ip6->ip6_hlim <= IPV6_HLIMDEC) { @@ -10083,8 +10098,8 @@ pd->src = (struct pf_addr *)&h->ip_src; pd->dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = &h->ip_sum; pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; pd->ttl = h->ip_ttl; @@ -10121,8 +10136,8 @@ h = mtod(pd->m, struct ip6_hdr *); pd->src = (struct pf_addr *)&h->ip6_src; pd->dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = NULL; pd->tos = IPV6_DSCP(h); pd->ttl = h->ip6_hlim; diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -522,7 +522,7 @@ case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, + return (pf_match_addr(0, &dyn->pfid_addr4, &dyn->pfid_mask4, a, AF_INET)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); @@ -535,7 +535,7 @@ case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, + return (pf_match_addr(0, &dyn->pfid_addr6, &dyn->pfid_mask6, a, AF_INET6)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -615,7 +615,7 @@ pfi_kkif_unref(rule->kif); if (rule->rcv_kif) pfi_kkif_unref(rule->rcv_kif); - pf_kanchor_remove(rule); + pf_remove_kanchor(rule); pf_empty_kpool(&rule->rdr.list); pf_empty_kpool(&rule->nat.list); pf_empty_kpool(&rule->route.list); @@ -2350,15 +2350,17 @@ if (psk->psk_proto && psk->psk_proto != sk->proto) continue; - if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, + if (! pf_match_addr(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, + if (! pf_match_addr(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_rt_addr.neg, + if (! pf_match_addr(psk->psk_rt_addr.neg, &psk->psk_rt_addr.addr.v.a.addr, &psk->psk_rt_addr.addr.v.a.mask, &s->act.rt_addr, sk->af)) @@ -2398,10 +2400,10 @@ match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } @@ -2738,7 +2740,7 @@ return (ENOENT); } pr->nr = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL) @@ -2770,7 +2772,7 @@ } pr->name[0] = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { @@ -4152,9 +4154,9 @@ bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; - PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; - PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); @@ -4166,9 +4168,11 @@ error = E2BIG; /* more than one state */ } else { sk = state->key[sidx]; - PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pf_addrcpy(&pnl->rsaddr, + &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; - PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pf_addrcpy(&pnl->rdaddr, + &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; PF_STATE_UNLOCK(state); } @@ -4606,7 +4610,7 @@ } pool->cur = TAILQ_FIRST(&pool->list); - PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); + pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; @@ -6024,11 +6028,11 @@ PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (psnk == NULL || - (PF_MATCHA(psnk->psnk_src.neg, + (pf_match_addr(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, + pf_match_addr(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af))) { @@ -6132,10 +6136,10 @@ match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -319,12 +319,14 @@ bzero(&udp_source, sizeof(udp_source)); udp_source.af = pd->af; - PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af); + pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { - PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); + pf_addrcpy(naddr, + &(*udp_mapping)->endpoints[1].addr, + pd->af); *nport = (*udp_mapping)->endpoints[1].port; /* Try to find a src_node as per pf_map_addr(). */ if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && @@ -369,12 +371,13 @@ key.proto = pd->proto; do { - PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af); - PF_ACPY(&key.addr[sidx], naddr, key.af); + pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); + pf_addrcpy(&key.addr[sidx], naddr, key.af); key.port[didx] = pd->ndport; if (udp_mapping && *udp_mapping) - PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); + pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, + pd->af); /* * port search; start random, step; @@ -591,10 +594,10 @@ switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); + pf_addrcpy(naddr, raddr, af); break; case PF_POOL_BITMASK: - PF_POOLMASK(naddr, raddr, rmask, saddr, af); + pf_poolmask(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: if (rpool->cur->addr.type == PF_ADDR_TABLE) { @@ -609,7 +612,7 @@ reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -623,7 +626,7 @@ reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { switch (af) { #ifdef INET @@ -654,12 +657,12 @@ break; #endif /* INET6 */ } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + pf_addr_inc(&rpool->counter, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); } break; case PF_POOL_SRCHASH: @@ -680,7 +683,7 @@ reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -694,9 +697,9 @@ reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else { - PF_POOLMASK(naddr, raddr, rmask, + pf_poolmask(naddr, raddr, rmask, (struct pf_addr *)&hash, af); } break; @@ -743,14 +746,14 @@ } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); + pf_addrcpy(&rpool->counter, raddr, af); } get_addr: - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); + pf_addr_inc(&rpool->counter, af); break; } } @@ -798,7 +801,7 @@ goto done; } - PF_ACPY(naddr, &(*sn)->raddr, af); + pf_addrcpy(naddr, &(*sn)->raddr, af); if (nkif) *nkif = (*sn)->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { @@ -948,7 +951,7 @@ reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr4, &rpool->cur->addr.p.dyn->pfid_mask4, &pd->nsaddr, AF_INET); @@ -961,7 +964,7 @@ reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr6, &rpool->cur->addr.p.dyn->pfid_mask6, &pd->nsaddr, AF_INET6); @@ -969,7 +972,7 @@ #endif /* INET6 */ } } else - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.v.a.addr, &rpool->cur->addr.v.a.mask, &pd->nsaddr, pd->af); @@ -983,7 +986,7 @@ reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr4, &r->src.addr.p.dyn->pfid_mask4, &pd->ndaddr, AF_INET); @@ -995,7 +998,7 @@ reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr6, &r->src.addr.p.dyn->pfid_mask6, &pd->ndaddr, AF_INET6); @@ -1003,7 +1006,7 @@ #endif /* INET6 */ } } else - PF_POOLMASK(naddr, &r->src.addr.v.a.addr, + pf_poolmask(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); break; } @@ -1018,7 +1021,7 @@ if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, &rpool->cur->addr.v.a.mask, + pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, &pd->ndaddr, pd->af); /* Do not change SCTP ports. */ @@ -1056,9 +1059,9 @@ key.af = pd->af; key.proto = pd->proto; key.port[0] = pd->nsport; - PF_ACPY(&key.addr[0], &pd->nsaddr, key.af); + pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); key.port[1] = nport; - PF_ACPY(&key.addr[1], naddr, key.af); + pf_addrcpy(&key.addr[1], naddr, key.af); if (!pf_find_state_all_exists(&key, PF_OUT)) break; @@ -1220,8 +1223,8 @@ } } - PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf); - PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf); + pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); + pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: af-to %s done, prefixlen %d, ", diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -1308,9 +1308,9 @@ key.af = attrs.af; key.proto = attrs.proto; - PF_ACPY(&key.addr[sidx], &attrs.src, attrs.af); + pf_addrcpy(&key.addr[sidx], &attrs.src, attrs.af); key.port[sidx] = attrs.sport; - PF_ACPY(&key.addr[didx], &attrs.dst, attrs.af); + pf_addrcpy(&key.addr[didx], &attrs.dst, attrs.af); key.port[didx] = attrs.dport; state = pf_find_state_all(&key, attrs.direction, &m); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -232,7 +232,7 @@ return (ruleset); } -struct pf_kanchor * +static struct pf_kanchor * pf_create_kanchor(struct pf_kanchor *parent, const char *aname) { struct pf_kanchor *anchor, *dup; @@ -259,8 +259,8 @@ if ((dup = RB_INSERT(pf_kanchor_global, &V_pf_anchors, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", + printf("%s: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", __func__, anchor->path, anchor->name, dup->path, dup->name); rs_free(anchor); return (NULL); @@ -270,10 +270,10 @@ anchor->parent = parent; if ((dup = RB_INSERT(pf_kanchor_node, &parent->children, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " + printf("%s: " "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); + "'%s' '%s'\n", __func__, anchor->path, + anchor->name, dup->path, dup->name); RB_REMOVE(pf_kanchor_global, &V_pf_anchors, anchor); rs_free(anchor); @@ -339,7 +339,7 @@ int i; while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + if (ruleset == &pf_main_ruleset || !RB_EMPTY(&ruleset->anchor->children) || ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || ruleset->topen) @@ -407,7 +407,7 @@ } ruleset = pf_find_or_create_kruleset(path); rs_free(path); - if (ruleset == NULL || ruleset->anchor == NULL) { + if (ruleset == NULL || ruleset == &pf_main_ruleset) { DPFPRINTF("%s: ruleset\n", __func__); return (1); } @@ -432,7 +432,7 @@ char a[MAXPATHLEN]; char *p; int i; - if (rs->anchor == NULL) + if (rs == &pf_main_ruleset) a[0] = 0; else strlcpy(a, rs->anchor->path, MAXPATHLEN); @@ -444,7 +444,7 @@ anchor_call_len); } if (strncmp(a, r->anchor->path, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, + printf("%s: '%s' '%s'\n", __func__, a, r->anchor->path); return (1); } @@ -525,16 +525,13 @@ } void -pf_kanchor_remove(struct pf_krule *r) +pf_remove_kanchor(struct pf_krule *r) { if (r->anchor == NULL) return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount\n"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) + if (r->anchor->refcnt <= 0) + printf("%s: broken refcount\n", __func__); + else if (!--r->anchor->refcnt) pf_remove_if_empty_kruleset(&r->anchor->ruleset); r->anchor = NULL; } diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -704,7 +704,7 @@ return (-1); if (ad->pfra_not && ad->pfra_not != 1) return (-1); - if (ad->pfra_fback) + if (ad->pfra_fback != PFR_FB_NONE) return (-1); return (0); } @@ -2340,16 +2340,16 @@ if (use_counter && !PF_AZERO(counter, af)) { /* is supplied address within block? */ - if (!PF_MATCHA(0, &cur, &mask, counter, af)) { + if (!pf_match_addr(0, &cur, &mask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; goto _next_block; } - PF_ACPY(addr, counter, af); + pf_addrcpy(addr, counter, af); } else { /* use first address of block */ - PF_ACPY(addr, &cur, af); + pf_addrcpy(addr, &cur, af); } if (!KENTRY_NETWORK(ke)) { @@ -2358,7 +2358,7 @@ idx++; goto _next_block; } - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2382,7 +2382,7 @@ /* lookup return the same block - perfect */ if (filter && filter(af, addr)) goto _next_entry; - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2392,9 +2392,9 @@ /* we need to increase the counter past the nested block */ pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net); pfr_sockaddr_to_pf_addr(&umask, &umask_addr); - PF_POOLMASK(addr, addr, &umask_addr, &pfr_ffaddr, af); - PF_AINC(addr, af); - if (!PF_MATCHA(0, &cur, &mask, addr, af)) { + pf_poolmask(addr, addr, &umask_addr, &pfr_ffaddr, af); + pf_addr_inc(addr, af); + if (!pf_match_addr(0, &cur, &mask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c --- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c +++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c @@ -226,14 +226,14 @@ return (ENOMEM); } + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { mpc85xx_gpio_detach(dev); return (ENOMEM); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - return (0); } diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner --- a/sys/riscv/allwinner/files.allwinner +++ b/sys/riscv/allwinner/files.allwinner @@ -1,5 +1,6 @@ arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt +arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_syscon.c optional syscon arm/allwinner/aw_sid.c optional aw_sid nvmem arm/allwinner/aw_timer.c optional aw_timer fdt diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -7,6 +7,7 @@ device aw_ccu # Allwinner clock controller device aw_gpio # Allwinner GPIO controller +device aw_rtc # Allwinner Real-time Clock device aw_sid # Allwinner Secure ID EFUSE device aw_timer # Allwinner Timer device aw_usbphy # Allwinner USB PHY diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h --- a/sys/sys/caprights.h +++ b/sys/sys/caprights.h @@ -79,6 +79,8 @@ extern const cap_rights_t cap_getpeername_rights; extern const cap_rights_t cap_getsockopt_rights; extern const cap_rights_t cap_getsockname_rights; +extern const cap_rights_t cap_inotify_add_rights; +extern const cap_rights_t cap_inotify_rm_rights; extern const cap_rights_t cap_ioctl_rights; extern const cap_rights_t cap_linkat_source_rights; extern const cap_rights_t cap_linkat_target_rights; diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -279,11 +279,15 @@ #define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE) +/* Allows operations on inotify descriptors. */ +#define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL) + /* All used bits for index 1. */ -#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL) +#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL) /* Available bits for index 1. */ -#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -16,6 +16,8 @@ #define EXTERR_KTRACE 3 /* To allow inclusion of this file into kern_ktrace.c */ #define EXTERR_CAT_FUSE 4 +#define EXTERR_CAT_INOTIFY 5 +#define EXTERR_CAT_GENIO 6 #endif diff --git a/sys/sys/exterrvar.h b/sys/sys/exterrvar.h --- a/sys/sys/exterrvar.h +++ b/sys/sys/exterrvar.h @@ -21,6 +21,7 @@ #define EXTERRCTL_ENABLE 1 #define EXTERRCTL_DISABLE 2 +#define EXTERRCTL_UD 3 #define EXTERRCTLF_FORCE 0x00000001 diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -71,6 +71,7 @@ #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ +#define DTYPE_INOTIFY 15 /* inotify descriptor */ #ifdef _KERNEL diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h new file mode 100644 --- /dev/null +++ b/sys/sys/inotify.h @@ -0,0 +1,150 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#ifndef _INOTIFY_H_ +#define _INOTIFY_H_ + +#include + +/* Flags for inotify_init1(). */ +#define IN_NONBLOCK 0x00000004 /* O_NONBLOCK */ +#define IN_CLOEXEC 0x00100000 /* O_CLOEXEC */ + +struct inotify_event { + int wd; + __uint32_t mask; + __uint32_t cookie; + __uint32_t len; + char name[0]; +}; + +/* Events, set in the mask field. */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 +#define IN_ALL_EVENTS 0x00000fff + +/* Events report only for entries in a watched dir, not the dir itself. */ +#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \ + IN_MOVED_FROM | IN_MOVED_TO) + +#ifdef _KERNEL +/* + * An unlink that's done as part of a rename only records IN_DELETE if the + * unlinked vnode itself is watched, and not when the containing directory is + * watched. + */ +#define _IN_MOVE_DELETE 0x40000000 +/* + * Inode link count changes only trigger IN_ATTRIB events if the inode itself is + * watched, and not when the containing directory is watched. + */ +#define _IN_ATTRIB_LINKCOUNT 0x80000000 +#endif + +/* Flags, set in the mask field. */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_CREATE 0x10000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ONESHOT 0x80000000 +#define _IN_ALL_FLAGS (IN_ONLYDIR | IN_DONT_FOLLOW | \ + IN_EXCL_UNLINK | IN_MASK_CREATE | \ + IN_MASK_ADD | IN_ONESHOT) + +/* Flags returned by the kernel. */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 +#define IN_ISDIR 0x40000000 +#define _IN_ALL_RETFLAGS (IN_Q_OVERFLOW | IN_UNMOUNT | IN_IGNORED | \ + IN_ISDIR) + +#define _IN_ALIGN _Alignof(struct inotify_event) +#define _IN_NAMESIZE(namelen) \ + ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN)) + +#ifdef _KERNEL +struct componentname; +struct file; +struct inotify_softc; +struct thread; +struct vnode; + +int inotify_create_file(struct thread *, struct file *, int, int *); +void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t); + +int kern_inotify_rm_watch(int, uint32_t, struct thread *); +int kern_inotify_add_watch(int, int, const char *, uint32_t, + struct thread *); + +void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int, + uint32_t); +int vn_inotify_add_watch(struct vnode *, struct inotify_softc *, + __uint32_t, __uint32_t *, struct thread *); +void vn_inotify_revoke(struct vnode *); + +/* Log an inotify event. */ +#define INOTIFY(vp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \ + VIRF_INOTIFY_PARENT)) != 0)) \ + VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \ +} while (0) + +/* Log an inotify event using a specific name for the vnode. */ +#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \ + VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \ +} while (0) + +extern __uint32_t inotify_rename_cookie; + +#define INOTIFY_MOVE(vp, fdvp, fcnp, tvp, tdvp, tcnp) do { \ + if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \ + __uint32_t cookie; \ + \ + cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \ + VOP_INOTIFY((vp), (fdvp), (fcnp), IN_MOVED_FROM, cookie); \ + VOP_INOTIFY((vp), (tdvp), (tcnp), IN_MOVED_TO, cookie); \ + } \ + if ((tvp) != NULL) \ + INOTIFY_NAME((tvp), (tdvp), (tcnp), _IN_MOVE_DELETE); \ +} while (0) + +#define INOTIFY_REVOKE(vp) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) \ + vn_inotify_revoke((vp)); \ +} while (0) + +#else +#include + +__BEGIN_DECLS +int inotify_init(void); +int inotify_init1(int flags); +int inotify_add_watch(int fd, const char *pathname, __uint32_t mask); +int inotify_add_watch_at(int fd, int dfd, const char *pathname, + __uint32_t mask); +int inotify_rm_watch(int fd, int wd); +__END_DECLS +#endif /* !_KERNEL */ + +#endif /* !_INOTIFY_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -267,6 +267,7 @@ int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */ struct lock mnt_explock; /* vfs_export walkers lock */ + struct lock mnt_renamelock; /* renames and O_RESOLVE_BENEATH */ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -108,7 +108,12 @@ * through the VOP interface. */ struct componentname ni_cnd; + + /* Serving RBENEATH. */ struct nameicap_tracker_head ni_cap_tracker; + struct vnode *ni_rbeneath_dpp; + struct mount *ni_nctrack_mnt; + /* * Private helper data for UFS, must be at the end. See * NDINIT_PREFILL(). @@ -235,6 +240,10 @@ panic("namei data not inited"); \ if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \ panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \ + if ((arg)->ni_nctrack_mnt != NULL) \ + panic("NDREINIT on namei data with leaked ni_nctrack_mnt"); \ + if (!TAILQ_EMPTY(&(arg)->ni_cap_tracker)) \ + panic("NDREINIT on namei data with leaked ni_cap_tracker"); \ (arg)->ni_debugflags = NAMEI_DBG_INITED; \ } #else @@ -259,6 +268,9 @@ _ndp->ni_resflags = 0; \ filecaps_init(&_ndp->ni_filecaps); \ _ndp->ni_rightsneeded = _rightsp; \ + _ndp->ni_rbeneath_dpp = NULL; \ + _ndp->ni_nctrack_mnt = NULL; \ + TAILQ_INIT(&_ndp->ni_cap_tracker); \ } while (0) #define NDREINIT(ndp) do { \ diff --git a/sys/sys/param.h b/sys/sys/param.h --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500049 +#define __FreeBSD_version 1500051 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -122,6 +122,8 @@ long ui_kqcnt; /* (b) number of kqueues */ long ui_umtxcnt; /* (b) number of shared umtxs */ long ui_pipecnt; /* (b) consumption of pipe buffers */ + long ui_inotifycnt; /* (b) number of inotify descriptors */ + long ui_inotifywatchcnt; /* (b) number of inotify watches */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ #ifdef RACCT @@ -144,6 +146,8 @@ int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max); +int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval); +int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval); int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp); struct plimit diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -30,6 +30,7 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_INOTIFY = 2, }; struct specialfd_eventfd { @@ -37,4 +38,8 @@ int flags; }; +struct specialfd_inotify { + int flags; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -529,4 +529,6 @@ #define SYS_fchroot 590 #define SYS_setcred 591 #define SYS_exterrctl 592 -#define SYS_MAXSYSCALL 593 +#define SYS_inotify_add_watch_at 593 +#define SYS_inotify_rm_watch 594 +#define SYS_MAXSYSCALL 595 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -434,4 +434,6 @@ getrlimitusage.o \ fchroot.o \ setcred.o \ - exterrctl.o + exterrctl.o \ + inotify_add_watch_at.o \ + inotify_rm_watch.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1891,6 +1891,16 @@ char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)]; char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)]; }; +struct inotify_add_watch_at_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; +}; +struct inotify_rm_watch_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2293,6 +2303,8 @@ int sys_fchroot(struct thread *, struct fchroot_args *); int sys_setcred(struct thread *, struct setcred_args *); int sys_exterrctl(struct thread *, struct exterrctl_args *); +int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *); +int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); #ifdef COMPAT_43 @@ -3275,6 +3287,8 @@ #define SYS_AUE_fchroot AUE_NULL #define SYS_AUE_setcred AUE_SETCRED #define SYS_AUE_exterrctl AUE_NULL +#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY +#define SYS_AUE_inotify_rm_watch AUE_INOTIFY #undef PAD_ #undef PADL_ diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 +#define KF_TYPE_INOTIFY 15 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -456,6 +457,10 @@ int32_t kf_kqueue_count; int32_t kf_kqueue_state; } kf_kqueue; + struct { + uint64_t kf_inotify_npending; + uint64_t kf_inotify_nbpending; + } kf_inotify; } kf_un; }; uint16_t kf_status; /* Status flags. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -86,11 +86,13 @@ * it from v_data. If non-null, this area is freed in getnewvnode(). */ -struct namecache; struct cache_fpl; +struct inotify_watch; +struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ + TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ @@ -248,6 +250,9 @@ #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ #define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ #define VIRF_NAMEDATTR 0x0040 /* Named attribute */ +#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */ +#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being + watched */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ @@ -667,6 +672,7 @@ void cache_symlink_free(char *string, size_t size); int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len); +void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); @@ -869,8 +875,10 @@ int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); -int vop_stdioctl(struct vop_ioctl_args *); int vop_stdneed_inactive(struct vop_need_inactive_args *); +int vop_stdinotify(struct vop_inotify_args *); +int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *); +int vop_stdioctl(struct vop_ioctl_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdunlock(struct vop_unlock_args *); @@ -910,9 +918,12 @@ int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ +void vop_allocate_post(void *a, int rc); +void vop_copy_file_range_post(void *ap, int rc); void vop_close_post(void *a, int rc); void vop_create_pre(void *a); void vop_create_post(void *a, int rc); +void vop_deallocate_post(void *a, int rc); void vop_whiteout_pre(void *a); void vop_whiteout_post(void *a, int rc); void vop_deleteextattr_pre(void *a); @@ -1020,9 +1031,12 @@ #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ - if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ - VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ - | (noffset > osize ? NOTE_EXTEND : 0)); \ + if (noffset > ooffset) { \ + if (VN_KNLIST_EMPTY((ap)->a_vp)) { \ + VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ + (noffset > osize ? NOTE_EXTEND : 0)); \ + } \ + INOTIFY((ap)->a_vp, IN_MODIFY); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -193,6 +193,7 @@ printc(common_head \ "#include \n" \ "#include \n" \ + "#include \n" \ "#include \n" \ "#include \n" \ "#include \n" \ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1012,7 +1012,6 @@ else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); - sx_init(&ump->um_checkpath_lock, "uchpth"); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; @@ -1182,7 +1181,6 @@ } if (ump != NULL) { mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; @@ -1306,7 +1304,6 @@ vrele(ump->um_odevvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1412,7 +1412,6 @@ vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; - sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED); if (target->i_number == source_ino) return (EEXIST); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1273,9 +1273,9 @@ struct mount *mp; ino_t ino; seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; - bool checkpath_locked, want_seqc_end; + bool want_seqc_end; - checkpath_locked = want_seqc_end = false; + want_seqc_end = false; endoff = 0; mp = tdvp->v_mount; @@ -1427,10 +1427,6 @@ } vfs_ref(mp); MPASS(!want_seqc_end); - if (checkpath_locked) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; - } VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1484,8 +1480,6 @@ if (error) goto unlockout; - sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = true; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* @@ -1493,8 +1487,6 @@ * everything else and VGET before restarting. */ if (ino) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); @@ -1574,9 +1566,6 @@ vn_seqc_write_end(fdvp); want_seqc_end = false; vfs_ref(mp); - MPASS(checkpath_locked); - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1763,9 +1752,6 @@ vn_seqc_write_end(fdvp); } - if (checkpath_locked) - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - vput(fdvp); vput(fvp); diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -97,8 +97,6 @@ uint64_t um_maxsymlinklen; /* (c) max size of short symlink */ struct mtx um_lock; /* (c) Protects ufsmount & fs */ - struct sx um_checkpath_lock; /* (c) Protects ufs_checkpath() - result */ struct mount_softdeps *um_softdep; /* (c) softdep mgmt structure */ struct vnode *um_quotas[MAXQUOTAS]; /* (q) pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* (q) quota file access cred */ diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c --- a/sys/x86/linux/linux_dummy_x86.c +++ b/sys/x86/linux/linux_dummy_x86.c @@ -46,7 +46,5 @@ DUMMY(sysfs); DUMMY(quotactl); -/* Linux 2.6.13: */ -DUMMY(inotify_init); /* Linux 2.6.22: */ DUMMY(signalfd); diff --git a/tests/ci/tools/freebsdci b/tests/ci/tools/freebsdci --- a/tests/ci/tools/freebsdci +++ b/tests/ci/tools/freebsdci @@ -34,6 +34,7 @@ start_cmd="firstboot_ci_run" stop_cmd=":" os_arch=$(uname -p) +parallelism=$(nproc) tardev=/dev/vtbd1 metadir=/meta istar=$(file -s ${tardev} | grep "POSIX tar archive" | wc -l) @@ -74,7 +75,7 @@ tar xvf ${tardev} -C ${metadir} cd /usr/tests set +e - kyua test + kyua -v parallelism=${parallelism} test rc=$? set -e if [ ${rc} -ne 0 ] && [ ${rc} -ne 1 ]; then diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile --- a/tests/sys/kern/Makefile +++ b/tests/sys/kern/Makefile @@ -17,8 +17,10 @@ ATF_TESTS_C+= kern_descrip_test # One test modifies the maxfiles limit, which can cause spurious test failures. TEST_METADATA.kern_descrip_test+= is_exclusive="true" +ATF_TESTS_C+= exterr_test ATF_TESTS_C+= fdgrowtable_test ATF_TESTS_C+= jail_lookup_root +ATF_TESTS_C+= inotify_test ATF_TESTS_C+= kill_zombie .if ${MK_OPENSSL} != "no" ATF_TESTS_C+= ktls_test @@ -85,6 +87,7 @@ LIBADD.sys_getrandom+= pthread LIBADD.ptrace_test+= pthread LIBADD.unix_seqpacket_test+= pthread +LIBADD.inotify_test+= util LIBADD.kcov+= pthread CFLAGS.ktls_test+= -DOPENSSL_API_COMPAT=0x10100000L LIBADD.ktls_test+= crypto util diff --git a/tests/sys/kern/exterr_test.c b/tests/sys/kern/exterr_test.c new file mode 100644 --- /dev/null +++ b/tests/sys/kern/exterr_test.c @@ -0,0 +1,107 @@ +/*- + * Copyright (C) 2025 ConnectWise, LLC. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include + +ATF_TC(gettext_extended); +ATF_TC_HEAD(gettext_extended, tc) +{ + atf_tc_set_md_var(tc, "descr", "Retrieve an extended error message"); +} +ATF_TC_BODY(gettext_extended, tc) +{ + char exterr[UEXTERROR_MAXLEN]; + int r; + + /* + * Use an invalid call to mmap() because it supports extended error + * messages, requires no special resources, and does not need root. + */ + ATF_CHECK_ERRNO(ENOTSUP, + mmap(NULL, 0, PROT_MAX(PROT_READ) | PROT_WRITE, 0, -1, 0)); + r = uexterr_gettext(exterr, sizeof(exterr)); + ATF_CHECK_EQ(0, r); + printf("Extended error: %s\n", exterr); + ATF_CHECK(strstr(exterr, "prot is not subset of max_prot") != 0); +} + +ATF_TC(gettext_noextended); +ATF_TC_HEAD(gettext_noextended, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Fail to retrieve an extended error message because none exists"); +} +ATF_TC_BODY(gettext_noextended, tc) +{ + char exterr[UEXTERROR_MAXLEN]; + int r; + + ATF_CHECK_ERRNO(EINVAL, exterrctl(EXTERRCTL_UD, 0, NULL)); + r = uexterr_gettext(exterr, sizeof(exterr)); + ATF_CHECK_EQ(0, r); + ATF_CHECK_STREQ(exterr, "No error"); +} + +ATF_TC(gettext_noextended_after_extended); +ATF_TC_HEAD(gettext_noextended_after_extended, tc) +{ + atf_tc_set_md_var(tc, "descr", + "uexterr_gettext should not return a stale extended error message"); +} +ATF_TC_BODY(gettext_noextended_after_extended, tc) +{ + char exterr[UEXTERROR_MAXLEN]; + int r; + + /* + * First do something that will create an extended error message, but + * ignore it. + */ + ATF_CHECK_ERRNO(ENOTSUP, + mmap(NULL, 0, PROT_MAX(PROT_READ) | PROT_WRITE, 0, -1, 0)); + + /* Then do something that won't create an extended error message */ + ATF_CHECK_ERRNO(EINVAL, exterrctl(EXTERRCTL_UD, 0, NULL)); + + /* Hopefully we won't see the stale extended error message */ + r = uexterr_gettext(exterr, sizeof(exterr)); + ATF_CHECK_EQ(0, r); + ATF_CHECK_STREQ(exterr, "No error"); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, gettext_extended); + ATF_TP_ADD_TC(tp, gettext_noextended); + ATF_TP_ADD_TC(tp, gettext_noextended_after_extended); + + return (atf_no_error()); +} diff --git a/tests/sys/kern/inotify_test.c b/tests/sys/kern/inotify_test.c new file mode 100644 --- /dev/null +++ b/tests/sys/kern/inotify_test.c @@ -0,0 +1,862 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static const char * +ev2name(int event) +{ + switch (event) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + return (NULL); + } +} + +static void +close_checked(int fd) +{ + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Make sure that no other events are pending, and close the inotify descriptor. + */ +static void +close_inotify(int fd) +{ + int n; + + ATF_REQUIRE(ioctl(fd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + close_checked(fd); +} + +static uint32_t +consume_event_cookie(int ifd, int wd, int event, int flags, const char *name) +{ + struct inotify_event *ev; + size_t evsz, namelen; + ssize_t n; + uint32_t cookie; + + /* Only read one record. */ + namelen = name == NULL ? 0 : strlen(name); + evsz = sizeof(*ev) + _IN_NAMESIZE(namelen); + ev = malloc(evsz); + ATF_REQUIRE(ev != NULL); + + n = read(ifd, ev, evsz); + ATF_REQUIRE_MSG(n >= 0, "failed to read event %s", ev2name(event)); + ATF_REQUIRE((size_t)n >= sizeof(*ev)); + ATF_REQUIRE((size_t)n == sizeof(*ev) + ev->len); + ATF_REQUIRE((size_t)n == evsz); + + ATF_REQUIRE_MSG((ev->mask & IN_ALL_EVENTS) == event, + "expected event %#x, got %#x", event, ev->mask); + ATF_REQUIRE_MSG((ev->mask & _IN_ALL_RETFLAGS) == flags, + "expected flags %#x, got %#x", flags, ev->mask); + ATF_REQUIRE_MSG(ev->wd == wd, + "expected wd %d, got %d", wd, ev->wd); + ATF_REQUIRE_MSG(name == NULL || strcmp(name, ev->name) == 0, + "expected name '%s', got '%s'", name, ev->name); + cookie = ev->cookie; + if ((ev->mask & (IN_MOVED_FROM | IN_MOVED_TO)) == 0) + ATF_REQUIRE(cookie == 0); + free(ev); + return (cookie); +} + +/* + * Read an event from the inotify file descriptor and check that it + * matches the expected values. + */ +static void +consume_event(int ifd, int wd, int event, int flags, const char *name) +{ + (void)consume_event_cookie(ifd, wd, event, flags, name); +} + +static int +inotify(int flags) +{ + int ifd; + + ifd = inotify_init1(flags); + ATF_REQUIRE(ifd != -1); + return (ifd); +} + +static void +mount_nullfs(char *dir, char *src) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "nullfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", dir, (size_t)-1); + build_iovec(&iov, &iovlen, "target", src, (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount nullfs %s %s: %s", src, dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static void +mount_tmpfs(const char *dir) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "tmpfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir), + (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount tmpfs %s: %s", dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static int +watch_file(int ifd, int events, char *path) +{ + int fd, wd; + + strncpy(path, "test.XXXXXX", PATH_MAX); + fd = mkstemp(path); + ATF_REQUIRE(fd != -1); + close_checked(fd); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +static int +watch_dir(int ifd, int events, char *path) +{ + char *p; + int wd; + + strlcpy(path, "test.XXXXXX", PATH_MAX); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +/* + * Verify that Capsicum restrictions are applied as expected. + */ +ATF_TC_WITHOUT_HEAD(inotify_capsicum); +ATF_TC_BODY(inotify_capsicum, tc) +{ + int error, dfd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + ATF_REQUIRE(ifd != -1); + + dfd = open(".", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + + error = mkdirat(dfd, "testdir", 0755); + ATF_REQUIRE(error == 0); + + error = cap_enter(); + ATF_REQUIRE(error == 0); + + /* + * Plain inotify_add_watch() is disallowed. + */ + wd = inotify_add_watch(ifd, ".", IN_DELETE_SELF); + ATF_REQUIRE_ERRNO(ECAPMODE, wd == -1); + wd = inotify_add_watch_at(ifd, dfd, "testdir", IN_DELETE_SELF); + ATF_REQUIRE(wd >= 0); + + /* + * Generate a record and consume it. + */ + error = unlinkat(dfd, "testdir", AT_REMOVEDIR); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +/* + * Make sure that duplicate, back-to-back events are coalesced. + */ +ATF_TC_WITHOUT_HEAD(inotify_coalesce); +ATF_TC_BODY(inotify_coalesce, tc) +{ + char file[PATH_MAX], path[PATH_MAX]; + int fd, fd1, ifd, n, wd; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_OPEN, path); + /* Create a file in the directory and open it. */ + snprintf(file, sizeof(file), "%s/file", path); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + fd = open(file, O_RDWR); + ATF_REQUIRE(fd != -1); + fd1 = open(file, O_RDONLY); + ATF_REQUIRE(fd1 != -1); + close_checked(fd1); + close_checked(fd); + + consume_event(ifd, wd, IN_OPEN, 0, "file"); + ATF_REQUIRE(ioctl(ifd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + + close_inotify(ifd); +} + +/* + * Check handling of IN_MASK_CREATE. + */ +ATF_TC_WITHOUT_HEAD(inotify_mask_create); +ATF_TC_BODY(inotify_mask_create, tc) +{ + char path[PATH_MAX]; + int ifd, wd, wd1; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_CREATE, path); + /* Updating the watch with IN_MASK_CREATE should result in an error. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EEXIST, wd1 == -1); + /* It's an error to specify IN_MASK_ADD with IN_MASK_CREATE. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_ADD | + IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EINVAL, wd1 == -1); + /* Updating the watch without IN_MASK_CREATE should work. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY); + ATF_REQUIRE(wd1 != -1); + ATF_REQUIRE_EQ(wd, wd1); + + close_inotify(ifd); +} + +/* + * Make sure that inotify cooperates with nullfs: if a lower vnode is the + * subject of an event, the upper vnode should be notified, and if the upper + * vnode is the subject of an event, the lower vnode should be notified. + */ +ATF_TC_WITH_CLEANUP(inotify_nullfs); +ATF_TC_HEAD(inotify_nullfs, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_nullfs, tc) +{ + char path[PATH_MAX], *p; + int dfd, error, fd, ifd, mask, wd; + + mask = IN_CREATE | IN_OPEN; + + ifd = inotify(IN_NONBLOCK); + + strlcpy(path, "./test.XXXXXX", sizeof(path)); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + error = mkdir("./mnt", 0755); + ATF_REQUIRE(error == 0); + + /* Mount the testdir onto ./mnt. */ + mount_nullfs("./mnt", path); + + wd = inotify_add_watch(ifd, "./mnt", mask); + ATF_REQUIRE(wd != -1); + + /* Create a file in the lower directory and open it. */ + dfd = open(path, O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file", O_RDWR | O_CREAT, 0644); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the nullfs mount. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + error = inotify_rm_watch(ifd, wd); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + /* Watch the lower directory. */ + wd = inotify_add_watch(ifd, path, mask); + ATF_REQUIRE(wd != -1); + /* ... and create a file in the upper directory and open it. */ + dfd = open("./mnt", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file2", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the lower directory. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file2"); + consume_event(ifd, wd, IN_OPEN, 0, "file2"); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_nullfs, tc) +{ + int error; + + error = unmount("./mnt", 0); + if (error != 0) { + perror("unmount"); + exit(1); + } +} + +/* + * Make sure that exceeding max_events pending events results in an overflow + * event. + */ +ATF_TC_WITHOUT_HEAD(inotify_queue_overflow); +ATF_TC_BODY(inotify_queue_overflow, tc) +{ + char path[PATH_MAX]; + size_t size; + int error, dfd, ifd, max, wd; + + size = sizeof(max); + error = sysctlbyname("vfs.inotify.max_queued_events", &max, &size, NULL, + 0); + ATF_REQUIRE(error == 0); + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it for file creation events. */ + wd = watch_dir(ifd, IN_CREATE, path); + dfd = open(path, O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + /* Generate max+1 file creation events. */ + for (int i = 0; i < max + 1; i++) { + char name[NAME_MAX]; + int fd; + + (void)snprintf(name, sizeof(name), "file%d", i); + fd = openat(dfd, name, O_CREAT | O_RDWR, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + } + + /* + * Read our events. We should see files 0..max-1 and then an overflow + * event. + */ + for (int i = 0; i < max; i++) { + char name[NAME_MAX]; + + (void)snprintf(name, sizeof(name), "file%d", i); + consume_event(ifd, wd, IN_CREATE, 0, name); + } + + /* Look for an overflow event. */ + consume_event(ifd, -1, 0, IN_Q_OVERFLOW, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_file); +ATF_TC_BODY(inotify_event_access_file, tc) +{ + char path[PATH_MAX], buf[16]; + off_t nb; + ssize_t n; + int error, fd, fd1, ifd, s[2], wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ACCESS, path); + + fd = open(path, O_RDWR); + n = write(fd, "test", 4); + ATF_REQUIRE(n == 4); + + /* A simple read(2) should generate an access. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + n = read(fd, buf, sizeof(buf)); + ATF_REQUIRE(n == 4); + ATF_REQUIRE(memcmp(buf, "test", 4) == 0); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* copy_file_range(2) should as well. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + fd1 = open("sink", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd1 != -1); + n = copy_file_range(fd, NULL, fd1, NULL, 4, 0); + ATF_REQUIRE(n == 4); + close_checked(fd1); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* As should sendfile(2). */ + error = socketpair(AF_UNIX, SOCK_STREAM, 0, s); + ATF_REQUIRE(error == 0); + error = sendfile(fd, s[0], 0, 4, NULL, &nb, 0); + ATF_REQUIRE(error == 0); + ATF_REQUIRE(nb == 4); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + close_checked(s[0]); + close_checked(s[1]); + + close_checked(fd); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_dir); +ATF_TC_BODY(inotify_event_access_dir, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + int error, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_ACCESS, root); + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + + /* Read an entry and generate an access. */ + dir = opendir(path); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, "dir"); + + /* + * Reading the watched directory should generate an access event. + * This is contrary to Linux's inotify man page, which states that + * IN_ACCESS is only generated for accesses to objects in a watched + * directory. + */ + dir = opendir(root); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_attrib); +ATF_TC_BODY(inotify_event_attrib, tc) +{ + char path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ATTRIB, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + error = fchmod(fd, 0600); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + error = fchown(fd, getuid(), getgid()); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + close_checked(fd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_nowrite); +ATF_TC_BODY(inotify_event_close_nowrite, tc) +{ + char file[PATH_MAX], file1[PATH_MAX], dir[PATH_MAX]; + int ifd, fd, wd1, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_CLOSE_NOWRITE, dir); + wd2 = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, file); + + fd = open(dir, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, IN_ISDIR, NULL); + + fd = open(file, O_RDONLY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd2, IN_CLOSE_NOWRITE, 0, NULL); + + snprintf(file1, sizeof(file1), "%s/file", dir); + fd = open(file1, O_RDONLY | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, 0, "file"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_write); +ATF_TC_BODY(inotify_event_close_write, tc) +{ + char path[PATH_MAX]; + int ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_CLOSE_WRITE, 0, NULL); + + close_inotify(ifd); +} + +/* Verify that various operations in a directory generate IN_CREATE events. */ +ATF_TC_WITHOUT_HEAD(inotify_event_create); +ATF_TC_BODY(inotify_event_create, tc) +{ + struct sockaddr_un sun; + char path[PATH_MAX], path1[PATH_MAX], root[PATH_MAX]; + ssize_t n; + int error, ifd, ifd1, fd, s, wd, wd1; + char b; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_CREATE, root); + + /* Regular file. */ + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + /* + * Make sure we get an event triggered by the fd used to create the + * file. + */ + ifd1 = inotify(IN_NONBLOCK); + wd1 = inotify_add_watch(ifd1, root, IN_MODIFY); + b = 42; + n = write(fd, &b, sizeof(b)); + ATF_REQUIRE(n == sizeof(b)); + close_checked(fd); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd1, wd1, IN_MODIFY, 0, "file"); + close_inotify(ifd1); + + /* Hard link. */ + snprintf(path1, sizeof(path1), "%s/link", root); + error = link(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "link"); + + /* Directory. */ + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, IN_ISDIR, "dir"); + + /* Symbolic link. */ + snprintf(path1, sizeof(path1), "%s/symlink", root); + error = symlink(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "symlink"); + + /* FIFO. */ + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "fifo"); + + /* Binding a socket. */ + s = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + sun.sun_len = sizeof(sun); + snprintf(sun.sun_path, sizeof(sun.sun_path), "%s/socket", root); + error = bind(s, (struct sockaddr *)&sun, sizeof(sun)); + ATF_REQUIRE(error == 0); + close_checked(s); + consume_event(ifd, wd, IN_CREATE, 0, "socket"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_delete); +ATF_TC_BODY(inotify_event_delete, tc) +{ + char root[PATH_MAX], path[PATH_MAX], file[PATH_MAX]; + int error, fd, ifd, wd, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_DELETE | IN_DELETE_SELF, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + close_checked(fd); + + /* + * Make sure that renaming over a file generates a delete event when and + * only when that file is watched. + */ + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + wd2 = inotify_add_watch(ifd, path, IN_DELETE | IN_DELETE_SELF); + ATF_REQUIRE(wd2 != -1); + snprintf(file, sizeof(file), "%s/file2", root); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + error = rename(file, path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd2, IN_DELETE_SELF, 0, NULL); + consume_event(ifd, wd2, 0, IN_IGNORED, NULL); + + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + error = rmdir(root); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_move); +ATF_TC_BODY(inotify_event_move, tc) +{ + char dir1[PATH_MAX], dir2[PATH_MAX], path1[PATH_MAX], path2[PATH_MAX]; + char path3[PATH_MAX]; + int error, ifd, fd, wd1, wd2, wd3; + uint32_t cookie1, cookie2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir1); + wd2 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir2); + + snprintf(path1, sizeof(path1), "%s/file", dir1); + fd = open(path1, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + snprintf(path2, sizeof(path2), "%s/file2", dir2); + error = rename(path1, path2); + ATF_REQUIRE(error == 0); + cookie1 = consume_event_cookie(ifd, wd1, IN_MOVED_FROM, 0, "file"); + cookie2 = consume_event_cookie(ifd, wd2, IN_MOVED_TO, 0, "file2"); + ATF_REQUIRE_MSG(cookie1 == cookie2, + "expected cookie %u, got %u", cookie1, cookie2); + + snprintf(path2, sizeof(path2), "%s/dir", dir2); + error = rename(dir1, path2); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd1, IN_MOVE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd2, IN_MOVED_TO, IN_ISDIR, "dir"); + + wd3 = watch_file(ifd, IN_MOVE_SELF, path3); + error = rename(path3, "foo"); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd3, IN_MOVE_SELF, 0, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_open); +ATF_TC_BODY(inotify_event_open, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_OPEN, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(path, O_PATH); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(root, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "fifo"); + + close_inotify(ifd); +} + +ATF_TC_WITH_CLEANUP(inotify_event_unmount); +ATF_TC_HEAD(inotify_event_unmount, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_event_unmount, tc) +{ + int error, fd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + error = mkdir("./root", 0755); + ATF_REQUIRE(error == 0); + + mount_tmpfs("./root"); + + error = mkdir("./root/dir", 0755); + ATF_REQUIRE(error == 0); + wd = inotify_add_watch(ifd, "./root/dir", IN_OPEN); + ATF_REQUIRE(wd >= 0); + + fd = open("./root/dir", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(fd != -1); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + close_checked(fd); + + /* A regular unmount should fail, as inotify holds a vnode reference. */ + error = unmount("./root", 0); + ATF_REQUIRE_ERRNO(EBUSY, error == -1); + error = unmount("./root", MNT_FORCE); + ATF_REQUIRE_MSG(error == 0, + "unmounting ./root failed: %s", strerror(errno)); + + consume_event(ifd, wd, 0, IN_UNMOUNT, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_event_unmount, tc) +{ + (void)unmount("./root", MNT_FORCE); +} + +ATF_TP_ADD_TCS(tp) +{ + /* Tests for the inotify syscalls. */ + ATF_TP_ADD_TC(tp, inotify_capsicum); + ATF_TP_ADD_TC(tp, inotify_coalesce); + ATF_TP_ADD_TC(tp, inotify_mask_create); + ATF_TP_ADD_TC(tp, inotify_nullfs); + ATF_TP_ADD_TC(tp, inotify_queue_overflow); + /* Tests for the various inotify event types. */ + ATF_TP_ADD_TC(tp, inotify_event_access_file); + ATF_TP_ADD_TC(tp, inotify_event_access_dir); + ATF_TP_ADD_TC(tp, inotify_event_attrib); + ATF_TP_ADD_TC(tp, inotify_event_close_nowrite); + ATF_TP_ADD_TC(tp, inotify_event_close_write); + ATF_TP_ADD_TC(tp, inotify_event_create); + ATF_TP_ADD_TC(tp, inotify_event_delete); + ATF_TP_ADD_TC(tp, inotify_event_move); + ATF_TP_ADD_TC(tp, inotify_event_open); + ATF_TP_ADD_TC(tp, inotify_event_unmount); + return (atf_no_error()); +} diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh --- a/tests/sys/net/if_bridge_test.sh +++ b/tests/sys/net/if_bridge_test.sh @@ -829,6 +829,398 @@ vnet_cleanup } +atf_test_case "vlan_pvid" "cleanup" +vlan_pvid_head() +{ + atf_set descr 'bridge with two ports with pvid set' + atf_set require.user root +} + +vlan_pvid_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # With VLAN filtering enabled, traffic should be passed. + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Removed the untagged VLAN on one port; traffic should not be passed. + ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_filtered" "cleanup" +vlan_pvid_filtered_head() +{ + atf_set descr 'bridge with two ports with different pvids' + atf_set require.user root +} + +vlan_pvid_filtered_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 30 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_filtered_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_tagged" "cleanup" +vlan_pvid_tagged_head() +{ + atf_set descr 'bridge pvid with tagged frames for pvid' + atf_set require.user root +} + +vlan_pvid_tagged_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create two tagged interfaces on the appropriate VLANs + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # Tagged frames should not be passed. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_tagged_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_1q" "cleanup" +vlan_pvid_1q_head() +{ + atf_set descr '802.1q tag addition and removal' + atf_set require.user root +} + +vlan_pvid_1q_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Set up one jail with an access port, and the other with a trunk port. + # This forces the bridge to add and remove .1q tags to bridge the + # traffic. + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_1q_cleanup() +{ + vnet_cleanup +} + +# +# Test vlan filtering. +# +atf_test_case "vlan_filtering" "cleanup" +vlan_filtering_head() +{ + atf_set descr 'tagged traffic with filtering' + atf_set require.user root +} + +vlan_filtering_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Set the untagged vlan on both ports to 20 and make sure traffic is + # still blocked. We intentionally do not pass tagged traffic for the + # untagged vlan. + atf_check -s exit:0 ifconfig ${bridge} untagged ${epone}a 20 + atf_check -s exit:0 ifconfig ${bridge} untagged ${eptwo}a 20 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + atf_check -s exit:0 ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:0 ifconfig ${bridge} -untagged ${eptwo}a + + # Add VLANs 10-30 to the access list; now access should be allowed. + ifconfig ${bridge} +tagged ${epone}a 10-30 + ifconfig ${bridge} +tagged ${eptwo}a 10-30 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Remove vlan 20 from the access list, now access should be blocked + # again. + ifconfig ${bridge} -tagged ${epone}a 20 + ifconfig ${bridge} -tagged ${eptwo}a 20 + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_filtering_cleanup() +{ + vnet_cleanup +} + +# +# Test the ifconfig 'tagged' option. +# +atf_test_case "vlan_ifconfig_tagged" "cleanup" +vlan_ifconfig_tagged_head() +{ + atf_set descr 'test the ifconfig tagged option' + atf_set require.user root +} + +vlan_ifconfig_tagged_body() +{ + vnet_init + vnet_init_bridge + + ep=$(vnet_mkepair) + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${ep}a vlanfilter ${ep}a up + ifconfig ${ep}a up + + # To start with, no vlans should be configured. + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Add vlans 100-149. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 100-149 + atf_check -s exit:0 -o match:"tagged 100-149" ifconfig ${bridge} + + # Replace the vlan list with 139-199. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 139-199 + atf_check -s exit:0 -o match:"tagged 139-199" ifconfig ${bridge} + + # Add vlans 100-170. + atf_check -s exit:0 ifconfig ${bridge} +tagged ${ep}a 100-170 + atf_check -s exit:0 -o match:"tagged 100-199" ifconfig ${bridge} + + # Remove vlans 104, 105, and 150-159 + atf_check -s exit:0 ifconfig ${bridge} -tagged ${ep}a 104,105,150-159 + atf_check -s exit:0 -o match:"tagged 100-103,106-149,160-199" \ + ifconfig ${bridge} + + # Remove the entire vlan list. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a none + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Test some invalid vlans sets. + for bad_vlan in -1 0 4096 4097 foo 0-10 4000-5000 foo-40 40-foo; do + atf_check -s exit:1 -e ignore \ + ifconfig ${bridge} tagged "$bad_vlan" + done +} + +vlan_ifconfig_tagged_cleanup() +{ + vnet_cleanup +} + +# +# Test a vlan(4) "SVI" interface on top of a bridge. +# +atf_test_case "vlan_svi" "cleanup" +vlan_svi_head() +{ + atf_set descr 'vlan bridge with an SVI' + atf_set require.user root +} + +vlan_svi_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${bridge} addm ${epone}a tagged ${epone}a 20 + + svi=$(vnet_mkvlan) + ifconfig ${svi} vlan 20 vlandev ${bridge} + ifconfig ${svi} inet 192.0.2.2/24 up + + atf_check -s exit:0 -o ignore ping -c 3 -t 1 192.0.2.1 +} + +vlan_svi_cleanup() +{ + vnet_cleanup +} + +# +# Test QinQ (802.1ad). +# +atf_test_case "vlan_qinq" "cleanup" +vlan_qinq_head() +{ + atf_set descr 'vlan filtering with QinQ traffic' + atf_set require.user root +} + +vlan_qinq_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create a QinQ trunk between the two jails. The outer (provider) tag + # is 5, and the inner tag is 10. + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.5 create vlanproto 802.1ad up + jexec one ifconfig ${epone}b.5.10 create inet 192.0.2.1/24 up + + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.5 create vlanproto 802.1ad up + jexec two ifconfig ${eptwo}b.5.10 create inet 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Add the provider tag to the access list; now traffic should be passed. + ifconfig ${bridge} +tagged ${epone}a 5 + ifconfig ${bridge} +tagged ${eptwo}a 5 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_qinq_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "bridge_transmit_ipv4_unicast" @@ -847,4 +1239,12 @@ atf_add_test_case "member_ifaddrs_enabled" atf_add_test_case "member_ifaddrs_disabled" atf_add_test_case "member_ifaddrs_vlan" + atf_add_test_case "vlan_pvid" + atf_add_test_case "vlan_pvid_1q" + atf_add_test_case "vlan_pvid_filtered" + atf_add_test_case "vlan_pvid_tagged" + atf_add_test_case "vlan_filtering" + atf_add_test_case "vlan_ifconfig_tagged" + atf_add_test_case "vlan_svi" + atf_add_test_case "vlan_qinq" } diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -105,6 +106,7 @@ static void ktrcsw_old(struct ktr_csw_old *); static void ktruser(int, void *); static void ktrcaprights(cap_rights_t *); +static void ktrinotify(struct inotify_event *); static void ktritimerval(struct itimerval *it); static void ktrsockaddr(struct sockaddr *); static void ktrsplice(struct splice *); @@ -1860,6 +1862,14 @@ printf("{%ld, %ld}", (long)tv->tv_sec, tv->tv_usec); } +static void +ktrinotify(struct inotify_event *ev) +{ + printf( + "inotify { .wd = %d, .mask = %#x, .cookie = %u, .len = %u, .name = %s }\n", + ev->wd, ev->mask, ev->cookie, ev->len, ev->name); +} + static void ktritimerval(struct itimerval *it) { @@ -2128,6 +2138,17 @@ goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); + } else if (strcmp(name, "inotify") == 0) { + struct inotify_event *ev; + + if (datalen < sizeof(struct inotify_event) || + datalen > sizeof(struct inotify_event) + NAME_MAX + 1) + goto invalid; + ev = malloc(datalen); + if (ev == NULL) + err(1, "malloc"); + memcpy(ev, data, datalen); + ktrinotify(ev); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; diff --git a/usr.bin/procstat/procstat.1 b/usr.bin/procstat/procstat.1 --- a/usr.bin/procstat/procstat.1 +++ b/usr.bin/procstat/procstat.1 @@ -377,6 +377,8 @@ fifo .It h shared memory +.It i +inotify descriptor .It k kqueue .It m @@ -862,6 +864,7 @@ .Xr sockstat 1 , .Xr cap_enter 2 , .Xr cap_rights_limit 2 , +.Xr inotify 2 , .Xr mlock 2 , .Xr mlockall 2 , .Xr libprocstat 3 , diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -226,6 +226,10 @@ { CAP_BINDAT, "ba" }, { CAP_CONNECTAT, "ca" }, + /* Inotify descriptor rights. */ + { CAP_INOTIFY_ADD, "ina" }, + { CAP_INOTIFY_RM, "inr" }, + /* Aliases and defines that combine multiple rights. */ { CAP_PREAD, "prd" }, { CAP_PWRITE, "pwr" }, @@ -416,6 +420,11 @@ xo_emit("{eq:fd_type/eventfd}"); break; + case PS_FST_TYPE_INOTIFY: + str = "i"; + xo_emit("{eq:fd_type/inotify}"); + break; + case PS_FST_TYPE_NONE: str = "?"; xo_emit("{eq:fd_type/none}"); diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c --- a/usr.bin/sockstat/sockstat.c +++ b/usr.bin/sockstat/sockstat.c @@ -951,7 +951,7 @@ } if (addrstr[0] == '\0') { error = cap_getnameinfo(capnet, sstosa(ss), ss->ss_len, - addrstr, sizeof(addrstr), buf, bufsize, NI_NUMERICHOST); + addrstr, sizeof(addrstr), NULL, 0, NI_NUMERICHOST); if (error) errx(1, "cap_getnameinfo()"); } diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -99,6 +99,7 @@ Getfsstatmode, Idtype, Ioctl, + Inotifyflags, Itimerwhich, Kldsymcmd, Kldunloadflags, diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -31,7 +31,6 @@ * SUCH DAMAGE. */ -#include /* * This file has routines used to print out system calls and their * arguments. @@ -316,6 +315,9 @@ { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, + { .name = "inotify_add_watch_at", .ret_type = 1, .nargs = 4, + .args = { { Int, 0 }, { Atfd, 1 }, { Name | IN, 2 }, + { Inotifyflags, 3 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, @@ -2447,6 +2449,9 @@ print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; + case Inotifyflags: + print_mask_arg(sysdecode_inotifyflags, fp, args[sc->offset]); + break; case Itimerwhich: print_integer_arg(sysdecode_itimer, fp, args[sc->offset]); break;