diff --git a/lib/libc/sys/access.2 b/lib/libc/sys/access.2 index 13bfd7e5a88a..12af63385780 100644 --- a/lib/libc/sys/access.2 +++ b/lib/libc/sys/access.2 @@ -1,261 +1,272 @@ .\" Copyright (c) 1980, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)access.2 8.2 (Berkeley) 4/1/94 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt ACCESS 2 .Os .Sh NAME .Nm access , .Nm eaccess , .Nm faccessat .Nd check accessibility of a file .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Ft int .Fn access "const char *path" "int mode" .Ft int .Fn eaccess "const char *path" "int mode" .Ft int .Fn faccessat "int fd" "const char *path" "int mode" "int flag" .Sh DESCRIPTION The .Fn access and .Fn eaccess system calls check the accessibility of the file named by the .Fa path argument for the access permissions indicated by the .Fa mode argument. The value of .Fa mode is either the bitwise-inclusive OR of the access permissions to be checked .Dv ( R_OK for read permission, .Dv W_OK for write permission, and .Dv X_OK for execute/search permission), or the existence test .Pq Dv F_OK . .Pp For additional information, see the .Sx "File Access Permission" section of .Xr intro 2 . .Pp The .Fn eaccess system call uses the effective user ID and the group access list to authorize the request; the .Fn access system call uses the real user ID in place of the effective user ID, the real group ID in place of the effective group ID, and the rest of the group access list. .Pp The .Fn faccessat system call is equivalent to .Fn access except in the case where .Fa path specifies a relative path. In this case the file whose accessibility is to be determined is located relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. If .Fn faccessat is passed the special value .Dv AT_FDCWD in the .Fa fd parameter, the current working directory is used and the behavior is identical to a call to .Fn access . Values for .Fa flag are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_EACCESS The checks for accessibility are performed using the effective user and group IDs instead of the real user and group ID as required in a call to .Fn access . .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Pp Even if a process's real or effective user has appropriate privileges and indicates success for .Dv X_OK , the file may not actually have execute permission bits set. Likewise for .Dv R_OK and .Dv W_OK . .Sh RETURN VALUES .Rv -std .Sh ERRORS .Fn access , .Fn eaccess , or .Fn faccessat will fail if: .Bl -tag -width Er .It Bq Er EINVAL The value of the .Fa mode argument is invalid. .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er EROFS Write access is requested for a file on a read-only file system. .It Bq Er ETXTBSY Write access is requested for a pure procedure (shared text) file presently being executed. .It Bq Er EACCES Permission bits of the file mode do not permit the requested access, or search permission is denied on a component of the path prefix. .It Bq Er EFAULT The .Fa path argument points outside the process's allocated address space. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .El .Pp Also, the .Fn faccessat system call may fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa path argument does not specify an absolute path and the .Fa fd argument is neither .Dv AT_FDCWD nor a valid file descriptor. .It Bq Er EINVAL The value of the .Fa flag argument is not valid. .It Bq Er ENOTDIR The .Fa path argument is not an absolute path and .Fa fd is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode. .El .Sh SEE ALSO .Xr chmod 2 , .Xr intro 2 , .Xr stat 2 .Sh STANDARDS The .Fn access system call is expected to conform to .St -p1003.1-90 . The .Fn faccessat system call follows The Open Group Extended API Set 2 specification. .Sh HISTORY The .Fn access function appeared in .At v7 . The .Fn faccessat system call appeared in .Fx 8.0 . .Sh SECURITY CONSIDERATIONS The .Fn access system call is a potential security hole due to race conditions and should never be used. Set-user-ID and set-group-ID applications should restore the effective user or group ID, and perform actions directly rather than use .Fn access to simulate access checks for the real user or group ID. The .Fn eaccess system call likewise may be subject to races if used inappropriately. .Pp .Fn access remains useful for providing clues to users as to whether operations make sense for particular filesystem objects (e.g. 'delete' menu item only highlighted in a writable folder ... avoiding interpretation of the st_mode bits that the application might not understand -- e.g. in the case of AFS). It also allows a cheaper file existence test than .Xr stat 2 . diff --git a/lib/libc/sys/chflags.2 b/lib/libc/sys/chflags.2 index a44713904599..f8dfd59c39d3 100644 --- a/lib/libc/sys/chflags.2 +++ b/lib/libc/sys/chflags.2 @@ -1,348 +1,359 @@ .\" Copyright (c) 1989, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)chflags.2 8.3 (Berkeley) 5/2/95 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt CHFLAGS 2 .Os .Sh NAME .Nm chflags , .Nm lchflags , .Nm fchflags , .Nm chflagsat .Nd set file flags .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/stat.h .In unistd.h .Ft int .Fn chflags "const char *path" "unsigned long flags" .Ft int .Fn lchflags "const char *path" "unsigned long flags" .Ft int .Fn fchflags "int fd" "unsigned long flags" .Ft int .Fn chflagsat "int fd" "const char *path" "unsigned long flags" "int atflag" .Sh DESCRIPTION The file whose name is given by .Fa path or referenced by the descriptor .Fa fd has its flags changed to .Fa flags . .Pp The .Fn lchflags system call is like .Fn chflags except in the case where the named file is a symbolic link, in which case .Fn lchflags will change the flags of the link itself, rather than the file it points to. .Pp The .Fn chflagsat is equivalent to either .Fn chflags or .Fn lchflags depending on the .Fa atflag except in the case where .Fa path specifies a relative path. In this case the file to be changed is determined relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. The values for the .Fa atflag are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_NOFOLLOW If .Fa path names a symbolic link, then the flags of the symbolic link are changed. .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Pp If .Fn chflagsat is passed the special value .Dv AT_FDCWD in the .Fa fd parameter, the current working directory is used. If also .Fa atflag is zero, the behavior is identical to a call to .Fn chflags . .Pp The flags specified are formed by .Em or Ns 'ing the following values .Pp .Bl -tag -width ".Dv SF_IMMUTABLE" -compact -offset indent .It Dv SF_APPEND The file may only be appended to. .It Dv SF_ARCHIVED The file has been archived. This flag means the opposite of the DOS, Windows and CIFS FILE_ATTRIBUTE_ARCHIVE attribute. This flag has been deprecated, and may be removed in a future release. .It Dv SF_IMMUTABLE The file may not be changed. .It Dv SF_NOUNLINK The file may not be renamed or deleted. .It Dv SF_SNAPSHOT The file is a snapshot file. .It Dv UF_APPEND The file may only be appended to. .It Dv UF_ARCHIVE The file needs to be archived. This flag has the same meaning as the DOS, Windows and CIFS FILE_ATTRIBUTE_ARCHIVE attribute. Filesystems in FreeBSD may or may not have special handling for this flag. For instance, ZFS tracks changes to files and will set this bit when a file is updated. UFS only stores the flag, and relies on the application to change it when needed. .It Dv UF_HIDDEN The file may be hidden from directory listings at the application's discretion. The file has the DOS, Windows and CIFS FILE_ATTRIBUTE_HIDDEN attribute. .It Dv UF_IMMUTABLE The file may not be changed. .It Dv UF_NODUMP Do not dump the file. .It Dv UF_NOUNLINK The file may not be renamed or deleted. .It Dv UF_OFFLINE The file is offline, or has the Windows and CIFS FILE_ATTRIBUTE_OFFLINE attribute. Filesystems in FreeBSD store and display this flag, but do not provide any special handling when it is set. .It Dv UF_OPAQUE The directory is opaque when viewed through a union stack. .It Dv UF_READONLY The file is read only, and may not be written or appended. Filesystems may use this flag to maintain compatibility with the DOS, Windows and CIFS FILE_ATTRIBUTE_READONLY attribute. .It Dv UF_REPARSE The file contains a Windows reparse point and has the Windows and CIFS FILE_ATTRIBUTE_REPARSE_POINT attribute. .It Dv UF_SPARSE The file has the Windows FILE_ATTRIBUTE_SPARSE_FILE attribute. This may also be used by a filesystem to indicate a sparse file. .It Dv UF_SYSTEM The file has the DOS, Windows and CIFS FILE_ATTRIBUTE_SYSTEM attribute. Filesystems in FreeBSD may store and display this flag, but do not provide any special handling when it is set. .El .Pp If one of .Dv SF_IMMUTABLE , SF_APPEND , or .Dv SF_NOUNLINK is set a non-super-user cannot change any flags and even the super-user can change flags only if securelevel is 0. (See .Xr init 8 for details.) .Pp The .Dv UF_IMMUTABLE , UF_APPEND , UF_NOUNLINK , UF_NODUMP , and .Dv UF_OPAQUE flags may be set or unset by either the owner of a file or the super-user. .Pp The .Dv SF_IMMUTABLE , SF_APPEND , SF_NOUNLINK , and .Dv SF_ARCHIVED flags may only be set or unset by the super-user. Attempts to toggle these flags by non-super-users are rejected. These flags may be set at any time, but normally may only be unset when the system is in single-user mode. (See .Xr init 8 for details.) .Pp The implementation of all flags is filesystem-dependent. See the description of the .Dv UF_ARCHIVE flag above for one example of the differences in behavior. Care should be exercised when writing applications to account for support or lack of support of these flags in various filesystems. .Pp The .Dv SF_SNAPSHOT flag is maintained by the system and cannot be toggled. .Sh RETURN VALUES .Rv -std .Sh ERRORS The .Fn chflags system call will fail if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er EACCES Search permission is denied for a component of the path prefix. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er EPERM The effective user ID does not match the owner of the file and the effective user ID is not the super-user. .It Bq Er EPERM One of .Dv SF_IMMUTABLE , SF_APPEND , or .Dv SF_NOUNLINK is set and the user is either not the super-user or securelevel is greater than 0. .It Bq Er EPERM A non-super-user attempted to toggle one of .Dv SF_ARCHIVED , SF_IMMUTABLE , SF_APPEND , or .Dv SF_NOUNLINK . .It Bq Er EPERM An attempt was made to toggle the .Dv SF_SNAPSHOT flag. .It Bq Er EROFS The named file resides on a read-only file system. .It Bq Er EFAULT The .Fa path argument points outside the process's allocated address space. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EOPNOTSUPP The underlying file system does not support file flags, or does not support all of the flags set in .Fa flags . .El .Pp The .Fn fchflags system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The descriptor is not valid. .It Bq Er EINVAL The .Fa fd argument refers to a socket, not to a file. .It Bq Er EPERM The effective user ID does not match the owner of the file and the effective user ID is not the super-user. .It Bq Er EPERM One of .Dv SF_IMMUTABLE , SF_APPEND , or .Dv SF_NOUNLINK is set and the user is either not the super-user or securelevel is greater than 0. .It Bq Er EPERM A non-super-user attempted to toggle one of .Dv SF_ARCHIVED , SF_IMMUTABLE , SF_APPEND , or .Dv SF_NOUNLINK . .It Bq Er EPERM An attempt was made to toggle the .Dv SF_SNAPSHOT flag. .It Bq Er EROFS The file resides on a read-only file system. .It Bq Er EIO An .Tn I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EOPNOTSUPP The underlying file system does not support file flags, or does not support all of the flags set in .Fa flags . .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr chflags 1 , .Xr fflagstostr 3 , .Xr strtofflags 3 , .Xr init 8 , .Xr mount_unionfs 8 .Sh HISTORY The .Fn chflags and .Fn fchflags system calls first appeared in .Bx 4.4 . The .Fn lchflags system call first appeared in .Fx 5.0 . The .Fn chflagsat system call first appeared in .Fx 10.0 . diff --git a/lib/libc/sys/chmod.2 b/lib/libc/sys/chmod.2 index 0127a5b629e4..44a1b18718f1 100644 --- a/lib/libc/sys/chmod.2 +++ b/lib/libc/sys/chmod.2 @@ -1,349 +1,360 @@ .\" Copyright (c) 1980, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)chmod.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt CHMOD 2 .Os .Sh NAME .Nm chmod , .Nm fchmod , .Nm lchmod , .Nm fchmodat .Nd change mode of file .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/stat.h .Ft int .Fn chmod "const char *path" "mode_t mode" .Ft int .Fn fchmod "int fd" "mode_t mode" .Ft int .Fn lchmod "const char *path" "mode_t mode" .Ft int .Fn fchmodat "int fd" "const char *path" "mode_t mode" "int flag" .Sh DESCRIPTION The file permission bits of the file named specified by .Fa path or referenced by the file descriptor .Fa fd are changed to .Fa mode . The .Fn chmod system call verifies that the process owner (user) either owns the file specified by .Fa path (or .Fa fd ) , or is the super-user. The .Fn chmod system call follows symbolic links to operate on the target of the link rather than the link itself. .Pp The .Fn lchmod system call is similar to .Fn chmod but does not follow symbolic links. .Pp The .Fn fchmodat is equivalent to either .Fn chmod or .Fn lchmod depending on the .Fa flag except in the case where .Fa path specifies a relative path. In this case the file to be changed is determined relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. The values for the .Fa flag are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_NOFOLLOW If .Fa path names a symbolic link, then the mode of the symbolic link is changed. .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Pp If .Fn fchmodat is passed the special value .Dv AT_FDCWD in the .Fa fd parameter, the current working directory is used. If also .Fa flag is zero, the behavior is identical to a call to .Fn chmod . .Pp A mode is created from .Em or'd permission bit masks defined in .In sys/stat.h : .Pp .Bd -literal -offset indent -compact #define S_IRWXU 0000700 /* RWX mask for owner */ #define S_IRUSR 0000400 /* R for owner */ #define S_IWUSR 0000200 /* W for owner */ #define S_IXUSR 0000100 /* X for owner */ #define S_IRWXG 0000070 /* RWX mask for group */ #define S_IRGRP 0000040 /* R for group */ #define S_IWGRP 0000020 /* W for group */ #define S_IXGRP 0000010 /* X for group */ #define S_IRWXO 0000007 /* RWX mask for other */ #define S_IROTH 0000004 /* R for other */ #define S_IWOTH 0000002 /* W for other */ #define S_IXOTH 0000001 /* X for other */ #define S_ISUID 0004000 /* set user id on execution */ #define S_ISGID 0002000 /* set group id on execution */ #define S_ISVTX 0001000 /* sticky bit */ .Ed .Pp The non-standard .Dv S_ISTXT is a synonym for .Dv S_ISVTX . .Pp The .Fx VM system totally ignores the sticky bit .Pq Dv S_ISVTX for executables. On UFS-based file systems (FFS, LFS) the sticky bit may only be set upon directories. .Pp If mode .Dv S_ISVTX (the `sticky bit') is set on a directory, an unprivileged user may not delete or rename files of other users in that directory. The sticky bit may be set by any user on a directory which the user owns or has appropriate permissions. For more details of the properties of the sticky bit, see .Xr sticky 7 . .Pp If mode ISUID (set UID) is set on a directory, and the MNT_SUIDDIR option was used in the mount of the file system, then the owner of any new files and sub-directories created within this directory are set to be the same as the owner of that directory. If this function is enabled, new directories will inherit the bit from their parents. Execute bits are removed from the file, and it will not be given to root. This behavior does not change the requirements for the user to be allowed to write the file, but only the eventual owner after it has been created. Group inheritance is not affected. .Pp This feature is designed for use on fileservers serving PC users via ftp, SAMBA, or netatalk. It provides security holes for shell users and as such should not be used on shell machines, especially on home directories. This option requires the SUIDDIR option in the kernel to work. Only UFS file systems support this option. For more details of the suiddir mount option, see .Xr mount 8 . .Pp Writing or changing the owner of a file turns off the set-user-id and set-group-id bits unless the user is the super-user. This makes the system somewhat more secure by protecting set-user-id (set-group-id) files from remaining set-user-id (set-group-id) if they are modified, at the expense of a degree of compatibility. .Sh RETURN VALUES .Rv -std .Sh ERRORS The .Fn chmod system call will fail and the file mode will be unchanged if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er EACCES Search permission is denied for a component of the path prefix. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er EPERM The effective user ID does not match the owner of the file and the effective user ID is not the super-user. .It Bq Er EPERM The effective user ID is not the super-user, the effective user ID do match the owner of the file, but the group ID of the file does not match the effective group ID nor one of the supplementary group IDs. .It Bq Er EPERM The named file has its immutable or append-only flag set, see the .Xr chflags 2 manual page for more information. .It Bq Er EROFS The named file resides on a read-only file system. .It Bq Er EFAULT The .Fa path argument points outside the process's allocated address space. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EFTYPE The effective user ID is not the super-user, the mode includes the sticky bit .Dv ( S_ISVTX ) , and path does not refer to a directory. .El .Pp The .Fn fchmod system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The descriptor is not valid. .It Bq Er EINVAL The .Fa fd argument refers to a socket, not to a file. .It Bq Er EROFS The file resides on a read-only file system. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .El .Pp In addition to the .Fn chmod errors, .Fn fchmodat fails if: .Bl -tag -width Er .It Bq Er EBADF The .Fa path argument does not specify an absolute path and the .Fa fd argument is neither .Fa AT_FDCWD nor a valid file descriptor open for searching. .It Bq Er EINVAL The value of the .Fa flag argument is not valid. .It Bq Er ENOTDIR The .Fa path argument is not an absolute path and .Fa fd is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr chmod 1 , .Xr chflags 2 , .Xr chown 2 , .Xr open 2 , .Xr stat 2 , .Xr sticky 7 .Sh STANDARDS The .Fn chmod system call is expected to conform to .St -p1003.1-90 , except for the return of .Er EFTYPE . The .Dv S_ISVTX bit on directories is expected to conform to .St -susv3 . The .Fn fchmodat system call is expected to conform to .St -p1003.1-2008 . .Sh HISTORY The .Fn chmod function appeared in .At v1 . The .Fn fchmod system call appeared in .Bx 4.2 . The .Fn lchmod system call appeared in .Fx 3.0 . The .Fn fchmodat system call appeared in .Fx 8.0 . diff --git a/lib/libc/sys/chown.2 b/lib/libc/sys/chown.2 index 4c45ce9174bb..467ff8a87e55 100644 --- a/lib/libc/sys/chown.2 +++ b/lib/libc/sys/chown.2 @@ -1,290 +1,301 @@ .\" Copyright (c) 1980, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)chown.2 8.4 (Berkeley) 4/19/94 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt CHOWN 2 .Os .Sh NAME .Nm chown , .Nm fchown , .Nm lchown , .Nm fchownat .Nd change owner and group of a file .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Ft int .Fn chown "const char *path" "uid_t owner" "gid_t group" .Ft int .Fn fchown "int fd" "uid_t owner" "gid_t group" .Ft int .Fn lchown "const char *path" "uid_t owner" "gid_t group" .Ft int .Fn fchownat "int fd" "const char *path" "uid_t owner" "gid_t group" "int flag" .Sh DESCRIPTION The owner ID and group ID of the file named by .Fa path or referenced by .Fa fd is changed as specified by the arguments .Fa owner and .Fa group . The owner of a file may change the .Fa group to a group of which he or she is a member, but the change .Fa owner capability is restricted to the super-user. .Pp The .Fn chown system call clears the set-user-id and set-group-id bits on the file to prevent accidental or mischievous creation of set-user-id and set-group-id programs if not executed by the super-user. The .Fn chown system call follows symbolic links to operate on the target of the link rather than the link itself. .Pp The .Fn fchown system call is particularly useful when used in conjunction with the file locking primitives (see .Xr flock 2 ) . .Pp The .Fn lchown system call is similar to .Fn chown but does not follow symbolic links. .Pp The .Fn fchownat system call is equivalent to the .Fn chown and .Fn lchown except in the case where .Fa path specifies a relative path. In this case the file to be changed is determined relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. .Pp Values for .Fa flag are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_NOFOLLOW If .Fa path names a symbolic link, ownership of the symbolic link is changed. .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Pp If .Fn fchownat is passed the special value .Dv AT_FDCWD in the .Fa fd parameter, the current working directory is used and the behavior is identical to a call to .Fn chown or .Fn lchown respectively, depending on whether or not the .Dv AT_SYMLINK_NOFOLLOW bit is set in the .Fa flag argument. .Pp One of the owner or group id's may be left unchanged by specifying it as -1. .Sh RETURN VALUES .Rv -std .Sh ERRORS The .Fn chown and .Fn lchown will fail and the file will be unchanged if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er EACCES Search permission is denied for a component of the path prefix. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er EPERM The operation would change the ownership, but the effective user ID is not the super-user. .It Bq Er EPERM The named file has its immutable or append-only flag set, see the .Xr chflags 2 manual page for more information. .It Bq Er EROFS The named file resides on a read-only file system. .It Bq Er EFAULT The .Fa path argument points outside the process's allocated address space. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .El .Pp The .Fn fchown system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument does not refer to a valid descriptor. .It Bq Er EINVAL The .Fa fd argument refers to a socket, not a file. .It Bq Er EPERM The effective user ID is not the super-user. .It Bq Er EROFS The named file resides on a read-only file system. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .El .Pp In addition to the errors specified for .Fn chown and .Fn lchown , the .Fn fchownat system call may fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa path argument does not specify an absolute path and the .Fa fd argument is neither .Dv AT_FDCWD nor a valid file descriptor open for searching. .It Bq Er EINVAL The value of the .Fa flag argument is not valid. .It Bq Er ENOTDIR The .Fa path argument is not an absolute path and .Fa fd is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr chgrp 1 , .Xr chflags 2 , .Xr chmod 2 , .Xr flock 2 , .Xr chown 8 .Sh STANDARDS The .Fn chown system call is expected to conform to .St -p1003.1-90 . The .Fn fchownat system call follows The Open Group Extended API Set 2 specification. .Sh HISTORY The .Fn chown function appeared in .At v1 . The .Fn fchown system call appeared in .Bx 4.2 . .Pp The .Fn chown system call was changed to follow symbolic links in .Bx 4.4 . The .Fn lchown system call was added in .Fx 3.0 to compensate for the loss of functionality. .Pp The .Fn fchownat system call appeared in .Fx 8.0 . diff --git a/lib/libc/sys/link.2 b/lib/libc/sys/link.2 index bcf03f17f3bb..37225f9571d0 100644 --- a/lib/libc/sys/link.2 +++ b/lib/libc/sys/link.2 @@ -1,310 +1,319 @@ .\" Copyright (c) 1980, 1991, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)link.2 8.3 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt LINK 2 .Os .Sh NAME .Nm link , .Nm linkat .Nd make a hard file link .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Ft int .Fn link "const char *name1" "const char *name2" .Ft int .Fo linkat .Fa "int fd1" "const char *name1" "int fd2" "const char *name2" "int flag" .Fc .Sh DESCRIPTION The .Fn link system call atomically creates the specified directory entry (hard link) .Fa name2 with the attributes of the underlying object pointed at by .Fa name1 . If the link is successful: the link count of the underlying object is incremented; .Fa name1 and .Fa name2 share equal access and rights to the underlying object. .Pp If .Fa name1 is removed, the file .Fa name2 is not deleted and the link count of the underlying object is decremented. .Pp The object pointed at by the .Fa name1 argument must exist for the hard link to succeed and both .Fa name1 and .Fa name2 must be in the same file system. The .Fa name1 argument may not be a directory. .Pp The .Fn linkat system call is equivalent to .Fa link except in the case where either .Fa name1 or .Fa name2 or both are relative paths. In this case a relative path .Fa name1 is interpreted relative to the directory associated with the file descriptor .Fa fd1 instead of the current working directory and similarly for .Fa name2 and the file descriptor .Fa fd2 . .Pp Values for .Fa flag are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_FOLLOW If .Fa name1 names a symbolic link, a new link for the target of the symbolic link is created. .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd1 descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path2 +argument is an empty string, link the file referenced by the descriptor +.Fa fd2 . +The operation requires that the calling process has the +.Dv PRIV_VFS_FHOPEN +privilege, effectively being executed with effective user +.Dv root . .El .Pp If .Fn linkat is passed the special value .Dv AT_FDCWD in the .Fa fd1 or .Fa fd2 parameter, the current working directory is used for the respective .Fa name argument. If both .Fa fd1 and .Fa fd2 have value .Dv AT_FDCWD , the behavior is identical to a call to .Fn link . Unless .Fa flag contains the .Dv AT_SYMLINK_FOLLOW flag, if .Fa name1 names a symbolic link, a new link is created for the symbolic link .Fa name1 and not its target. .Sh RETURN VALUES .Rv -std link .Sh ERRORS The .Fn link system call will fail and no link will be created if: .Bl -tag -width Er .It Bq Er ENOTDIR A component of either path prefix is not a directory. .It Bq Er ENAMETOOLONG A component of either pathname exceeded 255 characters, or entire length of either path name exceeded 1023 characters. .It Bq Er ENOENT A component of either path prefix does not exist. .It Bq Er EOPNOTSUPP The file system containing the file named by .Fa name1 does not support links. .It Bq Er EMLINK The link count of the file named by .Fa name1 would exceed 32767. .It Bq Er EACCES A component of either path prefix denies search permission. .It Bq Er EACCES The requested link requires writing in a directory with a mode that denies write permission. .It Bq Er ELOOP Too many symbolic links were encountered in translating one of the pathnames. .It Bq Er ENOENT The file named by .Fa name1 does not exist. .It Bq Er EEXIST The link named by .Fa name2 does exist. .It Bq Er EPERM The file named by .Fa name1 is a directory. .It Bq Er EPERM The file named by .Fa name1 has its immutable or append-only flag set, see the .Xr chflags 2 manual page for more information. .It Bq Er EPERM The parent directory of the file named by .Fa name2 has its immutable flag set. .It Bq Er EXDEV The link named by .Fa name2 and the file named by .Fa name1 are on different file systems. .It Bq Er ENOSPC The directory in which the entry for the new link is being placed cannot be extended because there is no space left on the file system containing the directory. .It Bq Er EDQUOT The directory in which the entry for the new link is being placed cannot be extended because the user's quota of disk blocks on the file system containing the directory has been exhausted. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system to make the directory entry. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EROFS The requested link requires writing in a directory on a read-only file system. .It Bq Er EFAULT One of the pathnames specified is outside the process's allocated address space. .El .Pp In addition to the errors returned by the .Fn link , the .Fn linkat system call may fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa name1 or .Fa name2 argument does not specify an absolute path and the .Fa fd1 or .Fa fd2 argument, respectively, is neither .Dv AT_FDCWD nor a valid file descriptor open for searching. .It Bq Er EINVAL The value of the .Fa flag argument is not valid. .It Bq Er ENOTDIR The .Fa name1 or .Fa name2 argument is not an absolute path and .Fa fd1 or .Fa fd2 , respectively, is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa name1 is not strictly relative to the starting directory. For example, .Fa name1 is absolute or includes a ".." component that escapes the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr chflags 2 , .Xr readlink 2 , .Xr symlink 2 , .Xr unlink 2 .Sh STANDARDS The .Fn link system call is expected to conform to .St -p1003.1-90 . The .Fn linkat system call follows The Open Group Extended API Set 2 specification. .Sh HISTORY The .Fn link function appeared in .At v1 . The .Fn linkat system call appeared in .Fx 8.0 . .Pp The .Fn link system call traditionally allows the super-user to link directories which corrupts the file system coherency. This implementation no longer permits it. diff --git a/lib/libc/sys/stat.2 b/lib/libc/sys/stat.2 index 0ed70620af63..55221d05a60e 100644 --- a/lib/libc/sys/stat.2 +++ b/lib/libc/sys/stat.2 @@ -1,481 +1,492 @@ .\" Copyright (c) 1980, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)stat.2 8.4 (Berkeley) 5/1/95 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt STAT 2 .Os .Sh NAME .Nm stat , .Nm lstat , .Nm fstat , .Nm fstatat .Nd get file status .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/stat.h .Ft int .Fn stat "const char * restrict path" "struct stat * restrict sb" .Ft int .Fn lstat "const char * restrict path" "struct stat * restrict sb" .Ft int .Fn fstat "int fd" "struct stat *sb" .Ft int .Fn fstatat "int fd" "const char *path" "struct stat *sb" "int flag" .Sh DESCRIPTION The .Fn stat system call obtains information about the file pointed to by .Fa path . Read, write or execute permission of the named file is not required, but all directories listed in the path name leading to the file must be searchable. .Pp The .Fn lstat system call is like .Fn stat except when the named file is a symbolic link, in which case .Fn lstat returns information about the link, while .Fn stat returns information about the file the link references. .Pp The .Fn fstat system call obtains the same information about an open file known by the file descriptor .Fa fd . .Pp The .Fn fstatat system call is equivalent to .Fn stat and .Fn lstat except when the .Fa path specifies a relative path. For .Fn fstatat and relative .Fa path , the status is retrieved from a file relative to the directory associated with the file descriptor .Fa fd instead of the current working directory. .Pp The values for the .Fa flag are constructed by a bitwise-inclusive OR of flags from this list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_NOFOLLOW If .Fa path names a symbolic link, the status of the symbolic link is returned. .It Dv AT_RESOLVE_BENEATH Only walk paths below the starting directory. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Pp If .Fn fstatat is passed the special value .Dv AT_FDCWD in the .Fa fd parameter, the current working directory is used and the behavior is identical to a call to .Fn stat or .Fn lstat respectively, depending on whether or not the .Dv AT_SYMLINK_NOFOLLOW bit is set in .Fa flag . .Pp When .Fn fstatat is called with an absolute .Fa path , it ignores the .Fa fd argument. .Pp The .Fa sb argument is a pointer to a .Vt stat structure as defined by .In sys/stat.h and into which information is placed concerning the file. .Pp The fields of .Vt "struct stat" related to the file system are: .Bl -tag -width ".Va st_nlink" .It Va st_dev Numeric ID of the device containing the file. .It Va st_ino The file's inode number. .It Va st_nlink Number of hard links to the file. .It Va st_flags Flags enabled for the file. See .Xr chflags 2 for the list of flags and their description. .El .Pp The .Va st_dev and .Va st_ino fields together identify the file uniquely within the system. .Pp The time-related fields of .Vt "struct stat" are: .Bl -tag -width ".Va st_birthtim" .It Va st_atim Time when file data was last accessed. Changed implicitly by syscalls such as .Xr read 2 and .Xr readv 2 , and explicitly by .Xr utimes 2 . .It Va st_mtim Time when file data was last modified. Changed implicitly by syscalls such as .Xr truncate 2 , .Xr write 2 , and .Xr writev 2 , and explicitly by .Xr utimes 2 . Also, any syscall which modifies directory content changes the .Va st_mtim for the affected directory. For instance, .Xr creat 2 , .Xr mkdir 2 , .Xr rename 2 , .Xr link 2 , and .Xr unlink 2 . .It Va st_ctim Time when file status was last changed (inode data modification). Changed implicitly by any syscall that affects file metadata, including .Va st_mtim , such as .Xr chflags 2 , .Xr chmod 2 , .Xr chown 2 , .Xr truncate 2 , .Xr utimes 2 , and .Xr write 2 . Also, any syscall which modifies directory content changes the .Va st_ctim for the affected directory. For instance, .Xr creat 2 , .Xr mkdir 2 , .Xr rename 2 , .Xr link 2 , and .Xr unlink 2 . .It Va st_birthtim Time when the inode was created. .El .Pp These time-related macros are defined for compatibility: .Bd -literal #define st_atime st_atim.tv_sec #define st_mtime st_mtim.tv_sec #define st_ctime st_ctim.tv_sec #ifndef _POSIX_SOURCE #define st_birthtime st_birthtim.tv_sec #endif #ifndef _POSIX_SOURCE #define st_atimespec st_atim #define st_mtimespec st_mtim #define st_ctimespec st_ctim #define st_birthtimespec st_birthtim #endif .Ed .Pp Size-related fields of the .Vt "struct stat" are: .Bl -tag -width ".Va st_blksize" .It Va st_size File size in bytes. .It Va st_blksize Optimal I/O block size for the file. .It Va st_blocks Actual number of blocks allocated for the file in 512-byte units. As short symbolic links are stored in the inode, this number may be zero. .El .Pp The access-related fields of .Vt "struct stat" are: .Bl -tag -width ".Va st_mode" .It Va st_uid User ID of the file's owner. .It Va st_gid Group ID of the file. .It Va st_mode Status of the file (see below). .El .Pp The status information word .Fa st_mode has these bits: .Bd -literal #define S_IFMT 0170000 /* type of file mask */ #define S_IFIFO 0010000 /* named pipe (fifo) */ #define S_IFCHR 0020000 /* character special */ #define S_IFDIR 0040000 /* directory */ #define S_IFBLK 0060000 /* block special */ #define S_IFREG 0100000 /* regular */ #define S_IFLNK 0120000 /* symbolic link */ #define S_IFSOCK 0140000 /* socket */ #define S_IFWHT 0160000 /* whiteout */ #define S_ISUID 0004000 /* set user id on execution */ #define S_ISGID 0002000 /* set group id on execution */ #define S_ISVTX 0001000 /* save swapped text even after use */ #define S_IRWXU 0000700 /* RWX mask for owner */ #define S_IRUSR 0000400 /* read permission, owner */ #define S_IWUSR 0000200 /* write permission, owner */ #define S_IXUSR 0000100 /* execute/search permission, owner */ #define S_IRWXG 0000070 /* RWX mask for group */ #define S_IRGRP 0000040 /* read permission, group */ #define S_IWGRP 0000020 /* write permission, group */ #define S_IXGRP 0000010 /* execute/search permission, group */ #define S_IRWXO 0000007 /* RWX mask for other */ #define S_IROTH 0000004 /* read permission, other */ #define S_IWOTH 0000002 /* write permission, other */ #define S_IXOTH 0000001 /* execute/search permission, other */ .Ed .Pp For a list of access modes, see .In sys/stat.h , .Xr access 2 and .Xr chmod 2 . These macros are available to test whether a .Va st_mode value passed in the .Fa m argument corresponds to a file of the specified type: .Bl -tag -width ".Fn S_ISFIFO m" .It Fn S_ISBLK m Test for a block special file. .It Fn S_ISCHR m Test for a character special file. .It Fn S_ISDIR m Test for a directory. .It Fn S_ISFIFO m Test for a pipe or FIFO special file. .It Fn S_ISLNK m Test for a symbolic link. .It Fn S_ISREG m Test for a regular file. .It Fn S_ISSOCK m Test for a socket. .It Fn S_ISWHT m Test for a whiteout. .El .Pp The macros evaluate to a non-zero value if the test is true or to the value 0 if the test is false. .Sh RETURN VALUES .Rv -std .Sh COMPATIBILITY Previous versions of the system used different types for the .Va st_dev , .Va st_uid , .Va st_gid , .Va st_rdev , .Va st_size , .Va st_blksize and .Va st_blocks fields. .Sh ERRORS The .Fn stat and .Fn lstat system calls will fail if: .Bl -tag -width Er .It Bq Er EACCES Search permission is denied for a component of the path prefix. .It Bq Er EFAULT The .Fa sb or .Fa path argument points to an invalid address. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er ENAMETOOLONG A component of a pathname exceeded 255 characters, or an entire path name exceeded 1023 characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er EOVERFLOW The file size in bytes cannot be represented correctly in the structure pointed to by .Fa sb . .El .Pp The .Fn fstat system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument is not a valid open file descriptor. .It Bq Er EFAULT The .Fa sb argument points to an invalid address. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EOVERFLOW The file size in bytes cannot be represented correctly in the structure pointed to by .Fa sb . .El .Pp In addition to the errors returned by the .Fn lstat , the .Fn fstatat may fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa path argument does not specify an absolute path and the .Fa fd argument is neither .Dv AT_FDCWD nor a valid file descriptor open for searching. .It Bq Er EINVAL The value of the .Fa flag argument is not valid. .It Bq Er ENOTDIR The .Fa path argument is not an absolute path and .Fa fd is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr access 2 , .Xr chmod 2 , .Xr chown 2 , .Xr fhstat 2 , .Xr statfs 2 , .Xr utimes 2 , .Xr sticky 7 , .Xr symlink 7 .Sh STANDARDS The .Fn stat and .Fn fstat system calls are expected to conform to .St -p1003.1-90 . The .Fn fstatat system call follows The Open Group Extended API Set 2 specification. .Sh HISTORY The .Fn stat and .Fn fstat system calls appeared in .At v1 . The .Fn lstat system call appeared in .Bx 4.2 . The .Fn fstatat system call appeared in .Fx 8.0 . .Sh BUGS Applying .Fn fstat to a socket returns a zeroed buffer, except for the blocksize field, and a unique device and inode number. diff --git a/lib/libc/sys/utimensat.2 b/lib/libc/sys/utimensat.2 index d31ee1f1515a..2af452898c9d 100644 --- a/lib/libc/sys/utimensat.2 +++ b/lib/libc/sys/utimensat.2 @@ -1,311 +1,322 @@ .\" $NetBSD: utimes.2,v 1.13 1999/03/22 19:45:11 garbled Exp $ .\" .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" Copyright (c) 2012, Jilles Tjoelker .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)utimes.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd February 23, 2021 +.Dd March 30, 2021 .Dt UTIMENSAT 2 .Os .Sh NAME .Nm futimens , .Nm utimensat .Nd set file access and modification times .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/stat.h .Ft int .Fn futimens "int fd" "const struct timespec times[2]" .Ft int .Fo utimensat .Fa "int fd" .Fa "const char *path" .Fa "const struct timespec times[2]" .Fa "int flag" .Fc .Sh DESCRIPTION The access and modification times of the file named by .Fa path or referenced by .Fa fd are changed as specified by the argument .Fa times . The inode-change-time of the file is set to the current time. .Pp If .Fa path specifies a relative path, it is relative to the current working directory if .Fa fd is .Dv AT_FDCWD and otherwise relative to the directory associated with the file descriptor .Fa fd . .Pp The .Va tv_nsec field of a .Vt timespec structure can be set to the special value .Dv UTIME_NOW to set the current time, or to .Dv UTIME_OMIT to leave the time unchanged. In either case, the .Va tv_sec field is ignored. .Pp If .Fa times is .No non- Ns Dv NULL , it is assumed to point to an array of two timespec structures. The access time is set to the value of the first element, and the modification time is set to the value of the second element. For file systems that support file birth (creation) times (such as .Dv UFS2 ) , the birth time will be set to the value of the second element if the second element is older than the currently set birth time. To set both a birth time and a modification time, two calls are required; the first to set the birth time and the second to set the (presumably newer) modification time. Ideally a new system call will be added that allows the setting of all three times at once. If .Fa times is .Dv NULL , this is equivalent to passing a pointer to an array of two timespec structures with both .Va tv_nsec fields set to .Dv UTIME_NOW . .Pp If both .Va tv_nsec fields are .Dv UTIME_OMIT , the timestamps remain unchanged and no permissions are needed for the file itself, although search permissions may be required for the path prefix. The call may or may not succeed if the named file does not exist. .Pp If both .Va tv_nsec fields are .Dv UTIME_NOW , the caller must be the owner of the file, have permission to write the file, or be the super-user. .Pp For all other values of the timestamps, the caller must be the owner of the file or be the super-user. .Pp The values for the .Fa flag argument of the .Fn utimensat system call are constructed by a bitwise-inclusive OR of flags from the following list, defined in .In fcntl.h : .Bl -tag -width indent .It Dv AT_SYMLINK_NOFOLLOW If .Fa path names a symbolic link, the symbolic link's times are changed. By default, .Fn utimensat changes the times of the file referenced by the symbolic link. .It Dv AT_RESOLVE_BENEATH Only walk paths below the directory specified by the .Ar fd descriptor. See the description of the .Dv O_RESOLVE_BENEATH flag in the .Xr open 2 manual page. +.It Dv AT_EMPTY_PATH +If the +.Fa path +argument is an empty string, operate on the file or directory +referenced by the descriptor +.Fa fd . +If +.Fa fd +is equal to +.Dv AT_FDCWD , +operate on the current working directory. .El .Sh RETURN VALUES .Rv -std .Sh COMPATIBILITY If the running kernel does not support this system call, a wrapper emulates it using .Xr fstatat 2 , .Xr futimesat 2 and .Xr lutimes 2 . As a result, timestamps will be rounded down to the nearest microsecond, .Dv UTIME_OMIT is not atomic and .Dv AT_SYMLINK_NOFOLLOW is not available with a path relative to a file descriptor. .Sh ERRORS These system calls will fail if: .Bl -tag -width Er .It Bq Er EACCES The .Fa times argument is .Dv NULL , or both .Va tv_nsec values are .Dv UTIME_NOW , and the effective user ID of the process does not match the owner of the file, and is not the super-user, and write access is denied. .It Bq Er EFAULT The .Fa times argument points outside the process's allocated address space. .It Bq Er EINVAL The .Va tv_nsec component of at least one of the values specified by the .Fa times argument has a value less than 0 or greater than 999999999 and is not equal to .Dv UTIME_NOW or .Dv UTIME_OMIT . .It Bq Er EIO An I/O error occurred while reading or writing the affected inode. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .It Bq Er EPERM The .Fa times argument is not .Dv NULL nor are both .Va tv_nsec values .Dv UTIME_NOW , nor are both .Va tv_nsec values .Dv UTIME_OMIT and the calling process's effective user ID does not match the owner of the file and is not the super-user. .It Bq Er EPERM The named file has its immutable or append-only flag set, see the .Xr chflags 2 manual page for more information. .It Bq Er EROFS The file system containing the file is mounted read-only. .El .Pp The .Fn futimens system call will fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument does not refer to a valid descriptor. .El .Pp The .Fn utimensat system call will fail if: .Bl -tag -width Er .It Bq Er EACCES Search permission is denied for a component of the path prefix. .It Bq Er EBADF The .Fa path argument does not specify an absolute path and the .Fa fd argument is neither .Dv AT_FDCWD nor a valid file descriptor. .It Bq Er EFAULT The .Fa path argument points outside the process's allocated address space. .It Bq Er ELOOP Too many symbolic links were encountered in translating the pathname. .It Bq Er ENAMETOOLONG A component of a pathname exceeded .Dv NAME_MAX characters, or an entire path name exceeded .Dv PATH_MAX characters. .It Bq Er ENOENT The named file does not exist. .It Bq Er ENOTDIR A component of the path prefix is not a directory. .It Bq Er ENOTDIR The .Fa path argument is not an absolute path and .Fa fd is neither .Dv AT_FDCWD nor a file descriptor associated with a directory. .It Bq Er ENOTCAPABLE .Fa path is an absolute path, or contained a ".." component leading to a directory outside of the directory hierarchy specified by .Fa fd , and the process is in capability mode or the .Dv AT_RESOLVE_BENEATH flag was specified. .El .Sh SEE ALSO .Xr chflags 2 , .Xr stat 2 , .Xr symlink 2 , .Xr utimes 2 , .Xr utime 3 , .Xr symlink 7 .Sh STANDARDS The .Fn futimens and .Fn utimensat system calls are expected to conform to .St -p1003.1-2008 . .Sh HISTORY The .Fn futimens and .Fn utimensat system calls appeared in .Fx 10.3 . diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 07c89e634de4..f4ec3cea9fff 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1,1715 +1,1762 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef INVARIANTS #include #endif #include #include #include #define NAMEI_DIAGNOSTIC 1 #undef NAMEI_DIAGNOSTIC SDT_PROVIDER_DEFINE(vfs); SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *", "unsigned long", "bool"); SDT_PROBE_DEFINE4(vfs, namei, lookup, return, "int", "struct vnode *", "bool", "struct nameidata"); /* Allocation zone for namei. */ uma_zone_t namei_zone; /* Placeholder vnode for mp traversal. */ static struct vnode *vp_crossmp; static int crossmp_vop_islocked(struct vop_islocked_args *ap) { return (LK_SHARED); } static int crossmp_vop_lock1(struct vop_lock1_args *ap) { struct vnode *vp; struct lock *lk __unused; const char *file __unused; int flags, line __unused; vp = ap->a_vp; lk = vp->v_vnlock; flags = ap->a_flags; file = ap->a_file; line = ap->a_line; if ((flags & LK_SHARED) == 0) panic("invalid lock request for crossmp"); WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL); WITNESS_LOCK(&lk->lock_object, 0, file, line); if ((flags & LK_INTERLOCK) != 0) VI_UNLOCK(vp); LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line); return (0); } static int crossmp_vop_unlock(struct vop_unlock_args *ap) { struct vnode *vp; struct lock *lk __unused; vp = ap->a_vp; lk = vp->v_vnlock; WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE); LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE, LOCK_LINE); return (0); } static struct vop_vector crossmp_vnodeops = { .vop_default = &default_vnodeops, .vop_islocked = crossmp_vop_islocked, .vop_lock1 = crossmp_vop_lock1, .vop_unlock = crossmp_vop_unlock, }; /* * VFS_VOP_VECTOR_REGISTER(crossmp_vnodeops) is not used here since the vnode * gets allocated early. See nameiinit for the direct call below. */ struct nameicap_tracker { struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ MALLOC_DEFINE(M_NAMEITRACKER, "namei_tracker", "namei tracking for dotdot"); static void nameiinit(void *dummy __unused) { namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); vfs_vector_op_register(&crossmp_vnodeops); getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); static int lookup_cap_dotdot = 1; SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN, &lookup_cap_dotdot, 0, "enables \"..\" components in path lookup in capability mode"); static int lookup_cap_dotdot_nonlocal = 1; SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, &lookup_cap_dotdot_nonlocal, 0, "enables \"..\" components in path lookup in capability mode " "on non-local mount"); static void nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; struct componentname *cnp; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) return; cnp = &ndp->ni_cnd; nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); if (nt != NULL && nt->dp == dp) return; nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); vhold(dp); nt->dp = dp; TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); } static void nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) { struct nameicap_tracker *nt, *nt1; nt = first; TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); vdrop(nt->dp); free(nt, M_NAMEITRACKER); } } static void nameicap_cleanup(struct nameidata *ndp) { KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); nameicap_cleanup_from(ndp, NULL); } /* * For dotdot lookups in capability mode, only allow the component * lookup to succeed if the resulting directory was already traversed * during the operation. This catches situations where already * traversed directory is moved to different parent, and then we walk * over it with dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to * escape the compartment. */ static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) return (0); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) return (ENOTCAPABLE); mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) return (ENOTCAPABLE); TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, nm_link) { if (dp == nt->dp) { nt = TAILQ_NEXT(nt, nm_link); if (nt != NULL) nameicap_cleanup_from(ndp, nt); return (0); } } return (ENOTCAPABLE); } static void namei_cleanup_cnp(struct componentname *cnp) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif } static int namei_handle_root(struct nameidata *ndp, struct vnode **dpp) { struct componentname *cnp; cnp = &ndp->ni_cnd; if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif return (ENOTCAPABLE); } while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } *dpp = ndp->ni_rootdir; vrefact(*dpp); return (0); } static int namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) { struct componentname *cnp; struct file *dfp; struct thread *td; struct pwd *pwd; cap_rights_t rights; int error; bool startdir_used; cnp = &ndp->ni_cnd; td = cnp->cn_thread; startdir_used = false; *pwdp = NULL; *dpp = NULL; #ifdef CAPABILITY_MODE /* * In capability mode, lookups must be restricted to happen in * the subtree with the root specified by the file descriptor: * - The root must be real file descriptor, not the pseudo-descriptor * AT_FDCWD. * - The passed path must be relative and not absolute. * - If lookup_cap_dotdot is disabled, path must not contain the * '..' components. * - If lookup_cap_dotdot is enabled, we verify that all '..' * components lookups result in the directories which were * previously walked by us, which prevents an escape from * the relative root. */ if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; ndp->ni_resflags |= NIRES_STRICTREL; if (ndp->ni_dirfd == AT_FDCWD) { #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif return (ECAPMODE); } } #endif error = 0; /* * Get starting point for the translation. */ pwd = pwd_hold(td); /* * The reference on ni_rootdir is acquired in the block below to avoid * back-to-back atomics for absolute lookups. */ ndp->ni_rootdir = pwd->pwd_rdir; ndp->ni_topdir = pwd->pwd_jdir; if (cnp->cn_pnbuf[0] == '/') { ndp->ni_resflags |= NIRES_ABS; error = namei_handle_root(ndp, dpp); } else { if (ndp->ni_startdir != NULL) { *dpp = ndp->ni_startdir; startdir_used = true; } else if (ndp->ni_dirfd == AT_FDCWD) { *dpp = pwd->pwd_cdir; vrefact(*dpp); } else { rights = *ndp->ni_rightsneeded; cap_rights_set_one(&rights, CAP_LOOKUP); if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_ATFD1(ndp->ni_dirfd); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_ATFD2(ndp->ni_dirfd); /* * Effectively inlined fgetvp_rights, because we need to * inspect the file as well as grabbing the vnode. */ error = fget_cap(td, ndp->ni_dirfd, &rights, &dfp, &ndp->ni_filecaps); if (error != 0) { /* * Preserve the error; it should either be EBADF * or capability-related, both of which can be * safely returned to the caller. */ } else { if (dfp->f_ops == &badfileops) { error = EBADF; } else if (dfp->f_vnode == NULL) { error = ENOTDIR; } else { *dpp = dfp->f_vnode; vrefact(*dpp); if ((dfp->f_flag & FSEARCH) != 0) cnp->cn_flags |= NOEXECCHECK; } fdrop(dfp, td); } #ifdef CAPABILITIES /* * If file descriptor doesn't have all rights, * all lookups relative to it must also be * strictly relative. */ CAP_ALL(&rights); if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) || ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; ndp->ni_resflags |= NIRES_STRICTREL; } #endif } - if (error == 0 && (*dpp)->v_type != VDIR) + if (error == 0 && (*dpp)->v_type != VDIR && + (cnp->cn_pnbuf[0] != '\0' || + (cnp->cn_flags & EMPTYPATH) == 0)) error = ENOTDIR; } if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) { if (cnp->cn_pnbuf[0] == '/') { error = ENOTCAPABLE; } else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT; } } /* * If we are auditing the kernel pathname, save the user pathname. */ if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (ndp->ni_startdir != NULL && !startdir_used) vrele(ndp->ni_startdir); if (error != 0) { if (*dpp != NULL) vrele(*dpp); pwd_drop(pwd); return (error); } if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 && lookup_cap_dotdot != 0) ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; SDT_PROBE4(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf, cnp->cn_flags, false); *pwdp = pwd; return (0); } static int namei_getpath(struct nameidata *ndp) { struct componentname *cnp; int error; cnp = &ndp->ni_cnd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) { error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); } else { error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); } - if (__predict_false(error != 0)) { - namei_cleanup_cnp(cnp); + if (__predict_false(error != 0)) return (error); - } /* - * Don't allow empty pathnames. + * Don't allow empty pathnames unless EMPTYPATH is specified. + * Caller checks for ENOENT as an indication for the empty path. */ - if (__predict_false(*cnp->cn_pnbuf == '\0')) { - namei_cleanup_cnp(cnp); + if (__predict_false(*cnp->cn_pnbuf == '\0')) return (ENOENT); - } cnp->cn_nameptr = cnp->cn_pnbuf; return (0); } +static int +namei_emptypath(struct nameidata *ndp) +{ + struct componentname *cnp; + struct pwd *pwd; + struct vnode *dp; + int error; + + cnp = &ndp->ni_cnd; + MPASS(*cnp->cn_pnbuf == '\0'); + MPASS((cnp->cn_flags & EMPTYPATH) != 0); + MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0); + + error = namei_setup(ndp, &dp, &pwd); + if (error != 0) { + namei_cleanup_cnp(cnp); + goto errout; + } + + ndp->ni_vp = dp; + vref(dp); + namei_cleanup_cnp(cnp); + pwd_drop(pwd); + ndp->ni_resflags |= NIRES_EMPTYPATH; + NDVALIDATE(ndp); + if ((cnp->cn_flags & LOCKLEAF) != 0) { + VOP_LOCK(dp, (cnp->cn_flags & LOCKSHARED) != 0 ? + LK_SHARED : LK_EXCLUSIVE); + if (VN_IS_DOOMED(dp)) { + vput(dp); + error = ENOENT; + goto errout; + } + } + SDT_PROBE4(vfs, namei, lookup, return, 0, ndp->ni_vp, false, ndp); + return (0); + +errout: + SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); + return (error); +} + /* * Convert a pathname into a pointer to a locked vnode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct componentname *cnp; struct thread *td; struct pwd *pwd; struct uio auio; int error, linklen; enum cache_fpl_status status; cnp = &ndp->ni_cnd; td = cnp->cn_thread; #ifdef INVARIANTS KASSERT((ndp->ni_debugflags & NAMEI_DBG_CALLED) == 0, ("%s: repeated call to namei without NDREINIT", __func__)); KASSERT(ndp->ni_debugflags == NAMEI_DBG_INITED, ("%s: bad debugflags %d", __func__, ndp->ni_debugflags)); ndp->ni_debugflags |= NAMEI_DBG_CALLED; if (ndp->ni_startdir != NULL) ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR; if (cnp->cn_flags & FAILIFEXISTS) { KASSERT(cnp->cn_nameiop == CREATE, ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop)); /* * The limitation below is to restrict hairy corner cases. */ KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT, ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF", __func__)); } /* * For NDVALIDATE. * * While NDINIT may seem like a more natural place to do it, there are * callers which directly modify flags past invoking init. */ cnp->cn_origflags = cnp->cn_flags; #endif ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; KASSERT(ndp->ni_resflags == 0, ("%s: garbage in ni_resflags: %x\n", __func__, ndp->ni_resflags)); KASSERT(cnp->cn_cred && td->td_proc, ("namei: bad cred/proc")); KASSERT((cnp->cn_flags & NAMEI_INTERNAL_FLAGS) == 0, ("namei: unexpected flags: %" PRIx64 "\n", cnp->cn_flags & NAMEI_INTERNAL_FLAGS)); if (cnp->cn_flags & NOCACHE) KASSERT(cnp->cn_nameiop != LOOKUP, ("%s: NOCACHE passed with LOOKUP", __func__)); MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || ndp->ni_startdir->v_type == VBAD); ndp->ni_lcf = 0; ndp->ni_loopcnt = 0; ndp->ni_vp = NULL; error = namei_getpath(ndp); if (__predict_false(error != 0)) { + if (error == ENOENT && (cnp->cn_flags & EMPTYPATH) != 0) + return (namei_emptypath(ndp)); + namei_cleanup_cnp(cnp); + SDT_PROBE4(vfs, namei, lookup, return, error, NULL, + false, ndp); return (error); } #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { KASSERT(cnp->cn_thread == curthread, ("namei not using curthread")); ktrnamei(cnp->cn_pnbuf); } #endif /* * First try looking up the target without locking any vnodes. * * We may need to start from scratch or pick up where it left off. */ error = cache_fplookup(ndp, &status, &pwd); switch (status) { case CACHE_FPL_STATUS_UNSET: __assert_unreachable(); break; case CACHE_FPL_STATUS_HANDLED: if (error == 0) NDVALIDATE(ndp); return (error); case CACHE_FPL_STATUS_PARTIAL: TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: ndp->ni_loopcnt = 0; error = namei_getpath(ndp); if (__predict_false(error != 0)) { + namei_cleanup_cnp(cnp); return (error); } /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); return (error); } break; } /* * Locked lookup. */ for (;;) { ndp->ni_startdir = dp; error = lookup(ndp); if (error != 0) goto out; /* * If not a symbolic link, we're done. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, (error == 0 ? ndp->ni_vp : NULL), false, ndp); if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { namei_cleanup_cnp(cnp); } else cnp->cn_flags |= HASBUF; nameicap_cleanup(ndp); pwd_drop(pwd); if (error == 0) NDVALIDATE(ndp); return (error); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_vnode_check_readlink(td->td_ucred, ndp->ni_vp); if (error != 0) break; } #endif if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error != 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENOENT; break; } if (linklen + ndp->ni_pathlen > MAXPATHLEN) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENAMETOOLONG; break; } if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; /* * Check if root directory should replace current directory. */ cnp->cn_nameptr = cnp->cn_pnbuf; if (*(cnp->cn_nameptr) == '/') { vrele(dp); error = namei_handle_root(ndp, &dp); if (error != 0) goto out; } } vput(ndp->ni_vp); ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: MPASS(error != 0); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); nameicap_cleanup(ndp); pwd_drop(pwd); return (error); } static int compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags) { if (mp == NULL || ((lkflags & LK_SHARED) && (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || ((cnflags & ISDOTDOT) && (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } lkflags |= LK_NODDLKTREAT; return (lkflags); } static __inline int needs_exclusive_leaf(struct mount *mp, int flags) { /* * Intermediate nodes can use shared locks, we only need to * force an exclusive lock for leaf nodes. */ if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) return (0); /* Always use exclusive locks if LOCKSHARED isn't set. */ if (!(flags & LOCKSHARED)) return (1); /* * For lookups during open(), if the mount point supports * extended shared operations, then use a shared lock for the * leaf node, otherwise use an exclusive lock. */ if ((flags & ISOPEN) != 0) return (!MNT_EXTENDED_SHARED(mp)); /* * Lookup requests outside of open() that specify LOCKSHARED * only need a shared lock on the leaf vnode. */ return (0); } /* * Various filesystems expect to be able to copy a name component with length * bounded by NAME_MAX into a directory entry buffer of size MAXNAMLEN. Make * sure that these are the same size. */ _Static_assert(MAXNAMLEN == NAME_MAX, "MAXNAMLEN and NAME_MAX have different values"); /* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp */ int lookup(struct nameidata *ndp) { char *cp; /* pointer into pathname argument */ char *prev_ni_next; /* saved ndp->ni_next */ struct vnode *dp = NULL; /* the directory we are searching */ struct vnode *tdp; /* saved dp */ struct mount *mp; /* mount table entry */ struct prison *pr; size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; int dpunlocked = 0; /* dp has already been unlocked */ int relookup = 0; /* do not consume the path component */ struct componentname *cnp = &ndp->ni_cnd; int lkflags_save; int ni_dvp_unlocked; /* * Setup: break out flag bits into variables. */ ni_dvp_unlocked = 0; wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); /* * When set to zero, docache causes the last component of the * pathname to be deleted from the cache and the full lookup * of the name to be done (via VOP_CACHEDLOOKUP()). Often * filesystems need some pre-computed values that are made * during the full lookup, for instance UFS sets dp->i_offset. * * The docache variable is set to zero when requested by the * NOCACHE flag and for all modifying operations except CREATE. */ docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; if (cnp->cn_nameiop == DELETE || (wantparent && cnp->cn_nameiop != CREATE && cnp->cn_nameiop != LOOKUP)) docache = 0; rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; ndp->ni_dvp = NULL; /* * We use shared locks until we hit the parent of the last cn then * we adjust based on the requesting flags. */ cnp->cn_lkflags = LK_SHARED; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, cnp->cn_flags)); dirloop: /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) continue; cnp->cn_namelen = cp - cnp->cn_nameptr; if (cnp->cn_namelen > NAME_MAX) { error = ENAMETOOLONG; goto bad; } #ifdef NAMEI_DIAGNOSTIC { char c = *cp; *cp = '\0'; printf("{%s}: ", cnp->cn_nameptr); *cp = c; } #endif prev_ni_pathlen = ndp->ni_pathlen; ndp->ni_pathlen -= cnp->cn_namelen; KASSERT(ndp->ni_pathlen <= PATH_MAX, ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); prev_ni_next = ndp->ni_next; ndp->ni_next = cp; /* * Replace multiple slashes by a single slash and trailing slashes * by a null. This must be done before VOP_LOOKUP() because some * fs's don't know about trailing slashes. Remember if there were * trailing slashes to handle symlinks, existing non-directories * and non-existing files that won't be directories specially later. */ while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { cp++; ndp->ni_pathlen--; if (*cp == '\0') { *ndp->ni_next = '\0'; cnp->cn_flags |= TRAILINGSLASH; } } ndp->ni_next = cp; cnp->cn_flags |= MAKEENTRY; if (*cp == '\0' && docache == 0) cnp->cn_flags &= ~MAKEENTRY; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') cnp->cn_flags |= ISDOTDOT; else cnp->cn_flags &= ~ISDOTDOT; if (*ndp->ni_next == 0) cnp->cn_flags |= ISLASTCN; else cnp->cn_flags &= ~ISLASTCN; if ((cnp->cn_flags & ISLASTCN) != 0 && cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } nameicap_tracker_add(ndp, dp); /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_VNODE1(dp); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_VNODE2(dp); if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp); /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); goto success; } /* * Handle "..": five special cases. * 0. If doing a capability lookup and lookup_cap_dotdot is * disabled, return ENOTCAPABLE. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 3. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other filesystem. * 4. If the vnode is the top directory of * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape * from the initial file descriptor directory. Checks are * done by ensuring that namei() already traversed the * result of dotdot lookup. */ if (cnp->cn_flags & ISDOTDOT) { if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT)) == NI_LCF_STRICTRELATIVE) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif error = ENOTCAPABLE; goto bad; } if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } for (;;) { for (pr = cnp->cn_cred->cr_prison; pr != NULL; pr = pr->pr_parent) if (dp == pr->pr_root) break; if (dp == ndp->ni_rootdir || dp == ndp->ni_topdir || dp == rootvnode || pr != NULL || ((dp->v_vflag & VV_ROOT) != 0 && (cnp->cn_flags & NOCROSSMOUNT) != 0)) { ndp->ni_dvp = dp; ndp->ni_vp = dp; VREF(dp); goto nextname; } if ((dp->v_vflag & VV_ROOT) == 0) break; if (VN_IS_DOOMED(dp)) { /* forced unmount */ error = ENOENT; goto bad; } tdp = dp; dp = dp->v_mount->mnt_vnodecovered; VREF(dp); vput(tdp); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, ISDOTDOT)); error = nameicap_check_dotdot(ndp, dp); if (error != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif goto bad; } } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: #ifdef MAC error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, cnp); if (error) goto bad; #endif ndp->ni_dvp = dp; ndp->ni_vp = NULL; ASSERT_VOP_LOCKED(dp, "lookup"); /* * If we have a shared lock we may need to upgrade the lock for the * last operation. */ if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) && dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED) vn_lock(dp, LK_UPGRADE|LK_RETRY); if (VN_IS_DOOMED(dp)) { error = ENOENT; goto bad; } /* * If we're looking up the last component and we need an exclusive * lock, adjust our lkflags. */ if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC vn_printf(dp, "lookup in "); #endif lkflags_save = cnp->cn_lkflags; cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags, cnp->cn_flags); error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); cnp->cn_lkflags = lkflags_save; if (error != 0) { KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif if ((error == ENOENT) && (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { tdp = dp; dp = dp->v_mount->mnt_vnodecovered; VREF(dp); vput(tdp); vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY, cnp->cn_flags)); nameicap_tracker_add(ndp, dp); goto unionlookup; } if (error == ERELOOKUP) { vref(dp); ndp->ni_vp = dp; error = 0; relookup = 1; goto good; } if (error != EJUSTRETURN) goto bad; /* * At this point, we know we're at the end of the * pathname. If creating / renaming, we can consider * allowing the file or directory to be created / renamed, * provided we're not on a read-only filesystem. */ if (rdonly) { error = EROFS; goto bad; } /* trailing slash only allowed for directories */ if ((cnp->cn_flags & TRAILINGSLASH) && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } goto success; } good: #ifdef NAMEI_DIAGNOSTIC printf("found\n"); #endif dp = ndp->ni_vp; /* * Check to see if the vnode has been mounted on; * if so find the root of the mounted filesystem. */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { if (vfs_busy(mp, 0)) continue; vput(dp); if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vrefact(vp_crossmp); ndp->ni_dvp = vp_crossmp; error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, cnp->cn_flags), &tdp); vfs_unbusy(mp); if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) panic("vp_crossmp exclusively locked or reclaimed"); if (error) { dpunlocked = 1; goto bad2; } ndp->ni_vp = dp = tdp; } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; if (VN_IS_DOOMED(dp)) { /* * We can't know whether the directory was mounted with * NOSYMFOLLOW, so we can't follow safely. */ error = ENOENT; goto bad2; } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; } /* * Symlink code always expects an unlocked dvp. */ if (ndp->ni_dvp != ndp->ni_vp) { VOP_UNLOCK(ndp->ni_dvp); ni_dvp_unlocked = 1; } goto success; } nextname: /* * Not a symbolic link that we will follow. Continue with the * next component if there is any; otherwise, we're done. */ KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', ("lookup: invalid path state.")); if (relookup) { relookup = 0; ndp->ni_pathlen = prev_ni_pathlen; ndp->ni_next = prev_ni_next; if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { error = nameicap_check_dotdot(ndp, ndp->ni_vp); if (error != 0) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif goto bad2; } } if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); goto dirloop; } /* * If we're processing a path with a trailing slash, * check that the end result is a directory. */ if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } if (!wantparent) { ni_dvp_unlocked = 2; if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) { VOP_UNLOCK(ndp->ni_dvp); ni_dvp_unlocked = 1; } if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG_VNODE1(dp); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG_VNODE2(dp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); success: /* * FIXME: for lookups which only cross a mount point to fetch the * root vnode, ni_dvp will be set to vp_crossmp. This can be a problem * if either WANTPARENT or LOCKPARENT is set. */ /* * Because of shared lookup we may have the vnode shared locked, but * the caller may want it to be exclusively locked. */ if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { vn_lock(dp, LK_UPGRADE | LK_RETRY); if (VN_IS_DOOMED(dp)) { error = ENOENT; goto bad2; } } if (ndp->ni_vp != NULL) { if ((cnp->cn_flags & ISDOTDOT) == 0) nameicap_tracker_add(ndp, ndp->ni_vp); if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) goto bad_eexist; } return (0); bad2: if (ni_dvp_unlocked != 2) { if (dp != ndp->ni_dvp && !ni_dvp_unlocked) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); } bad: if (!dpunlocked) vput(dp); ndp->ni_vp = NULL; return (error); bad_eexist: /* * FAILIFEXISTS handling. * * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange * behaviour of leaving the vnode unlocked if the target is the same * vnode as the parent. */ MPASS((cnp->cn_flags & ISSYMLINK) == 0); if (ndp->ni_vp == ndp->ni_dvp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); vrele(ndp->ni_vp); ndp->ni_dvp = NULL; ndp->ni_vp = NULL; NDFREE(ndp, NDF_ONLY_PNBUF); return (EEXIST); } /* * relookup - lookup a path name component * Used by lookup to re-acquire things. */ int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct vnode *dp = NULL; /* the directory we are searching */ int rdonly; /* lookup read-only flag bit */ int error = 0; KASSERT(cnp->cn_flags & ISLASTCN, ("relookup: Not given last component.")); /* * Setup: break out flag bits into variables. */ KASSERT((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) != 0, ("relookup: parent not wanted")); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; cnp->cn_lkflags = LK_EXCLUSIVE; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ #ifdef NAMEI_DIAGNOSTIC printf("{%s}: ", cnp->cn_nameptr); #endif /* * Check for "" which represents the root directory after slash * removal. */ if (cnp->cn_nameptr[0] == '\0') { /* * Support only LOOKUP for "/" because lookup() * can't succeed for CREATE, DELETE and RENAME. */ KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); KASSERT(dp->v_type == VDIR, ("dp is not a directory")); if (!(cnp->cn_flags & LOCKLEAF)) VOP_UNLOCK(dp); *vpp = dp; /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); } if (cnp->cn_flags & ISDOTDOT) panic ("relookup: lookup on dot-dot"); /* * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC vn_printf(dp, "search in "); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory vnode in ndp->ni_dvp. */ return (0); } dp = *vpp; /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { if (dvp == dp) vrele(dvp); else vput(dvp); error = EROFS; goto bad; } /* * Set the parent lock/ref state to the requested state. */ if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) VOP_UNLOCK(dvp); /* * Check for symbolic link */ KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), ("relookup: symlink found.\n")); /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp); return (0); bad: vput(dp); *vpp = NULL; return (error); } /* * Free data allocated by namei(); see namei(9) for details. */ void NDFREE_PNBUF(struct nameidata *ndp) { if ((ndp->ni_cnd.cn_flags & HASBUF) != 0) { MPASS((ndp->ni_cnd.cn_flags & (SAVENAME | SAVESTART)) != 0); uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } } /* * NDFREE_PNBUF replacement for callers that know there is no buffer. * * This is a hack. Preferably the VFS layer would not produce anything more * than it was asked to do. Unfortunately several non-LOOKUP cases can add the * HASBUF flag to the result. Even then an interface could be implemented where * the caller specifies what they expected to see in the result and what they * are going to take care of. * * In the meantime provide this kludge as a trivial replacement for NDFREE_PNBUF * calls scattered throughout the kernel where we know for a fact the flag must not * be seen. */ #ifdef INVARIANTS void NDFREE_NOTHING(struct nameidata *ndp) { struct componentname *cnp; cnp = &ndp->ni_cnd; KASSERT(cnp->cn_nameiop == LOOKUP, ("%s: got non-LOOKUP op %d\n", __func__, cnp->cn_nameiop)); KASSERT((cnp->cn_flags & (SAVENAME | HASBUF)) == 0, ("%s: bad flags \%" PRIx64 "\n", __func__, cnp->cn_flags)); } #endif void (NDFREE)(struct nameidata *ndp, const u_int flags) { int unlock_dvp; int unlock_vp; unlock_dvp = 0; unlock_vp = 0; if (!(flags & NDF_NO_FREE_PNBUF)) { NDFREE_PNBUF(ndp); } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) unlock_vp = 1; if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) unlock_dvp = 1; if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { if (unlock_vp) { vput(ndp->ni_vp); unlock_vp = 0; } else vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (unlock_vp) VOP_UNLOCK(ndp->ni_vp); if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { if (unlock_dvp) { vput(ndp->ni_dvp); unlock_dvp = 0; } else vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (unlock_dvp) VOP_UNLOCK(ndp->ni_dvp); if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } #ifdef INVARIANTS /* * Validate the final state of ndp after the lookup. * * Historically filesystems were allowed to modify cn_flags. Most notably they * can add SAVENAME to the request, resulting in HASBUF and pushing subsequent * clean up to the consumer. In practice this seems to only concern != LOOKUP * operations. * * As a step towards stricter API contract this routine validates the state to * clean up. Note validation is a work in progress with the intent of becoming * stricter over time. */ #define NDMODIFYINGFLAGS (LOCKLEAF | LOCKPARENT | WANTPARENT | SAVENAME | SAVESTART | HASBUF) void NDVALIDATE(struct nameidata *ndp) { struct componentname *cnp; u_int64_t used, orig; cnp = &ndp->ni_cnd; orig = cnp->cn_origflags; used = cnp->cn_flags; switch (cnp->cn_nameiop) { case LOOKUP: /* * For plain lookup we require strict conformance -- nothing * to clean up if it was not requested by the caller. */ orig &= NDMODIFYINGFLAGS; used &= NDMODIFYINGFLAGS; if ((orig & (SAVENAME | SAVESTART)) != 0) orig |= HASBUF; if (orig != used) { goto out_mismatch; } break; case CREATE: case DELETE: case RENAME: /* * Some filesystems set SAVENAME to provoke HASBUF, accomodate * for it until it gets fixed. */ orig &= NDMODIFYINGFLAGS; orig |= (SAVENAME | HASBUF); used &= NDMODIFYINGFLAGS; used |= (SAVENAME | HASBUF); if (orig != used) { goto out_mismatch; } break; } return; out_mismatch: panic("%s: mismatched flags for op %d: added %" PRIx64 ", " "removed %" PRIx64" (%" PRIx64" != %" PRIx64"; stored %" PRIx64" != %" PRIx64")", __func__, cnp->cn_nameiop, used & ~orig, orig &~ used, orig, used, cnp->cn_origflags, cnp->cn_flags); } #endif /* * Determine if there is a suitable alternate filename under the specified * prefix for the specified path. If the create flag is set, then the * alternate prefix will be used so long as the parent directory exists. * This is used by the various compatibility ABIs so that Linux binaries prefer * files under /compat/linux for example. The chosen path (whether under * the prefix or under /) is returned in a kernel malloc'd buffer pointed * to by pathbuf. The caller is responsible for free'ing the buffer from * the M_TEMP bucket if one is returned. */ int kern_alternate_path(struct thread *td, const char *prefix, const char *path, enum uio_seg pathseg, char **pathbuf, int create, int dirfd) { struct nameidata nd, ndroot; char *ptr, *buf, *cp; size_t len, sz; int error; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pathbuf = buf; /* Copy the prefix into the new pathname as a starting point. */ len = strlcpy(buf, prefix, MAXPATHLEN); if (len >= MAXPATHLEN) { *pathbuf = NULL; free(buf, M_TEMP); return (EINVAL); } sz = MAXPATHLEN - len; ptr = buf + len; /* Append the filename to the prefix. */ if (pathseg == UIO_SYSSPACE) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { *pathbuf = NULL; free(buf, M_TEMP); return (error); } /* Only use a prefix with absolute pathnames. */ if (*ptr != '/') { error = EINVAL; goto keeporig; } if (dirfd != AT_FDCWD) { /* * We want the original because the "prefix" is * included in the already opened dirfd. */ bcopy(ptr, buf, len); return (0); } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (create) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); error = namei(&nd); *cp = '/'; if (error != 0) goto keeporig; } else { NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf, td); error = namei(&nd); if (error != 0) goto keeporig; /* * We now compare the vnode of the prefix to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix, td); /* We shouldn't ever get an error from this namei(). */ error = namei(&ndroot); if (error == 0) { if (nd.ni_vp == ndroot.ni_vp) error = ENOENT; NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); keeporig: /* If there was an error, use the original path name. */ if (error) bcopy(ptr, buf, len); return (error); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 48df8a3e9051..45f155ebff3d 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4925 +1,4950 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag); static int setfflags(struct thread *td, struct vnode *, u_long); static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); static int getutimens(const struct timespec *, enum uio_seg, struct timespec *, int *); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); static int kern_fhlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, fhandle_t *fhp); static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, struct thread *td); static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, enum uio_seg segflag); static uint64_t at2cnpflags(u_int at_flags, u_int mask) { u_int64_t res; MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); res = 0; at_flags &= mask; if ((at_flags & AT_RESOLVE_BENEATH) != 0) res |= RBENEATH; if ((at_flags & AT_SYMLINK_FOLLOW) != 0) res |= FOLLOW; /* NOFOLLOW is pseudo flag */ if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; } + if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) + res |= EMPTYPATH; return (res); } int kern_sync(struct thread *td) { struct mount *mp, *nmp; int save; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { save = curthread_pflags_set(TDP_SYNCIO); vfs_periodic(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT); curthread_pflags_restore(save); vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif /* ARGSUSED */ int sys_sync(struct thread *td, struct sync_args *uap) { return (kern_sync(td)); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif int sys_quotactl(struct thread *td, struct quotactl_args *uap) { struct mount *mp; struct nameidata nd; int error; AUDIT_ARG_CMD(uap->cmd); AUDIT_ARG_UID(uap->uid); if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) return (EPERM); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); mp = nd.ni_vp->v_mount; vfs_ref(mp); vput(nd.ni_vp); error = vfs_busy(mp, 0); if (error != 0) { vfs_rel(mp); return (error); } error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); /* * Since quota on operation typically needs to open quota * file, the Q_QUOTAON handler needs to unbusy the mount point * before calling into namei. Otherwise, unmount might be * started between two vfs_busy() invocations (first is our, * second is from mount point cross-walk code in lookup()), * causing deadlock. * * Require that Q_QUOTAON handles the vfs_busy() reference on * its own, always returning with ubusied mount point. */ if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) vfs_unbusy(mp); vfs_rel(mp); return (error); } /* * Used by statfs conversion routines to scale the block size up if * necessary so that all of the block counts are <= 'max_size'. Note * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero * value of 'n'. */ void statfs_scale_blocks(struct statfs *sf, long max_size) { uint64_t count; int shift; KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); /* * Attempt to scale the block counts to give a more accurate * overview to userland of the ratio of free space to used * space. To do this, find the largest block count and compute * a divisor that lets it fit into a signed integer <= max_size. */ if (sf->f_bavail < 0) count = -sf->f_bavail; else count = sf->f_bavail; count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); if (count <= max_size) return; count >>= flsl(max_size); shift = 0; while (count > 0) { shift++; count >>=1; } sf->f_bsize <<= shift; sf->f_blocks >>= shift; sf->f_bfree >>= shift; sf->f_bavail >>= shift; } static int kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) { int error; if (mp == NULL) return (EBADF); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (error); #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif error = VFS_STATFS(mp, buf); if (error != 0) goto out; if (priv_check_cred_vfs_generation(td->td_ucred)) { buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, buf); } out: vfs_unbusy(mp); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif int sys_statfs(struct thread *td, struct statfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); error = namei(&nd); if (error != 0) return (error); mp = vfs_ref_from_vp(nd.ni_vp); NDFREE_NOTHING(&nd); vrele(nd.ni_vp); return (kern_do_statfs(td, mp, buf)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif int sys_fstatfs(struct thread *td, struct fstatfs_args *uap) { struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(struct statfs)); free(sfp, M_STATFS); return (error); } int kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; struct vnode *vp; int error; AUDIT_ARG_FD(fd); error = getvnode(td, fd, &cap_fstatfs_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; #ifdef AUDIT if (AUDITING_TD(td)) { vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); VOP_UNLOCK(vp); } #endif mp = vfs_ref_from_vp(vp); fdrop(fp, td); return (kern_do_statfs(td, mp, buf)); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int mode; }; #endif int sys_getfsstat(struct thread *td, struct getfsstat_args *uap) { size_t count; int error; if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) return (EINVAL); error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, UIO_USERSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; return (error); } /* * If (bufsize > 0 && bufseg == UIO_SYSSPACE) * The caller is responsible for freeing memory which will be allocated * in '*buf'. */ int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, size_t *countp, enum uio_seg bufseg, int mode) { struct mount *mp, *nmp; struct statfs *sfsp, *sp, *sptmp, *tofree; size_t count, maxcount; int error; switch (mode) { case MNT_WAIT: case MNT_NOWAIT: break; default: if (bufseg == UIO_SYSSPACE) *buf = NULL; return (EINVAL); } restart: maxcount = bufsize / sizeof(struct statfs); if (bufsize == 0) { sfsp = NULL; tofree = NULL; } else if (bufseg == UIO_USERSPACE) { sfsp = *buf; tofree = NULL; } else /* if (bufseg == UIO_SYSSPACE) */ { count = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { count++; } mtx_unlock(&mountlist_mtx); if (maxcount > count) maxcount = count; tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_STATFS, M_WAITOK); } count = 0; /* * If there is no target buffer they only want the count. * * This could be TAILQ_FOREACH but it is open-coded to match the original * code below. */ if (sfsp == NULL) { mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_mount_check_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif count++; nmp = TAILQ_NEXT(mp, mnt_list); } mtx_unlock(&mountlist_mtx); *countp = count; return (0); } /* * They want the entire thing. * * Short-circuit the corner case of no room for anything, avoids * relocking below. */ if (maxcount < 1) { goto out; } mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_mount_check_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif if (mode == MNT_WAIT) { if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { /* * If vfs_busy() failed, and MBF_NOWAIT * wasn't passed, then the mp is gone. * Furthermore, because of MBF_MNTLSTLOCK, * the mountlist_mtx was dropped. We have * no other choice than to start over. */ mtx_unlock(&mountlist_mtx); free(tofree, M_STATFS); goto restart; } } else { if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } } sp = &mp->mnt_stat; /* * If MNT_NOWAIT is specified, do not refresh * the fsstat cache. */ if (mode != MNT_NOWAIT) { error = VFS_STATFS(mp, sp); if (error != 0) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); continue; } } if (priv_check_cred_vfs_generation(td->td_ucred)) { sptmp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); *sptmp = *sp; sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, sptmp); sp = sptmp; } else sptmp = NULL; if (bufseg == UIO_SYSSPACE) { bcopy(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); } else /* if (bufseg == UIO_USERSPACE) */ { error = copyout(sp, sfsp, sizeof(*sp)); free(sptmp, M_STATFS); if (error != 0) { vfs_unbusy(mp); return (error); } } sfsp++; count++; if (count == maxcount) { vfs_unbusy(mp); goto out; } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); out: *countp = count; return (0); } #ifdef COMPAT_FREEBSD4 /* * Get old format filesystem statistics. */ static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); #ifndef _SYS_SYSPROTO_H_ struct freebsd4_statfs_args { char *path; struct ostatfs *buf; }; #endif int freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) { freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fstatfs_args { int fd; struct ostatfs *buf; }; #endif int freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) { freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_getfsstat_args { struct ostatfs *buf; long bufsize; int mode; }; #endif int freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) { struct statfs *buf, *sp; struct ostatfs osb; size_t count, size; int error; if (uap->bufsize < 0) return (EINVAL); count = uap->bufsize / sizeof(struct ostatfs); if (count > SIZE_MAX / sizeof(struct statfs)) return (EINVAL); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; if (size != 0) { sp = buf; while (count != 0 && error == 0) { freebsd4_cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_STATFS); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fhstatfs_args { struct fhandle *u_fhp; struct ostatfs *buf; }; #endif int freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) { struct ostatfs osb; struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) { freebsd4_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) { statfs_scale_blocks(nsp, LONG_MAX); bzero(osp, sizeof(*osp)); osp->f_bsize = nsp->f_bsize; osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); osp->f_blocks = nsp->f_blocks; osp->f_bfree = nsp->f_bfree; osp->f_bavail = nsp->f_bavail; osp->f_files = MIN(nsp->f_files, LONG_MAX); osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); osp->f_owner = nsp->f_owner; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); strlcpy(osp->f_fstypename, nsp->f_fstypename, MIN(MFSNAMELEN, OMFSNAMELEN)); strlcpy(osp->f_mntonname, nsp->f_mntonname, MIN(MNAMELEN, OMNAMELEN)); strlcpy(osp->f_mntfromname, nsp->f_mntfromname, MIN(MNAMELEN, OMNAMELEN)); osp->f_fsid = nsp->f_fsid; } #endif /* COMPAT_FREEBSD4 */ #if defined(COMPAT_FREEBSD11) /* * Get old format filesystem statistics. */ static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); int freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) { struct freebsd11_statfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); if (error == 0) { freebsd11_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get filesystem statistics. */ int freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) { struct freebsd11_statfs osb; struct statfs *sfp; int error; sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sfp); if (error == 0) { freebsd11_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Get statistics on all filesystems. */ int freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) { struct freebsd11_statfs osb; struct statfs *buf, *sp; size_t count, size; int error; count = uap->bufsize / sizeof(struct ostatfs); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; if (size > 0) { sp = buf; while (count > 0 && error == 0) { freebsd11_cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_STATFS); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ int freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) { struct freebsd11_statfs osb; struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) { freebsd11_cvtstatfs(sfp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); } free(sfp, M_STATFS); return (error); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) { bzero(osp, sizeof(*osp)); osp->f_version = FREEBSD11_STATFS_VERSION; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_bsize = nsp->f_bsize; osp->f_iosize = nsp->f_iosize; osp->f_blocks = nsp->f_blocks; osp->f_bfree = nsp->f_bfree; osp->f_bavail = nsp->f_bavail; osp->f_files = nsp->f_files; osp->f_ffree = nsp->f_ffree; osp->f_syncwrites = nsp->f_syncwrites; osp->f_asyncwrites = nsp->f_asyncwrites; osp->f_syncreads = nsp->f_syncreads; osp->f_asyncreads = nsp->f_asyncreads; osp->f_namemax = nsp->f_namemax; osp->f_owner = nsp->f_owner; osp->f_fsid = nsp->f_fsid; strlcpy(osp->f_fstypename, nsp->f_fstypename, MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); strlcpy(osp->f_mntonname, nsp->f_mntonname, MIN(MNAMELEN, sizeof(osp->f_mntonname))); strlcpy(osp->f_mntfromname, nsp->f_mntfromname, MIN(MNAMELEN, sizeof(osp->f_mntfromname))); } #endif /* COMPAT_FREEBSD11 */ /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif int sys_fchdir(struct thread *td, struct fchdir_args *uap) { struct vnode *vp, *tdp; struct mount *mp; struct file *fp; int error; AUDIT_ARG_FD(uap->fd); error = getvnode(td, uap->fd, &cap_fchdir_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; vrefact(vp); fdrop(fp, td); vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); error = change_dir(vp, td); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0)) continue; error = VFS_ROOT(mp, LK_SHARED, &tdp); vfs_unbusy(mp); if (error != 0) break; vput(vp); vp = tdp; } if (error != 0) { vput(vp); return (error); } VOP_UNLOCK(vp); pwd_chdir(td, vp); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif int sys_chdir(struct thread *td, struct chdir_args *uap) { return (kern_chdir(td, uap->path, UIO_USERSPACE)); } int kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); if ((error = change_dir(nd.ni_vp, td)) != 0) { vput(nd.ni_vp); NDFREE_NOTHING(&nd); return (error); } VOP_UNLOCK(nd.ni_vp); NDFREE_NOTHING(&nd); pwd_chdir(td, nd.ni_vp); return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif int sys_chroot(struct thread *td, struct chroot_args *uap) { struct nameidata nd; int error; error = priv_check(td, PRIV_VFS_CHROOT); if (error != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) goto error; error = change_dir(nd.ni_vp, td); if (error != 0) goto e_vunlock; #ifdef MAC error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); if (error != 0) goto e_vunlock; #endif VOP_UNLOCK(nd.ni_vp); error = pwd_chroot(td, nd.ni_vp); vrele(nd.ni_vp); NDFREE_NOTHING(&nd); return (error); e_vunlock: vput(nd.ni_vp); error: NDFREE_NOTHING(&nd); return (error); } /* * Common routine for chroot and chdir. Callers must provide a locked vnode * instance. */ int change_dir(struct vnode *vp, struct thread *td) { #ifdef MAC int error; #endif ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); if (vp->v_type != VDIR) return (ENOTDIR); #ifdef MAC error = mac_vnode_check_chdir(td->td_ucred, vp); if (error != 0) return (error); #endif return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); } static __inline void flags_to_rights(int flags, cap_rights_t *rightsp) { if (flags & O_EXEC) { cap_rights_set_one(rightsp, CAP_FEXECVE); } else { switch ((flags & O_ACCMODE)) { case O_RDONLY: cap_rights_set_one(rightsp, CAP_READ); break; case O_RDWR: cap_rights_set_one(rightsp, CAP_READ); /* FALLTHROUGH */ case O_WRONLY: cap_rights_set_one(rightsp, CAP_WRITE); if (!(flags & (O_APPEND | O_TRUNC))) cap_rights_set_one(rightsp, CAP_SEEK); break; } } if (flags & O_CREAT) cap_rights_set_one(rightsp, CAP_CREATE); if (flags & O_TRUNC) cap_rights_set_one(rightsp, CAP_FTRUNCATE); if (flags & (O_SYNC | O_FSYNC)) cap_rights_set_one(rightsp, CAP_FSYNC); if (flags & (O_EXLOCK | O_SHLOCK)) cap_rights_set_one(rightsp, CAP_FLOCK); } /* * Check permissions, allocate an open file structure, and call the device * open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int sys_open(struct thread *td, struct open_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct openat_args { int fd; char *path; int flag; int mode; }; #endif int sys_openat(struct thread *td, struct openat_args *uap) { AUDIT_ARG_FD(uap->fd); return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->mode)); } int kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flags, int mode) { struct proc *p = td->td_proc; struct filedesc *fdp; struct pwddesc *pdp; struct file *fp; struct vnode *vp; struct nameidata nd; cap_rights_t rights; int cmode, error, indx; indx = -1; fdp = p->p_fd; pdp = p->p_pd; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); cap_rights_init_one(&rights, CAP_LOOKUP); flags_to_rights(flags, &rights); /* * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags * may be specified. */ if (flags & O_EXEC) { if (flags & O_ACCMODE) return (EINVAL); } else if ((flags & O_ACCMODE) == O_ACCMODE) { return (EINVAL); } else { flags = FFLAGS(flags); } /* * Allocate a file structure. The descriptor to reference it * is allocated and used by finstall_refed() below. */ error = falloc_noinstall(td, &fp); if (error != 0) return (error); /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, &rights, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, fp); if (error != 0) { /* * If the vn_open replaced the method vector, something * wonderous happened deep below and we just pass it up * pretending we know what we do. */ if (error == ENXIO && fp->f_ops != &badfileops) goto success; /* * Handle special fdopen() case. bleh. * * Don't do this for relative (capability) lookups; we don't * understand exactly what would happen, and we don't think * that it ever should. */ if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && (error == ENODEV || error == ENXIO) && td->td_dupfd >= 0) { error = dupfdopen(td, fdp, td->td_dupfd, flags, error, &indx); if (error == 0) goto success; } goto bad; } td->td_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * Store the vnode, for any f_type. Typically, the vnode use * count is decremented by direct call to vn_closefile() for * files that switched type in the cdevsw fdopen() method. */ fp->f_vnode = vp; /* * If the file wasn't claimed by devfs bind it to the normal * vnode operations here. */ if (fp->f_ops == &badfileops) { KASSERT(vp->v_type != VFIFO, ("Unexpected fifo fp %p vp %p", fp, vp)); finit_vnode(fp, flags, NULL, &vnops); } VOP_UNLOCK(vp); if (flags & O_TRUNC) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } success: /* * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { struct filecaps *fcaps; #ifdef CAPABILITIES if ((nd.ni_resflags & NIRES_STRICTREL) != 0) fcaps = &nd.ni_filecaps; else #endif fcaps = NULL; error = finstall_refed(td, fp, &indx, flags, fcaps); /* On success finstall_refed() consumes fcaps. */ if (error != 0) { filecaps_free(&nd.ni_filecaps); goto bad; } } else { filecaps_free(&nd.ni_filecaps); falloc_abort(td, fp); } td->td_retval[0] = indx; return (0); bad: KASSERT(indx == -1, ("indx=%d, should be -1", indx)); falloc_abort(td, fp); return (error); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(struct thread *td, struct ocreat_args *uap) { return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknodat_args { int fd; char *path; mode_t mode; dev_t dev; }; #endif int sys_mknodat(struct thread *td, struct mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } #if defined(COMPAT_FREEBSD11) int freebsd11_mknod(struct thread *td, struct freebsd11_mknod_args *uap) { return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } int freebsd11_mknodat(struct thread *td, struct freebsd11_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } #endif /* COMPAT_FREEBSD11 */ int kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode, dev_t dev) { struct vnode *vp; struct mount *mp; struct vattr vattr; struct nameidata nd; int error, whiteout = 0; AUDIT_ARG_MODE(mode); AUDIT_ARG_DEV(dev); switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: error = priv_check(td, PRIV_VFS_MKNOD_DEV); if (error == 0 && dev == VNOVAL) error = EINVAL; break; case S_IFWHT: error = priv_check(td, PRIV_VFS_MKNOD_WHT); break; case S_IFIFO: if (dev == 0) return (kern_mkfifoat(td, fd, path, pathseg, mode)); /* FALLTHROUGH */ default: error = EINVAL; break; } if (error != 0) return (error); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, &cap_mknodat_rights, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } else { VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; vattr.va_rdev = dev; whiteout = 0; switch (mode & S_IFMT) { case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: panic("kern_mknod: invalid mode"); } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC if (error == 0 && !whiteout) error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (error == 0) { if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); } } VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, true); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); if (error == ERELOOKUP) goto restart; return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif int sys_mkfifo(struct thread *td, struct mkfifo_args *uap) { return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkfifoat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) { return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkfifoat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int mode) { struct mount *mp; struct vattr vattr; struct nameidata nd; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); if (error == ERELOOKUP) goto restart; return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif int sys_link(struct thread *td, struct link_args *uap) { return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, UIO_USERSPACE, FOLLOW)); } #ifndef _SYS_SYSPROTO_H_ struct linkat_args { int fd1; char *path1; int fd2; char *path2; int flag; }; #endif int sys_linkat(struct thread *td, struct linkat_args *uap) { int flag; flag = uap->flag; - if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, UIO_USERSPACE, at2cnpflags(flag, AT_SYMLINK_FOLLOW | - AT_RESOLVE_BENEATH))); + AT_RESOLVE_BENEATH | AT_EMPTY_PATH))); } int hardlink_check_uid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, &hardlink_check_uid, 0, "Unprivileged processes cannot create hard links to files owned by other " "users"); static int hardlink_check_gid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, &hardlink_check_gid, 0, "Unprivileged processes cannot create hard links to files owned by other " "groups"); static int can_hardlink(struct vnode *vp, struct ucred *cred) { struct vattr va; int error; if (!hardlink_check_uid && !hardlink_check_gid) return (0); error = VOP_GETATTR(vp, &va, cred); if (error != 0) return (error); if (hardlink_check_uid && cred->cr_uid != va.va_uid) { error = priv_check_cred(cred, PRIV_VFS_LINK); if (error != 0) return (error); } if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { error = priv_check_cred(cred, PRIV_VFS_LINK); if (error != 0) return (error); } return (0); } int kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, const char *path2, enum uio_seg segflag, int follow) { struct nameidata nd; int error; do { bwillwrite(); NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, path1, fd1, &cap_linkat_source_rights, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); } while (error == EAGAIN || error == ERELOOKUP); return (error); } static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, enum uio_seg segflag) { struct nameidata nd; struct mount *mp; int error; if (vp->v_type == VDIR) { vrele(vp); return (EPERM); /* POSIX */ } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, &cap_linkat_target_rights, td); if ((error = namei(&nd)) == 0) { + if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { + error = priv_check(td, PRIV_VFS_FHOPEN); + if (error != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_vp != NULL) { + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vrele(nd.ni_vp); + } else { + vput(nd.ni_dvp); + } + vrele(vp); + return (error); + } + } if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); vrele(vp); return (EEXIST); } else if (nd.ni_dvp->v_mount != vp->v_mount) { /* * Cross-device link. No need to recheck * vp->v_type, since it cannot change, except * to VBAD. */ NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vrele(vp); return (EXDEV); } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { error = can_hardlink(vp, td->td_ucred); #ifdef MAC if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); #endif if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } error = vn_start_write(vp, &mp, V_NOWAIT); if (error != 0) { vput(vp); vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); return (EAGAIN); } error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); vp = NULL; } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); return (EAGAIN); } } if (vp != NULL) vrele(vp); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif int sys_symlink(struct thread *td, struct symlink_args *uap) { return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct symlinkat_args { char *path; int fd; char *path2; }; #endif int sys_symlinkat(struct thread *td, struct symlinkat_args *uap) { return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, UIO_USERSPACE)); } int kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, enum uio_seg segflg) { struct mount *mp; struct vattr vattr; const char *syspath; char *tmppath; struct nameidata nd; int error; if (segflg == UIO_SYSSPACE) { syspath = path1; } else { tmppath = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) goto out; syspath = tmppath; } AUDIT_ARG_TEXT(syspath); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); nd.ni_vp = NULL; error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto out; goto restart; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; #ifdef MAC vattr.va_type = VLNK; error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out2; #endif error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); #ifdef MAC out2: #endif VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); vn_finished_write(mp); NDFREE(&nd, NDF_ONLY_PNBUF); if (error == ERELOOKUP) goto restart; out: if (segflg != UIO_SYSSPACE) uma_zfree(namei_zone, tmppath); return (error); } /* * Delete a whiteout from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct undelete_args { char *path; }; #endif int sys_undelete(struct thread *td, struct undelete_args *uap) { struct mount *mp; struct nameidata nd; int error; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error != 0) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); if (error == ERELOOKUP) goto restart; return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif int sys_unlink(struct thread *td, struct unlink_args *uap) { return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 0, 0)); } static int kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, int flag, enum uio_seg pathseg, ino_t oldinum) { if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) return (EINVAL); if ((flag & AT_REMOVEDIR) != 0) return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); } #ifndef _SYS_SYSPROTO_H_ struct unlinkat_args { int fd; char *path; int flag; }; #endif int sys_unlinkat(struct thread *td, struct unlinkat_args *uap) { return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, UIO_USERSPACE, 0)); } #ifndef _SYS_SYSPROTO_H_ struct funlinkat_args { int dfd; const char *path; int fd; int flag; }; #endif int sys_funlinkat(struct thread *td, struct funlinkat_args *uap) { return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, UIO_USERSPACE, 0)); } int kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag, ino_t oldinum) { struct mount *mp; struct file *fp; struct vnode *vp; struct nameidata nd; struct stat sb; int error; fp = NULL; if (fd != FD_NONE) { error = getvnode(td, fd, &cap_no_rights, &fp); if (error != 0) return (error); } restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH), pathseg, path, dfd, &cap_unlinkat_rights, td); if ((error = namei(&nd)) != 0) { if (error == EINVAL) error = EPERM; goto fdout; } vp = nd.ni_vp; if (vp->v_type == VDIR && oldinum == 0) { error = EPERM; /* POSIX */ } else if (oldinum != 0 && ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && sb.st_ino != oldinum) { error = EIDRM; /* Identifier removed */ } else if (fp != NULL && fp->f_vnode != vp) { if (VN_IS_DOOMED(fp->f_vnode)) error = EBADF; else error = EDEADLK; } else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_vflag & VV_ROOT) error = EBUSY; } if (error == 0) { if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) { goto fdout; } goto restart; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: #endif vn_finished_write(mp); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); if (error == ERELOOKUP) goto restart; fdout: if (fp != NULL) fdrop(fp, td); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int sys_lseek(struct thread *td, struct lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } int kern_lseek(struct thread *td, int fd, off_t offset, int whence) { struct file *fp; int error; AUDIT_ARG_FD(fd); error = fget(td, fd, &cap_seek_rights, &fp); if (error != 0) return (error); error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? fo_seek(fp, offset, whence, td) : ESPIPE; fdrop(fp, td); return (error); } #if defined(COMPAT_43) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(struct thread *td, struct olseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Version with the 'pad' argument */ int freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif /* * Check access permissions using passed credentials. */ static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td) { accmode_t accmode; int error; /* Flags == 0 means only check for existence. */ if (user_flags == 0) return (0); accmode = 0; if (user_flags & R_OK) accmode |= VREAD; if (user_flags & W_OK) accmode |= VWRITE; if (user_flags & X_OK) accmode |= VEXEC; #ifdef MAC error = mac_vnode_check_access(cred, vp, accmode); if (error != 0) return (error); #endif if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, accmode, cred, td); return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int amode; }; #endif int sys_access(struct thread *td, struct access_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, uap->amode)); } #ifndef _SYS_SYSPROTO_H_ struct faccessat_args { int dirfd; char *path; int amode; int flag; } #endif int sys_faccessat(struct thread *td, struct faccessat_args *uap) { return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, uap->amode)); } int kern_accessat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int flag, int amode) { struct ucred *cred, *usecred; struct vnode *vp; struct nameidata nd; int error; - if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH)) != 0) + if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) return (EINVAL); if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) return (EINVAL); /* * Create and modify a temporary credential instead of one that * is potentially shared (if we need one). */ cred = td->td_ucred; if ((flag & AT_EACCESS) == 0 && ((cred->cr_uid != cred->cr_ruid || cred->cr_rgid != cred->cr_groups[0]))) { usecred = crdup(cred); usecred->cr_uid = cred->cr_ruid; usecred->cr_groups[0] = cred->cr_rgid; td->td_ucred = usecred; } else usecred = cred; AUDIT_ARG_VALUE(amode); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | - AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH), - pathseg, path, fd, &cap_fstat_rights, td); + AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | + AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights, td); if ((error = namei(&nd)) != 0) goto out; vp = nd.ni_vp; error = vn_access(vp, amode, usecred, td); NDFREE_NOTHING(&nd); vput(vp); out: if (usecred != cred) { td->td_ucred = cred; crfree(usecred); } return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int amode; }; #endif int sys_eaccess(struct thread *td, struct eaccess_args *uap) { return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, AT_EACCESS, uap->amode)); } #if defined(COMPAT_43) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif int ostat(struct thread *td, struct ostat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif int olstat(struct thread *td, struct olstat_args *uap) { struct stat sb; struct ostat osb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); cvtstat(&sb, &osb); return (copyout(&osb, uap->ub, sizeof (osb))); } /* * Convert from an old to a new stat structure. * XXX: many values are blindly truncated. */ void cvtstat(struct stat *st, struct ostat *ost) { bzero(ost, sizeof(*ost)); ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; ost->st_size = MIN(st->st_size, INT32_MAX); ost->st_atim = st->st_atim; ost->st_mtim = st->st_mtim; ost->st_ctim = st->st_ctim; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 */ #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) int ino64_trunc_error; SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, &ino64_trunc_error, 0, "Error on truncation of device, file or inode number, or link count"); int freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) { ost->st_dev = st->st_dev; if (ost->st_dev != st->st_dev) { switch (ino64_trunc_error) { default: /* * Since dev_t is almost raw, don't clamp to the * maximum for case 2, but ignore the error. */ break; case 1: return (EOVERFLOW); } } ost->st_ino = st->st_ino; if (ost->st_ino != st->st_ino) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: ost->st_ino = UINT32_MAX; break; } } ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; if (ost->st_nlink != st->st_nlink) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: ost->st_nlink = UINT16_MAX; break; } } ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (ost->st_rdev != st->st_rdev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } ost->st_atim = st->st_atim; ost->st_mtim = st->st_mtim; ost->st_ctim = st->st_ctim; ost->st_size = st->st_size; ost->st_blocks = st->st_blocks; ost->st_blksize = st->st_blksize; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; ost->st_lspare = 0; ost->st_birthtim = st->st_birthtim; bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), sizeof(*ost) - offsetof(struct freebsd11_stat, st_birthtim) - sizeof(ost->st_birthtim)); return (0); } int freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) { struct stat sb; struct freebsd11_stat osb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat(&sb, &osb); if (error == 0) error = copyout(&osb, uap->ub, sizeof(osb)); return (error); } int freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) { struct stat sb; struct freebsd11_stat osb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat(&sb, &osb); if (error == 0) error = copyout(&osb, uap->ub, sizeof(osb)); return (error); } int freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) { struct fhandle fh; struct stat sb; struct freebsd11_stat osb; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); error = freebsd11_cvtstat(&sb, &osb); if (error == 0) error = copyout(&osb, uap->sb, sizeof(osb)); return (error); } int freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) { struct stat sb; struct freebsd11_stat osb; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); error = freebsd11_cvtstat(&sb, &osb); if (error == 0) error = copyout(&osb, uap->buf, sizeof(osb)); return (error); } #endif /* COMPAT_FREEBSD11 */ /* * Get file status */ #ifndef _SYS_SYSPROTO_H_ struct fstatat_args { int fd; char *path; struct stat *buf; int flag; } #endif int sys_fstatat(struct thread *td, struct fstatat_args *uap) { struct stat sb; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb, NULL); if (error == 0) error = copyout(&sb, uap->buf, sizeof (sb)); return (error); } int kern_statat(struct thread *td, int flag, int fd, const char *path, enum uio_seg pathseg, struct stat *sbp, void (*hook)(struct vnode *vp, struct stat *sbp)) { struct nameidata nd; int error; - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | - AT_SYMLINK_NOFOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, &cap_fstat_rights, td); + AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | + AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, td); if ((error = namei(&nd)) != 0) return (error); error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); if (error == 0) { if (__predict_false(hook != NULL)) hook(nd.ni_vp, sbp); } NDFREE_NOTHING(&nd); vput(nd.ni_vp); #ifdef __STAT_TIME_T_EXT sbp->st_atim_ext = 0; sbp->st_mtim_ext = 0; sbp->st_ctim_ext = 0; sbp->st_btim_ext = 0; #endif #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT)) ktrstat_error(sbp, error); #endif return (error); } #if defined(COMPAT_FREEBSD11) /* * Implementation of the NetBSD [l]stat() functions. */ void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) { bzero(nsb, sizeof(*nsb)); nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atim = sb->st_atim; nsb->st_mtim = sb->st_mtim; nsb->st_ctim = sb->st_ctim; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_birthtim = sb->st_birthtim; } #ifndef _SYS_SYSPROTO_H_ struct freebsd11_nstat_args { char *path; struct nstat *ub; }; #endif int freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); freebsd11_cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd11_nlstat_args { char *path; struct nstat *ub; }; #endif int freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) { struct stat sb; struct nstat nsb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb, NULL); if (error != 0) return (error); freebsd11_cvtnstat(&sb, &nsb); return (copyout(&nsb, uap->ub, sizeof (nsb))); } #endif /* COMPAT_FREEBSD11 */ /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif int sys_pathconf(struct thread *td, struct pathconf_args *uap) { long value; int error; error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, &value); if (error == 0) td->td_retval[0] = value; return (error); } #ifndef _SYS_SYSPROTO_H_ struct lpathconf_args { char *path; int name; }; #endif int sys_lpathconf(struct thread *td, struct lpathconf_args *uap) { long value; int error; error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, NOFOLLOW, &value); if (error == 0) td->td_retval[0] = value; return (error); } int kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, int name, u_long flags, long *valuep) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = VOP_PATHCONF(nd.ni_vp, name, valuep); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; size_t count; }; #endif int sys_readlink(struct thread *td, struct readlink_args *uap) { return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->count)); } #ifndef _SYS_SYSPROTO_H_ struct readlinkat_args { int fd; char *path; char *buf; size_t bufsize; }; #endif int sys_readlinkat(struct thread *td, struct readlinkat_args *uap) { return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->bufsize)); } int kern_readlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) { struct vnode *vp; struct nameidata nd; int error; if (count > IOSIZE_MAX) return (EINVAL); NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); vp = nd.ni_vp; error = kern_readlink_vp(vp, buf, bufseg, count, td); vput(vp); return (error); } /* * Helper function to readlink from a vnode */ static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, struct thread *td) { struct iovec aiov; struct uio auio; int error; ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); #ifdef MAC error = mac_vnode_check_readlink(td->td_ucred, vp); if (error != 0) return (error); #endif if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) return (EINVAL); aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; auio.uio_resid = count; error = VOP_READLINK(vp, &auio, td->td_ucred); td->td_retval[0] = count - auio.uio_resid; return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int setfflags(struct thread *td, struct vnode *vp, u_long flags) { struct mount *mp; struct vattr vattr; int error; /* We can't support the value matching VNOVAL. */ if (flags == VNOVAL) return (EOPNOTSUPP); /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); if (error != 0) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VATTR_NULL(&vattr); vattr.va_flags = flags; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); #ifdef MAC error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { const char *path; u_long flags; }; #endif int sys_chflags(struct thread *td, struct chflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, 0)); } #ifndef _SYS_SYSPROTO_H_ struct chflagsat_args { int fd; const char *path; u_long flags; int atflag; } #endif int sys_chflagsat(struct thread *td, struct chflagsat_args *uap) { - if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flags, uap->atflag)); } /* * Same as chflags() but doesn't follow symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchflags_args { const char *path; u_long flags; }; #endif int sys_lchflags(struct thread *td, struct lchflags_args *uap) { return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->flags, AT_SYMLINK_NOFOLLOW)); } static int kern_chflagsat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, u_long flags, int atflag) { struct nameidata nd; int error; AUDIT_ARG_FFLAGS(flags); NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | - AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, - &cap_fchflags_rights, td); + AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, + fd, &cap_fchflags_rights, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = setfflags(td, nd.ni_vp, flags); vrele(nd.ni_vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; u_long flags; }; #endif int sys_fchflags(struct thread *td, struct fchflags_args *uap) { struct file *fp; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_FFLAGS(uap->flags); error = getvnode(td, uap->fd, &cap_fchflags_rights, &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode); #endif error = setfflags(td, fp->f_vnode, uap->flags); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; #ifdef MAC error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp); vn_finished_write(mp); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif int sys_chmod(struct thread *td, struct chmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchmodat_args { int dirfd; char *path; mode_t mode; int flag; } #endif int sys_fchmodat(struct thread *td, struct fchmodat_args *uap) { - if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, uap->flag)); } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif int sys_lchmod(struct thread *td, struct lchmod_args *uap) { return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode, AT_SYMLINK_NOFOLLOW)); } int kern_fchmodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, mode_t mode, int flag) { struct nameidata nd; int error; AUDIT_ARG_MODE(mode); NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | - AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, - &cap_fchmod_rights, td); + AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, + fd, &cap_fchmod_rights, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = setfmode(td, td->td_ucred, nd.ni_vp, mode); vrele(nd.ni_vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif int sys_fchmod(struct thread *td, struct fchmod_args *uap) { struct file *fp; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_MODE(uap->mode); error = fget(td, uap->fd, &cap_fchmod_rights, &fp); if (error != 0) return (error); error = fo_chmod(fp, uap->mode, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid) { struct mount *mp; struct vattr vattr; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; #ifdef MAC error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, vattr.va_gid); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp); vn_finished_write(mp); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif int sys_chown(struct thread *td, struct chown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, 0)); } #ifndef _SYS_SYSPROTO_H_ struct fchownat_args { int fd; const char * path; uid_t uid; gid_t gid; int flag; }; #endif int sys_fchownat(struct thread *td, struct fchownat_args *uap) { - if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, uap->gid, uap->flag)); } int kern_fchownat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, int uid, int gid, int flag) { struct nameidata nd; int error; AUDIT_ARG_OWNER(uid, gid); NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | - AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, - &cap_fchown_rights, td); + AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, + fd, &cap_fchown_rights, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); vrele(nd.ni_vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif int sys_lchown(struct thread *td, struct lchown_args *uap) { return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif int sys_fchown(struct thread *td, struct fchown_args *uap) { struct file *fp; int error; AUDIT_ARG_FD(uap->fd); AUDIT_ARG_OWNER(uap->uid, uap->gid); error = fget(td, uap->fd, &cap_fchown_rights, &fp); if (error != 0) return (error); error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, struct timespec *tsp) { struct timeval tv[2]; const struct timeval *tvp; int error; if (usrtvp == NULL) { vfs_timestamp(&tsp[0]); tsp[1] = tsp[0]; } else { if (tvpseg == UIO_SYSSPACE) { tvp = usrtvp; } else { if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) return (error); tvp = tv; } if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); } return (0); } /* * Common implementation code for futimens(), utimensat(). */ #define UTIMENS_NULL 0x1 #define UTIMENS_EXIT 0x2 static int getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, struct timespec *tsp, int *retflags) { struct timespec tsnow; int error; vfs_timestamp(&tsnow); *retflags = 0; if (usrtsp == NULL) { tsp[0] = tsnow; tsp[1] = tsnow; *retflags |= UTIMENS_NULL; return (0); } if (tspseg == UIO_SYSSPACE) { tsp[0] = usrtsp[0]; tsp[1] = usrtsp[1]; } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) return (error); if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) *retflags |= UTIMENS_EXIT; if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) *retflags |= UTIMENS_NULL; if (tsp[0].tv_nsec == UTIME_OMIT) tsp[0].tv_sec = VNOVAL; else if (tsp[0].tv_nsec == UTIME_NOW) tsp[0] = tsnow; else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) return (EINVAL); if (tsp[1].tv_nsec == UTIME_OMIT) tsp[1].tv_sec = VNOVAL; else if (tsp[1].tv_nsec == UTIME_NOW) tsp[1] = tsnow; else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) return (EINVAL); return (0); } /* * Common implementation code for utimes(), lutimes(), futimes(), futimens(), * and utimensat(). */ static int setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, int numtimes, int nullflag) { struct mount *mp; struct vattr vattr; int error, setbirthtime; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); setbirthtime = 0; if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && timespeccmp(&ts[1], &vattr.va_birthtime, < )) setbirthtime = 1; VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (setbirthtime) vattr.va_birthtime = ts[1]; if (numtimes > 2) vattr.va_birthtime = ts[2]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; #ifdef MAC error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, vattr.va_mtime); #endif if (error == 0) error = VOP_SETATTR(vp, &vattr, td->td_ucred); VOP_UNLOCK(vp); vn_finished_write(mp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif int sys_utimes(struct thread *td, struct utimes_args *uap) { return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct futimesat_args { int fd; const char * path; const struct timeval * times; }; #endif int sys_futimesat(struct thread *td, struct futimesat_args *uap) { return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE)); } int kern_utimesat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct nameidata nd; struct timespec ts[2]; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, &cap_futimes_rights, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif int sys_lutimes(struct thread *td, struct lutimes_args *uap) { return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct nameidata nd; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE_NOTHING(&nd); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif int sys_futimes(struct thread *td, struct futimes_args *uap) { return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); } int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; int error; AUDIT_ARG_FD(fd); error = getutimes(tptr, tptrseg, ts); if (error != 0) return (error); error = getvnode(td, fd, &cap_futimes_rights, &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode); #endif error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); fdrop(fp, td); return (error); } int sys_futimens(struct thread *td, struct futimens_args *uap) { return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); } int kern_futimens(struct thread *td, int fd, struct timespec *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; int error, flags; AUDIT_ARG_FD(fd); error = getutimens(tptr, tptrseg, ts, &flags); if (error != 0) return (error); if (flags & UTIMENS_EXIT) return (0); error = getvnode(td, fd, &cap_futimes_rights, &fp); if (error != 0) return (error); #ifdef AUDIT vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(fp->f_vnode); VOP_UNLOCK(fp->f_vnode); #endif error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); fdrop(fp, td); return (error); } int sys_utimensat(struct thread *td, struct utimensat_args *uap) { return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, uap->times, UIO_USERSPACE, uap->flag)); } int kern_utimensat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, int flag) { struct nameidata nd; struct timespec ts[2]; int error, flags; - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | + AT_EMPTY_PATH)) != 0) return (EINVAL); if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) return (error); NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | - AT_RESOLVE_BENEATH) | AUDITVNODE1, + AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, fd, &cap_futimes_rights, td); if ((error = namei(&nd)) != 0) return (error); /* * We are allowed to call namei() regardless of 2xUTIME_OMIT. * POSIX states: * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." * "Search permission is denied by a component of the path prefix." */ NDFREE_NOTHING(&nd); if ((flags & UTIMENS_EXIT) == 0) error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); vrele(nd.ni_vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif int sys_truncate(struct thread *td, struct truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, off_t length) { struct mount *mp; struct vnode *vp; void *rl_cookie; struct vattr vattr; struct nameidata nd; int error; if (length < 0) return (EINVAL); retry: NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vn_rangelock_unlock(vp, rl_cookie); vrele(vp); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = length; error = VOP_SETATTR(vp, &vattr, td->td_ucred); } VOP_UNLOCK(vp); vn_finished_write(mp); vn_rangelock_unlock(vp, rl_cookie); vrele(vp); if (error == ERELOOKUP) goto retry; return (error); } #if defined(COMPAT_43) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif int otruncate(struct thread *td, struct otruncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD6) /* Versions with the pad argument */ int freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, uap->length)); } #endif int kern_fsync(struct thread *td, int fd, bool fullsync) { struct vnode *vp; struct mount *mp; struct file *fp; int error, lock_flags; AUDIT_ARG_FD(fd); error = getvnode(td, fd, &cap_fsync_rights, &fp); if (error != 0) return (error); vp = fp->f_vnode; #if 0 if (!fullsync) /* XXXKIB: compete outstanding aio writes */; #endif retry: error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error != 0) goto drop; if (MNT_SHARED_WRITES(mp) || ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { lock_flags = LK_SHARED; } else { lock_flags = LK_EXCLUSIVE; } vn_lock(vp, lock_flags | LK_RETRY); AUDIT_ARG_VNODE1(vp); if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); VOP_UNLOCK(vp); vn_finished_write(mp); if (error == ERELOOKUP) goto retry; drop: fdrop(fp, td); return (error); } /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif int sys_fsync(struct thread *td, struct fsync_args *uap) { return (kern_fsync(td, uap->fd, true)); } int sys_fdatasync(struct thread *td, struct fdatasync_args *uap) { return (kern_fsync(td, uap->fd, false)); } /* * Rename files. Source and destination must either both be directories, or * both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif int sys_rename(struct thread *td, struct rename_args *uap) { return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, uap->to, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct renameat_args { int oldfd; char *old; int newfd; char *new; }; #endif int sys_renameat(struct thread *td, struct renameat_args *uap) { return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, UIO_USERSPACE)); } #ifdef MAC static int kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg, struct nameidata *fromnd) { int error; NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); if ((error = namei(fromnd)) != 0) return (error); error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, fromnd->ni_vp, &fromnd->ni_cnd); VOP_UNLOCK(fromnd->ni_dvp); if (fromnd->ni_dvp != fromnd->ni_vp) VOP_UNLOCK(fromnd->ni_vp); if (error != 0) { NDFREE(fromnd, NDF_ONLY_PNBUF); vrele(fromnd->ni_dvp); vrele(fromnd->ni_vp); if (fromnd->ni_startdir) vrele(fromnd->ni_startdir); } return (error); } #endif int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg) { struct mount *mp = NULL; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; u_int64_t tondflags; int error; again: bwillwrite(); #ifdef MAC if (mac_vnode_check_rename_from_enabled()) { error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, &fromnd); if (error != 0) return (error); } else { #endif NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); if ((error = namei(&fromnd)) != 0) return (error); #ifdef MAC } #endif fvp = fromnd.ni_vp; tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; if (fromnd.ni_vp->v_type == VDIR) tondflags |= WILLBEDIR; NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, &cap_renameat_target_rights, td); if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); vrele(tond.ni_startdir); if (fromnd.ni_startdir != NULL) vrele(fromnd.ni_startdir); error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); if (error != 0) return (error); goto again; } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } #ifdef CAPABILITIES if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { /* * If the target already exists we require CAP_UNLINKAT * from 'newfd', when newfd was used for the lookup. */ error = cap_check(&tond.ni_filecaps.fc_rights, &cap_unlinkat_rights); if (error != 0) goto out; } #endif } if (fvp == tdvp) { error = EINVAL; goto out; } /* * If the source is the same as the destination (that is, if they * are links to the same vnode), then there is nothing to do. */ if (fvp == tvp) error = ERESTART; #ifdef MAC else error = mac_vnode_check_rename_to(td->td_ucred, tdvp, tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); #endif out: if (error == 0) { error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp != NULL) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); if (error == ERESTART) return (0); if (error == ERELOOKUP) goto again; return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif int sys_mkdir(struct thread *td, struct mkdir_args *uap) { return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->mode)); } #ifndef _SYS_SYSPROTO_H_ struct mkdirat_args { int fd; char *path; mode_t mode; }; #endif int sys_mkdirat(struct thread *td, struct mkdirat_args *uap) { return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode) { struct mount *mp; struct vattr vattr; struct nameidata nd; int error; AUDIT_ARG_MODE(mode); restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, segflg, path, fd, &cap_mkdirat_rights, td); if ((error = namei(&nd)) != 0) return (error); if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error != 0) goto out; #endif error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif NDFREE(&nd, NDF_ONLY_PNBUF); VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); vn_finished_write(mp); if (error == ERELOOKUP) goto restart; return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif int sys_rmdir(struct thread *td, struct rmdir_args *uap) { return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 0)); } int kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, enum uio_seg pathseg, int flag) { struct mount *mp; struct vnode *vp; struct file *fp; struct nameidata nd; cap_rights_t rights; int error; fp = NULL; if (fd != FD_NONE) { error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), &fp); if (error != 0) return (error); } restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH), pathseg, path, dfd, &cap_unlinkat_rights, td); if ((error = namei(&nd)) != 0) goto fdout; vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) { error = EBUSY; goto out; } if (fp != NULL && fp->f_vnode != vp) { if (VN_IS_DOOMED(fp->f_vnode)) error = EBADF; else error = EDEADLK; goto out; } #ifdef MAC error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error != 0) goto out; #endif if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto fdout; goto restart; } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (error == ERELOOKUP) goto restart; fdout: if (fp != NULL) fdrop(fp, td); return (error); } #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) int freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, long *basep, void (*func)(struct freebsd11_dirent *)) { struct freebsd11_dirent dstdp; struct dirent *dp, *edp; char *dirbuf; off_t base; ssize_t resid, ucount; int error; /* XXX arbitrary sanity limit on `count'. */ count = min(count, 64 * 1024); dirbuf = malloc(count, M_TEMP, M_WAITOK); error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, UIO_SYSSPACE); if (error != 0) goto done; if (basep != NULL) *basep = base; ucount = 0; for (dp = (struct dirent *)dirbuf, edp = (struct dirent *)&dirbuf[count - resid]; ucount < count && dp < edp; ) { if (dp->d_reclen == 0) break; MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); if (dp->d_namlen >= sizeof(dstdp.d_name)) continue; dstdp.d_type = dp->d_type; dstdp.d_namlen = dp->d_namlen; dstdp.d_fileno = dp->d_fileno; /* truncate */ if (dstdp.d_fileno != dp->d_fileno) { switch (ino64_trunc_error) { default: case 0: break; case 1: error = EOVERFLOW; goto done; case 2: dstdp.d_fileno = UINT32_MAX; break; } } dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + ((dp->d_namlen + 1 + 3) &~ 3); bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); bzero(dstdp.d_name + dstdp.d_namlen, dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - dstdp.d_namlen); MPASS(dstdp.d_reclen <= dp->d_reclen); MPASS(ucount + dstdp.d_reclen <= count); if (func != NULL) func(&dstdp); error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); if (error != 0) break; dp = (struct dirent *)((char *)dp + dp->d_reclen); ucount += dstdp.d_reclen; } done: free(dirbuf, M_TEMP); if (error == 0) td->td_retval[0] = ucount; return (error); } #endif /* COMPAT */ #ifdef COMPAT_43 static void ogetdirentries_cvt(struct freebsd11_dirent *dp) { #if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; #else /* * The dp->d_type is the high byte of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; #endif } /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) { long loff; int error; error = kern_ogetdirentries(td, uap, &loff); if (error == 0) error = copyout(&loff, uap->basep, sizeof(long)); return (error); } int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff) { long base; int error; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) return (EINVAL); error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, ogetdirentries_cvt); if (error == 0 && uap->basep != NULL) error = copyout(&base, uap->basep, sizeof(long)); return (error); } #endif /* COMPAT_43 */ #if defined(COMPAT_FREEBSD11) #ifndef _SYS_SYSPROTO_H_ struct freebsd11_getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int freebsd11_getdirentries(struct thread *td, struct freebsd11_getdirentries_args *uap) { long base; int error; error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL); if (error == 0 && uap->basep != NULL) error = copyout(&base, uap->basep, sizeof(long)); return (error); } int freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) { struct freebsd11_getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (freebsd11_getdirentries(td, &ap)); } #endif /* COMPAT_FREEBSD11 */ /* * Read a block of directory entries in a filesystem independent format. */ int sys_getdirentries(struct thread *td, struct getdirentries_args *uap) { off_t base; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error != 0) return (error); if (uap->basep != NULL) error = copyout(&base, uap->basep, sizeof(off_t)); return (error); } int kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, off_t *basep, ssize_t *residp, enum uio_seg bufseg) { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; off_t loff; int error, eofflag; off_t foffset; AUDIT_ARG_FD(fd); if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; error = getvnode(td, fd, &cap_read_rights, &fp); if (error != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; foffset = foffset_lock(fp, 0); unionread: if (vp->v_type != VDIR) { error = EINVAL; goto fail; } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); foffset = auio.uio_offset; if (error != 0) { VOP_UNLOCK(vp); goto fail; } if (count == auio.uio_resid && (vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; foffset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp); *basep = loff; if (residp != NULL) *residp = auio.uio_resid; td->td_retval[0] = count - auio.uio_resid; fail: foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int sys_umask(struct thread *td, struct umask_args *uap) { struct pwddesc *pdp; pdp = td->td_proc->p_pd; PWDDESC_XLOCK(pdp); td->td_retval[0] = pdp->pd_cmask; pdp->pd_cmask = uap->newmask & ALLPERMS; PWDDESC_XUNLOCK(pdp); return (0); } /* * Void all references to file by ripping underlying filesystem away from * vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif int sys_revoke(struct thread *td, struct revoke_args *uap) { struct vnode *vp; struct vattr vattr; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; NDFREE_NOTHING(&nd); if (vp->v_type != VCHR || vp->v_rdev == NULL) { error = EINVAL; goto out; } #ifdef MAC error = mac_vnode_check_revoke(td->td_ucred, vp); if (error != 0) goto out; #endif error = VOP_GETATTR(vp, &vattr, td->td_ucred); if (error != 0) goto out; if (td->td_ucred->cr_uid != vattr.va_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto out; } if (devfs_usecount(vp) > 0) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry and check that, if it * is a capability, the correct rights are present. A reference on the file * entry is held upon returning. */ int getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) { struct file *fp; int error; error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); if (error != 0) return (error); /* * The file could be not of the vnode type, or it may be not * yet fully initialized, in which case the f_vnode pointer * may be set, but f_ops is still badfileops. E.g., * devfs_open() transiently create such situation to * facilitate csw d_fdopen(). * * Dupfdopen() handling in kern_openat() installs the * half-baked file into the process descriptor table, allowing * other thread to dereference it. Guard against the race by * checking f_ops. */ if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { fdrop(fp, td); return (EINVAL); } *fpp = fp; return (0); } /* * Get an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct lgetfh_args { char *fname; fhandle_t *fhp; }; #endif int sys_lgetfh(struct thread *td, struct lgetfh_args *uap) { return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); } #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int sys_getfh(struct thread *td, struct getfh_args *uap) { return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); } /* * syscall for the rpc.lockd to use to translate an open descriptor into * a NFS file handle. * * warning: do not remove the priv_check() call or this becomes one giant * security hole. */ #ifndef _SYS_SYSPROTO_H_ struct getfhat_args { int fd; char *path; fhandle_t *fhp; int flags; }; #endif int sys_getfhat(struct thread *td, struct getfhat_args *uap) { if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) return (EINVAL); return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); } int kern_getfhat(struct thread *td, int flags, int fd, const char *path, enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) { struct nameidata nd; fhandle_t fh; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, td); error = namei(&nd); if (error != 0) return (error); NDFREE_NOTHING(&nd); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VOP_VPTOFH(vp, &fh.fh_fid); vput(vp); if (error == 0) { if (fhseg == UIO_USERSPACE) error = copyout(&fh, fhp, sizeof (fh)); else memcpy(fhp, &fh, sizeof(fh)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct fhlink_args { fhandle_t *fhp; const char *to; }; #endif int sys_fhlink(struct thread *td, struct fhlink_args *uap) { return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); } #ifndef _SYS_SYSPROTO_H_ struct fhlinkat_args { fhandle_t *fhp; int tofd; const char *to; }; #endif int sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) { return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); } static int kern_fhlinkat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, fhandle_t *fhp) { fhandle_t fh; struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); error = copyin(fhp, &fh, sizeof(fh)); if (error != 0) return (error); do { bwillwrite(); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); vfs_unbusy(mp); if (error != 0) return (error); VOP_UNLOCK(vp); error = kern_linkat_vp(td, vp, fd, path, pathseg); } while (error == EAGAIN || error == ERELOOKUP); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fhreadlink_args { fhandle_t *fhp; char *buf; size_t bufsize; }; #endif int sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) { fhandle_t fh; struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_GETFH); if (error != 0) return (error); if (uap->bufsize > IOSIZE_MAX) return (EINVAL); error = copyin(uap->fhp, &fh, sizeof(fh)); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); vput(vp); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into an * open descriptor. * * warning: do not remove the priv_check() call or this becomes one giant * security hole. */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int sys_fhopen(struct thread *td, struct fhopen_args *uap) { return (kern_fhopen(td, uap->u_fhp, uap->flags)); } int kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) { struct mount *mp; struct vnode *vp; struct fhandle fhp; struct file *fp; int fmode, error; int indx; error = priv_check(td, PRIV_VFS_FHOPEN); if (error != 0) return (error); indx = -1; fmode = FFLAGS(flags); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(u_fhp, &fhp, sizeof(fhp)); if (error != 0) return(error); /* find the mount point */ mp = vfs_busyfs(&fhp.fh_fsid); if (mp == NULL) return (ESTALE); /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = falloc_noinstall(td, &fp); if (error != 0) { vput(vp); return (error); } /* * An extra reference on `fp' has been held for us by * falloc_noinstall(). */ #ifdef INVARIANTS td->td_dupfd = -1; #endif error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); if (error != 0) { KASSERT(fp->f_ops == &badfileops, ("VOP_OPEN in fhopen() set f_ops")); KASSERT(td->td_dupfd < 0, ("fhopen() encountered fdopen()")); vput(vp); goto bad; } #ifdef INVARIANTS td->td_dupfd = 0; #endif fp->f_vnode = vp; finit_vnode(fp, fmode, NULL, &vnops); VOP_UNLOCK(vp); if ((fmode & O_TRUNC) != 0) { error = fo_truncate(fp, 0, td->td_ucred, td); if (error != 0) goto bad; } error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; return (error); } /* * Stat an (NFS) file handle. */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int sys_fhstat(struct thread *td, struct fhstat_args *uap) { struct stat sb; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fh)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error == 0) error = copyout(&sb, uap->sb, sizeof(sb)); return (error); } int kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) { struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTAT); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); vfs_unbusy(mp); if (error != 0) return (error); error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td); vput(vp); return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) { struct statfs *sfp; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sfp); if (error == 0) error = copyout(sfp, uap->buf, sizeof(*sfp)); free(sfp, M_STATFS); return (error); } int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) { struct mount *mp; struct vnode *vp; int error; error = priv_check(td, PRIV_VFS_FHSTATFS); if (error != 0) return (error); if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) return (ESTALE); error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); if (error != 0) { vfs_unbusy(mp); return (error); } vput(vp); error = prison_canseemount(td->td_ucred, mp); if (error != 0) goto out; #ifdef MAC error = mac_mount_check_stat(td->td_ucred, mp); if (error != 0) goto out; #endif error = VFS_STATFS(mp, buf); out: vfs_unbusy(mp); return (error); } /* * Unlike madvise(2), we do not make a best effort to remember every * possible caching hint. Instead, we remember the last setting with * the exception that we will allow POSIX_FADV_NORMAL to adjust the * region of any current setting. */ int kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, int advice) { struct fadvise_info *fa, *new; struct file *fp; struct vnode *vp; off_t end; int error; if (offset < 0 || len < 0 || offset > OFF_MAX - len) return (EINVAL); AUDIT_ARG_VALUE(advice); switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); break; case POSIX_FADV_NORMAL: case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: new = NULL; break; default: return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ AUDIT_ARG_FD(fd); error = fget(td, fd, &cap_no_rights, &fp); if (error != 0) goto out; AUDIT_ARG_FILE(td->td_proc, fp); if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { error = ESPIPE; goto out; } if (fp->f_type != DTYPE_VNODE) { error = ENODEV; goto out; } vp = fp->f_vnode; if (vp->v_type != VREG) { error = ENODEV; goto out; } if (len == 0) end = OFF_MAX; else end = offset + len - 1; switch (advice) { case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_RANDOM: case POSIX_FADV_NOREUSE: /* * Try to merge any existing non-standard region with * this new region if possible, otherwise create a new * non-standard region for this request. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL && fa->fa_advice == advice && ((fa->fa_start <= end && fa->fa_end >= offset) || (end != OFF_MAX && fa->fa_start == end + 1) || (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { if (offset < fa->fa_start) fa->fa_start = offset; if (end > fa->fa_end) fa->fa_end = end; } else { new->fa_advice = advice; new->fa_start = offset; new->fa_end = end; fp->f_advice = new; new = fa; } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_NORMAL: /* * If a the "normal" region overlaps with an existing * non-standard region, trim or remove the * non-standard region. */ mtx_pool_lock(mtxpool_sleep, fp); fa = fp->f_advice; if (fa != NULL) { if (offset <= fa->fa_start && end >= fa->fa_end) { new = fa; fp->f_advice = NULL; } else if (offset <= fa->fa_start && end >= fa->fa_start) fa->fa_start = end + 1; else if (offset <= fa->fa_end && end >= fa->fa_end) fa->fa_end = offset - 1; else if (offset >= fa->fa_start && end <= fa->fa_end) { /* * If the "normal" region is a middle * portion of the existing * non-standard region, just remove * the whole thing rather than picking * one side or the other to * preserve. */ new = fa; fp->f_advice = NULL; } } mtx_pool_unlock(mtxpool_sleep, fp); break; case POSIX_FADV_WILLNEED: case POSIX_FADV_DONTNEED: error = VOP_ADVISE(vp, offset, end, advice); break; } out: if (fp != NULL) fdrop(fp, td); free(new, M_FADVISE); return (error); } int sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, uap->advice); return (kern_posix_error(td, error)); } int kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, off_t *outoffp, size_t len, unsigned int flags) { struct file *infp, *outfp; struct vnode *invp, *outvp; int error; size_t retlen; void *rl_rcookie, *rl_wcookie; off_t savinoff, savoutoff; infp = outfp = NULL; rl_rcookie = rl_wcookie = NULL; savinoff = -1; error = 0; retlen = 0; if (flags != 0) { error = EINVAL; goto out; } if (len > SSIZE_MAX) /* * Although the len argument is size_t, the return argument * is ssize_t (which is signed). Therefore a size that won't * fit in ssize_t can't be returned. */ len = SSIZE_MAX; /* Get the file structures for the file descriptors. */ error = fget_read(td, infd, &cap_read_rights, &infp); if (error != 0) goto out; if (infp->f_ops == &badfileops) { error = EBADF; goto out; } if (infp->f_vnode == NULL) { error = EINVAL; goto out; } error = fget_write(td, outfd, &cap_write_rights, &outfp); if (error != 0) goto out; if (outfp->f_ops == &badfileops) { error = EBADF; goto out; } if (outfp->f_vnode == NULL) { error = EINVAL; goto out; } /* Set the offset pointers to the correct place. */ if (inoffp == NULL) inoffp = &infp->f_offset; if (outoffp == NULL) outoffp = &outfp->f_offset; savinoff = *inoffp; savoutoff = *outoffp; invp = infp->f_vnode; outvp = outfp->f_vnode; /* Sanity check the f_flag bits. */ if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || (infp->f_flag & FREAD) == 0) { error = EBADF; goto out; } /* If len == 0, just return 0. */ if (len == 0) goto out; /* * If infp and outfp refer to the same file, the byte ranges cannot * overlap. */ if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > savoutoff) || (savinoff > savoutoff && savoutoff + len > savinoff))) { error = EINVAL; goto out; } /* Range lock the byte ranges for both invp and outvp. */ for (;;) { rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + len); rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + len); if (rl_rcookie != NULL) break; vn_rangelock_unlock(outvp, rl_wcookie); rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); vn_rangelock_unlock(invp, rl_rcookie); } retlen = len; error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, flags, infp->f_cred, outfp->f_cred, td); out: if (rl_rcookie != NULL) vn_rangelock_unlock(invp, rl_rcookie); if (rl_wcookie != NULL) vn_rangelock_unlock(outvp, rl_wcookie); if (savinoff != -1 && (error == EINTR || error == ERESTART)) { *inoffp = savinoff; *outoffp = savoutoff; } if (outfp != NULL) fdrop(outfp, td); if (infp != NULL) fdrop(infp, td); td->td_retval[0] = retlen; return (error); } int sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) { off_t inoff, outoff, *inoffp, *outoffp; int error; inoffp = outoffp = NULL; if (uap->inoffp != NULL) { error = copyin(uap->inoffp, &inoff, sizeof(off_t)); if (error != 0) return (error); inoffp = &inoff; } if (uap->outoffp != NULL) { error = copyin(uap->outoffp, &outoff, sizeof(off_t)); if (error != 0) return (error); outoffp = &outoff; } error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, outoffp, uap->len, uap->flags); if (error == 0 && uap->inoffp != NULL) error = copyout(inoffp, uap->inoffp, sizeof(off_t)); if (error == 0 && uap->outoffp != NULL) error = copyout(outoffp, uap->outoffp, sizeof(off_t)); return (error); } diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index bc2011c31e88..0fa4e7758c9d 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -1,367 +1,368 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1983, 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fcntl.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_FCNTL_H_ #define _SYS_FCNTL_H_ /* * This file includes the definitions for open and fcntl * described by POSIX for ; it also includes * related kernel definitions. */ #include #include #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; #define _MODE_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; #define _OFF_T_DECLARED #endif #ifndef _PID_T_DECLARED typedef __pid_t pid_t; #define _PID_T_DECLARED #endif /* * File status flags: these are used by open(2), fcntl(2). * They are also used (indirectly) in the kernel file structure f_flags, * which is a superset of the open/fcntl flags. Open flags and f_flags * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags). * Open/fcntl flags begin with O_; kernel-internal flags begin with F. */ /* open-only flags */ #define O_RDONLY 0x0000 /* open for reading only */ #define O_WRONLY 0x0001 /* open for writing only */ #define O_RDWR 0x0002 /* open for reading and writing */ #define O_ACCMODE 0x0003 /* mask for above modes */ /* * Kernel encoding of open mode; separate read and write bits that are * independently testable: 1 greater than the above. * * XXX * FREAD and FWRITE are excluded from the #ifdef _KERNEL so that TIOCFLUSH, * which was documented to use FREAD/FWRITE, continues to work. */ #if __BSD_VISIBLE #define FREAD 0x0001 #define FWRITE 0x0002 #endif #define O_NONBLOCK 0x0004 /* no delay */ #define O_APPEND 0x0008 /* set append mode */ #if __BSD_VISIBLE #define O_SHLOCK 0x0010 /* open with shared file lock */ #define O_EXLOCK 0x0020 /* open with exclusive file lock */ #define O_ASYNC 0x0040 /* signal pgrp when data ready */ #define O_FSYNC 0x0080 /* synchronous writes */ #endif #define O_SYNC 0x0080 /* POSIX synonym for O_FSYNC */ #if __POSIX_VISIBLE >= 200809 #define O_NOFOLLOW 0x0100 /* don't follow symlinks */ #endif #define O_CREAT 0x0200 /* create if nonexistent */ #define O_TRUNC 0x0400 /* truncate to zero length */ #define O_EXCL 0x0800 /* error if already exists */ #ifdef _KERNEL #define FHASLOCK 0x4000 /* descriptor holds advisory lock */ #endif /* Defined by POSIX 1003.1; BSD default, but must be distinct from O_RDONLY. */ #define O_NOCTTY 0x8000 /* don't assign controlling terminal */ #if __BSD_VISIBLE /* Attempt to bypass buffer cache */ #define O_DIRECT 0x00010000 #endif #if __POSIX_VISIBLE >= 200809 #define O_DIRECTORY 0x00020000 /* Fail if not directory */ #define O_EXEC 0x00040000 /* Open for execute only */ #define O_SEARCH O_EXEC #endif #ifdef _KERNEL #define FEXEC O_EXEC #define FSEARCH O_SEARCH #endif #if __POSIX_VISIBLE >= 200809 /* Defined by POSIX 1003.1-2008; BSD default, but reserve for future use. */ #define O_TTY_INIT 0x00080000 /* Restore default termios attributes */ #define O_CLOEXEC 0x00100000 #endif #if __BSD_VISIBLE #define O_VERIFY 0x00200000 /* open only after verification */ /* #define O_UNUSED1 0x00400000 */ /* Was O_BENEATH */ #define O_RESOLVE_BENEATH 0x00800000 /* Do not allow name resolution to walk out of cwd */ #endif #define O_DSYNC 0x01000000 /* POSIX data sync */ /* * XXX missing O_RSYNC. */ #ifdef _KERNEL /* Only for devfs d_close() flags. */ #define FLASTCLOSE O_DIRECTORY #define FREVOKE O_VERIFY /* Only for fo_close() from half-succeeded open */ #define FOPENFAILED O_TTY_INIT /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */ #define FFLAGS(oflags) ((oflags) & O_EXEC ? (oflags) : (oflags) + 1) #define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1) /* bits to save after open */ #define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|O_DIRECT|FEXEC) /* bits settable by fcntl(F_SETFL, ...) */ #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FDSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT) #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) /* * Set by shm_open(3) in older libc's to get automatic MAP_ASYNC * behavior for POSIX shared memory objects (which are otherwise * implemented as plain files). */ #define FPOSIXSHM O_NOFOLLOW #undef FCNTLFLAGS #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|FRDAHEAD| \ O_DIRECT) #endif #endif /* * The O_* flags used to have only F* names, which were used in the kernel * and by fcntl. We retain the F* names for the kernel f_flag field * and for backward compatibility for fcntl. These flags are deprecated. */ #if __BSD_VISIBLE #define FAPPEND O_APPEND /* kernel/compat */ #define FASYNC O_ASYNC /* kernel/compat */ #define FFSYNC O_FSYNC /* kernel */ #define FDSYNC O_DSYNC /* kernel */ #define FNONBLOCK O_NONBLOCK /* kernel */ #define FNDELAY O_NONBLOCK /* compat */ #define O_NDELAY O_NONBLOCK /* compat */ #endif /* * Historically, we ran out of bits in f_flag (which was once a short). * However, the flag bits not set in FMASK are only meaningful in the * initial open syscall. Those bits were thus given a * different meaning for fcntl(2). */ #if __BSD_VISIBLE /* Read ahead */ #define FRDAHEAD O_CREAT #endif #if __POSIX_VISIBLE >= 200809 /* * Magic value that specify the use of the current working directory * to determine the target of relative file paths in the openat() and * similar syscalls. */ #define AT_FDCWD -100 /* * Miscellaneous flags for the *at() syscalls. */ #define AT_EACCESS 0x0100 /* Check access using effective user and group ID */ #define AT_SYMLINK_NOFOLLOW 0x0200 /* Do not follow symbolic links */ #define AT_SYMLINK_FOLLOW 0x0400 /* Follow symbolic link */ #define AT_REMOVEDIR 0x0800 /* Remove directory instead of file */ #endif /* __POSIX_VISIBLE >= 200809 */ #if __BSD_VISIBLE /* #define AT_UNUSED1 0x1000 *//* Was AT_BENEATH */ #define AT_RESOLVE_BENEATH 0x2000 /* Do not allow name resolution to walk out of dirfd */ +#define AT_EMPTY_PATH 0x4000 /* Operate on dirfd if path is empty */ #endif /* __BSD_VISIBLE */ /* * Constants used for fcntl(2) */ /* command values */ #define F_DUPFD 0 /* duplicate file descriptor */ #define F_GETFD 1 /* get file descriptor flags */ #define F_SETFD 2 /* set file descriptor flags */ #define F_GETFL 3 /* get file status flags */ #define F_SETFL 4 /* set file status flags */ #if __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 #define F_GETOWN 5 /* get SIGIO/SIGURG proc/pgrp */ #define F_SETOWN 6 /* set SIGIO/SIGURG proc/pgrp */ #endif #if __BSD_VISIBLE #define F_OGETLK 7 /* get record locking information */ #define F_OSETLK 8 /* set record locking information */ #define F_OSETLKW 9 /* F_SETLK; wait if blocked */ #define F_DUP2FD 10 /* duplicate file descriptor to arg */ #endif #define F_GETLK 11 /* get record locking information */ #define F_SETLK 12 /* set record locking information */ #define F_SETLKW 13 /* F_SETLK; wait if blocked */ #if __BSD_VISIBLE #define F_SETLK_REMOTE 14 /* debugging support for remote locks */ #define F_READAHEAD 15 /* read ahead */ #define F_RDAHEAD 16 /* Darwin compatible read ahead */ #endif #if __POSIX_VISIBLE >= 200809 #define F_DUPFD_CLOEXEC 17 /* Like F_DUPFD, but FD_CLOEXEC is set */ #endif #if __BSD_VISIBLE #define F_DUP2FD_CLOEXEC 18 /* Like F_DUP2FD, but FD_CLOEXEC is set */ #define F_ADD_SEALS 19 #define F_GET_SEALS 20 #define F_ISUNIONSTACK 21 /* Kludge for libc, don't use it. */ /* Seals (F_ADD_SEALS, F_GET_SEALS). */ #define F_SEAL_SEAL 0x0001 /* Prevent adding sealings */ #define F_SEAL_SHRINK 0x0002 /* May not shrink */ #define F_SEAL_GROW 0x0004 /* May not grow */ #define F_SEAL_WRITE 0x0008 /* May not write */ #endif /* __BSD_VISIBLE */ /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */ /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ #define F_UNLCK 2 /* unlock */ #define F_WRLCK 3 /* exclusive or write lock */ #if __BSD_VISIBLE #define F_UNLCKSYS 4 /* purge locks for a given system ID */ #define F_CANCEL 5 /* cancel an async lock request */ #endif #ifdef _KERNEL #define F_WAIT 0x010 /* Wait until lock is granted */ #define F_FLOCK 0x020 /* Use flock(2) semantics for lock */ #define F_POSIX 0x040 /* Use POSIX semantics for lock */ #define F_REMOTE 0x080 /* Lock owner is remote NFS client */ #define F_NOINTR 0x100 /* Ignore signals when waiting */ #define F_FIRSTOPEN 0x200 /* First right to advlock file */ #endif /* * Advisory file segment locking data type - * information passed to system by user */ struct flock { off_t l_start; /* starting offset */ off_t l_len; /* len = 0 means until end of file */ pid_t l_pid; /* lock owner */ short l_type; /* lock type: read/write, etc. */ short l_whence; /* type of l_start */ int l_sysid; /* remote system id or zero for local */ }; #if __BSD_VISIBLE /* * Old advisory file segment locking data type, * before adding l_sysid. */ struct __oflock { off_t l_start; /* starting offset */ off_t l_len; /* len = 0 means until end of file */ pid_t l_pid; /* lock owner */ short l_type; /* lock type: read/write, etc. */ short l_whence; /* type of l_start */ }; #endif #if __BSD_VISIBLE /* lock operations for flock(2) */ #define LOCK_SH 0x01 /* shared file lock */ #define LOCK_EX 0x02 /* exclusive file lock */ #define LOCK_NB 0x04 /* don't block when locking */ #define LOCK_UN 0x08 /* unlock file */ #endif #if __POSIX_VISIBLE >= 200112 /* * Advice to posix_fadvise */ #define POSIX_FADV_NORMAL 0 /* no special treatment */ #define POSIX_FADV_RANDOM 1 /* expect random page references */ #define POSIX_FADV_SEQUENTIAL 2 /* expect sequential page references */ #define POSIX_FADV_WILLNEED 3 /* will need these pages */ #define POSIX_FADV_DONTNEED 4 /* dont need these pages */ #define POSIX_FADV_NOREUSE 5 /* access data only once */ #endif #ifdef __BSD_VISIBLE /* * Magic value that specify that corresponding file descriptor to filename * is unknown and sanitary check should be omitted in the funlinkat() and * similar syscalls. */ #define FD_NONE -200 #endif #ifndef _KERNEL __BEGIN_DECLS int open(const char *, int, ...); int creat(const char *, mode_t); int fcntl(int, int, ...); #if __BSD_VISIBLE int flock(int, int); #endif #if __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); #endif #if __POSIX_VISIBLE >= 200112 int posix_fadvise(int, off_t, off_t, int); int posix_fallocate(int, off_t, off_t); #endif __END_DECLS #endif #endif /* !_SYS_FCNTL_H_ */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index b6985f1fa6ff..5f3d917083a5 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -1,315 +1,318 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1985, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)namei.h 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_NAMEI_H_ #define _SYS_NAMEI_H_ #include #include #include #include enum nameiop { LOOKUP, CREATE, DELETE, RENAME }; struct componentname { /* * Arguments to lookup. */ u_int64_t cn_origflags; /* flags to namei */ u_int64_t cn_flags; /* flags to namei */ struct thread *cn_thread;/* thread requesting lookup */ struct ucred *cn_cred; /* credentials */ enum nameiop cn_nameiop; /* namei operation */ int cn_lkflags; /* Lock flags LK_EXCLUSIVE or LK_SHARED */ /* * Shared between lookup and commit routines. */ char *cn_pnbuf; /* pathname buffer */ char *cn_nameptr; /* pointer to looked up name */ long cn_namelen; /* length of looked up component */ }; struct nameicap_tracker; TAILQ_HEAD(nameicap_tracker_head, nameicap_tracker); /* * Encapsulation of namei parameters. */ struct nameidata { /* * Arguments to namei/lookup. */ const char *ni_dirp; /* pathname pointer */ enum uio_seg ni_segflg; /* location of pathname */ cap_rights_t *ni_rightsneeded; /* rights required to look up vnode */ /* * Arguments to lookup. */ struct vnode *ni_startdir; /* starting directory */ struct vnode *ni_rootdir; /* logical root directory */ struct vnode *ni_topdir; /* logical top directory */ int ni_dirfd; /* starting directory for *at functions */ int ni_lcf; /* local call flags */ /* * Results: returned from namei */ struct filecaps ni_filecaps; /* rights the *at base has */ /* * Results: returned from/manipulated by lookup */ struct vnode *ni_vp; /* vnode of result */ struct vnode *ni_dvp; /* vnode of intermediate directory */ /* * Results: flags returned from namei */ u_int ni_resflags; /* * Debug for validating API use by the callers. */ u_short ni_debugflags; /* * Shared between namei and lookup/commit routines. */ u_short ni_loopcnt; /* count of symlinks encountered */ size_t ni_pathlen; /* remaining chars in path */ char *ni_next; /* next location in pathname */ /* * Lookup parameters: this structure describes the subset of * information from the nameidata structure that is passed * through the VOP interface. */ struct componentname ni_cnd; struct nameicap_tracker_head ni_cap_tracker; }; #ifdef _KERNEL enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET }; int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, struct pwd **pwdp); /* * Flags for namei. * * If modifying the list make sure to check whether NDVALIDATE needs updating. */ /* * Debug. */ #define NAMEI_DBG_INITED 0x0001 #define NAMEI_DBG_CALLED 0x0002 #define NAMEI_DBG_HADSTARTDIR 0x0004 /* * namei operational modifier flags, stored in ni_cnd.flags */ #define NC_NOMAKEENTRY 0x0001 /* name must not be added to cache */ #define NC_KEEPPOSENTRY 0x0002 /* don't evict a positive entry */ #define NOCACHE NC_NOMAKEENTRY /* for compatibility with older code */ #define LOCKLEAF 0x0004 /* lock vnode on return */ #define LOCKPARENT 0x0008 /* want parent vnode returned locked */ #define WANTPARENT 0x0010 /* want parent vnode returned unlocked */ #define FAILIFEXISTS 0x0020 /* return EEXIST if found */ #define FOLLOW 0x0040 /* follow symbolic links */ +#define EMPTYPATH 0x0080 /* Allow empty path for *at */ #define LOCKSHARED 0x0100 /* Shared lock leaf */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define RBENEATH 0x100000000ULL /* No escape, even tmp, from start dir */ #define MODMASK 0xf000001ffULL /* mask of operational modifiers */ + /* * Namei parameter descriptors. * * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP. * If the caller of namei sets the flag (for example execve wants to * know the name of the program that is being executed), then it must * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must * be freed by either the commit routine or the VOP_ABORT routine. * SAVESTART is set only by the callers of namei. It implies SAVENAME * plus the addition of saving the parent directory that contains the * name in ni_startdir. It allows repeated calls to lookup for the * name being sought. The caller is responsible for releasing the * buffer and for vrele'ing ni_startdir. */ #define RDONLY 0x00000200 /* lookup with read-only semantics */ #define SAVENAME 0x00000400 /* save pathname buffer */ #define SAVESTART 0x00000800 /* save starting directory */ #define ISWHITEOUT 0x00001000 /* found whiteout */ #define DOWHITEOUT 0x00002000 /* do whiteouts */ #define WILLBEDIR 0x00004000 /* new files will be dirs; allow trailing / */ #define ISOPEN 0x00008000 /* caller is opening; return a real vnode. */ #define NOCROSSMOUNT 0x00010000 /* do not cross mount points */ #define NOMACCHECK 0x00020000 /* do not perform MAC checks */ #define AUDITVNODE1 0x00040000 /* audit the looked up vnode information */ #define AUDITVNODE2 0x00080000 /* audit the looked up vnode information */ #define NOCAPCHECK 0x00100000 /* do not perform capability checks */ /* UNUSED 0x00200000 */ /* UNUSED 0x00400000 */ /* UNUSED 0x00800000 */ #define HASBUF 0x01000000 /* has allocated pathname buffer */ #define NOEXECCHECK 0x02000000 /* do not perform exec check on dir */ #define MAKEENTRY 0x04000000 /* entry is to be added to name cache */ #define ISSYMLINK 0x08000000 /* symlink needs interpretation */ #define ISLASTCN 0x10000000 /* this is last component of pathname */ #define ISDOTDOT 0x20000000 /* current component name is .. */ #define TRAILINGSLASH 0x40000000 /* path ended in a slash */ #define PARAMASK 0x7ffffe00 /* mask of parameter descriptors */ /* * Flags which must not be passed in by callers. */ #define NAMEI_INTERNAL_FLAGS \ (HASBUF | NOEXECCHECK | MAKEENTRY | ISSYMLINK | ISLASTCN | ISDOTDOT | \ TRAILINGSLASH) /* * Namei results flags */ #define NIRES_ABS 0x00000001 /* Path was absolute */ #define NIRES_STRICTREL 0x00000002 /* Restricted lookup result */ +#define NIRES_EMPTYPATH 0x00000004 /* EMPTYPATH used */ /* * Flags in ni_lcf, valid for the duration of the namei call. */ #define NI_LCF_STRICTRELATIVE 0x0001 /* relative lookup only */ #define NI_LCF_CAP_DOTDOT 0x0002 /* ".." in strictrelative case */ /* * Initialization of a nameidata structure. */ #define NDINIT(ndp, op, flags, segflg, namep, td) \ NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, &cap_no_rights, td) #define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \ NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, &cap_no_rights, td) #define NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rightsp, td) \ NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rightsp, td) #define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \ NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, &cap_no_rights, td) /* * Note the constant pattern may *hide* bugs. */ #ifdef INVARIANTS #define NDINIT_PREFILL(arg) memset(arg, 0xff, sizeof(*arg)) #define NDINIT_DBG(arg) { (arg)->ni_debugflags = NAMEI_DBG_INITED; } #define NDREINIT_DBG(arg) { \ if (((arg)->ni_debugflags & NAMEI_DBG_INITED) == 0) \ panic("namei data not inited"); \ if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \ panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \ (arg)->ni_debugflags = NAMEI_DBG_INITED; \ } #else #define NDINIT_PREFILL(arg) do { } while (0) #define NDINIT_DBG(arg) do { } while (0) #define NDREINIT_DBG(arg) do { } while (0) #endif #define NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, startdir, rightsp, td) \ do { \ struct nameidata *_ndp = (ndp); \ cap_rights_t *_rightsp = (rightsp); \ MPASS(_rightsp != NULL); \ NDINIT_PREFILL(_ndp); \ NDINIT_DBG(_ndp); \ _ndp->ni_cnd.cn_nameiop = op; \ _ndp->ni_cnd.cn_flags = flags; \ _ndp->ni_segflg = segflg; \ _ndp->ni_dirp = namep; \ _ndp->ni_dirfd = dirfd; \ _ndp->ni_startdir = startdir; \ _ndp->ni_resflags = 0; \ filecaps_init(&_ndp->ni_filecaps); \ _ndp->ni_cnd.cn_thread = td; \ _ndp->ni_rightsneeded = _rightsp; \ } while (0) #define NDREINIT(ndp) do { \ struct nameidata *_ndp = (ndp); \ NDREINIT_DBG(_ndp); \ _ndp->ni_resflags = 0; \ _ndp->ni_startdir = NULL; \ } while (0) #define NDF_NO_DVP_RELE 0x00000001 #define NDF_NO_DVP_UNLOCK 0x00000002 #define NDF_NO_DVP_PUT 0x00000003 #define NDF_NO_VP_RELE 0x00000004 #define NDF_NO_VP_UNLOCK 0x00000008 #define NDF_NO_VP_PUT 0x0000000c #define NDF_NO_STARTDIR_RELE 0x00000010 #define NDF_NO_FREE_PNBUF 0x00000020 #define NDF_ONLY_PNBUF (~NDF_NO_FREE_PNBUF) void NDFREE_PNBUF(struct nameidata *); void NDFREE(struct nameidata *, const u_int); #define NDFREE(ndp, flags) do { \ struct nameidata *_ndp = (ndp); \ if (__builtin_constant_p(flags) && flags == NDF_ONLY_PNBUF) \ NDFREE_PNBUF(_ndp); \ else \ NDFREE(_ndp, flags); \ } while (0) #ifdef INVARIANTS void NDFREE_NOTHING(struct nameidata *); void NDVALIDATE(struct nameidata *); #else #define NDFREE_NOTHING(ndp) do { } while (0) #define NDVALIDATE(ndp) do { } while (0) #endif int namei(struct nameidata *ndp); int lookup(struct nameidata *ndp); int relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp); #endif /* * Stats on usefulness of namei caches. */ struct nchstats { long ncs_goodhits; /* hits that we can really use */ long ncs_neghits; /* negative hits that we can use */ long ncs_badhits; /* hits we must drop */ long ncs_falsehits; /* hits with id mismatch */ long ncs_miss; /* misses */ long ncs_long; /* long names that ignore cache */ long ncs_pass2; /* names found with passes == 2 */ long ncs_2passes; /* number of times we attempt it */ }; extern struct nchstats nchstats; #endif /* !_SYS_NAMEI_H_ */