diff --git a/crypto/openssh/blacklist.c b/crypto/openssh/blacklist.c index f118edab40cf..33d02607dd98 100644 --- a/crypto/openssh/blacklist.c +++ b/crypto/openssh/blacklist.c @@ -1,97 +1,97 @@ /*- * Copyright (c) 2015 The NetBSD Foundation, Inc. - * Copyright (c) 2016 The FreeBSD Foundation, Inc. + * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Kurt Lidl * under sponsorship from the FreeBSD Foundation. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "includes.h" #include #include #include #include #include #include #include #include "ssh.h" #include "packet.h" #include "log.h" #include "misc.h" #include "servconf.h" #include #include "blacklist_client.h" static struct blacklist *blstate = NULL; /* import */ extern ServerOptions options; /* internal definition from bl.h */ struct blacklist *bl_create(bool, char *, void (*)(int, const char *, va_list)); /* impedence match vsyslog() to sshd's internal logging levels */ void im_log(int priority, const char *message, va_list args) { LogLevel imlevel; switch (priority) { case LOG_ERR: imlevel = SYSLOG_LEVEL_ERROR; break; case LOG_DEBUG: imlevel = SYSLOG_LEVEL_DEBUG1; break; case LOG_INFO: imlevel = SYSLOG_LEVEL_INFO; break; default: imlevel = SYSLOG_LEVEL_DEBUG2; } do_log2(imlevel, message, args); } void blacklist_init(void) { if (options.use_blacklist) blstate = bl_create(false, NULL, im_log); } void blacklist_notify(struct ssh *ssh, int action, const char *msg) { if (blstate != NULL && ssh_packet_connection_is_on_socket(ssh)) (void)blacklist_r(blstate, action, ssh_packet_get_connection_in(ssh), msg); } diff --git a/crypto/openssh/blacklist_client.h b/crypto/openssh/blacklist_client.h index 236884092010..601a44461e20 100644 --- a/crypto/openssh/blacklist_client.h +++ b/crypto/openssh/blacklist_client.h @@ -1,61 +1,61 @@ /*- * Copyright (c) 2015 The NetBSD Foundation, Inc. - * Copyright (c) 2016 The FreeBSD Foundation, Inc. + * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Kurt Lidl * under sponsorship from the FreeBSD Foundation. * * This code is derived from software contributed to The NetBSD Foundation * by Christos Zoulas. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLACKLIST_CLIENT_H #define BLACKLIST_CLIENT_H #ifndef BLACKLIST_API_ENUM enum { BLACKLIST_AUTH_OK = 0, BLACKLIST_AUTH_FAIL, BLACKLIST_ABUSIVE_BEHAVIOR, BLACKLIST_BAD_USER }; #endif #ifdef USE_BLACKLIST void blacklist_init(void); void blacklist_notify(struct ssh *, int, const char *); #define BLACKLIST_INIT() blacklist_init() #define BLACKLIST_NOTIFY(ssh,x,msg) blacklist_notify(ssh,x,msg) #else #define BLACKLIST_INIT() #define BLACKLIST_NOTIFY(ssh,x,msg) #endif #endif /* BLACKLIST_CLIENT_H */ diff --git a/lib/libc/sys/_umtx_op.2 b/lib/libc/sys/_umtx_op.2 index 60f90c32ba35..974850fb8425 100644 --- a/lib/libc/sys/_umtx_op.2 +++ b/lib/libc/sys/_umtx_op.2 @@ -1,1539 +1,1539 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd November 23, 2020 .Dt _UMTX_OP 2 .Os .Sh NAME .Nm _umtx_op .Nd interface for implementation of userspace threading synchronization primitives .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/types.h .In sys/umtx.h .Ft int .Fn _umtx_op "void *obj" "int op" "u_long val" "void *uaddr" "void *uaddr2" .Sh DESCRIPTION The .Fn _umtx_op system call provides kernel support for userspace implementation of the threading synchronization primitives. The .Lb libthr uses the syscall to implement .St -p1003.1-2001 pthread locks, like mutexes, condition variables and so on. .Ss STRUCTURES The operations, performed by the .Fn _umtx_op syscall, operate on userspace objects which are described by the following structures. Reserved fields and paddings are omitted. All objects require ABI-mandated alignment, but this is not currently enforced consistently on all architectures. .Pp The following flags are defined for flag fields of all structures: .Bl -tag -width indent .It Dv USYNC_PROCESS_SHARED Allow selection of the process-shared sleep queue for the thread sleep container, when the lock ownership cannot be granted immediately, and the operation must sleep. The process-shared or process-private sleep queue is selected based on the attributes of the memory mapping which contains the first byte of the structure, see .Xr mmap 2 . Otherwise, if the flag is not specified, the process-private sleep queue is selected regardless of the memory mapping attributes, as an optimization. .Pp See the .Sx SLEEP QUEUES subsection below for more details on sleep queues. .El .Bl -hang -offset indent .It Sy Mutex .Bd -literal struct umutex { volatile lwpid_t m_owner; uint32_t m_flags; uint32_t m_ceilings[2]; uintptr_t m_rb_lnk; }; .Ed .Pp The .Dv m_owner field is the actual lock. It contains either the thread identifier of the lock owner in the locked state, or zero when the lock is unowned. The highest bit set indicates that there is contention on the lock. The constants are defined for special values: .Bl -tag -width indent .It Dv UMUTEX_UNOWNED Zero, the value stored in the unowned lock. .It Dv UMUTEX_CONTESTED The contention indicator. .It Dv UMUTEX_RB_OWNERDEAD A thread owning the robust mutex terminated. The mutex is in unlocked state. .It Dv UMUTEX_RB_NOTRECOV The robust mutex is in a non-recoverable state. It cannot be locked until reinitialized. .El .Pp The .Dv m_flags field may contain the following umutex-specific flags, in addition to the common flags: .Bl -tag -width indent .It Dv UMUTEX_PRIO_INHERIT Mutex implements .Em Priority Inheritance protocol. .It Dv UMUTEX_PRIO_PROTECT Mutex implements .Em Priority Protection protocol. .It Dv UMUTEX_ROBUST Mutex is robust, as described in the .Sx ROBUST UMUTEXES section below. .It Dv UMUTEX_NONCONSISTENT Robust mutex is in a transient non-consistent state. Not used by kernel. .El .Pp In the manual page, mutexes not having .Dv UMUTEX_PRIO_INHERIT and .Dv UMUTEX_PRIO_PROTECT flags set, are called normal mutexes. Each type of mutex .Pq normal, priority-inherited, and priority-protected has a separate sleep queue associated with the given key. .Pp For priority protected mutexes, the .Dv m_ceilings array contains priority ceiling values. The .Dv m_ceilings[0] is the ceiling value for the mutex, as specified by .St -p1003.1-2008 for the .Em Priority Protected mutex protocol. The .Dv m_ceilings[1] is used only for the unlock of a priority protected mutex, when unlock is done in an order other than the reversed lock order. In this case, .Dv m_ceilings[1] must contain the ceiling value for the last locked priority protected mutex, for proper priority reassignment. If, instead, the unlocking mutex was the last priority propagated mutex locked by the thread, .Dv m_ceilings[1] should contain \-1. This is required because kernel does not maintain the ordered lock list. .It Sy Condition variable .Bd -literal struct ucond { volatile uint32_t c_has_waiters; uint32_t c_flags; uint32_t c_clockid; }; .Ed .Pp A non-zero .Dv c_has_waiters value indicates that there are in-kernel waiters for the condition, executing the .Dv UMTX_OP_CV_WAIT request. .Pp The .Dv c_flags field contains flags. Only the common flags .Pq Dv USYNC_PROCESS_SHARED are defined for ucond. .Pp The .Dv c_clockid member provides the clock identifier to use for timeout, when the .Dv UMTX_OP_CV_WAIT request has both the .Dv CVWAIT_CLOCKID flag and the timeout specified. Valid clock identifiers are a subset of those for .Xr clock_gettime 2 : .Bl -bullet -compact .It .Dv CLOCK_MONOTONIC .It .Dv CLOCK_MONOTONIC_FAST .It .Dv CLOCK_MONOTONIC_PRECISE .It .Dv CLOCK_PROF .It .Dv CLOCK_REALTIME .It .Dv CLOCK_REALTIME_FAST .It .Dv CLOCK_REALTIME_PRECISE .It .Dv CLOCK_SECOND .It .Dv CLOCK_UPTIME .It .Dv CLOCK_UPTIME_FAST .It .Dv CLOCK_UPTIME_PRECISE .It .Dv CLOCK_VIRTUAL .El .It Sy Reader/writer lock .Bd -literal struct urwlock { volatile int32_t rw_state; uint32_t rw_flags; uint32_t rw_blocked_readers; uint32_t rw_blocked_writers; }; .Ed .Pp The .Dv rw_state field is the actual lock. It contains both the flags and counter of the read locks which were granted. Names of the .Dv rw_state bits are following: .Bl -tag -width indent .It Dv URWLOCK_WRITE_OWNER Write lock was granted. .It Dv URWLOCK_WRITE_WAITERS There are write lock waiters. .It Dv URWLOCK_READ_WAITERS There are read lock waiters. .It Dv URWLOCK_READER_COUNT(c) Returns the count of currently granted read locks. .El .Pp At any given time there may be only one thread to which the writer lock is granted on the .Vt struct rwlock , and no threads are granted read lock. Or, at the given time, up to .Dv URWLOCK_MAX_READERS threads may be granted the read lock simultaneously, but write lock is not granted to any thread. .Pp The following flags for the .Dv rw_flags member of .Vt struct urwlock are defined, in addition to the common flags: .Bl -tag -width indent .It Dv URWLOCK_PREFER_READER If specified, immediately grant read lock requests when .Dv urwlock is already read-locked, even in presence of unsatisfied write lock requests. By default, if there is a write lock waiter, further read requests are not granted, to prevent unfair write lock waiter starvation. .El .Pp The .Dv rw_blocked_readers and .Dv rw_blocked_writers members contain the count of threads which are sleeping in kernel, waiting for the associated request type to be granted. The fields are used by kernel to update the .Dv URWLOCK_READ_WAITERS and .Dv URWLOCK_WRITE_WAITERS flags of the .Dv rw_state lock after requesting thread was woken up. .It Sy Semaphore .Bd -literal struct _usem2 { volatile uint32_t _count; uint32_t _flags; }; .Ed .Pp The .Dv _count word represents a counting semaphore. A non-zero value indicates an unlocked (posted) semaphore, while zero represents the locked state. The maximal supported semaphore count is .Dv USEM_MAX_COUNT . .Pp The .Dv _count word, besides the counter of posts (unlocks), also contains the .Dv USEM_HAS_WAITERS bit, which indicates that locked semaphore has waiting threads. .Pp The .Dv USEM_COUNT() macro, applied to the .Dv _count word, returns the current semaphore counter, which is the number of posts issued on the semaphore. .Pp The following bits for the .Dv _flags member of .Vt struct _usem2 are defined, in addition to the common flags: .Bl -tag -width indent .It Dv USEM_NAMED Flag is ignored by kernel. .El .It Sy Timeout parameter .Bd -literal struct _umtx_time { struct timespec _timeout; uint32_t _flags; uint32_t _clockid; }; .Ed .Pp Several .Fn _umtx_op operations allow the blocking time to be limited, failing the request if it cannot be satisfied in the specified time period. The timeout is specified by passing either the address of .Vt struct timespec , or its extended variant, .Vt struct _umtx_time , as the .Fa uaddr2 argument of .Fn _umtx_op . They are distinguished by the .Fa uaddr value, which must be equal to the size of the structure pointed to by .Fa uaddr2 , casted to .Vt uintptr_t . .Pp The .Dv _timeout member specifies the time when the timeout should occur. Legal values for clock identifier .Dv _clockid are shared with the .Fa clock_id argument to the .Xr clock_gettime 2 function, and use the same underlying clocks. The specified clock is used to obtain the current time value. Interval counting is always performed by the monotonic wall clock. .Pp The .Dv _flags argument allows the following flags to further define the timeout behaviour: .Bl -tag -width indent .It Dv UMTX_ABSTIME The .Dv _timeout value is the absolute time. The thread will be unblocked and the request failed when specified clock value is equal or exceeds the .Dv _timeout. .Pp If the flag is absent, the timeout value is relative, that is the amount of time, measured by the monotonic wall clock from the moment of the request start. .El .El .Ss SLEEP QUEUES When a locking request cannot be immediately satisfied, the thread is typically put to .Em sleep , which is a non-runnable state terminated by the .Em wake operation. Lock operations include a .Em try variant which returns an error rather than sleeping if the lock cannot be obtained. Also, .Fn _umtx_op provides requests which explicitly put the thread to sleep. .Pp Wakes need to know which threads to make runnable, so sleeping threads are grouped into containers called .Em sleep queues . A sleep queue is identified by a key, which for .Fn _umtx_op is defined as the physical address of some variable. Note that the .Em physical address is used, which means that same variable mapped multiple times will give one key value. This mechanism enables the construction of .Em process-shared locks. .Pp A related attribute of the key is shareability. Some requests always interpret keys as private for the current process, creating sleep queues with the scope of the current process even if the memory is shared. Others either select the shareability automatically from the mapping attributes, or take additional input as the .Dv USYNC_PROCESS_SHARED common flag. This is done as optimization, allowing the lock scope to be limited regardless of the kind of backing memory. .Pp Only the address of the start byte of the variable specified as key is important for determining corresponding sleep queue. The size of the variable does not matter, so, for example, sleep on the same address interpreted as .Vt uint32_t and .Vt long on a little-endian 64-bit platform would collide. .Pp The last attribute of the key is the object type. The sleep queue to which a sleeping thread is assigned is an individual one for simple wait requests, mutexes, rwlocks, condvars and other primitives, even when the physical address of the key is same. .Pp When waking up a limited number of threads from a given sleep queue, the highest priority threads that have been blocked for the longest on the queue are selected. .Ss ROBUST UMUTEXES The .Em robust umutexes are provided as a substrate for a userspace library to implement .Tn POSIX robust mutexes. A robust umutex must have the .Dv UMUTEX_ROBUST flag set. .Pp On thread termination, the kernel walks two lists of mutexes. The two lists head addresses must be provided by a prior call to .Dv UMTX_OP_ROBUST_LISTS request. The lists are singly-linked. The link to next element is provided by the .Dv m_rb_lnk member of the .Vt struct umutex . .Pp Robust list processing is aborted if the kernel finds a mutex with any of the following conditions: .Bl -dash -offset indent -compact .It the .Dv UMUTEX_ROBUST flag is not set .It not owned by the current thread, except when the mutex is pointed to by the .Dv robust_inactive member of the .Vt struct umtx_robust_lists_params , registered for the current thread .It the combination of mutex flags is invalid .It read of the umutex memory faults .It the list length limit described in .Xr libthr 3 is reached. .El .Pp Every mutex in both lists is unlocked as if the .Dv UMTX_OP_MUTEX_UNLOCK request is performed on it, but instead of the .Dv UMUTEX_UNOWNED value, the .Dv m_owner field is written with the .Dv UMUTEX_RB_OWNERDEAD value. When a mutex in the .Dv UMUTEX_RB_OWNERDEAD state is locked by kernel due to the .Dv UMTX_OP_MUTEX_TRYLOCK and .Dv UMTX_OP_MUTEX_LOCK requests, the lock is granted and .Er EOWNERDEAD error is returned. .Pp Also, the kernel handles the .Dv UMUTEX_RB_NOTRECOV value of .Dv the m_owner field specially, always returning the .Er ENOTRECOVERABLE error for lock attempts, without granting the lock. .Ss OPERATIONS The following operations, requested by the .Fa op argument to the function, are implemented: .Bl -tag -width indent .It Dv UMTX_OP_WAIT Wait. The arguments for the request are: .Bl -tag -width "obj" .It Fa obj Pointer to a variable of type .Vt long . .It Fa val Current value of the .Dv *obj . .El .Pp The current value of the variable pointed to by the .Fa obj argument is compared with the .Fa val . If they are equal, the requesting thread is put to interruptible sleep until woken up or the optionally specified timeout expires. .Pp The comparison and sleep are atomic. In other words, if another thread writes a new value to .Dv *obj and then issues .Dv UMTX_OP_WAKE , the request is guaranteed to not miss the wakeup, which might otherwise happen between comparison and blocking. .Pp The physical address of memory where the .Fa *obj variable is located, is used as a key to index sleeping threads. .Pp The read of the current value of the .Dv *obj variable is not guarded by barriers. In particular, it is the user's duty to ensure the lock acquire and release memory semantics, if the .Dv UMTX_OP_WAIT and .Dv UMTX_OP_WAKE requests are used as a substrate for implementing a simple lock. .Pp The request is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. .Pp Optionally, a timeout for the request may be specified. .It Dv UMTX_OP_WAKE Wake the threads possibly sleeping due to .Dv UMTX_OP_WAIT . The arguments for the request are: .Bl -tag -width "obj" .It Fa obj Pointer to a variable, used as a key to find sleeping threads. .It Fa val Up to .Fa val threads are woken up by this request. Specify .Dv INT_MAX to wake up all waiters. .El .It Dv UMTX_OP_MUTEX_TRYLOCK Try to lock umutex. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the umutex. .El .Pp Operates same as the .Dv UMTX_OP_MUTEX_LOCK request, but returns .Er EBUSY instead of sleeping if the lock cannot be obtained immediately. .It Dv UMTX_OP_MUTEX_LOCK Lock umutex. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the umutex. .El .Pp Locking is performed by writing the current thread id into the .Dv m_owner word of the .Vt struct umutex . The write is atomic, preserves the .Dv UMUTEX_CONTESTED contention indicator, and provides the acquire barrier for lock entrance semantic. .Pp If the lock cannot be obtained immediately because another thread owns the lock, the current thread is put to sleep, with .Dv UMUTEX_CONTESTED bit set before. Upon wake up, the lock conditions are re-tested. .Pp The request adheres to the priority protection or inheritance protocol of the mutex, specified by the .Dv UMUTEX_PRIO_PROTECT or .Dv UMUTEX_PRIO_INHERIT flag, respectively. .Pp Optionally, a timeout for the request may be specified. .Pp A request with a timeout specified is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. A request without timeout specified is always restarted after return from a signal handler. .It Dv UMTX_OP_MUTEX_UNLOCK Unlock umutex. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the umutex. .El .Pp Unlocks the mutex, by writing .Dv UMUTEX_UNOWNED (zero) value into .Dv m_owner word of the .Vt struct umutex . The write is done with a release barrier, to provide lock leave semantic. .Pp If there are threads sleeping in the sleep queue associated with the umutex, one thread is woken up. If more than one thread sleeps in the sleep queue, the .Dv UMUTEX_CONTESTED bit is set together with the write of the .Dv UMUTEX_UNOWNED value into .Dv m_owner . .Pp The request adheres to the priority protection or inheritance protocol of the mutex, specified by the .Dv UMUTEX_PRIO_PROTECT or .Dv UMUTEX_PRIO_INHERIT flag, respectively. See description of the .Dv m_ceilings member of the .Vt struct umutex structure for additional details of the request operation on the priority protected protocol mutex. .It Dv UMTX_OP_SET_CEILING Set ceiling for the priority protected umutex. The arguments to the request are: .Bl -tag -width "uaddr" .It Fa obj Pointer to the umutex. .It Fa val New ceiling value. .It Fa uaddr Address of a variable of type .Vt uint32_t . If not .Dv NULL and the update was successful, the previous ceiling value is written to the location pointed to by .Fa uaddr . .El .Pp The request locks the umutex pointed to by the .Fa obj parameter, waiting for the lock if not immediately available. After the lock is obtained, the new ceiling value .Fa val is written to the .Dv m_ceilings[0] member of the .Vt struct umutex, after which the umutex is unlocked. .Pp The locking does not adhere to the priority protect protocol, to conform to the .Tn POSIX requirements for the .Xr pthread_mutex_setprioceiling 3 interface. .It Dv UMTX_OP_CV_WAIT Wait for a condition. The arguments to the request are: .Bl -tag -width "uaddr2" .It Fa obj Pointer to the .Vt struct ucond . .It Fa val Request flags, see below. .It Fa uaddr Pointer to the umutex. .It Fa uaddr2 Optional pointer to a .Vt struct timespec for timeout specification. .El .Pp The request must be issued by the thread owning the mutex pointed to by the .Fa uaddr argument. The .Dv c_hash_waiters member of the .Vt struct ucond , pointed to by the .Fa obj argument, is set to an arbitrary non-zero value, after which the .Fa uaddr mutex is unlocked (following the appropriate protocol), and the current thread is put to sleep on the sleep queue keyed by the .Fa obj argument. The operations are performed atomically. It is guaranteed to not miss a wakeup from .Dv UMTX_OP_CV_SIGNAL or .Dv UMTX_OP_CV_BROADCAST sent between mutex unlock and putting the current thread on the sleep queue. .Pp Upon wakeup, if the timeout expired and no other threads are sleeping in the same sleep queue, the .Dv c_hash_waiters member is cleared. After wakeup, the .Fa uaddr umutex is not relocked. .Pp The following flags are defined: .Bl -tag -width "CVWAIT_CLOCKID" .It Dv CVWAIT_ABSTIME Timeout is absolute. .It Dv CVWAIT_CLOCKID Clockid is provided. .El .Pp Optionally, a timeout for the request may be specified. Unlike other requests, the timeout value is specified directly by a .Vt struct timespec , pointed to by the .Fa uaddr2 argument. If the .Dv CVWAIT_CLOCKID flag is provided, the timeout uses the clock from the .Dv c_clockid member of the .Vt struct ucond , pointed to by .Fa obj argument. Otherwise, .Dv CLOCK_REALTIME is used, regardless of the clock identifier possibly specified in the .Vt struct _umtx_time . If the .Dv CVWAIT_ABSTIME flag is supplied, the timeout specifies absolute time value, otherwise it denotes a relative time interval. .Pp The request is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. .It Dv UMTX_OP_CV_SIGNAL Wake up one condition waiter. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to .Vt struct ucond . .El .Pp The request wakes up at most one thread sleeping on the sleep queue keyed by the .Fa obj argument. If the woken up thread was the last on the sleep queue, the .Dv c_has_waiters member of the .Vt struct ucond is cleared. .It Dv UMTX_OP_CV_BROADCAST Wake up all condition waiters. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to .Vt struct ucond . .El .Pp The request wakes up all threads sleeping on the sleep queue keyed by the .Fa obj argument. The .Dv c_has_waiters member of the .Vt struct ucond is cleared. .It Dv UMTX_OP_WAIT_UINT Same as .Dv UMTX_OP_WAIT , but the type of the variable pointed to by .Fa obj is .Vt u_int .Pq a 32-bit integer . .It Dv UMTX_OP_RW_RDLOCK Read-lock a .Vt struct rwlock lock. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the lock (of type .Vt struct rwlock ) to be read-locked. .It Fa val Additional flags to augment locking behaviour. The valid flags in the .Fa val argument are: .Bl -tag -width indent .It Dv URWLOCK_PREFER_READER .El .El .Pp The request obtains the read lock on the specified .Vt struct rwlock by incrementing the count of readers in the .Dv rw_state word of the structure. If the .Dv URWLOCK_WRITE_OWNER bit is set in the word .Dv rw_state , the lock was granted to a writer which has not yet relinquished its ownership. In this case the current thread is put to sleep until it makes sense to retry. .Pp If the .Dv URWLOCK_PREFER_READER flag is set either in the .Dv rw_flags word of the structure, or in the .Fa val argument of the request, the presence of the threads trying to obtain the write lock on the same structure does not prevent the current thread from trying to obtain the read lock. Otherwise, if the flag is not set, and the .Dv URWLOCK_WRITE_WAITERS flag is set in .Dv rw_state , the current thread does not attempt to obtain read-lock. Instead it sets the .Dv URWLOCK_READ_WAITERS in the .Dv rw_state word and puts itself to sleep on corresponding sleep queue. Upon wakeup, the locking conditions are re-evaluated. .Pp Optionally, a timeout for the request may be specified. .Pp The request is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. .It Dv UMTX_OP_RW_WRLOCK Write-lock a .Vt struct rwlock lock. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the lock (of type .Vt struct rwlock ) to be write-locked. .El .Pp The request obtains a write lock on the specified .Vt struct rwlock , by setting the .Dv URWLOCK_WRITE_OWNER bit in the .Dv rw_state word of the structure. If there is already a write lock owner, as indicated by the .Dv URWLOCK_WRITE_OWNER bit being set, or there are read lock owners, as indicated by the read-lock counter, the current thread does not attempt to obtain the write-lock. Instead it sets the .Dv URWLOCK_WRITE_WAITERS in the .Dv rw_state word and puts itself to sleep on corresponding sleep queue. Upon wakeup, the locking conditions are re-evaluated. .Pp Optionally, a timeout for the request may be specified. .Pp The request is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. .It Dv UMTX_OP_RW_UNLOCK Unlock rwlock. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the lock (of type .Vt struct rwlock ) to be unlocked. .El .Pp The unlock type (read or write) is determined by the current lock state. Note that the .Vt struct rwlock does not save information about the identity of the thread which acquired the lock. .Pp If there are pending writers after the unlock, and the .Dv URWLOCK_PREFER_READER flag is not set in the .Dv rw_flags member of the .Fa *obj structure, one writer is woken up, selected as described in the .Sx SLEEP QUEUES subsection. If the .Dv URWLOCK_PREFER_READER flag is set, a pending writer is woken up only if there is no pending readers. .Pp If there are no pending writers, or, in the case that the .Dv URWLOCK_PREFER_READER flag is set, then all pending readers are woken up by unlock. .It Dv UMTX_OP_WAIT_UINT_PRIVATE Same as .Dv UMTX_OP_WAIT_UINT , but unconditionally select the process-private sleep queue. .It Dv UMTX_OP_WAKE_PRIVATE Same as .Dv UMTX_OP_WAKE , but unconditionally select the process-private sleep queue. .It Dv UMTX_OP_MUTEX_WAIT Wait for mutex availability. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Address of the mutex. .El .Pp Similarly to the .Dv UMTX_OP_MUTEX_LOCK , put the requesting thread to sleep if the mutex lock cannot be obtained immediately. The .Dv UMUTEX_CONTESTED bit is set in the .Dv m_owner word of the mutex to indicate that there is a waiter, before the thread is added to the sleep queue. Unlike the .Dv UMTX_OP_MUTEX_LOCK request, the lock is not obtained. .Pp The operation is not implemented for priority protected and priority inherited protocol mutexes. .Pp Optionally, a timeout for the request may be specified. .Pp A request with a timeout specified is not restartable. An unblocked signal delivered during the wait always results in sleep interruption and .Er EINTR error. A request without a timeout automatically restarts if the signal disposition requested restart via the .Dv SA_RESTART flag in .Vt struct sigaction member .Dv sa_flags . .It Dv UMTX_OP_NWAKE_PRIVATE Wake up a batch of sleeping threads. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the array of pointers. .It Fa val Number of elements in the array pointed to by .Fa obj . .El .Pp For each element in the array pointed to by .Fa obj , wakes up all threads waiting on the .Em private sleep queue with the key being the byte addressed by the array element. .It Dv UMTX_OP_MUTEX_WAKE Check if a normal umutex is unlocked and wake up a waiter. The arguments for the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the umutex. .El .Pp If the .Dv m_owner word of the mutex pointed to by the .Fa obj argument indicates unowned mutex, which has its contention indicator bit .Dv UMUTEX_CONTESTED set, clear the bit and wake up one waiter in the sleep queue associated with the byte addressed by the .Fa obj , if any. Only normal mutexes are supported by the request. The sleep queue is always one for a normal mutex type. .Pp This request is deprecated in favor of .Dv UMTX_OP_MUTEX_WAKE2 since mutexes using it cannot synchronize their own destruction. That is, the .Dv m_owner word has already been set to .Dv UMUTEX_UNOWNED when this request is made, so that another thread can lock, unlock and destroy the mutex (if no other thread uses the mutex afterwards). Clearing the .Dv UMUTEX_CONTESTED bit may then modify freed memory. .It Dv UMTX_OP_MUTEX_WAKE2 Check if a umutex is unlocked and wake up a waiter. The arguments for the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the umutex. .It Fa val The umutex flags. .El .Pp The request does not read the .Dv m_flags member of the .Vt struct umutex ; instead, the .Fa val argument supplies flag information, in particular, to determine the sleep queue where the waiters are found for wake up. .Pp If the mutex is unowned, one waiter is woken up. .Pp If the mutex memory cannot be accessed, all waiters are woken up. .Pp If there is more than one waiter on the sleep queue, or there is only one waiter but the mutex is owned by a thread, the .Dv UMUTEX_CONTESTED bit is set in the .Dv m_owner word of the .Vt struct umutex . .It Dv UMTX_OP_SEM2_WAIT Wait until semaphore is available. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the semaphore (of type .Vt struct _usem2 ) . .It Fa uaddr Size of the memory passed in via the .Fa uaddr2 argument. .It Fa uaddr2 Optional pointer to a structure of type .Vt struct _umtx_time , which may be followed by a structure of type .Vt struct timespec . .El .Pp Put the requesting thread onto a sleep queue if the semaphore counter is zero. If the thread is put to sleep, the .Dv USEM_HAS_WAITERS bit is set in the .Dv _count word to indicate waiters. The function returns either due to .Dv _count indicating the semaphore is available (non-zero count due to post), or due to a wakeup. The return does not guarantee that the semaphore is available, nor does it consume the semaphore lock on successful return. .Pp Optionally, a timeout for the request may be specified. .Pp A request with non-absolute timeout value is not restartable. An unblocked signal delivered during such wait results in sleep interruption and .Er EINTR error. .Pp If .Dv UMTX_ABSTIME was not set, and the operation was interrupted and the caller passed in a .Fa uaddr2 large enough to hold a .Vt struct timespec following the initial .Vt struct _umtx_time , then the .Vt struct timespec is updated to contain the unslept amount. .It Dv UMTX_OP_SEM2_WAKE Wake up waiters on semaphore lock. The arguments to the request are: .Bl -tag -width "obj" .It Fa obj Pointer to the semaphore (of type .Vt struct _usem2 ) . .El .Pp The request wakes up one waiter for the semaphore lock. The function does not increment the semaphore lock count. If the .Dv USEM_HAS_WAITERS bit was set in the .Dv _count word, and the last sleeping thread was woken up, the bit is cleared. .It Dv UMTX_OP_SHM Manage anonymous .Tn POSIX shared memory objects (see .Xr shm_open 2 ) , which can be attached to a byte of physical memory, mapped into the process address space. The objects are used to implement process-shared locks in .Dv libthr . .Pp The .Fa val argument specifies the sub-request of the .Dv UMTX_OP_SHM request: .Bl -tag -width indent .It Dv UMTX_SHM_CREAT Creates the anonymous shared memory object, which can be looked up with the specified key .Fa uaddr . If the object associated with the .Fa uaddr key already exists, it is returned instead of creating a new object. The object's size is one page. On success, the file descriptor referencing the object is returned. The descriptor can be used for mapping the object using .Xr mmap 2 , or for other shared memory operations. .It Dv UMTX_SHM_LOOKUP Same as .Dv UMTX_SHM_CREATE request, but if there is no shared memory object associated with the specified key .Fa uaddr , an error is returned, and no new object is created. .It Dv UMTX_SHM_DESTROY De-associate the shared object with the specified key .Fa uaddr . The object is destroyed after the last open file descriptor is closed and the last mapping for it is destroyed. .It Dv UMTX_SHM_ALIVE Checks whether there is a live shared object associated with the supplied key .Fa uaddr . Returns zero if there is, and an error otherwise. This request is an optimization of the .Dv UMTX_SHM_LOOKUP request. It is cheaper when only the liveness of the associated object is asked for, since no file descriptor is installed in the process fd table on success. .El .Pp The .Fa uaddr argument specifies the virtual address, which backing physical memory byte identity is used as a key for the anonymous shared object creation or lookup. .It Dv UMTX_OP_ROBUST_LISTS Register the list heads for the current thread's robust mutex lists. The arguments to the request are: .Bl -tag -width "uaddr" .It Fa val Size of the structure passed in the .Fa uaddr argument. .It Fa uaddr Pointer to the structure of type .Vt struct umtx_robust_lists_params . .El .Pp The structure is defined as .Bd -literal struct umtx_robust_lists_params { uintptr_t robust_list_offset; uintptr_t robust_priv_list_offset; uintptr_t robust_inact_offset; }; .Ed .Pp The .Dv robust_list_offset member contains address of the first element in the list of locked robust shared mutexes. The .Dv robust_priv_list_offset member contains address of the first element in the list of locked robust private mutexes. The private and shared robust locked lists are split to allow fast termination of the shared list on fork, in the child. .Pp The .Dv robust_inact_offset contains a pointer to the mutex which might be locked in nearby future, or might have been just unlocked. It is typically set by the lock or unlock mutex implementation code around the whole operation, since lists can be only changed race-free when the thread owns the mutex. The kernel inspects the .Dv robust_inact_offset in addition to walking the shared and private lists. Also, the mutex pointed to by .Dv robust_inact_offset is handled more loosely at the thread termination time, than other mutexes on the list. That mutex is allowed to be not owned by the current thread, in which case list processing is continued. See .Sx ROBUST UMUTEXES subsection for details. .It Dv UMTX_OP_GET_MIN_TIMEOUT Writes out the current value of minimal umtx operations timeout, in nanoseconds, into the long integer variable pointed to by .Fa uaddr1 . .It Dv UMTX_OP_SET_MIN_TIMEOUT Set the minimal amount of time, in nanoseconds, the thread is required to sleep for umtx operations specifying a timeout using absolute clocks. The value is taken from the .Fa val argument of the call. Zero means no minimum. .El .Pp The .Fa op argument may be a bitwise OR of a single command from above with one or more of the following flags: .Bl -tag -width indent .It Dv UMTX_OP__I386 Request i386 ABI compatibility from the native .Nm system call. Specifically, this implies that: .Bl -hang -offset indent .It .Fa obj arguments that point to a word, point to a 32-bit integer. .It The .Dv UMTX_OP_NWAKE_PRIVATE .Fa obj argument is a pointer to an array of 32-bit pointers. .It The .Dv m_rb_lnk member of .Vt struct umutex is a 32-bit pointer. .It .Vt struct timespec uses a 32-bit time_t. .El .Pp .Dv UMTX_OP__32BIT has no effect if this flag is set. This flag is valid for all architectures, but it is ignored on i386. .It Dv UMTX_OP__32BIT Request non-i386, 32-bit ABI compatibility from the native .Nm system call. Specifically, this implies that: .Bl -hang -offset indent .It .Fa obj arguments that point to a word, point to a 32-bit integer. .It The .Dv UMTX_OP_NWAKE_PRIVATE .Fa obj argument is a pointer to an array of 32-bit pointers. .It The .Dv m_rb_lnk member of .Vt struct umutex is a 32-bit pointer. .It .Vt struct timespec uses a 64-bit time_t. .El .Pp This flag has no effect if .Dv UMTX_OP__I386 is set. This flag is valid for all architectures. .El .Pp Note that if any 32-bit ABI compatibility is being requested, then care must be taken with robust lists. A single thread may not mix 32-bit compatible robust lists with native robust lists. The first .Dv UMTX_OP_ROBUST_LISTS call in a given thread determines which ABI that thread will use for robust lists going forward. .Sh RETURN VALUES If successful, all requests, except .Dv UMTX_SHM_CREAT and .Dv UMTX_SHM_LOOKUP sub-requests of the .Dv UMTX_OP_SHM request, will return zero. The .Dv UMTX_SHM_CREAT and .Dv UMTX_SHM_LOOKUP return a shared memory file descriptor on success. On error \-1 is returned, and the .Va errno variable is set to indicate the error. .Sh ERRORS The .Fn _umtx_op operations can fail with the following errors: .Bl -tag -width "[ETIMEDOUT]" .It Bq Er EFAULT One of the arguments point to invalid memory. .It Bq Er EINVAL The clock identifier, specified for the .Vt struct _umtx_time timeout parameter, or in the .Dv c_clockid member of .Vt struct ucond, is invalid. .It Bq Er EINVAL The type of the mutex, encoded by the .Dv m_flags member of .Vt struct umutex , is invalid. .It Bq Er EINVAL The .Dv m_owner member of the .Vt struct umutex has changed the lock owner thread identifier during unlock. .It Bq Er EINVAL The .Dv timeout.tv_sec or .Dv timeout.tv_nsec member of .Vt struct _umtx_time is less than zero, or .Dv timeout.tv_nsec is greater than 1000000000. .It Bq Er EINVAL The .Fa op argument specifies invalid operation. .It Bq Er EINVAL The .Fa uaddr argument for the .Dv UMTX_OP_SHM request specifies invalid operation. .It Bq Er EINVAL The .Dv UMTX_OP_SET_CEILING request specifies non priority protected mutex. .It Bq Er EINVAL The new ceiling value for the .Dv UMTX_OP_SET_CEILING request, or one or more of the values read from the .Dv m_ceilings array during lock or unlock operations, is greater than .Dv RTP_PRIO_MAX . .It Bq Er EPERM Unlock attempted on an object not owned by the current thread. .It Bq Er EOWNERDEAD The lock was requested on an umutex where the .Dv m_owner field was set to the .Dv UMUTEX_RB_OWNERDEAD value, indicating terminated robust mutex. The lock was granted to the caller, so this error in fact indicates success with additional conditions. .It Bq Er ENOTRECOVERABLE The lock was requested on an umutex which .Dv m_owner field is equal to the .Dv UMUTEX_RB_NOTRECOV value, indicating abandoned robust mutex after termination. The lock was not granted to the caller. .It Bq Er ENOTTY The shared memory object, associated with the address passed to the .Dv UMTX_SHM_ALIVE sub-request of .Dv UMTX_OP_SHM request, was destroyed. .It Bq Er ESRCH For the .Dv UMTX_SHM_LOOKUP , .Dv UMTX_SHM_DESTROY , and .Dv UMTX_SHM_ALIVE sub-requests of the .Dv UMTX_OP_SHM request, there is no shared memory object associated with the provided key. .It Bq Er ENOMEM The .Dv UMTX_SHM_CREAT sub-request of the .Dv UMTX_OP_SHM request cannot be satisfied, because allocation of the shared memory object would exceed the .Dv RLIMIT_UMTXP resource limit, see .Xr setrlimit 2 . .It Bq Er EAGAIN The maximum number of readers .Dv ( URWLOCK_MAX_READERS ) were already granted ownership of the given .Vt struct rwlock for read. .It Bq Er EBUSY A try mutex lock operation was not able to obtain the lock. .It Bq Er ETIMEDOUT The request specified a timeout in the .Fa uaddr and .Fa uaddr2 arguments, and timed out before obtaining the lock or being woken up. .It Bq Er EINTR A signal was delivered during wait, for a non-restartable operation. Operations with timeouts are typically non-restartable, but timeouts specified in absolute time may be restartable. .It Bq Er ERESTART A signal was delivered during wait, for a restartable operation. Mutex lock requests without timeout specified are restartable. The error is not returned to userspace code since restart is handled by usual adjustment of the instruction counter. .El .Sh SEE ALSO .Xr clock_gettime 2 , .Xr mmap 2 , .Xr setrlimit 2 , .Xr shm_open 2 , .Xr sigaction 2 , .Xr thr_exit 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_new 2 , .Xr thr_self 2 , .Xr thr_set_name 2 , .Xr signal 3 .Sh STANDARDS The .Fn _umtx_op system call is non-standard and is used by the .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh BUGS A window between a unlocking robust mutex and resetting the pointer in the .Dv robust_inact_offset member of the registered .Vt struct umtx_robust_lists_params allows another thread to destroy the mutex, thus making the kernel inspect freed or reused memory. The .Li libthr implementation is only vulnerable to this race when operating on a shared mutex. A possible fix for the current implementation is to strengthen the checks for shared mutexes before terminating them, in particular, verifying that the mutex memory is mapped from a shared memory object allocated by the .Dv UMTX_OP_SHM request. This is not done because it is believed that the race is adequately covered by other consistency checks, while adding the check would prevent alternative implementations of .Li libpthread . diff --git a/lib/libc/sys/fsync.2 b/lib/libc/sys/fsync.2 index 02cdf9bd656b..24435e018815 100644 --- a/lib/libc/sys/fsync.2 +++ b/lib/libc/sys/fsync.2 @@ -1,132 +1,132 @@ .\" Copyright (c) 1983, 1993 .\" The Regents of the University of California. All rights reserved. -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" Parts of this documentation were written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)fsync.2 8.1 (Berkeley) 6/4/93 .\" .Dd March 30, 2020 .Dt FSYNC 2 .Os .Sh NAME .Nm fdatasync , .Nm fsync .Nd "synchronise changes to a file" .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In unistd.h .Ft int .Fn fdatasync "int fd" .Ft int .Fn fsync "int fd" .Sh DESCRIPTION The .Fn fsync system call causes all modified data and attributes of the file referenced by the file descriptor .Fa fd to be moved to a permanent storage device. This normally results in all in-core modified copies of buffers for the associated file to be written to a disk. .Pp The .Fn fdatasync system call causes all modified data of .Fa fd to be moved to a permanent storage device. Unlike .Fn fsync , the system call does not guarantee that file attributes or metadata necessary to access the file are committed to the permanent storage. .Pp The .Fn fsync system call should be used by programs that require a file to be in a known state, for example, in building a simple transaction facility. If the file metadata has already been committed, using .Fn fdatasync can be more efficient than .Fn fsync . .Pp Both .Fn fdatasync and .Fn fsync calls are cancellation points. .Sh RETURN VALUES .Rv -std fsync .Sh ERRORS The .Fn fsync and .Fn fdatasync calls fail if: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument is not a valid descriptor. .It Bq Er EINVAL The .Fa fd argument refers to a socket, not to a file. .It Bq Er EIO An I/O error occurred while reading from or writing to the file system. .It Bq Er EINTEGRITY Corrupted data was detected while reading from the file system. .El .Sh SEE ALSO .Xr fsync 1 , .Xr sync 2 , .Xr syncer 4 , .Xr sync 8 .Sh HISTORY The .Fn fsync system call appeared in .Bx 4.2 . The .Fn fdatasync system call appeared in .Fx 11.1 . .Sh BUGS The .Fn fdatasync system call currently does not guarantee that enqueued .Xr aio 4 requests for the file referenced by .Fa fd are completed before the syscall returns. diff --git a/lib/libc/sys/sigfastblock.2 b/lib/libc/sys/sigfastblock.2 index 72897191c4bc..19d649e63db9 100644 --- a/lib/libc/sys/sigfastblock.2 +++ b/lib/libc/sys/sigfastblock.2 @@ -1,164 +1,164 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd December 13, 2019 .Dt SIGFASTBLOCK 2 .Os .Sh NAME .Nm sigfastblock .Nd controls signals blocking with a simple memory write .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/signalvar.h .Ft int .Fn sigfastblock "int cmd" "void *ptr" .Sh DESCRIPTION .Bf -symbolic This function is not intended for a direct usage by applications. The functionality is provided for implementing some optimizations in .Xr ld-elf.so.1 8 and .Lb libthr . .Ef .Pp The function configures the kernel facility that allows a thread to block asynchronous signals delivery with a single write to userspace memory, avoiding overhead of system calls like .Xr sigprocmask 2 for establishing critical sections. The C runtime uses it to optimize implementation of async-signal-safe functionality. .Pp A thread might register a .Dv sigblock variable of type .Vt int as a location which is consulted by kernel when calculating the blocked signal mask for delivery of asynchronous signals. If the variable indicates that blocking is requested, then the kernel effectively operates as if the mask containing all blockable signals was supplied to .Xr sigprocmask 2 . .Pp The variable is supposed to be modified only from the owning thread, there is no way to guarantee visibility of update from other thread to kernel when signals are delivered. .Pp Lower bits of the sigblock variable are reserved as flags, which might be set or cleared by kernel at arbitrary moments. Userspace code should use .Xr atomic 9 operations of incrementing and decrementing by .Dv SIGFASTBLOCK_INC quantity to recursively block or unblock signals delivery. .Pp If a signal would be delivered when unmasked, kernel might set the .Dv SIGFASTBLOCK_PEND .Dq pending signal flag in the sigblock variable. Userspace should perform .Dv SIGFASTBLOCK_UNBLOCK operation when clearing the variable if it notes the pending signal bit is set, which would deliver the pending signals immediately. Otherwise, signals delivery might be postponed. .Pp The .Fa cmd argument specifies one of the following operations: .Bl -tag -width SIGFASTBLOCK_UNSETPTR .It Dv SIGFASTBLOCK_SETPTR Register the variable of type .Vt int at location pointed to by the .Fa ptr argument as sigblock variable for the calling thread. .It Dv SIGFASTBLOCK_UNSETPTR Unregister the currently registered sigblock location. Kernel stops inferring the blocked mask from non-zero value of its blocked count. New location can be registered after previous one is deregistered. .It Dv SIGFASTBLOCK_UNBLOCK If there are pending signals which should be delivered to the calling thread, they are delivered before returning from the call. The sigblock variable should have zero blocking count, and indicate that the pending signal exists. Effectively this means that the variable should have the value .Dv SIGFASTBLOCK_PEND . .El .Sh RETURN VALUES .Rv -std .Sh ERRORS The operation may fail with the following errors: .Bl -tag -width Er .It Bq Er EBUSY The .Dv SIGFASTBLOCK_SETPTR attempted while the sigblock address was already registered. The .Dv SIGFASTBLOCK_UNBLOCK was called while sigblock variable value is not equal to .Dv SIGFASTBLOCK_PEND . .It Bq Er EINVAL The variable address passed to .Dv SIGFASTBLOCK_SETPTR is not aligned naturally. The .Dv SIGFASTBLOCK_UNSETPTR operation was attempted without prior successful call to .Dv SIGFASTBLOCK_SETPTR . .It Bq Er EFAULT Attempt to read or write to the sigblock variable failed. Note that kernel generates the .Dv SIGSEGV signal if an attempt to read from the sigblock variable faulted during implicit accesses from syscall entry. .El .Sh SEE ALSO .Xr kill 2 , .Xr signal 2 , .Xr sigprocmask 2 , .Xr libthr 3 , .Xr ld-elf.so.1 8 .Sh STANDARDS The .Nm function is non-standard, although a similar functionality is a common optimization provided by several other systems. .Sh HISTORY The .Nm function was introduced in .Fx 13.0 . .Sh BUGS The .Nm symbol is currently not exported by libc, on purpose. Consumers should either use the .Dv __sys_fast_sigblock symbol from the private libc namespace, or utilize .Xr syscall 2 . diff --git a/lib/libc/sys/thr_exit.2 b/lib/libc/sys/thr_exit.2 index 62d6e6da22ce..98c6dd63f7ec 100644 --- a/lib/libc/sys/thr_exit.2 +++ b/lib/libc/sys/thr_exit.2 @@ -1,92 +1,92 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_EXIT 2 .Os .Sh NAME .Nm thr_exit .Nd terminate current thread .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft void .Fn thr_exit "long *state" .Sh DESCRIPTION .Bf -symbolic This function is intended for implementing threading. Normal applications should call .Xr pthread_exit 3 instead. .Ef .Pp The .Fn thr_exit system call terminates the current kernel-scheduled thread. .Pp If the .Fa state argument is not .Dv NULL , the location pointed to by the argument is updated with an arbitrary non-zero value, and an .Xr _umtx_op 2 .Dv UMTX_OP_WAKE operation is consequently performed on the location. .Pp Attempts to terminate the last thread in the process are silently ignored. Use .Xr _exit 2 syscall to terminate the process. .Sh RETURN VALUES The function does not return a value. A return from the function indicates that the calling thread was the last one in the process. .Sh SEE ALSO .Xr _exit 2 , .Xr _umtx_op 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_new 2 , .Xr thr_self 2 , .Xr thr_set_name 2 , .Xr pthread_exit 3 .Sh STANDARDS The .Fn thr_exit system call is non-standard and is used by .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh HISTORY The .Fn thr_exit system call first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_kill.2 b/lib/libc/sys/thr_kill.2 index a18e53e61008..8db645a77e54 100644 --- a/lib/libc/sys/thr_kill.2 +++ b/lib/libc/sys/thr_kill.2 @@ -1,137 +1,137 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_kill 2 .Os .Sh NAME .Nm thr_kill .Nd send signal to thread .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_kill "long id" "int sig" .Ft int .Fn thr_kill2 "pid_t pid" "long id" "int sig" .Sh DESCRIPTION The .Fn thr_kill and .Fn thr_kill2 system calls allow sending a signal, specified by the .Fa sig argument, to some threads in a process. For the .Fn thr_kill function, signalled threads are always limited to the current process. For the .Fn thr_kill2 function, the .Fa pid argument specifies the process with threads to be signalled. .Pp The .Fa id argument specifies which threads get the signal. If .Fa id is equal to \-1, all threads in the specified process are signalled. Otherwise, only the thread with the thread identifier equal to the argument is signalled. .Pp The .Fa sig argument defines the delivered signal. It must be a valid signal number or zero. In the latter case no signal is actually sent, and the call is used to verify the liveness of the thread. .Pp The signal is delivered with .Dv siginfo .Dv si_code set to .Dv SI_LWP . .Sh RETURN VALUES If successful, .Fn thr_kill and .Fn thr_kill2 will return zero, otherwise \-1 is returned, and .Va errno is set to indicate the error. .Sh ERRORS The .Fn thr_kill and .Fn thr_kill2 operations return the following errors: .Bl -tag -width Er .It Bq Er EINVAL The .Fa sig argument is not zero and does not specify valid signal. .It Bq Er ESRCH The specified process or thread was not found. .El .Pp Additionally, the .Fn thr_kill2 may return the following errors: .Bl -tag -width Er .It Bq Er EPERM The current process does not have sufficient privilege to check existence or send a signal to the specified process. .El .Sh SEE ALSO .Xr kill 2 , .Xr thr_exit 2 , .Xr thr_new 2 , .Xr thr_self 2 , .Xr thr_set_name 2 , .Xr _umtx_op 2 , .Xr pthread_kill 3 , .Xr signal 3 .Sh STANDARDS The .Fn thr_kill and .Fn thr_kill2 system calls are non-standard and are used by the .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh HISTORY The .Fn thr_kill and .Fn thr_kill2 system calls first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_new.2 b/lib/libc/sys/thr_new.2 index eb57fb67ac2a..0576acebef47 100644 --- a/lib/libc/sys/thr_new.2 +++ b/lib/libc/sys/thr_new.2 @@ -1,247 +1,247 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_NEW 2 .Os .Sh NAME .Nm thr_new .Nd create new thread of execution .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_new "struct thr_param *param" "int param_size" .Sh DESCRIPTION .Bf -symbolic This function is intended for implementing threading. Normal applications should call .Xr pthread_create 3 instead. .Ef .Pp The .Fn thr_new system call creates a new kernel-scheduled thread of execution in the context of the current process. The newly created thread shares all attributes of the process with the existing kernel-scheduled threads in the process, but has private processor execution state. The machine context for the new thread is copied from the creating thread's context, including coprocessor state. FPU state and specific machine registers are excluded from the copy. These are set according to ABI requirements and syscall parameters. The FPU state for the new thread is reinitialized to clean. .Pp The .Fa param structure supplies parameters affecting the thread creation. The structure is defined in the .In sys/thr.h header as follows .Bd -literal struct thr_param { void (*start_func)(void *); void *arg; char *stack_base; size_t stack_size; char *tls_base; size_t tls_size; long *child_tid; long *parent_tid; int flags; struct rtprio *rtp; }; .Ed and contains the following fields: .Bl -tag -width ".Va parent_tid" .It Va start_func Pointer to the thread entry function. The kernel arranges for the new thread to start executing the function upon the first return to userspace. .It Va arg Opaque argument supplied to the entry function. .It Va stack_base Stack base address. The stack must be allocated by the caller. On some architectures, the ABI might require that the system put information on the stack to ensure the execution environment for .Va start_func . .It Va stack_size Stack size. .It Va tls_base TLS base address. The value of TLS base is loaded into the ABI-defined machine register in the new thread context. .It Va tls_size TLS size. .It Va child_tid Address to store the new thread identifier, for the child's use. .It Va parent_tid Address to store the new thread identifier, for the parent's use. .Pp Both .Va child_tid and .Va parent_tid are provided, with the intent that .Va child_tid is used by the new thread to get its thread identifier without issuing the .Xr thr_self 2 syscall, while .Va parent_tid is used by the thread creator. The latter is separate from .Va child_tid because the new thread might exit and free its thread data before the parent has a chance to execute far enough to access it. .It Va flags Thread creation flags. The .Va flags member may specify the following flags: .Bl -tag -width ".Dv THR_SYSTEM_SCOPE" .It Dv THR_SUSPENDED Create the new thread in the suspended state. The flag is not currently implemented. .It Dv THR_SYSTEM_SCOPE Create the system scope thread. The flag is not currently implemented. .El .It Va rtp Real-time scheduling priority for the new thread. May be .Dv NULL to inherit the priority from the creating thread. .El .Pp The .Fa param_size argument should be set to the size of the .Fa param structure. .Pp After the first successful creation of an additional thread, the process is marked by the kernel as multi-threaded. In particular, the .Dv P_HADTHREADS flag is set in the process' .Dv p_flag (visible in the .Xr ps 1 output), and several operations are executed in multi-threaded mode. For instance, the .Xr execve 2 system call terminates all threads but the calling one on successful execution. .Sh RETURN VALUES If successful, .Fn thr_new will return zero, otherwise \-1 is returned, and .Va errno is set to indicate the error. .Sh ERRORS The .Fn thr_new operation returns the following errors: .Bl -tag -width Er .\" When changing this list, consider updating share/man/man3/pthread_create.3, .\" since that function can return any of these errors. .It Bq Er EFAULT The memory pointed to by the .Fa param argument is not valid. .It Bq Er EFAULT The memory pointed to by the .Fa param structure .Fa child_tid , parent_tid or .Fa rtp arguments is not valid. .It Bq Er EFAULT The specified stack base is invalid, or the kernel was unable to put required initial data on the stack. .It Bq Er EINVAL The .Fa param_size argument specifies a negative value, or the value is greater than the largest .Fa struct param size the kernel can interpret. .It Bq Er EINVAL The .Fa rtp member is not .Dv NULL and specifies invalid scheduling parameters. .It Bq Er EINVAL The specified TLS base is invalid. .It Bq Er EPERM The caller does not have permission to set the scheduling parameters or scheduling policy. .It Bq Er EPROCLIM Creation of the new thread would exceed the .Dv RACCT_NTHR limit, see .Xr racct 2 . .It Bq Er EPROCLIM Creation of the new thread would exceed the .Dv kern.threads.max_threads_per_proc .Xr sysctl 2 limit. .It Bq Er ENOMEM There was not enough kernel memory to allocate the new thread structures. .El .Sh SEE ALSO .Xr ps 1 , .Xr _umtx_op 2 , .Xr execve 2 , .Xr racct 2 , .Xr thr_exit 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_self 2 , .Xr thr_set_name 2 , .Xr pthread_create 3 .Sh STANDARDS The .Fn thr_new system call is non-standard and is used by the .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh HISTORY The .Fn thr_new system call first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_self.2 b/lib/libc/sys/thr_self.2 index 0637dca1d7b7..42d146448c05 100644 --- a/lib/libc/sys/thr_self.2 +++ b/lib/libc/sys/thr_self.2 @@ -1,92 +1,92 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_SELF 2 .Os .Sh NAME .Nm thr_self .Nd return thread identifier for the calling thread .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_self "long *id" .Sh DESCRIPTION The .Fn thr_self system call stores the system-wide thread identifier for the current kernel-scheduled thread in the variable pointed by the argument .Va id . .Pp The thread identifier is an integer in the range from .Dv PID_MAX + 2 (100001) to .Dv INT_MAX . The thread identifier is guaranteed to be unique at any given time, for each running thread in the system. After the thread exits, the identifier may be reused. .Sh RETURN VALUES If successful, .Fn thr_self will return zero, otherwise \-1 is returned, and .Va errno is set to indicate the error. .Sh ERRORS The .Fn thr_self operation may return the following errors: .Bl -tag -width Er .It Bq Er EFAULT The memory pointed to by the .Fa id argument is not valid. .El .Sh SEE ALSO .Xr _umtx_op 2 , .Xr thr_exit 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_new 2 , .Xr thr_set_name 2 , .Xr pthread_getthreadid_np 3 , .Xr pthread_self 3 .Sh STANDARDS The .Fn thr_self system call is non-standard and is used by .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh HISTORY The .Fn thr_self system call first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_set_name.2 b/lib/libc/sys/thr_set_name.2 index f43a60a4b3a5..38205f7f30b8 100644 --- a/lib/libc/sys/thr_set_name.2 +++ b/lib/libc/sys/thr_set_name.2 @@ -1,96 +1,96 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_SET_NAME 2 .Os .Sh NAME .Nm thr_set_name .Nd set user-visible thread name .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_set_name "long id" "const char *name" .Sh DESCRIPTION The .Fn thr_set_name system call sets the user-visible name for the thread with the identifier .Va id in the current process to the NUL-terminated string .Va name . The name will be silently truncated to fit into a buffer of .Dv MAXCOMLEN + 1 bytes. The thread name can be seen in the output of the .Xr ps 1 and .Xr top 1 commands, in the kernel debuggers and kernel tracing facility outputs, and in userland debuggers and program core files, as notes. .Sh RETURN VALUES If successful, .Fn thr_set_name returns zero; otherwise, \-1 is returned, and .Va errno is set to indicate the error. .Sh ERRORS The .Fn thr_set_name system call may return the following errors: .Bl -tag -width Er .It Bq Er EFAULT The memory pointed to by the .Fa name argument is not valid. .It Bq Er ESRCH The thread with the identifier .Fa id does not exist in the current process. .El .Sh SEE ALSO .Xr ps 1 , .Xr _umtx_op 2 , .Xr thr_exit 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_new 2 , .Xr thr_self 2 , .Xr pthread_set_name_np 3 , .Xr ddb 4 , .Xr ktr 9 .Sh STANDARDS The .Fn thr_set_name system call is non-standard and is used by the .Lb libthr . .Sh HISTORY The .Fn thr_set_name system call first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_suspend.2 b/lib/libc/sys/thr_suspend.2 index ee4261b3676c..74eacb0daa3c 100644 --- a/lib/libc/sys/thr_suspend.2 +++ b/lib/libc/sys/thr_suspend.2 @@ -1,131 +1,131 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_SUSPEND 2 .Os .Sh NAME .Nm thr_suspend .Nd suspend the calling thread .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_suspend "struct timespec *timeout" .Sh DESCRIPTION .Bf -symbolic This function is intended for implementing threading. Normal applications should use .Xr pthread_cond_timedwait 3 together with .Xr pthread_cond_broadcast 3 for typical safe suspension with cooperation of the thread being suspended, or .Xr pthread_suspend_np 3 and .Xr pthread_resume_np 3 in some specific situations, instead. .Ef .Pp The .Fn thr_suspend system call puts the calling thread in a suspended state, where it is not eligible for CPU time. This state is exited by another thread calling .Xr thr_wake 2 , when the time interval specified by .Fa timeout has elapsed, or by the delivery of a signal to the suspended thread. .Pp If the .Fa timeout argument is .Dv NULL , the suspended state can be only terminated by explicit .Fn thr_wake or signal. .Pp If a wake from .Xr thr_wake 2 was delivered before the .Nm call, the thread is not put into a suspended state. Instead, the call returns immediately without an error. .Pp If a thread previously called .Xr thr_wake 2 with its own thread identifier, which resulted in setting the internal kernel flag to immediately abort interruptible sleeps with an .Er EINTR error .Po see .Xr thr_wake 2 .Pc , the flag is cleared. As with .Xr thr_wake 2 called from another thread, the next .Nm call does not result in suspension. .Sh RETURN VALUES .Rv -std thr_suspend .Sh ERRORS The .Fn thr_suspend operation returns the following errors: .Bl -tag -width Er .It Bq Er EFAULT The memory pointed to by the .Fa timeout argument is not valid. .It Bq Er ETIMEDOUT The specified timeout expired. .It Bq Er ETIMEDOUT The .Fa timeout argument specified a zero time interval. .It Bq Er EINTR The sleep was interrupted by a signal. .El .Sh SEE ALSO .Xr ps 1 , .Xr thr_wake 2 , .Xr pthread_resume_np 3 , .Xr pthread_suspend_np 3 .Sh STANDARDS The .Fn thr_suspend system call is non-standard. .Sh HISTORY The .Fn thr_suspend system call first appeared in .Fx 5.2 . diff --git a/lib/libc/sys/thr_wake.2 b/lib/libc/sys/thr_wake.2 index 0a196c2dd18b..9091a2b0c06e 100644 --- a/lib/libc/sys/thr_wake.2 +++ b/lib/libc/sys/thr_wake.2 @@ -1,114 +1,114 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 5, 2020 .Dt THR_WAKE 2 .Os .Sh NAME .Nm thr_wake .Nd wake up the suspended thread .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/thr.h .Ft int .Fn thr_wake "long id" .Sh DESCRIPTION .Bf -symbolic This function is intended for implementing threading. Normal applications should use .Xr pthread_cond_timedwait 3 together with .Xr pthread_cond_broadcast 3 for typical safe suspension with cooperation of the thread being suspended, or .Xr pthread_suspend_np 3 and .Xr pthread_resume_np 3 in some specific situations, instead. .Ef .Pp Passing the thread identifier of the calling thread .Po see .Xr thr_self 2 .Pc to .Fn thr_wake sets a thread's flag to cause the next signal-interruptible sleep of that thread in the kernel to fail immediately with the .Er EINTR error. The flag is cleared by an interruptible sleep attempt or by a call to .Xr thr_suspend 2 . This is used by the system threading library to implement cancellation. .Pp If .Fa id is not equal to the current thread identifier, the specified thread is woken up if suspended by the .Xr thr_suspend 2 system call. If the thread is not suspended at the time of the .Nm call, the wake is remembered and the next attempt of the thread to suspend itself with the .Xr thr_suspend 2 results in immediate return with success. Only one wake is remembered. .Sh RETURN VALUES .Rv -std thr_wake .Sh ERRORS The .Fn thr_wake operation returns these errors: .Bl -tag -width Er .It Bq Er ESRCH The specified thread was not found or does not belong to the process of the calling thread. .El .Sh SEE ALSO .Xr ps 1 , .Xr thr_self 2 , .Xr thr_suspend 2 , .Xr pthread_cancel 3 , .Xr pthread_resume_np 3 , .Xr pthread_suspend_np 3 .Sh STANDARDS The .Fn thr_suspend system call is non-standard and is used by .Lb libthr to implement .St -p1003.1-2001 .Xr pthread 3 functionality. .Sh HISTORY The .Fn thr_suspend system call first appeared in .Fx 5.2 . diff --git a/lib/libc/x86/sys/pkru.3 b/lib/libc/x86/sys/pkru.3 index 2bcb6a64baaa..43c6e2423abe 100644 --- a/lib/libc/x86/sys/pkru.3 +++ b/lib/libc/x86/sys/pkru.3 @@ -1,203 +1,203 @@ -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd February 16, 2019 .Dt PKRU 3 .Os .Sh NAME .Nm Protection Key Rights for User pages .Nd provide fast user-managed key-based access control for pages .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In machine/sysarch.h .Ft int .Fn x86_pkru_get_perm "unsigned int keyidx" "int *access" "int *modify" .Ft int .Fn x86_pkru_set_perm "unsigned int keyidx" "int access" "int modify" .Ft int .Fo x86_pkru_protect_range .Fa "void *addr" .Fa "unsigned long len" .Fa "unsigned int keyidx" .Fa "int flag" .Fc .Ft int .Fn x86_pkru_unprotect_range "void *addr" "unsigned long len" .Sh DESCRIPTION The protection keys feature provides an additional mechanism, besides the normal page permissions as established by .Xr mmap 2 and .Xr mprotect 2 , to control access to user-mode addresses. The mechanism gives safety measures which can be used to avoid incidental read or modification of sensitive memory, or as a debugging feature. It cannot guard against conscious accesses since permissions are user-controllable. .Pp If supported by hardware, each mapped user linear address has an associated 4-bit protection key. A new per-thread PKRU hardware register determines, for each protection key, whether user-mode addresses with that protection key may be read or written. .Pp Only one key may apply to a given range at a time. The default protection key index is zero, it is used even if no key was explicitly assigned to the address, or if the key was removed. .Pp The protection prevents the system from accessing user addresses as well as the user applications. When a system call was unable to read or write user memory due to key protection, it returns the .Er EFAULT error code. Note that some side effects may have occurred if this error is reported. .Pp Protection keys require that the system uses 4-level paging (also called long mode), which means that it is only available on amd64 system. Both 64-bit and 32-bit applications can use protection keys. More information about the hardware feature is provided in the IA32 Software Developer's Manual published by Intel Corp. .Pp The key indexes written into the page table entries are managed by the .Fn sysarch syscall. Per-key permissions are managed using the user-mode instructions .Em RDPKRU and .Em WRPKRU . The system provides convenient library helpers for both the syscall and the instructions, described below. .Pp The .Fn x86_pkru_protect_range function assigns key .Fa keyidx to the range starting at .Fa addr and having length .Fa len . Starting address is truncated to the page start, and the end is rounded up to the end of the page. After a successful call, the range has the specified key assigned, even if the key is zero and it did not change the page table entries. .Pp The .Fa flags argument takes the logical OR of the following values: .Bl -tag -width .It Bq Va AMD64_PKRU_EXCL Only assign the key if the range does not have any other keys assigned (including the zero key). You must first remove any existing key with .Fn x86_pkru_unprotect_range in order for this request to succeed. If the .Va AMD64_PKRU_EXCL flag is not specified, .Fn x86_pkru_protect_range replaces any existing key. .It Bq Va AMD64_PKRU_PERSIST The keys assigned to the range are persistent. They are re-established when the current mapping is destroyed and a new mapping is created in any sub-range of the specified range. You must use a .Fn x86_pkru_unprotect_range call to forget the key. .El .Pp The .Fn x86_pkru_unprotect_range function removes any keys assigned to the specified range. Existing mappings are changed to use key index zero in page table entries. Keys are no longer considered installed for all mappings in the range, for the purposes of .Fn x86_pkru_protect_range with the .Va AMD64_PKRU_EXCL flag. .Pp The .Fn x86_pkru_get_perm function returns access rights for the key specified by the .Fa keyidx argument. If the value pointed to by .Fa access is zero after the call, no read or write permissions is granted for mappings which are assigned the key .Fa keyidx . If .Fa access is not zero, read access is permitted. The non-zero value of the variable pointed to by the .Fa modify argument indicates that write access is permitted. .Pp Conversely, the .Fn x86_pkru_set_perm establishes the access and modify permissions for the given key index as specified by its arguments. .Sh RETURN VALUES .Rv -std .Sh ERRORS .Bl -tag -width Er .It Bq Er EOPNOTSUPP The hardware does not support protection keys. .It Bq Er EINVAL The supplied key index is invalid (greater than 15). .It Bq Er EINVAL The supplied .Fa flags argument for .Fn x86_pkru_protect_range has reserved bits set. .It Bq Er EFAULT The supplied address range does not completely fit into the user-managed address range. .It Bq Er ENOMEM The memory shortage prevents the completion of the operation. .It Bq Er EBUSY The .Va AMD64_PKRU_EXCL flag was specified for .Fn x86_pkru_protect_range and the range already has defined protection keys. .El .Sh SEE ALSO .Xr mmap 2 , .Xr mprotect 2 , .Xr munmap 2 , .Xr sysarch 2 . .Sh STANDARDS The .Nm functions are non-standard and first appeared in .Fx 13.0 . diff --git a/lib/libthr/libthr.3 b/lib/libthr/libthr.3 index 7d0c7669654a..a89c6a4cb4c7 100644 --- a/lib/libthr/libthr.3 +++ b/lib/libthr/libthr.3 @@ -1,362 +1,362 @@ .\" Copyright (c) 2005 Robert N. M. Watson -.\" Copyright (c) 2014,2015,2021 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2014,2015,2021 The FreeBSD Foundation .\" All rights reserved. .\" .\" Part of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd October 1, 2021 .Dt LIBTHR 3 .Os .Sh NAME .Nm libthr .Nd "1:1 POSIX threads library" .Sh LIBRARY .Lb libthr .Sh SYNOPSIS .In pthread.h .Sh DESCRIPTION The .Nm library provides a 1:1 implementation of the .Xr pthread 3 library interfaces for application threading. It has been optimized for use by applications expecting system scope thread semantics. .Pp The library is tightly integrated with the run-time link editor .Xr ld-elf.so.1 1 and .Lb libc ; all three components must be built from the same source tree. Mixing .Li libc and .Nm libraries from different versions of .Fx is not supported. The run-time linker .Xr ld-elf.so.1 1 has some code to ensure backward-compatibility with older versions of .Nm . .Pp The man page documents the quirks and tunables of the .Nm . When linking with .Li -lpthread , the run-time dependency .Li libthr.so.3 is recorded in the produced object. .Sh MUTEX ACQUISITION A locked mutex (see .Xr pthread_mutex_lock 3 ) is represented by a volatile variable of type .Dv lwpid_t , which records the global system identifier of the thread owning the lock. .Nm performs a contested mutex acquisition in three stages, each of which is more resource-consuming than the previous. The first two stages are only applied for a mutex of .Dv PTHREAD_MUTEX_ADAPTIVE_NP type and .Dv PTHREAD_PRIO_NONE protocol (see .Xr pthread_mutexattr 3 ) . .Pp First, on SMP systems, a spin loop is performed, where the library attempts to acquire the lock by .Xr atomic 9 operations. The loop count is controlled by the .Ev LIBPTHREAD_SPINLOOPS environment variable, with a default value of 2000. .Pp If the spin loop was unable to acquire the mutex, a yield loop is executed, performing the same .Xr atomic 9 acquisition attempts as the spin loop, but each attempt is followed by a yield of the CPU time of the thread using the .Xr sched_yield 2 syscall. By default, the yield loop is not executed. This is controlled by the .Ev LIBPTHREAD_YIELDLOOPS environment variable. .Pp If both the spin and yield loops failed to acquire the lock, the thread is taken off the CPU and put to sleep in the kernel with the .Xr _umtx_op 2 syscall. The kernel wakes up a thread and hands the ownership of the lock to the woken thread when the lock becomes available. .Sh THREAD STACKS Each thread is provided with a private user-mode stack area used by the C runtime. The size of the main (initial) thread stack is set by the kernel, and is controlled by the .Dv RLIMIT_STACK process resource limit (see .Xr getrlimit 2 ) . .Pp By default, the main thread's stack size is equal to the value of .Dv RLIMIT_STACK for the process. If the .Ev LIBPTHREAD_SPLITSTACK_MAIN environment variable is present in the process environment (its value does not matter), the main thread's stack is reduced to 4MB on 64bit architectures, and to 2MB on 32bit architectures, when the threading library is initialized. The rest of the address space area which has been reserved by the kernel for the initial process stack is used for non-initial thread stacks in this case. The presence of the .Ev LIBPTHREAD_BIGSTACK_MAIN environment variable overrides .Ev LIBPTHREAD_SPLITSTACK_MAIN ; it is kept for backward-compatibility. .Pp The size of stacks for threads created by the process at run-time with the .Xr pthread_create 3 call is controlled by thread attributes: see .Xr pthread_attr 3 , in particular, the .Xr pthread_attr_setstacksize 3 , .Xr pthread_attr_setguardsize 3 and .Xr pthread_attr_setstackaddr 3 functions. If no attributes for the thread stack size are specified, the default non-initial thread stack size is 2MB for 64bit architectures, and 1MB for 32bit architectures. .Sh RUN-TIME SETTINGS The following environment variables are recognized by .Nm and adjust the operation of the library at run-time: .Bl -tag -width "Ev LIBPTHREAD_SPLITSTACK_MAIN" .It Ev LIBPTHREAD_BIGSTACK_MAIN Disables the reduction of the initial thread stack enabled by .Ev LIBPTHREAD_SPLITSTACK_MAIN . .It Ev LIBPTHREAD_SPLITSTACK_MAIN Causes a reduction of the initial thread stack, as described in the section .Sx THREAD STACKS . This was the default behaviour of .Nm before .Fx 11.0 . .It Ev LIBPTHREAD_SPINLOOPS The integer value of the variable overrides the default count of iterations in the .Li spin loop of the mutex acquisition. The default count is 2000, set by the .Dv MUTEX_ADAPTIVE_SPINS constant in the .Nm sources. .It Ev LIBPTHREAD_YIELDLOOPS A non-zero integer value enables the yield loop in the process of the mutex acquisition. The value is the count of loop operations. .It Ev LIBPTHREAD_QUEUE_FIFO The integer value of the variable specifies how often blocked threads are inserted at the head of the sleep queue, instead of its tail. Bigger values reduce the frequency of the FIFO discipline. The value must be between 0 and 255. .It Dv LIBPTHREAD_UMTX_MIN_TIMEOUT The minimal amount of time, in nanoseconds, the thread is required to sleep for pthread operations specifying a timeout. If the operation requests a timeout less than the value provided, it is silently increased to the value. The value of zero means no minimum (default). .Pp .El The following .Dv sysctl MIBs affect the operation of the library: .Bl -tag -width "Dv debug.umtx.robust_faults_verbose" .It Dv kern.ipc.umtx_vnode_persistent By default, a shared lock backed by a mapped file in memory is automatically destroyed on the last unmap of the corresponding file's page, which is allowed by POSIX. Setting the sysctl to 1 makes such a shared lock object persist until the vnode is recycled by the Virtual File System. Note that in case file is not opened and not mapped, the kernel might recycle it at any moment, making this sysctl less useful than it sounds. .It Dv kern.ipc.umtx_max_robust The maximal number of robust mutexes allowed for one thread. The kernel will not unlock more mutexes than specified, see .Xr _umtx_op for more details. The default value is large enough for most useful applications. .It Dv debug.umtx.robust_faults_verbose A non zero value makes kernel emit some diagnostic when the robust mutexes unlock was prematurely aborted after detecting some inconsistency, as a measure to prevent memory corruption. .El .Pp The .Dv RLIMIT_UMTXP limit (see .Xr getrlimit 2 ) defines how many shared locks a given user may create simultaneously. .Sh INTERACTION WITH RUN-TIME LINKER On load, .Nm installs interposing handlers into the hooks exported by .Li libc . The interposers provide real locking implementation instead of the stubs for single-threaded processes in .Li libc , cancellation support and some modifications to the signal operations. .Pp .Nm cannot be unloaded; the .Xr dlclose 3 function does not perform any action when called with a handle for .Nm . One of the reasons is that the internal interposing of .Li libc functions cannot be undone. .Sh SIGNALS The implementation interposes the user-installed .Xr signal 3 handlers. This interposing is done to postpone signal delivery to threads which entered (libthr-internal) critical sections, where the calling of the user-provided signal handler is unsafe. An example of such a situation is owning the internal library lock. When a signal is delivered while the signal handler cannot be safely called, the call is postponed and performed until after the exit from the critical section. This should be taken into account when interpreting .Xr ktrace 1 logs. .Sh PROCESS-SHARED SYNCHRONIZATION OBJECTS In the .Li libthr implementation, user-visible types for all synchronization objects (e.g. pthread_mutex_t) are pointers to internal structures, allocated either by the corresponding .Fn pthread__init method call, or implicitly on first use when a static initializer was specified. The initial implementation of process-private locking object used this model with internal allocation, and the addition of process-shared objects was done in a way that did not break the application binary interface. .Pp For process-private objects, the internal structure is allocated using either .Xr malloc 3 or, for .Xr pthread_mutex_init 3 , an internal memory allocator implemented in .Nm . The internal allocator for mutexes is used to avoid bootstrap issues with many .Xr malloc 3 implementations which need working mutexes to function. The same allocator is used for thread-specific data, see .Xr pthread_setspecific 3 , for the same reason. .Pp For process-shared objects, the internal structure is created by first allocating a shared memory segment using .Xr _umtx_op 2 operation .Dv UMTX_OP_SHM , and then mapping it into process address space with .Xr mmap 2 with the .Dv MAP_SHARED flag. The POSIX standard requires that: .Bd -literal only the process-shared synchronization object itself can be used for performing synchronization. It need not be referenced at the address used to initialize it (that is, another mapping of the same object can be used). .Ed .Pp With the .Fx implementation, process-shared objects require initialization in each process that use them. In particular, if you map the shared memory containing the user portion of a process-shared object already initialized in different process, locking functions do not work on it. .Pp Another broken case is a forked child creating the object in memory shared with the parent, which cannot be used from parent. Note that processes should not use non-async-signal safe functions after .Xr fork 2 anyway. .Sh SEE ALSO .Xr ktrace 1 , .Xr ld-elf.so.1 1 , .Xr getrlimit 2 , .Xr errno 2 , .Xr thr_exit 2 , .Xr thr_kill 2 , .Xr thr_kill2 2 , .Xr thr_new 2 , .Xr thr_self 2 , .Xr thr_set_name 2 , .Xr _umtx_op 2 , .Xr dlclose 3 , .Xr dlopen 3 , .Xr getenv 3 , .Xr pthread_attr 3 , .Xr pthread_attr_setstacksize 3 , .Xr pthread_create 3 , .Xr signal 3 , .Xr atomic 9 .Sh HISTORY The .Nm library first appeared in .Fx 5.2 . .Sh AUTHORS .An -nosplit The .Nm library was originally created by .An Jeff Roberson Aq Mt jeff@FreeBSD.org , and enhanced by .An Jonathan Mini Aq Mt mini@FreeBSD.org and .An Mike Makonnen Aq Mt mtm@FreeBSD.org . It has been substantially rewritten and optimized by .An David Xu Aq Mt davidxu@FreeBSD.org . diff --git a/sbin/ldconfig/ldconfig.8 b/sbin/ldconfig/ldconfig.8 index 47e0dfa99b50..c9552a974ccc 100644 --- a/sbin/ldconfig/ldconfig.8 +++ b/sbin/ldconfig/ldconfig.8 @@ -1,173 +1,173 @@ .\" .\" Copyright (c) 1993 Paul Kranenburg .\" All rights reserved. -.\" Copyright (c) 2021 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2021 The FreeBSD Foundation .\" .\" Portions of this documentation were written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. All advertising materials mentioning features or use of this software .\" must display the following acknowledgement: .\" This product includes software developed by Paul Kranenburg. .\" 3. The name of the author may not be used to endorse or promote products .\" derived from this software without specific prior written permission .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .Dd May 15, 2021 .Dt LDCONFIG 8 .Os .Sh NAME .Nm ldconfig .Nd configure the dynamic linker search path for shared libraries .Sh SYNOPSIS .Nm .Op Fl 32 .Op Fl Rimrv .Op Fl f Ar hints_file .Op Ar directory | Ar .Sh DESCRIPTION .Nm utility is used to configure the set of paths used by the dynamic linker .Xr ld-elf.so.1 1 when searching for shared libraries. The dynamic linker looks for libraries in a set of built-in system directories and any directories specified in the hints file. This obviates the need for storing search paths within the executable, see the .Fl rpath option for the static linker .Xr ld 1 . .Pp The hints file is maintained by .Nm . The .Ar directories list to be stored in the hints file is supplied on the command line. .Pp Alternatively to the .Ar directories list on the command line, .Ar files may be specified; these are expected to contain directories to scan for shared libraries. Each directory's pathname must start on a new line. Blank lines and lines starting with the comment character .Ql \&# are ignored. .Pp For security reasons, directories which are world or group-writable or which are not owned by root produce warning messages and are skipped, unless the .Fl i option is present. .Pp The .Ev LD_LIBRARY_PATH environment variable can be used to specify additional shared library search directories. .Ev LD_LIBRARY_PATH is a .Sq \&: separated list of directory paths which are searched by the dynamic linker when it needs to load a shared library. It can be viewed as the run-time equivalent of the .Fl L switch of .Xr ld 1 . .Pp The following options are recognized by .Nm : .Bl -tag -width indent .It Fl 32 Generate the hints for 32-bit ABI shared libraries on 64-bit systems that support running 32-bit binaries. .It Fl elf Ignored for backwards compatibility. .It Fl R Appends pathnames on the command line to the directory list from the hints file. .Pp This is the default action when no options are given. .It Fl f Ar hints_file Read and/or update the specified hints file, instead of the standard file. This option is provided primarily for testing. .It Fl i Run in insecure mode. The security checks will not be performed. .It Fl m Instead of replacing the list of the directories to search with the directories specified on the command line, merge existing list with the specified directories, and write the result to the hints file. .It Fl r List the current list of the directories from the hints file on the standard output. The hints file is not modified. .Pp Scan and print all libraries found on the directories list. .It Fl v Switch on verbose mode. .El .Sh SECURITY Special care must be taken when loading shared libraries into the address space of .Ev set-user-Id programs. Whenever such a program is run by any user except the owner of the program, the dynamic linker will only load shared libraries from paths found in the hints file. In particular, the .Ev LD_LIBRARY_PATH is not used to search for libraries. Thus, .Nm serves to specify the trusted collection of directories from which shared objects can be safely loaded. .Sh FILES .Bl -tag -width /var/run/ld-elf.so.hintsxxx -compact .It Pa /var/run/ld-elf.so.hints Standard hints file for the ELF dynamic linker. .It Pa /etc/ld-elf.so.conf Conventional configuration file containing directory names for invocations with .Fl elf . .It Pa /var/run/ld-elf32.so.hints Conventional configuration files containing directory names for invocations with .Fl 32 . .El .Sh SEE ALSO .Xr ld 1 , .Xr ld-elf.so.1 , .Xr link 5 .Sh HISTORY A .Nm utility first appeared in SunOS 4.0, it appeared in its current form in .Fx 1.1 . .Pp The name 'hints file' is historic from the times when the file also contained hints to the dynamic linker. This functionality is not provided for ELF. diff --git a/share/man/man3/pthread_mutex_consistent.3 b/share/man/man3/pthread_mutex_consistent.3 index ac44a8d99718..e22be52ba618 100644 --- a/share/man/man3/pthread_mutex_consistent.3 +++ b/share/man/man3/pthread_mutex_consistent.3 @@ -1,91 +1,91 @@ -.\" Copyright (c) 2016 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2016 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd March 27, 2017 .Dt PTHREAD_MUTEX_CONSISTENT 3 .Os .Sh NAME .Nm pthread_mutex_consistent .Nd mark state protected by robust mutex as consistent .Sh LIBRARY .Lb libpthread .Sh SYNOPSIS .In pthread.h .Ft int .Fn pthread_mutex_consistent "pthread_mutex_t *mutex" .Sh DESCRIPTION If the process containing the thread owning a robust mutex terminates while holding the mutex, the mutex becomes inconsistent and the next thread that acquires the mutex lock is notified of the state by the return value .Er EOWNERDEAD . In this case, the mutex does not become normally usable again until the state is marked consistent. .Pp The .Fn pthread_mutex_consistent , when called with the .Fa mutex argument, which points to the initialized robust mutex in an inconsistent state, marks the by mutex as consistent again. The consequent unlock of the mutex, by either .Fn pthread_mutex_unlock or other methods, allows other contenders to lock the mutex. .Pp If the mutex in the inconsistent state is not marked consistent by the call to .Fn pthread_mutex_consistent before unlock, further attempts to lock the .Fa mutex result in the .Er ENOTRECOVERABLE condition reported by the locking functions. .Sh RETURN VALUES If successful, .Fn pthread_mutex_consistent will return zero, otherwise an error number will be returned to indicate the error. .Sh ERRORS The .Fn pthread_mutex_lock function will fail if: .Bl -tag -width Er .It Bq Er EINVAL The mutex pointed to by the .Fa mutex argument is not robust, or is not in the inconsistent state. .El .Sh SEE ALSO .Xr pthread_mutex_init 3 , .Xr pthread_mutex_lock 3 , .Xr pthread_mutex_unlock 3 , .Xr pthread_mutexattr_setrobust 3 .Sh STANDARDS The .Fn pthread_mutex_lock function conforms to .St -susv4 . diff --git a/share/man/man3/pthread_mutexattr.3 b/share/man/man3/pthread_mutexattr.3 index 8f4d0d9a06a4..b18d93e2e13e 100644 --- a/share/man/man3/pthread_mutexattr.3 +++ b/share/man/man3/pthread_mutexattr.3 @@ -1,368 +1,368 @@ .\" Copyright (C) 2000 Jason Evans . -.\" Copyright (c) 2021 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2021 The FreeBSD Foundation .\" All rights reserved. .\" .\" Part of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice(s), this list of conditions and the following disclaimer as .\" the first lines of this file unmodified other than the possible .\" addition of one or more copyright notices. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice(s), this list of conditions and the following disclaimer in .\" the documentation and/or other materials provided with the .\" distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY .\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR .\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, .\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE .\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, .\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .Dd October 27, 2023 .Dt PTHREAD_MUTEXATTR 3 .Os .Sh NAME .Nm pthread_mutexattr_init , .Nm pthread_mutexattr_destroy , .Nm pthread_mutexattr_setprioceiling , .Nm pthread_mutexattr_getprioceiling , .Nm pthread_mutexattr_setprotocol , .Nm pthread_mutexattr_getprotocol , .Nm pthread_mutexattr_setpshared , .Nm pthread_mutexattr_getpshared , .Nm pthread_mutexattr_setrobust , .Nm pthread_mutexattr_getrobust , .Nm pthread_mutexattr_settype , .Nm pthread_mutexattr_gettype .Nd mutex attribute operations .Sh LIBRARY .Lb libpthread .Sh SYNOPSIS .In pthread.h .Ft int .Fn pthread_mutexattr_init "pthread_mutexattr_t *attr" .Ft int .Fn pthread_mutexattr_destroy "pthread_mutexattr_t *attr" .Ft int .Fo pthread_mutexattr_setprioceiling .Fa "pthread_mutexattr_t *attr" "int prioceiling" .Fc .Ft int .Fo pthread_mutexattr_getprioceiling .Fa "const pthread_mutexattr_t *attr" "int *prioceiling" .Fc .Ft int .Fn pthread_mutexattr_setprotocol "pthread_mutexattr_t *attr" "int protocol" .Ft int .Fo pthread_mutexattr_getprotocol .Fa "const pthread_mutexattr_t *restrict attr" "int *restrict protocol" .Fc .Ft int .Fo pthread_mutexattr_setpshared .Fa "pthread_mutexattr_t *attr" "int shared" .Fc .Ft int .Fo pthread_mutexattr_getpshared .Fa "const pthread_mutexattr_t *attr" "int *shared" .Fc .Ft int .Fn pthread_mutexattr_setrobust "pthread_mutexattr_t *attr" "int robust" .Ft int .Fn pthread_mutexattr_getrobust "pthread_mutexattr_t *attr" "int *robust" .Ft int .Fn pthread_mutexattr_settype "pthread_mutexattr_t *attr" "int type" .Ft int .Fo pthread_mutexattr_gettype .Fa "const pthread_mutexattr_t *restrict attr" "int *restrict type" .Fc .Sh DESCRIPTION Mutex attributes are used to specify parameters to .Fn pthread_mutex_init . One attribute object can be used in multiple calls to .Fn pthread_mutex_init , with or without modifications between calls. .Pp The .Fn pthread_mutexattr_init function initializes .Fa attr with all the default mutex attributes. .Pp The .Fn pthread_mutexattr_destroy function destroys .Fa attr . .Pp The .Fn pthread_mutexattr_setprioceiling function sets the priority ceiling for the mutex, used by threads executed under the .Dv PTHREAD_PRIO_PROTECT protocol. .Pp The .Fn pthread_mutexattr_setprotocol function specifies the protocol to be followed in utilizing mutexes. The .Fa protocol argument can take one of the following values: .Bl -tag -width PTHREAD_PRIO_PROTECT .It PTHREAD_PRIO_NONE Priority and scheduling of the thread owning this mutex is not affected by its mutex ownership. .It PTHREAD_PRIO_INHERIT Request priority-inheritance protocol, where the thread owning the mutex is executed at the highest priority among priorities of all threads waiting on any mutex owned by this thread. .It PTHREAD_PRIO_PROTECT Request priority-inheritance protocol, where the thread owning the mutex is executed at highest priority among priorities or priority ceilings of all threads waiting on any mutex owned by this thread. .El .Pp The .Fn pthread_mutexattr_setpshared function sets the process-shared attribute of .Fa attr to the value specified in .Fa pshared . The argument .Fa pshared may have one of the following values: .Bl -tag -width ".Dv PTHREAD_PROCESS_PRIVATE" .It Dv PTHREAD_PROCESS_PRIVATE The mutex may only be used by threads in the same process as the one that created the object. .It Dv PTHREAD_PROCESS_SHARED The mutex may be used by threads in processes other than the one that created the object, assuming other processes share access to the memory where the mutex was allocated. .El See .Xr libthr 3 for details of the implementation of the shared mutexes, and their limitations. .Pp The .Fn pthread_mutexattr_setrobust function specifies robustness attribute of the mutex. Possible values for the .Fa robust argument are .Bl -tag -width PTHREAD_MUTEX_STALLED .It PTHREAD_MUTEX_STALLED No special actions are taken if the thread owning the mutex is terminated without unlocking the mutex lock. This can lead to deadlocks if no other thread can unlock the mutex. This is the default value. .It PTHREAD_MUTEX_ROBUST If the process containing the owning thread of a robust mutex, or owning thread, terminates while holding the mutex lock, the next thread that acquires the mutex is notified about the termination by the return value .Ev EOWNERDEAD from the locking function. Then, either .Xr pthread_mutex_consistent 3 can be used to repair the mutex lock state, or .Xr pthread_mutex_unlock 3 can unlock the mutex lock but also put it an unusable state, where all further attempts to acquire it result in the .Ev ENOTRECOVERABLE error. .El .Pp The .Fn pthread_mutexattr_settype function sets the type of the mutex. The type affects the behavior of calls which lock and unlock the mutex. The possible values for the .Fa type argument are .Bl -tag -width PTHREAD_MUTEX_ERRORCHECK .It PTHREAD_MUTEX_NORMAL Both recursive locking, and unlocking when the lock is not owned by the current thread, cause an error to be returned from the corresponding functions. This matches .Dv PTHREAD_MUTEX_ERRORCHECK but somewhat contradicts the behavior mandated by POSIX. .It PTHREAD_MUTEX_ERRORCHECK Both recursive locking, and unlocking when the lock is not owned by the current thread, cause an error returned from the corresponding functions. .It PTHREAD_MUTEX_RECURSIVE Recursive locking is allowed. Attempt to unlock when current thread is not an owner of the lock causes an error to be returned. .It PTHREAD_MUTEX_DEFAULT The .Fx implementation maps this type to .Dv PTHREAD_MUTEX_ERRORCHECK type. .El .Pp The .Fn pthread_mutexattr_get* functions copy the value of the attribute that corresponds to each function name to the location pointed to by the second function parameter. .Sh RETURN VALUES If successful, these functions return 0. Otherwise, an error number is returned to indicate the error. .Sh ERRORS The .Fn pthread_mutexattr_init function will fail if: .Bl -tag -width Er .It Bq Er ENOMEM Out of memory. .El .Pp The .Fn pthread_mutexattr_destroy function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Pp The .Fn pthread_mutexattr_setprioceiling function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr , or invalid value for .Fa prioceiling . .El .Pp The .Fn pthread_mutexattr_getprioceiling function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Pp The .Fn pthread_mutexattr_setprotocol function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr , or invalid value for .Fa protocol . .El .Pp The .Fn pthread_mutexattr_getprotocol function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Pp The .Fn pthread_mutexattr_setpshared function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr , or invalid value for .Fa shared . .El .Pp The .Fn pthread_mutexattr_getpshared function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Pp The .Fn pthread_mutexattr_settype function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr , or invalid value for .Fa type . .El .Pp The .Fn pthread_mutexattr_gettype function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Pp The .Fn pthread_mutexattr_setrobust function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr , or invalid value for .Fa robust . .El .Pp The .Fn pthread_mutexattr_getrobust function will fail if: .Bl -tag -width Er .It Bq Er EINVAL Invalid value for .Fa attr . .El .Sh SEE ALSO .Xr libthr 3 , .Xr pthread_mutex_init 3 .Sh STANDARDS The .Fn pthread_mutexattr_init and .Fn pthread_mutexattr_destroy functions conform to .St -p1003.1-96 .Pp The .Fn pthread_mutexattr_setprioceiling , .Fn pthread_mutexattr_getprioceiling , .Fn pthread_mutexattr_setprotocol , .Fn pthread_mutexattr_getprotocol , .Fn pthread_mutexattr_setpshared , .Fn pthread_mutexattr_getpshared , .Fn pthread_mutexattr_settype , and .Fn pthread_mutexattr_gettype functions conform to .St -susv2 . The .Fn pthread_mutexattr_setrobust and .Fn pthread_mutexattr_getrobust functions conform to .St -susv4 . diff --git a/share/man/man4/nvdimm.4 b/share/man/man4/nvdimm.4 index f7eab8f97e88..125fadd851f3 100644 --- a/share/man/man4/nvdimm.4 +++ b/share/man/man4/nvdimm.4 @@ -1,132 +1,132 @@ -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd September 5, 2019 .Dt NVDIMM 4 .Os .Sh NAME .Nm nvdimm .Nd ACPI NVDIMM driver .Sh SYNOPSIS To load the driver as a module at boot, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent nvdimm_load="YES" .Ed .Sh DESCRIPTION .Bf -symbolic Note: The .Nm driver is under development and has some important limitations described below. .Ef .Pp The .Nm driver provides access to Non-Volatile DIMM (NVDIMM) persistent memory devices, which are ACPI-enumerated under the root NVDIMM device with a .Va _HID of .Dv ACPI0012 and in the .Dv NFIT table. .Pp For each System Physical Address (SPA) Range described by NFIT, a device node .Pa /dev/nvdimm_spaNNN is created, where .Dv NNN is the SPA position in the table. The node can be used to .Xr read 2 , .Xr write 2 , or .Xr mmap 2 the device. .Pp Also, for each SPA, the geom provider .Pa spaNNN is created, which can be used to create a conventional filesystem (e.g., by .Xr newfs 8 ) and .Xr mount 8 it as any storage volume. Content accessible by .Pa /dev/nvdimm_spaNNN and .Pa /dev/spaNNN is coherent. .Pp The .Nm driver has support for reading NVDIMM namespaces (if supported by your hardware and already configured by some other mechanism, e.g., a BIOS configuration screen). The driver will provide a .Pa /dev/nvdimm_spaNNNnsMMM device node and .Pa spaNNNnsMMM geom provider for each namespace in a SPA, which behave analogously to their full-SPA cousins described above. .Sh SEE ALSO .Xr ACPI 4 , .Xr GEOM 4 , .Xr geom 8 , .Xr mount 8 , .Xr newfs 8 , .Xr disk 9 .Sh HISTORY The .Nm driver first appeared in .Fx 12.0 . .Sh AUTHORS .An -nosplit The .Nm driver was originally written by .An Konstantin Belousov Aq Mt kib@FreeBSD.org , and then updated by .An D. Scott Phillips Aq Mt scottph@FreeBSD.org . .Sh BUGS The .Nm driver does not utilize the Block Window interface, so if a write to an NVDIMM is interrupted due to a system crash or power outage, the corresponding page might be left in a partially updated state. .Pp There is no support for Device-Specific Methods (DSM), used to report and control device health and wearing. .Pp The driver depends on the .Xr pmap_largemap 9 pmap interface, which is currently only implemented on amd64. The interface can be only reasonable implemented on 64bit architectures. diff --git a/share/man/man5/fdescfs.5 b/share/man/man5/fdescfs.5 index fa260cbc12f3..0d060685b0b5 100644 --- a/share/man/man5/fdescfs.5 +++ b/share/man/man5/fdescfs.5 @@ -1,218 +1,218 @@ -.\" Copyright (c) 2021 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2021 The FreeBSD Foundation .\" .\" Copyright (c) 1996 .\" Mike Pritchard . All rights reserved. .\" .\" Copyright (c) 1992, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" All rights reserved. .\" .\" This code is derived from software donated to Berkeley by .\" Jan-Simon Pendry. .\" .\" Parts of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd Jul 11, 2023 .Dt FDESCFS 5 .Os .Sh NAME .Nm fdescfs .Nd file-descriptor file system .Sh SYNOPSIS .Bd -literal fdescfs /dev/fd fdescfs rw 0 0 .Ed .Sh DESCRIPTION The file-descriptor file system, or .Nm , provides access to the per-process file descriptor namespace in the global file system namespace. The conventional mount point is .Pa /dev/fd . .Pp The file system's contents appear as a list of numbered files which correspond to the open files of the process reading the directory. The files .Pa /dev/fd/0 through .Pa /dev/fd/# refer to file descriptors which can be accessed through the file system. .Pp The following mount options can be used when mounting .Nm filesystem: .Bl -tag -width linrdlnk .It Cm nodup For file descriptors referencing vnodes, instead of the .Xr dup 2 semantic described above, implement re-opening of the referenced vnode. See below for more details. .It Cm linrdlnk Report the type of the .Nm vnode as .Dv VLNK instead of .Fx traditional .Dv VCHR . For .Xr linux 4 ABI compatibility mount .Nm volume with the .Cm linrdlnk option. .It Cm rdlnk Treat .Nm vnodes as symbolic links consistently, in particular, follow the resolved name for the name lookups. This option is strictly stronger than the .Cm linrdlnk option, it changes not only the type returned by .Xr stat 2 , but also causes the .Nm files to behave as symlinks. .El .Pp For .Nm mounted without the .Cm nodup mount option, if the file descriptor is open and the mode the file is being opened with is a subset of the mode of the existing descriptor, the call: .Bd -literal -offset indent fd = open("/dev/fd/0", mode); .Ed .Pp and the call: .Bd -literal -offset indent fd = fcntl(0, F_DUPFD, 0); .Ed .Pp are equivalent. Flags to the .Xr open 2 call other than .Dv O_RDONLY , .Dv O_WRONLY and .Dv O_RDWR are ignored. .Pp For .Nm mounted with the .Cm nodup option, and file descriptor referencing a vnode, the call: .Bd -literal -offset indent fd = open("/dev/fd/0", mode); .Ed .Pp reopens the referenced vnode with the specified .Fa mode . In other words, the .Fn open call above is equivalent to .Bd -literal -offset indent fd = openat(0, "", O_EMPTY_PATH, mode); .Ed .Pp In particular, if the file descriptor was opened with the .Dv O_PATH flag, then either .Dv O_EMPTY_PATH or .Fn open over .Nm mount with .Cm nodup option allows one to convert it to a regularly opened file, assuming that the current permissions allow the requested .Fa mode . .Pp .Em "Note:" .Pa /dev/fd/0 , .Pa /dev/fd/1 and .Pa /dev/fd/2 files are created by default when devfs alone is mounted. .Nm creates entries for all file descriptors opened by the process. .Sh FILES .Bl -tag -width /dev/stderr -compact .It Pa /dev/fd/# .El .Sh EXAMPLES To mount a .Nm volume located on .Pa /dev/fd : .Pp .Dl "mount -t fdescfs none /dev/fd" .Pp For .Xr linux 4 ABI compatibility: .Pp .Dl "mount -t fdescfs -o linrdlnk none /compat/linux/dev/fd" .Pp For substitute of .Dv O_EMPTY_PATH flag use: .Pp .Dl "mount -t fdescfs -o nodup none /dev/fdpath" .Sh SEE ALSO .Xr devfs 5 , .Xr mount 8 .Sh HISTORY The .Nm file system first appeared in .Bx 4.4 . The .Nm manual page first appeared in .Fx 2.2 . .Sh AUTHORS .An -nosplit The .Nm manual page was written by .An Mike Pritchard Aq Mt mpp@FreeBSD.org , and was based on the manual page written by .An Jan-Simon Pendry . diff --git a/share/man/man5/tmpfs.5 b/share/man/man5/tmpfs.5 index c01aefd8550a..186bb812db6f 100644 --- a/share/man/man5/tmpfs.5 +++ b/share/man/man5/tmpfs.5 @@ -1,211 +1,211 @@ .\"- .\" Copyright (c) 2007 Xin LI -.\" Copyright (c) 2017 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2017 The FreeBSD Foundation .\" .\" Part of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS DOCUMENTATION IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, .\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .\"- .\" Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .Dd September 18, 2023 .Dt TMPFS 5 .Os .Sh NAME .Nm tmpfs .Nd "in-memory file system" .Sh SYNOPSIS To compile this driver into the kernel, place the following line in your kernel configuration file: .Bd -ragged -offset indent .Cd "options TMPFS" .Ed .Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : .Bd -literal -offset indent tmpfs_load="YES" .Ed .Sh DESCRIPTION The .Nm driver implements an in-memory, or .Nm file system. The filesystem stores both file metadata and data in main memory. This allows very fast and low latency accesses to the data. The data is volatile. An umount or system reboot invalidates it. These properties make the filesystem's mounts suitable for fast scratch storage, like .Pa /tmp . .Pp If the system becomes low on memory and swap is configured .Po see .Xr swapon 8 Pc , the system can transfer file data to swap space, freeing memory for other needs. Metadata, including the directory content, is never swapped out by the current implementation. Keep this in mind when planning the mount limits, especially when expecting to place many small files on a tmpfs mount. .Pp When .Xr mmap 2 is used on a file from a tmpfs mount, the swap VM object managing the file pages is used to implement mapping and avoid double-copying of the file data. This quirk causes process inspection tools, like .Xr procstat 1 , to report anonymous memory mappings instead of file mappings. .Sh OPTIONS The following options are available when mounting .Nm file systems: .Bl -tag -width "maxfilesize" .It Cm easize Set the maximum memory size used by extended attributes in bytes. The default is 16 megabytes. .It Cm export Accept the .Cm export option for compatibility with .Xr nfsv4 4 . This option does nothing. .It Cm gid Set the group ID of the root inode of the file system. The default is the mount point's GID. .It Cm inodes Set the maximum number of nodes available to the file system. If not specified, the file system chooses a reasonable maximum based on the file system size, which can be limited with the .Cm size option. .It Cm maxfilesize Set the maximum file size in bytes. The default is the maximum possible value. .It Cm mode Set the mode (in octal notation) of the root inode of the file system. The default is the mount point's mode. .It Cm nomtime Disable the tracking of mtime updates caused by writes to the shared mapped areas backed by .Nm files. This option removes periodic scans, which downgrade read-write-mapped pages to read-only to note the writes. .It Cm nonc Do not use namecache to resolve names to files for the created mount. This saves memory, but currently might impair scalability for highly used mounts on large machines. .It Cm nosymfollow Do not follow .Xr symlink 7 Ap s on the mounted file system. .It Cm pgread Enable pgcache read for the mount. .It Cm size Set the total file system size in bytes, unless suffixed with one of k, m, g, t, or p, which denote byte, kilobyte, megabyte, gigabyte, terabyte and petabyte respectively. If zero (the default) or a value larger than SIZE_MAX - PAGE_SIZE is given, the available amount of memory (including main memory and swap space) will be used. .It Cm uid Set the user ID of the root inode of the file system. The default is the mount point's UID. .It Cm union Refer to .Xr mount 8 . .El .Sh EXAMPLES Mount a .Nm memory file system: .Pp .Dl "mount -t tmpfs tmpfs /tmp" .Pp Configure a .Nm mount via .Xr fstab 5 : .Bd -literal -offset indent tmpfs /tmp tmpfs rw 0 0 .Ed .Sh SEE ALSO .Xr procstat 1 , .Xr mmap 2 , .Xr nmount 2 , .Xr unmount 2 , .Xr fstab 5 , .Xr mdmfs 8 , .Xr mount 8 , .Xr swapinfo 8 , .Xr swapon 8 .Sh HISTORY The .Nm driver first appeared in .Fx 7.0 . .Sh AUTHORS .An -nosplit The .Nm kernel implementation was written by .An Julio M. Merino Vidal Aq Mt jmmv@NetBSD.org as a Google Summer of Code project. .Pp .An Rohit Jalan and others ported it from .Nx to .Fx . .Pp This manual page was written by .An Xin LI Aq Mt delphij@FreeBSD.org . diff --git a/share/man/man7/security.7 b/share/man/man7/security.7 index 785cfcb6c7e5..4b5b792777f9 100644 --- a/share/man/man7/security.7 +++ b/share/man/man7/security.7 @@ -1,1115 +1,1115 @@ .\" Copyright (C) 1998 Matthew Dillon. All rights reserved. -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" Parts of this documentation were written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd August 18, 2023 .Dt SECURITY 7 .Os .Sh NAME .Nm security .Nd introduction to security under FreeBSD .Sh DESCRIPTION Security is a function that begins and ends with the system administrator. While all .Bx multi-user systems have some inherent security, the job of building and maintaining additional security mechanisms to keep users .Dq honest is probably one of the single largest undertakings of the sysadmin. Machines are only as secure as you make them, and security concerns are ever competing with the human necessity for convenience. .Ux systems, in general, are capable of running a huge number of simultaneous processes and many of these processes operate as servers \(em meaning that external entities can connect and talk to them. As yesterday's mini-computers and mainframes become today's desktops, and as computers become networked and internetworked, security becomes an ever bigger issue. .Pp Security is best implemented through a layered onion approach. In a nutshell, what you want to do is to create as many layers of security as are convenient and then carefully monitor the system for intrusions. .Pp System security also pertains to dealing with various forms of attacks, including attacks that attempt to crash or otherwise make a system unusable but do not attempt to break root. Security concerns can be split up into several categories: .Bl -enum -offset indent .It Denial of Service attacks (DoS) .It User account compromises .It Root compromise through accessible servers .It Root compromise via user accounts .It Backdoor creation .El .Pp A denial of service attack is an action that deprives the machine of needed resources. Typically, DoS attacks are brute-force mechanisms that attempt to crash or otherwise make a machine unusable by overwhelming its servers or network stack. Some DoS attacks try to take advantages of bugs in the networking stack to crash a machine with a single packet. The latter can only be fixed by applying a bug fix to the kernel. Attacks on servers can often be fixed by properly specifying options to limit the load the servers incur on the system under adverse conditions. Brute-force network attacks are harder to deal with. A spoofed-packet attack, for example, is nearly impossible to stop short of cutting your system off from the Internet. It may not be able to take your machine down, but it can fill up your Internet pipe. .Pp A user account compromise is even more common than a DoS attack. Many sysadmins still run standard .Xr telnetd 8 and .Xr ftpd 8 servers on their machines. These servers, by default, do not operate over encrypted connections. The result is that if you have any moderate-sized user base, one or more of your users logging into your system from a remote location (which is the most common and convenient way to log in to a system) will have his or her password sniffed. The attentive system administrator will analyze his remote access logs looking for suspicious source addresses even for successful logins. .Pp One must always assume that once an attacker has access to a user account, the attacker can break root. However, the reality is that in a well secured and maintained system, access to a user account does not necessarily give the attacker access to root. The distinction is important because without access to root the attacker cannot generally hide his tracks and may, at best, be able to do nothing more than mess with the user's files or crash the machine. User account compromises are very common because users tend not to take the precautions that sysadmins take. .Pp System administrators must keep in mind that there are potentially many ways to break root on a machine. The attacker may know the root password, the attacker may find a bug in a root-run server and be able to break root over a network connection to that server, or the attacker may know of a bug in an SUID-root program that allows the attacker to break root once he has broken into a user's account. If an attacker has found a way to break root on a machine, the attacker may not have a need to install a backdoor. Many of the root holes found and closed to date involve a considerable amount of work by the attacker to clean up after himself, so most attackers do install backdoors. This gives you a convenient way to detect the attacker. Making it impossible for an attacker to install a backdoor may actually be detrimental to your security because it will not close off the hole the attacker used to break in originally. .Pp Security remedies should always be implemented with a multi-layered .Dq onion peel approach and can be categorized as follows: .Bl -enum -offset indent .It Securing root and staff accounts .It Securing root \(em root-run servers and SUID/SGID binaries .It Securing user accounts .It Securing the password file .It Securing the kernel core, raw devices, and file systems .It Quick detection of inappropriate changes made to the system .It Paranoia .El .Sh SECURING THE ROOT ACCOUNT AND SECURING STAFF ACCOUNTS Do not bother securing staff accounts if you have not secured the root account. Most systems have a password assigned to the root account. The first thing you do is assume that the password is .Em always compromised. This does not mean that you should remove the password. The password is almost always necessary for console access to the machine. What it does mean is that you should not make it possible to use the password outside of the console or possibly even with a .Xr su 1 utility. For example, make sure that your PTYs are specified as being .Dq Li insecure in the .Pa /etc/ttys file so that direct root logins via .Xr telnet 1 are disallowed. If using other login services such as .Xr sshd 8 , make sure that direct root logins are disabled there as well. Consider every access method \(em services such as .Xr ftp 1 often fall through the cracks. Direct root logins should only be allowed via the system console. .Pp Of course, as a sysadmin you have to be able to get to root, so we open up a few holes. But we make sure these holes require additional password verification to operate. One way to make root accessible is to add appropriate staff accounts to the .Dq Li wheel group (in .Pa /etc/group ) . The staff members placed in the .Li wheel group are allowed to .Xr su 1 to root. You should never give staff members native .Li wheel access by putting them in the .Li wheel group in their password entry. Staff accounts should be placed in a .Dq Li staff group, and then added to the .Li wheel group via the .Pa /etc/group file. Only those staff members who actually need to have root access should be placed in the .Li wheel group. It is also possible, when using an authentication method such as Kerberos, to use Kerberos's .Pa .k5login file in the root account to allow a .Xr ksu 1 to root without having to place anyone at all in the .Li wheel group. This may be the better solution since the .Li wheel mechanism still allows an intruder to break root if the intruder has gotten hold of your password file and can break into a staff account. While having the .Li wheel mechanism is better than having nothing at all, it is not necessarily the safest option. .Pp An indirect way to secure the root account is to secure your staff accounts by using an alternative login access method and *'ing out the crypted password for the staff accounts. This way an intruder may be able to steal the password file but will not be able to break into any staff accounts or root, even if root has a crypted password associated with it (assuming, of course, that you have limited root access to the console). Staff members get into their staff accounts through a secure login mechanism such as .Xr kerberos 8 or .Xr ssh 1 using a private/public key pair. When you use something like Kerberos you generally must secure the machines which run the Kerberos servers and your desktop workstation. When you use a public/private key pair with SSH, you must generally secure the machine you are logging in .Em from (typically your workstation), but you can also add an additional layer of protection to the key pair by password protecting the keypair when you create it with .Xr ssh-keygen 1 . Being able to star-out the passwords for staff accounts also guarantees that staff members can only log in through secure access methods that you have set up. You can thus force all staff members to use secure, encrypted connections for all their sessions which closes an important hole used by many intruders: that of sniffing the network from an unrelated, less secure machine. .Pp The more indirect security mechanisms also assume that you are logging in from a more restrictive server to a less restrictive server. For example, if your main box is running all sorts of servers, your workstation should not be running any. In order for your workstation to be reasonably secure you should run as few servers as possible, up to and including no servers at all, and you should run a password-protected screen blanker. Of course, given physical access to a workstation, an attacker can break any sort of security you put on it. This is definitely a problem that you should consider but you should also consider the fact that the vast majority of break-ins occur remotely, over a network, from people who do not have physical access to your workstation or servers. .Pp Using something like Kerberos also gives you the ability to disable or change the password for a staff account in one place and have it immediately affect all the machines the staff member may have an account on. If a staff member's account gets compromised, the ability to instantly change his password on all machines should not be underrated. With discrete passwords, changing a password on N machines can be a mess. You can also impose re-passwording restrictions with Kerberos: not only can a Kerberos ticket be made to timeout after a while, but the Kerberos system can require that the user choose a new password after a certain period of time (say, once a month). .Sh SECURING ROOT \(em ROOT-RUN SERVERS AND SUID/SGID BINARIES The prudent sysadmin only runs the servers he needs to, no more, no less. Be aware that third party servers are often the most bug-prone. For example, running an old version of .Xr imapd 8 or .Xr popper 8 Pq Pa ports/mail/popper is like giving a universal root ticket out to the entire world. Never run a server that you have not checked out carefully. Many servers do not need to be run as root. For example, the .Xr talkd 8 , .Xr comsat 8 , and .Xr fingerd 8 daemons can be run in special user .Dq sandboxes . A sandbox is not perfect unless you go to a large amount of trouble, but the onion approach to security still stands: if someone is able to break in through a server running in a sandbox, they still have to break out of the sandbox. The more layers the attacker must break through, the lower the likelihood of his success. Root holes have historically been found in virtually every server ever run as root, including basic system servers. If you are running a machine through which people only log in via .Xr sshd 8 and never log in via .Xr telnetd 8 then turn off those services! .Pp .Fx now defaults to running .Xr talkd 8 , .Xr comsat 8 , and .Xr fingerd 8 in a sandbox. Depending on whether you are installing a new system or upgrading an existing system, the special user accounts used by these sandboxes may not be installed. The prudent sysadmin would research and implement sandboxes for servers whenever possible. .Pp There are a number of other servers that typically do not run in sandboxes: .Xr sendmail 8 , .Xr popper 8 , .Xr imapd 8 , .Xr ftpd 8 , and others. There are alternatives to some of these, but installing them may require more work than you are willing to put (the convenience factor strikes again). You may have to run these servers as root and rely on other mechanisms to detect break-ins that might occur through them. .Pp The other big potential root hole in a system are the SUID-root and SGID binaries installed on the system. Most of these binaries, such as .Xr su 1 , reside in .Pa /bin , /sbin , /usr/bin , or .Pa /usr/sbin . While nothing is 100% safe, the system-default SUID and SGID binaries can be considered reasonably safe. Still, root holes are occasionally found in these binaries. A root hole was found in Xlib in 1998 that made .Xr xterm 1 Pq Pa ports/x11/xterm (which is typically SUID) vulnerable. It is better to be safe than sorry and the prudent sysadmin will restrict SUID binaries that only staff should run to a special group that only staff can access, and get rid of .Pq Dq Li "chmod 000" any SUID binaries that nobody uses. A server with no display generally does not need an .Xr xterm 1 binary. SGID binaries can be almost as dangerous. If an intruder can break an SGID-kmem binary the intruder might be able to read .Pa /dev/kmem and thus read the crypted password file, potentially compromising any passworded account. Alternatively an intruder who breaks group .Dq Li kmem can monitor keystrokes sent through PTYs, including PTYs used by users who log in through secure methods. An intruder that breaks the .Dq Li tty group can write to almost any user's TTY. If a user is running a terminal program or emulator with a keyboard-simulation feature, the intruder can potentially generate a data stream that causes the user's terminal to echo a command, which is then run as that user. .Sh SECURING USER ACCOUNTS User accounts are usually the most difficult to secure. While you can impose draconian access restrictions on your staff and *-out their passwords, you may not be able to do so with any general user accounts you might have. If you do have sufficient control then you may win out and be able to secure the user accounts properly. If not, you simply have to be more vigilant in your monitoring of those accounts. Use of SSH and Kerberos for user accounts is more problematic due to the extra administration and technical support required, but still a very good solution compared to a crypted password file. .Sh SECURING THE PASSWORD FILE The only sure fire way is to *-out as many passwords as you can and use SSH or Kerberos for access to those accounts. Even though the crypted password file .Pq Pa /etc/spwd.db can only be read by root, it may be possible for an intruder to obtain read access to that file even if the attacker cannot obtain root-write access. .Pp Your security scripts should always check for and report changes to the password file (see .Sx CHECKING FILE INTEGRITY below). .Sh SECURING THE KERNEL CORE, RAW DEVICES, AND FILE SYSTEMS If an attacker breaks root he can do just about anything, but there are certain conveniences. For example, most modern kernels have a packet sniffing device driver built in. Under .Fx it is called the .Xr bpf 4 device. An intruder will commonly attempt to run a packet sniffer on a compromised machine. You do not need to give the intruder the capability and most systems should not have the .Xr bpf 4 device compiled in. .Pp But even if you turn off the .Xr bpf 4 device, you still have .Pa /dev/mem and .Pa /dev/kmem to worry about. For that matter, the intruder can still write to raw disk devices. Also, there is another kernel feature called the module loader, .Xr kldload 8 . An enterprising intruder can use a KLD module to install his own .Xr bpf 4 device or other sniffing device on a running kernel. To avoid these problems you have to run the kernel at a higher security level, at least level 1. The security level can be set with a .Xr sysctl 8 on the .Va kern.securelevel variable. Once you have set the security level to 1, write access to raw devices will be denied and special .Xr chflags 1 flags, such as .Cm schg , will be enforced. You must also ensure that the .Cm schg flag is set on critical startup binaries, directories, and script files \(em everything that gets run up to the point where the security level is set. This might be overdoing it, and upgrading the system is much more difficult when you operate at a higher security level. You may compromise and run the system at a higher security level but not set the .Cm schg flag for every system file and directory under the sun. Another possibility is to simply mount .Pa / and .Pa /usr read-only. It should be noted that being too draconian in what you attempt to protect may prevent the all-important detection of an intrusion. .Pp The kernel runs with five different security levels. Any super-user process can raise the level, but no process can lower it. The security levels are: .Bl -tag -width flag .It Ic -1 Permanently insecure mode \- always run the system in insecure mode. This is the default initial value. .It Ic 0 Insecure mode \- immutable and append-only flags may be turned off. All devices may be read or written subject to their permissions. .It Ic 1 Secure mode \- the system immutable and system append-only flags may not be turned off; disks for mounted file systems, .Pa /dev/mem and .Pa /dev/kmem may not be opened for writing; .Pa /dev/io (if your platform has it) may not be opened at all; kernel modules (see .Xr kld 4 ) may not be loaded or unloaded. The kernel debugger may not be entered using the .Va debug.kdb.enter sysctl. A panic or trap cannot be forced using the .Va debug.kdb.panic , .Va debug.kdb.panic_str and other sysctl's. .It Ic 2 Highly secure mode \- same as secure mode, plus disks may not be opened for writing (except by .Xr mount 2 ) whether mounted or not. This level precludes tampering with file systems by unmounting them, but also inhibits running .Xr newfs 8 while the system is multi-user. .Pp In addition, kernel time changes are restricted to less than or equal to one second. Attempts to change the time by more than this will log the message .Dq Time adjustment clamped to +1 second . .It Ic 3 Network secure mode \- same as highly secure mode, plus IP packet filter rules (see .Xr ipfw 8 , .Xr ipfirewall 4 and .Xr pfctl 8 ) cannot be changed and .Xr dummynet 4 or .Xr pf 4 configuration cannot be adjusted. .El .Pp The security level can be configured with variables documented in .Xr rc.conf 5 . .Sh CHECKING FILE INTEGRITY: BINARIES, CONFIG FILES, ETC When it comes right down to it, you can only protect your core system configuration and control files so much before the convenience factor rears its ugly head. For example, using .Xr chflags 1 to set the .Cm schg bit on most of the files in .Pa / and .Pa /usr is probably counterproductive because while it may protect the files, it also closes a detection window. The last layer of your security onion is perhaps the most important \(em detection. The rest of your security is pretty much useless (or, worse, presents you with a false sense of safety) if you cannot detect potential incursions. Half the job of the onion is to slow down the attacker rather than stop him in order to give the detection layer a chance to catch him in the act. .Pp The best way to detect an incursion is to look for modified, missing, or unexpected files. The best way to look for modified files is from another (often centralized) limited-access system. Writing your security scripts on the extra-secure limited-access system makes them mostly invisible to potential attackers, and this is important. In order to take maximum advantage you generally have to give the limited-access box significant access to the other machines in the business, usually either by doing a read-only NFS export of the other machines to the limited-access box, or by setting up SSH keypairs to allow the limit-access box to SSH to the other machines. Except for its network traffic, NFS is the least visible method \(em allowing you to monitor the file systems on each client box virtually undetected. If your limited-access server is connected to the client boxes through a switch, the NFS method is often the better choice. If your limited-access server is connected to the client boxes through a hub or through several layers of routing, the NFS method may be too insecure (network-wise) and using SSH may be the better choice even with the audit-trail tracks that SSH lays. .Pp Once you give a limit-access box at least read access to the client systems it is supposed to monitor, you must write scripts to do the actual monitoring. Given an NFS mount, you can write scripts out of simple system utilities such as .Xr find 1 and .Xr md5 1 . It is best to physically .Xr md5 1 the client-box files boxes at least once a day, and to test control files such as those found in .Pa /etc and .Pa /usr/local/etc even more often. When mismatches are found relative to the base MD5 information the limited-access machine knows is valid, it should scream at a sysadmin to go check it out. A good security script will also check for inappropriate SUID binaries and for new or deleted files on system partitions such as .Pa / and .Pa /usr . .Pp When using SSH rather than NFS, writing the security script is much more difficult. You essentially have to .Xr scp 1 the scripts to the client box in order to run them, making them visible, and for safety you also need to .Xr scp 1 the binaries (such as .Xr find 1 ) that those scripts use. The .Xr sshd 8 daemon on the client box may already be compromised. All in all, using SSH may be necessary when running over unsecure links, but it is also a lot harder to deal with. .Pp A good security script will also check for changes to user and staff members access configuration files: .Pa .rhosts , .shosts , .ssh/authorized_keys and so forth, files that might fall outside the purview of the MD5 check. .Pp If you have a huge amount of user disk space it may take too long to run through every file on those partitions. In this case, setting mount flags to disallow SUID binaries on those partitions is a good idea. The .Cm nosuid option (see .Xr mount 8 ) is what you want to look into. I would scan them anyway at least once a week, since the object of this layer is to detect a break-in whether or not the break-in is effective. .Pp Process accounting (see .Xr accton 8 ) is a relatively low-overhead feature of the operating system which I recommend using as a post-break-in evaluation mechanism. It is especially useful in tracking down how an intruder has actually broken into a system, assuming the file is still intact after the break-in occurs. .Pp Finally, security scripts should process the log files and the logs themselves should be generated in as secure a manner as possible \(em remote syslog can be very useful. An intruder tries to cover his tracks, and log files are critical to the sysadmin trying to track down the time and method of the initial break-in. One way to keep a permanent record of the log files is to run the system console to a serial port and collect the information on a continuing basis through a secure machine monitoring the consoles. .Sh PARANOIA A little paranoia never hurts. As a rule, a sysadmin can add any number of security features as long as they do not affect convenience, and can add security features that do affect convenience with some added thought. Even more importantly, a security administrator should mix it up a bit \(em if you use recommendations such as those given by this manual page verbatim, you give away your methodologies to the prospective attacker who also has access to this manual page. .Sh SPECIAL SECTION ON DoS ATTACKS This section covers Denial of Service attacks. A DoS attack is typically a packet attack. While there is not much you can do about modern spoofed packet attacks that saturate your network, you can generally limit the damage by ensuring that the attacks cannot take down your servers. .Bl -enum -offset indent .It Limiting server forks .It Limiting springboard attacks (ICMP response attacks, ping broadcast, etc.) .It Kernel Route Cache .El .Pp A common DoS attack is against a forking server that attempts to cause the server to eat processes, file descriptors, and memory until the machine dies. The .Xr inetd 8 server has several options to limit this sort of attack. It should be noted that while it is possible to prevent a machine from going down it is not generally possible to prevent a service from being disrupted by the attack. Read the .Xr inetd 8 manual page carefully and pay specific attention to the .Fl c , C , and .Fl R options. Note that spoofed-IP attacks will circumvent the .Fl C option to .Xr inetd 8 , so typically a combination of options must be used. Some standalone servers have self-fork-limitation parameters. .Pp The .Xr sendmail 8 daemon has its .Fl OMaxDaemonChildren option which tends to work much better than trying to use .Xr sendmail 8 Ns 's load limiting options due to the load lag. You should specify a .Va MaxDaemonChildren parameter when you start .Xr sendmail 8 high enough to handle your expected load but not so high that the computer cannot handle that number of .Nm sendmail Ns 's without falling on its face. It is also prudent to run .Xr sendmail 8 in .Dq queued mode .Pq Fl ODeliveryMode=queued and to run the daemon .Pq Dq Nm sendmail Fl bd separate from the queue-runs .Pq Dq Nm sendmail Fl q15m . If you still want real-time delivery you can run the queue at a much lower interval, such as .Fl q1m , but be sure to specify a reasonable .Va MaxDaemonChildren option for that .Xr sendmail 8 to prevent cascade failures. .Pp The .Xr syslogd 8 daemon can be attacked directly and it is strongly recommended that you use the .Fl s option whenever possible, and the .Fl a option otherwise. .Pp You should also be fairly careful with connect-back services such as tcpwrapper's reverse-identd, which can be attacked directly. You generally do not want to use the reverse-ident feature of tcpwrappers for this reason. .Pp It is a very good idea to protect internal services from external access by firewalling them off at your border routers. The idea here is to prevent saturation attacks from outside your LAN, not so much to protect internal services from network-based root compromise. Always configure an exclusive firewall, i.e., .So firewall everything .Em except ports A, B, C, D, and M-Z .Sc . This way you can firewall off all of your low ports except for certain specific services such as .Xr talkd 8 , .Xr sendmail 8 , and other internet-accessible services. If you try to configure the firewall the other way \(em as an inclusive or permissive firewall, there is a good chance that you will forget to .Dq close a couple of services or that you will add a new internal service and forget to update the firewall. You can still open up the high-numbered port range on the firewall to allow permissive-like operation without compromising your low ports. Also take note that .Fx allows you to control the range of port numbers used for dynamic binding via the various .Va net.inet.ip.portrange sysctl's .Pq Dq Li "sysctl net.inet.ip.portrange" , which can also ease the complexity of your firewall's configuration. I usually use a normal first/last range of 4000 to 5000, and a hiport range of 49152 to 65535, then block everything under 4000 off in my firewall (except for certain specific internet-accessible ports, of course). .Pp Another common DoS attack is called a springboard attack \(em to attack a server in a manner that causes the server to generate responses which then overload the server, the local network, or some other machine. The most common attack of this nature is the ICMP PING BROADCAST attack. The attacker spoofs ping packets sent to your LAN's broadcast address with the source IP address set to the actual machine they wish to attack. If your border routers are not configured to stomp on ping's to broadcast addresses, your LAN winds up generating sufficient responses to the spoofed source address to saturate the victim, especially when the attacker uses the same trick on several dozen broadcast addresses over several dozen different networks at once. Broadcast attacks of over a hundred and twenty megabits have been measured. A second common springboard attack is against the ICMP error reporting system. By constructing packets that generate ICMP error responses, an attacker can saturate a server's incoming network and cause the server to saturate its outgoing network with ICMP responses. This type of attack can also crash the server by running it out of .Vt mbuf Ns 's , especially if the server cannot drain the ICMP responses it generates fast enough. The .Fx kernel has a new kernel compile option called .Dv ICMP_BANDLIM which limits the effectiveness of these sorts of attacks. The last major class of springboard attacks is related to certain internal .Xr inetd 8 services such as the UDP echo service. An attacker simply spoofs a UDP packet with the source address being server A's echo port, and the destination address being server B's echo port, where server A and B are both on your LAN. The two servers then bounce this one packet back and forth between each other. The attacker can overload both servers and their LANs simply by injecting a few packets in this manner. Similar problems exist with the internal chargen port. A competent sysadmin will turn off all of these .Xr inetd 8 Ns -internal test services. .Sh ACCESS ISSUES WITH KERBEROS AND SSH There are a few issues with both Kerberos and SSH that need to be addressed if you intend to use them. Kerberos5 is an excellent authentication protocol but the kerberized .Xr telnet 1 suck rocks. There are bugs that make them unsuitable for dealing with binary streams. Also, by default Kerberos does not encrypt a session unless you use the .Fl x option. SSH encrypts everything by default. .Pp SSH works quite well in every respect except when it is set up to forward encryption keys. What this means is that if you have a secure workstation holding keys that give you access to the rest of the system, and you .Xr ssh 1 to an unsecure machine, your keys become exposed. The actual keys themselves are not exposed, but .Xr ssh 1 installs a forwarding port for the duration of your login and if an attacker has broken root on the unsecure machine he can utilize that port to use your keys to gain access to any other machine that your keys unlock. .Pp We recommend that you use SSH in combination with Kerberos whenever possible for staff logins. SSH can be compiled with Kerberos support. This reduces your reliance on potentially exposable SSH keys while at the same time protecting passwords via Kerberos. SSH keys should only be used for automated tasks from secure machines (something that Kerberos is unsuited to). We also recommend that you either turn off key-forwarding in the SSH configuration, or that you make use of the .Va from Ns = Ns Ar IP/DOMAIN option that SSH allows in its .Pa authorized_keys file to make the key only usable to entities logging in from specific machines. .Sh KNOBS AND TWEAKS .Fx provides several knobs and tweak handles that make some introspection information access more restricted. Some people consider this as improving system security, so the knobs are briefly listed there, together with controls which enable some mitigations of the hardware state leaks. .Pp Hardware mitigation sysctl knobs described below have been moved under .Pa machdep.mitigations , with backwards-compatibility shims to accept the existing names. A future change will rationalize the sense of the individual sysctls (so that enabled / true always indicates that the mitigation is active). For that reason the previous names remain the canonical way to set the mitigations, and are documented here. Backwards compatibility shims for the interim sysctls under .Pa machdep.mitigations will not be added. .Bl -tag -width security.bsd.unprivileged_proc_debug .It Dv security.bsd.see_other_uids Controls visibility and reachability of subjects (e.g., processes) and objects (e.g., sockets) owned by a different uid. The knob directly affects the .Dv kern.proc sysctls filtering of data, which results in restricted output from utilities like .Xr ps 1 . .It Dv security.bsd.see_other_gids Same, for subjects and objects owned by a different gid. .It Dv security.bsd.see_jail_proc Same, for subjects and objects belonging to a different jail, including sub-jails. .It Dv security.bsd.conservative_signals When enabled, unprivileged users are only allowed to send job control and usual termination signals like .Dv SIGKILL , .Dv SIGINT , and .Dv SIGTERM , to the processes executing programs with changed uids. .It Dv security.bsd.unprivileged_proc_debug Controls availability of the process debugging facilities to non-root users. See also .Xr proccontrol 1 mode .Dv trace . .It Dv vm.pmap.pti Tunable, amd64-only. Enables mode of operation of virtual memory system where usermode page tables are sanitized to prevent so-called Meltdown information leak on some Intel CPUs. By default, the system detects whether the CPU needs the workaround, and enables it automatically. See also .Xr proccontrol 1 mode .Dv kpti . .It Dv machdep.mitigations.flush_rsb_ctxsw amd64. Controls Return Stack Buffer flush on context switch, to prevent cross-process ret2spec attacks. Only needed, and only enabled by default, if the machine supports SMEP, otherwise IBRS would do necessary flushing on kernel entry anyway. .It Dv hw.mds_disable amd64 and i386. Controls Microarchitectural Data Sampling hardware information leak mitigation. .It Dv hw.spec_store_bypass_disable amd64 and i386. Controls Speculative Store Bypass hardware information leak mitigation. .It Dv hw.ibrs_disable amd64 and i386. Controls Indirect Branch Restricted Speculation hardware information leak mitigation. .It Dv machdep.syscall_ret_flush_l1d amd64. Controls force-flush of L1D cache on return from syscalls which report errors other than .Ev EEXIST , .Ev EAGAIN , .Ev EXDEV , .Ev ENOENT , .Ev ENOTCONN , and .Ev EINPROGRESS . This is mostly a paranoid setting added to prevent hypothetical exploitation of unknown gadgets for unknown hardware issues. The error codes exclusion list is composed of the most common errors which typically occurs on normal system operation. .It Dv machdep.nmi_flush_l1d_sw amd64. Controls force-flush of L1D cache on NMI; this provides software assist for bhyve mitigation of L1 terminal fault hardware information leak. .It Dv hw.vmm.vmx.l1d_flush amd64. Controls the mitigation of L1 Terminal Fault in bhyve hypervisor. .It Dv vm.pmap.allow_2m_x_ept amd64. Allows the use of superpages for executable mappings under the EPT page table format used by hypervisors on Intel CPUs to map the guest physical address space to machine physical memory. May be disabled to work around a CPU Erratum called Machine Check Error Avoidance on Page Size Change. .It Dv machdep.mitigations.rngds.enable amd64 and i386. Controls mitigation of Special Register Buffer Data Sampling versus optimization of the MCU access. When set to zero, the mitigation is disabled, and the RDSEED and RDRAND instructions do not incur serialization overhead for shared buffer accesses, and do not serialize off-core memory accessses. .It Dv kern.elf32.aslr.enable Controls system-global Address Space Layout Randomization (ASLR) for normal non-PIE (Position Independent Executable) 32-bit ELF binaries. See also the .Xr proccontrol 1 .Dv aslr mode, also affected by the per-image control note flag. .It Dv kern.elf32.aslr.pie_enable Controls system-global Address Space Layout Randomization for position-independent (PIE) 32-bit binaries. .It Dv kern.elf32.aslr.honor_sbrk Makes ASLR less aggressive and more compatible with old binaries relying on the sbrk area. .It Dv kern.elf32.aslr.stack If ASLR is enabled for a binary, a non-zero value enables randomization of the stack. Otherwise, the stack is mapped at a fixed location determined by the process ABI. .It Dv kern.elf64.aslr.enable ASLR control for 64-bit ELF binaries. .It Dv kern.elf64.aslr.pie_enable ASLR control for 64-bit ELF PIEs. .It Dv kern.elf64.aslr.honor_sbrk ASLR sbrk compatibility control for 64-bit binaries. .It Dv kern.elf64.aslr.stack Controls stack address randomization for 64-bit binaries. .It Dv kern.elf32.nxstack Enables non-executable stack for 32-bit processes. Enabled by default if supported by hardware and corresponding binary. .It Dv kern.elf64.nxstack Enables non-executable stack for 64-bit processes. .It Dv kern.elf32.allow_wx Enables mapping of simultaneously writable and executable pages for 32-bit processes. .It Dv kern.elf64.allow_wx Enables mapping of simultaneously writable and executable pages for 64-bit processes. .El .Sh SEE ALSO .Xr chflags 1 , .Xr find 1 , .Xr md5 1 , .Xr netstat 1 , .Xr openssl 1 , .Xr proccontrol 1 , .Xr ps 1 , .Xr ssh 1 , .Xr xdm 1 Pq Pa ports/x11/xorg-clients , .Xr group 5 , .Xr ttys 5 , .Xr accton 8 , .Xr init 8 , .Xr sshd 8 , .Xr sysctl 8 , .Xr syslogd 8 , .Xr vipw 8 .Sh HISTORY The .Nm manual page was originally written by .An Matthew Dillon and first appeared in .Fx 3.1 , December 1998. diff --git a/share/man/man9/VOP_READ_PGCACHE.9 b/share/man/man9/VOP_READ_PGCACHE.9 index f8f67eb00f13..8a99365eba28 100644 --- a/share/man/man9/VOP_READ_PGCACHE.9 +++ b/share/man/man9/VOP_READ_PGCACHE.9 @@ -1,131 +1,131 @@ -.\" Copyright (c) 2021 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2021 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd February 28, 2021 .Dt VOP_READ_PGCACHE 9 .Os .Sh NAME .Nm VOP_READ_PGCACHE .Nd read a file, fast .Sh SYNOPSIS .In sys/param.h .In sys/vnode.h .In sys/uio.h .Ft int .Fo VOP_READ_PGCACHE .Fa "struct vnode *vp" .Fa "struct uio *uio" .Fa "int ioflag" .Fa "struct ucred *cred" .Fc .Sh DESCRIPTION This entry point reads the contents of a file. The intent is to provide the data from caches, which do not require expensive operations or any disk IO. For instance, if filesystem uses normal VM page cache and maintains .Dv v_object lifetime, it can use .Xr vn_read_from_obj 9 helper to return data from the resident .Dv vp->v_object pages. .Pp The filesystem indicates support for the .Nm on specific vnode by setting the .Dv VIRF_PGREAD flag in .Dv vp->v_irflag . .Pp The function does not need to satisfy the whole request; it also might choose to not provide any data. In these cases, the .Fa uio must be advanced by the amount of read data, .Nm should return .Er EJUSTRETURN , and VFS would handle the rest of the read operation using the .Xr VOP_READ 9 . .Pp The VFS layer does the same deadlock avoidance for accessing userspace pages from .Nm as for .Xr VOP_READ 9 . .Pp Vnode is not locked on the call entry and should not be locked on return. For a filesystem that requires vnode lock to return any data, it does not make sense to implement .Nm (and set .Dv VIRF_PGREAD flag) since VFS arranges the call to .Xr VOP_READ 9 as needed. .Pp The arguments are: .Bl -tag -width ioflag .It Fa vp The vnode of the file. .It Fa uio The location of the data to be read. .It Fa ioflag Various flags, see .Xr VOP_READ 9 for the list. .It Fa cred The credentials of the caller. .El .Pp .Nm does not handle non-zero .Fa ioflag argument. .Sh LOCKS The file should be referenced on entry on entry and will still be referenced on exit. Rangelock covering the whole read range should be owned around the call. .Sh RETURN VALUES Zero is returned on success, when the whole request is satisfied, and no more data cannot be provided for it by any means. If more data can be returned, but .Nm was unable to provide it, .Er EJUSTRETURN must be returned. The .Dv uio records should be updated according to the partial operation done. .Pp Otherwise an error code is returned, same as from .Xr VOP_READ 9 .Sh SEE ALSO .Xr uiomove 9 , .Xr vnode 9 , .Xr VOP_READ 9 diff --git a/share/man/man9/refcount.9 b/share/man/man9/refcount.9 index 0c8e4380aed3..78631f9a865a 100644 --- a/share/man/man9/refcount.9 +++ b/share/man/man9/refcount.9 @@ -1,194 +1,194 @@ .\" .\" Copyright (c) 2009 Hudson River Trading LLC .\" Written by: John H. Baldwin .\" All rights reserved. .\" -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" Parts of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd October 12, 2022 .Dt REFCOUNT 9 .Os .Sh NAME .Nm refcount , .Nm refcount_init , .Nm refcount_acquire , .Nm refcount_release .Nd manage a simple reference counter .Sh SYNOPSIS .In sys/param.h .In sys/refcount.h .Ft void .Fn refcount_init "volatile u_int *count" "u_int value" .Ft u_int .Fn refcount_load "volatile u_int *count" .Ft u_int .Fn refcount_acquire "volatile u_int *count" .Ft bool .Fn refcount_acquire_checked "volatile u_int *count" .Ft bool .Fn refcount_acquire_if_not_zero "volatile u_int *count" .Ft bool .Fn refcount_release "volatile u_int *count" .Ft bool .Fn refcount_release_if_last "volatile u_int *count" .Ft bool .Fn refcount_release_if_not_last "volatile u_int *count" .Sh DESCRIPTION The .Nm functions provide an API to manage a simple reference counter. The caller provides the storage for the counter in an unsigned integer. A pointer to this integer is passed via .Fa count . Usually the counter is used to manage the lifetime of an object and is stored as a member of the object. .Pp Currently all functions are implemented as static inline. .Pp The .Fn refcount_init function is used to set the initial value of the counter to .Fa value . It is normally used when creating a reference-counted object. .Pp The .Fn refcount_load function returns a snapshot of the counter value. This value may immediately become out-of-date in the absence of external synchronization. .Fn refcount_load should be used instead of relying on the properties of the .Vt volatile qualifier. .Pp The .Fn refcount_acquire function is used to acquire a new reference. It returns the counter value before the new reference was acquired. The caller is responsible for ensuring that it holds a valid reference while obtaining a new reference. For example, if an object is stored on a list and the list holds a reference on the object, then holding a lock that protects the list provides sufficient protection for acquiring a new reference. .Pp The .Fn refcount_acquire_checked variant performs the same operation as .Fn refcount_acquire , but additionally checks that the .Fa count value does not overflow as result of the operation. It returns .Dv true if the reference was sucessfully obtained, and .Dv false if it was not, due to the overflow. .Pp The .Fn refcount_acquire_if_not_zero function is yet another variant of .Fn refcount_acquire , which only obtains the reference when some reference already exists. In other words, .Fa *count must be already greater than zero for the function to succeed, in which case the return value is .Dv true , otherwise .Dv false is returned. .Pp The .Fn refcount_release function is used to release an existing reference. The function returns true if the reference being released was the last reference; otherwise, it returns false. .Pp The .Fn refcount_release_if_last and .Fn refcount_release_if_not_last functions are variants of .Fn refcount_release which only drop the reference when it is or is not the last reference, respectively. In other words, .Fn refcount_release_if_last returns .Dv true when .Fa *count is equal to one, in which case it is decremented to zero. Otherwise, .Fa *count is not modified and the function returns .Dv false . Similarly, .Fn refcount_release_if_not_last returns .Dv true when .Fa *count is greater than one, in which case .Fa *count is decremented. Otherwise, if .Fa *count is equal to one, the reference is not released and the function returns .Dv false . .Pp Note that these routines do not provide any inter-CPU synchronization or data protection for managing the counter. The caller is responsible for any additional synchronization needed by consumers of any containing objects. In addition, the caller is also responsible for managing the life cycle of any containing objects including explicitly releasing any resources when the last reference is released. .Pp The .Fn refcount_release unconditionally executes a release fence (see .Xr atomic 9 ) before releasing the reference, which synchronizes with an acquire fence executed right before returning the .Dv true value. This ensures that the destructor, supposedly executed by the caller after the last reference was dropped, sees all updates done during the lifetime of the object. .Sh RETURN VALUES The .Nm refcount_release function returns true when releasing the last reference and false when releasing any other reference. .Sh HISTORY These functions were introduced in .Fx 6.0 . diff --git a/stand/man/loader.efi.8 b/stand/man/loader.efi.8 index 2128cc99c2a5..82b9aa091de8 100644 --- a/stand/man/loader.efi.8 +++ b/stand/man/loader.efi.8 @@ -1,482 +1,482 @@ .\" .\" SPDX-License-Identifier: BSD-2-Clause .\" .\" Copyright (c) 2019-2022 Netflix, Inc .\" Copyright (c) 2022 Mateusz Piotrowski <0mp@FreeBSD.org> -.\" Copyright 2022 The FreeBSD Foundation, Inc. +.\" Copyright 2022 The FreeBSD Foundation .\" .\" Part of this documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd September 3, 2024 .Dt LOADER.EFI 8 .Os .Sh NAME .Nm loader.efi .Nd UEFI kernel loader .Sh DESCRIPTION On UEFI systems, .Nm loads the kernel. .Pp .Xr boot1.efi 8 is used to load .Nm when it is placed within a UFS or ZFS file system. Alternatively, .Nm is used directly when configured with .Xr efibootmgr 8 , or when placed directly as the default boot program as described in .Xr uefi 8 . When a system is built using .Xr bsdinstall 8 , .Nm will be used directly. .Ss Console Considerations The EFI BIOS provides a generic console. In .Nm this is selected by specifying .Dq efi using the .Dv console variable. .Nm examines the .Dv 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConOut UEFI environment variable to guess what the .Dq efi console points to. .Nm will output its prompts and menus to all the places specified by ConOut. However, the .Fx kernel has a limitation when more than one console is present. The kernel outputs to all configured consoles. Only the primary console will get the log messages from the .Xr rc 8 system, and prompts for things like .Xr geli 8 passwords. If .Nm finds a video device first, then .Nm tells the kernel to use the video console as primary. Likewise, if a serial device is first in the .Dv ConOut list, the serial port will be the primary console. .Pp If there is no .Dv ConOut variable, both serial and video are attempted. .Nm uses the .Dq efi console for the video (which may or may not work) and .Dq comconsole for the serial on .Dv COM1 at the default baud rate. The kernel will use a dual console, with the video console primary if a UEFI graphics device is detected, or the serial console as primary if not. .Pp On x86 platforms, if you wish to redirect the loader's output to a serial port when the EFI BIOS doesn't support it, or to a serial port that isn't the one the EFI BIOS redirects its output to, set .Dv console to .Dq comconsole . The default port is .Dv COM1 with an I/O address of 0x3f8. .Dv comconsole_port is used to set this to a different port address. .Dv comconsole_speed is used to set the of the serial port (the default is 9600). If you have .Dv console set to .Dq efi,comconsole you will get output on both the EFI console and the serial port. If this causes a doubling of characters, set .Dv console to .Dq efi , since your EFI BIOS is redirecting to the serial port already. .Pp If your EFI BIOS redirects the serial port, you may need to tell the kernel which address to use. EFI uses ACPI's UID to identify the serial port, but .Nm does not have an ACPI parser, so it cannot convert that to an I/O port. The .Fx kernel initializes its consoles before it can decode ACPI resources. The .Fx kernel will look at the .Dv hw.uart.console variable to set its serial console. Its format should be described in .Xr uart 4 but is not. Set it to .Dq io:0x3f8,br:115200 with the proper port address. PCI or memory mapped ports are beyond the scope of this man page. .Pp The serial ports are assigned as follows on IBM PC compatible systems: .Bl -column -offset indent ".Sy Windows Name" ".Sy I/O Port Address" ".Sy Typical FreeBSD device" .It Sy Windows Name Ta Sy I/O Port Address Ta Sy Typical FreeBSD device .It COM1 Ta 0x3f8 Ta Pa /dev/uart0 .It COM2 Ta 0x2f8 Ta Pa /dev/uart1 .It COM3 Ta 0x3e8 Ta Pa /dev/uart2 .It COM4 Ta 0x2e8 Ta Pa /dev/uart3 .El Though .Dv COM3 and .Dv COM4 can vary. .Pp .Ss Primary Console The primary console is set using the boot flags. These command line arguments set corresponding flags for the kernel. These flags can be controlled by setting loader environment variables to .Dq yes or .Dq no . Boot flags may be set on the command line to the boot command. Inside the kernel, the RB_ flags are used to control behavior, sometimes in architecturally specific ways and are included to aid in discovery of any behavior not covered in this document. .Bl -column -offset indent ".Sy boot flag" ".Sy loader variable" ".Sy Kernel RB_ flag" .It Sy boot flag Ta Sy loader variable Ta Sy Kernel RB_ flag .It Fl a Ta Dv boot_askme Ta Va RB_ASKNAME .It Fl c Ta Dv boot_cdrom Ta Va RB_CDROM .It Fl d Ta Dv boot_ddb Ta Va RB_KDB .It Fl r Ta Dv boot_dfltroot Ta Va RB_DFLTROOT .It Fl D Ta Dv boot_multiple Ta Va RB_MULTIPLE .It Fl m Ta Dv boot_mute Ta Va RB_MUTE .It Fl g Ta Dv boot_gdb Ta Va RB_GDB .It Fl h Ta Dv boot_serial Ta Va RB_SERIAL .It Fl p Ta Dv boot_pause Ta Va RB_PAUSE .It Fl P Ta Dv boot_probe Ta Va RB_PROBE .It Fl s Ta Dv boot_single Ta Va RB_SINGLE .It Fl v Ta Dv boot_verbose Ta Va RB_VERBOSE .El And the following flags determine the primary console: .Bl -column -offset indent ".Sy Flags" ".Sy Kernel Flags" ".Sy Kernel Consoles" ".Sy Primary Console" .It Sy Flags Ta Sy Kernel Flags Ta Sy Kernel Consoles Ta Sy Primary Console .It none Ta 0 Ta Video Ta Video .It Fl h Ta RB_SERIAL Ta Serial Ta Serial .It Fl D Ta RB_MULTIPLE Ta Serial, Video Ta Video .It Fl Dh Ta RB_SERIAL | RB_MULTIPLE Ta Serial, Video Ta Serial .El .Pp .Nm does not implement the probe .Fl P functionality where we use the video console if a keyboard is connected and a serial console otherwise. .Ss Staging Slop The kernel must parse the firmware memory map tables to know what memory it can use. Since it must allocate memory to do this, .Nm ensures there's extra memory available, called .Dq slop , after everything it loads .Po the kernel, modules and metadata .Pc for the kernel to bootstrap the memory allocator. .Pp By default, amd64 reserves 8MB. The .Ic staging_slop command allows for tuning the slop size. It takes a single argument, the size of the slop in bytes. .Ss amd64 Nocopy .Nm will load the kernel into memory that is 2MB aligned below 4GB. It cannot load to a fixed address because the UEFI firmware may reserve arbitrary memory for its use at runtime. Prior to .Fx 13.1 , kernels retained the old BIOS-boot protocol of loading at exactly 2MB. Such kernels must be copied from their loaded location to 2MB prior starting them up. The .Ic copy_staging command is used to enable this copying for older kernels. It takes a single argument which can be one of .Bl -tag -width disable .It Ar disable Force-disable copying staging area to .Ad 2M . .It Ar enable Force-enable copying staging area to .Ad 2M . .It Ar auto Selects the behaviour based on the kernel's capability of boostraping from non-2M physical base. The kernel reports this capability by exporting the symbol .Va kernphys . .El .Pp Arm64 loaders have operated in the .Sq nocopy mode from their inception, so there is no .Ic copy_staging command on that platform. Riscv, 32-bit arm and arm64 have always loaded at any .Ad 2MB aligned location, so do not provide .Ic copy_staging . .Pp .Bd -ragged -offset indent .Sy Note. BIOS loaders on i386 and amd64 put the staging area starting at the physical address .Ad 2M , then enable paging with identical mapping for the low .Ad 1G . The initial port of .Nm followed the same scheme for handing control to the kernel, since it avoided modifications for the loader/kernel hand-off protocol, and for the kernel page table bootstrap. .Pp This approach is incompatible with the UEFI specification, and as a practical matter, caused troubles on many boards, because UEFI firmware is free to use any memory for its own needs. Applications like .Nm must only use memory explicitly allocated using boot interfaces. The original way also potentially destroyed UEFI runtime interfaces data. .Pp Eventually, .Nm and the kernel were improved to avoid this problem. .Ed .Ss amd64 Faults Because it executes in x86 protected mode, the amd64 version of .Nm is susceptible to CPU faults due to programmer mistakes and memory corruption. To make debugging such faults easier, amd64 .Nm can provide detailed reporting of the CPU state at the time of the fault. .Pp The .Ic grab_faults command installs a handler for faults directly in the IDT, avoiding the use of the UEFI debugging interface .Fn EFI_DEBUG_SUPPORT_PROTOCOL.RegisterExceptionCallback . That interface is left available for advanced debuggers in the UEFI environment. The .Ic ungrab_faults command tries to deinstall the fault handler, returning TSS and IDT CPU tables to their pre-installation state. The .Ic fault command produces a fault in the .Nm environment for testing purposes, by executing the .Ic ud2 processor instruction. .Sh FILES .Bl -tag -width "/boot/loader.efi" .It Pa /boot/loader.efi The location of the UEFI kernel loader within the system. .El .Ss EFI System Partition .Nm is installed on the ESP (EFI System Partition) in one of the following locations: .Bl -tag -width "efi/freebsd/loader.efi" .It Pa efi/boot/bootXXX.efi The default location for any EFI loader .Po see .Xr uefi 8 for values to replace .Ql XXX with .Pc . .It Pa efi/freebsd/loader.efi The location reserved specifically for the .Fx EFI loader. .El .Pp The default location for the ESP mount point is documented in .Xr hier 7 . .Sh EXAMPLES .Ss Updating loader.efi on the ESP The following examples shows how to install a new .Nm on the ESP. The exact placement is complicated due to the diversity of installations, setups and situations. In this section, paths that are all lower case are Unix paths. Paths that are all upper case are relative to the ESP mount point, though they may appear as lower case on your system because the FAT filesystem of the ESP is case insensitive. .Pp Locate the ESP, which has its own partition type of .Dq efi : .Bd -literal -offset indent # gpart show nda0 => 40 7501476448 nda0 GPT (3.5T) 40 614400 1 efi (300M) 614440 7500862048 2 freebsd-zfs (3.5T) .Ed .Pp The name of the ESP on this system is .Pa nda0p1 . By default, this will be mounted on .Pa /boot/efi . To check: .Bd -literal -offset indent # mount | grep nda0p1 /dev/nda0p1 on /boot/efi (msdosfs, local) .Ed If it's not mounted, you will need to mount it: .Bd -literal -offset indent # mount -t msdosfs /dev/nda0p1 /boot/efi .Ed .Pp .Xr efibootmgr 8 reports what we booted from. .Bd -literal -offset indent # efibootmgr -v Boot to FW : false BootCurrent: 0001 Timeout : 2 seconds BootOrder : 0000, 0001, 0003, 0004, 0005, 0006, 0001, 0008, 000A, 000B, 000C, 000E, 0007 ... +Boot0001* FreeBSD ZPOOL HD(1,GPT,b5d0f86b-265d-1e1b-18aa-0ed55e1e73bd,0x28,0x96000)/File(\EFI\FREEBSD\LOADER.EFI) nda0p1:/EFI/FREEBSD/LOADER.EFI /boot/efi//EFI/FREEBSD/LOADER.EFI ... .Ed Often there are several options, depending on the BIOS. The entry that we booted with is marked with a .Sq + at the start of the line, as shown above. So in this case, this firmware is using .Pa /EFI/FREEBSD/LOADER.EFI from the ESP. Often times it will be the UEFI .Dq default loader, which varies by architecture. .Bl -column -offset indent "Architecture" "Default Path" .It Sy Architecture Ta Sy Default Path .It amd64 Ta Pa /EFI/BOOT/BOOTX64.EFI .It arm Ta Pa /EFI/BOOT/BOOTARM.EFI .It arm64 Ta Pa /EFI/BOOT/BOOTAA64.EFI .It i386 Ta Pa /EFI/BOOT/BOOTIA32.EFI .It riscv Ta Pa /EFI/BOOT/BOOTRISCV64.EFI .El However, care must be taken: some multiple-boot environments rely on a special .Pa bootXXX.efi to function. Before updating a .Pa bootXXX.efi file, make sure it is the FreeBSD boot loader before updating it: .Bd -literal -offset indent # strings /boot/efi/EFI/BOOT/BOOTX64.EFI | grep FreeBSD | grep EFI FreeBSD/amd64 EFI loader, Revision 3.0 .Ed .Pp .Xr bsdinstall 8 copies .Pa loader.efi to the default name if there wasn't one there before. Check to see if they are copies before updating (with X64 substituted using the above table): .Bd -literal -offset indent # cmp /boot/efi/EFI/FREEBSD/LOADER.EFI /boot/efi/EFI/BOOT/BOOOTX64.EFI .Ed Copy the loader: .Bd -literal -offset indent # cp /boot/loader.efi /boot/efi/EFI/FREEBSD/LOADER.EFI .Ed replacing the all caps part of the example with the proper path. .Pp If ESP path was .Pa /FREEBSD/LOADER.EFI and LOADER.EFI and BOOTX64.EFI were identical in the cmp step, copy the loader to the default location: .Bd -literal -offset indent # cp /boot/loader.efi /boot/efi/EFI/BOOT/BOOTX64.EFI .Ed .Pp Finally, if you mounted the ESP, you may wish to unmount it. .Bd -literal -offset indent # umount /boot/efi .Ed .Sh SEE ALSO .Xr loader 8 , .Xr uefi 8 .Sh BUGS Non-x86 serial console handling is even more confusing and less well documented. .Pp Sometimes when the serial port speed isn't set, 9600 is used. Other times the result is typically 115200 since the speed remains unchanged from the default. .Pp U-Boot implements a subset of the UEFI standard. Some versions do not support fetching loader variables, so .Pa efibootmgr may not work. In addition, .Pa efibootmgr is not supported on armv7 or riscv. In these instances, the user has to understand what was booted to update it properly (and in most cases, it will be the FreeBSD path and the UEFI default so just copy loader.efi there if there are loaders there). Typically in these embedded situations, there is only one .efi file (loader.efi or a copy of loader.efi). The path to this file is typically the default removable path above. .Pp Managing booting multiple OSes on UEFI varies greatly, so extra caution when updating the UEFI default loader. .Pp The old, now obsolete, boot1.efi was installed as bootx64.efi in .Fx 10 and earlier. Since it was quite limited in functionality, we created very small ESPs by default. A modern loader.efi will not fit. However, if the old boot1.efi still works, there's no need to update it since it will chain boot /boot/loader.efi from a copy that make installworld updates. diff --git a/sys/dev/mgb/if_mgb.c b/sys/dev/mgb/if_mgb.c index 7ac93ec020c4..9308dd9b97b7 100644 --- a/sys/dev/mgb/if_mgb.c +++ b/sys/dev/mgb/if_mgb.c @@ -1,1617 +1,1617 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2019 The FreeBSD Foundation, Inc. + * Copyright (c) 2019 The FreeBSD Foundation * * This driver was written by Gerald ND Aryeetey * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include /* * Microchip LAN7430/LAN7431 PCIe to Gigabit Ethernet Controller driver. * * Product information: * LAN7430 https://www.microchip.com/en-us/product/LAN7430 * - Integrated IEEE 802.3 compliant PHY * LAN7431 https://www.microchip.com/en-us/product/LAN7431 * - RGMII Interface * * This driver uses the iflib interface and the default 'ukphy' PHY driver. * * UNIMPLEMENTED FEATURES * ---------------------- * A number of features supported by LAN743X device are not yet implemented in * this driver: * * - Multiple (up to 4) RX queues support * - Just needs to remove asserts and malloc multiple `rx_ring_data` * structs based on ncpus. * - RX/TX Checksum Offloading support * - VLAN support * - Receive Packet Filtering (Multicast Perfect/Hash Address) support * - Wake on LAN (WoL) support * - TX LSO support * - Receive Side Scaling (RSS) support * - Debugging Capabilities: * - Could include MAC statistics and * error status registers in sysctl. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #include "miibus_if.h" static const pci_vendor_info_t mgb_vendor_info_array[] = { PVID(MGB_MICROCHIP_VENDOR_ID, MGB_LAN7430_DEVICE_ID, "Microchip LAN7430 PCIe Gigabit Ethernet Controller"), PVID(MGB_MICROCHIP_VENDOR_ID, MGB_LAN7431_DEVICE_ID, "Microchip LAN7431 PCIe Gigabit Ethernet Controller"), PVID_END }; /* Device methods */ static device_register_t mgb_register; /* IFLIB methods */ static ifdi_attach_pre_t mgb_attach_pre; static ifdi_attach_post_t mgb_attach_post; static ifdi_detach_t mgb_detach; static ifdi_tx_queues_alloc_t mgb_tx_queues_alloc; static ifdi_rx_queues_alloc_t mgb_rx_queues_alloc; static ifdi_queues_free_t mgb_queues_free; static ifdi_init_t mgb_init; static ifdi_stop_t mgb_stop; static ifdi_msix_intr_assign_t mgb_msix_intr_assign; static ifdi_tx_queue_intr_enable_t mgb_tx_queue_intr_enable; static ifdi_rx_queue_intr_enable_t mgb_rx_queue_intr_enable; static ifdi_intr_enable_t mgb_intr_enable_all; static ifdi_intr_disable_t mgb_intr_disable_all; /* IFLIB_TXRX methods */ static int mgb_isc_txd_encap(void *, if_pkt_info_t); static void mgb_isc_txd_flush(void *, uint16_t, qidx_t); static int mgb_isc_txd_credits_update(void *, uint16_t, bool); static int mgb_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t); static int mgb_isc_rxd_pkt_get(void *, if_rxd_info_t); static void mgb_isc_rxd_refill(void *, if_rxd_update_t); static void mgb_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t); /* Interrupts */ static driver_filter_t mgb_legacy_intr; static driver_filter_t mgb_admin_intr; static driver_filter_t mgb_rxq_intr; static bool mgb_intr_test(struct mgb_softc *); /* MII methods */ static miibus_readreg_t mgb_miibus_readreg; static miibus_writereg_t mgb_miibus_writereg; static miibus_linkchg_t mgb_miibus_linkchg; static miibus_statchg_t mgb_miibus_statchg; static int mgb_media_change(if_t); static void mgb_media_status(if_t, struct ifmediareq *); /* Helper/Test functions */ static int mgb_test_bar(struct mgb_softc *); static int mgb_alloc_regs(struct mgb_softc *); static int mgb_release_regs(struct mgb_softc *); static void mgb_get_ethaddr(struct mgb_softc *, struct ether_addr *); static int mgb_wait_for_bits(struct mgb_softc *, int, int, int); /* H/W init, reset and teardown helpers */ static int mgb_hw_init(struct mgb_softc *); static int mgb_hw_teardown(struct mgb_softc *); static int mgb_hw_reset(struct mgb_softc *); static int mgb_mac_init(struct mgb_softc *); static int mgb_dmac_reset(struct mgb_softc *); static int mgb_phy_reset(struct mgb_softc *); static int mgb_dma_init(struct mgb_softc *); static int mgb_dma_tx_ring_init(struct mgb_softc *, int); static int mgb_dma_rx_ring_init(struct mgb_softc *, int); static int mgb_dmac_control(struct mgb_softc *, int, int, enum mgb_dmac_cmd); static int mgb_fct_control(struct mgb_softc *, int, int, enum mgb_fct_cmd); /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t mgb_methods[] = { /* Device interface */ DEVMETHOD(device_register, mgb_register), DEVMETHOD(device_probe, iflib_device_probe), DEVMETHOD(device_attach, iflib_device_attach), DEVMETHOD(device_detach, iflib_device_detach), DEVMETHOD(device_shutdown, iflib_device_shutdown), DEVMETHOD(device_suspend, iflib_device_suspend), DEVMETHOD(device_resume, iflib_device_resume), /* MII Interface */ DEVMETHOD(miibus_readreg, mgb_miibus_readreg), DEVMETHOD(miibus_writereg, mgb_miibus_writereg), DEVMETHOD(miibus_linkchg, mgb_miibus_linkchg), DEVMETHOD(miibus_statchg, mgb_miibus_statchg), DEVMETHOD_END }; static driver_t mgb_driver = { "mgb", mgb_methods, sizeof(struct mgb_softc) }; static devclass_t mgb_devclass; DRIVER_MODULE(mgb, pci, mgb_driver, mgb_devclass, NULL, NULL); IFLIB_PNP_INFO(pci, mgb, mgb_vendor_info_array); MODULE_VERSION(mgb, 1); #if 0 /* MIIBUS_DEBUG */ /* If MIIBUS debug stuff is in attach then order matters. Use below instead. */ DRIVER_MODULE_ORDERED(miibus, mgb, miibus_driver, miibus_devclass, NULL, NULL, SI_ORDER_ANY); #endif /* MIIBUS_DEBUG */ DRIVER_MODULE(miibus, mgb, miibus_driver, miibus_devclass, NULL, NULL); MODULE_DEPEND(mgb, pci, 1, 1, 1); MODULE_DEPEND(mgb, ether, 1, 1, 1); MODULE_DEPEND(mgb, miibus, 1, 1, 1); MODULE_DEPEND(mgb, iflib, 1, 1, 1); static device_method_t mgb_iflib_methods[] = { DEVMETHOD(ifdi_attach_pre, mgb_attach_pre), DEVMETHOD(ifdi_attach_post, mgb_attach_post), DEVMETHOD(ifdi_detach, mgb_detach), DEVMETHOD(ifdi_init, mgb_init), DEVMETHOD(ifdi_stop, mgb_stop), DEVMETHOD(ifdi_tx_queues_alloc, mgb_tx_queues_alloc), DEVMETHOD(ifdi_rx_queues_alloc, mgb_rx_queues_alloc), DEVMETHOD(ifdi_queues_free, mgb_queues_free), DEVMETHOD(ifdi_msix_intr_assign, mgb_msix_intr_assign), DEVMETHOD(ifdi_tx_queue_intr_enable, mgb_tx_queue_intr_enable), DEVMETHOD(ifdi_rx_queue_intr_enable, mgb_rx_queue_intr_enable), DEVMETHOD(ifdi_intr_enable, mgb_intr_enable_all), DEVMETHOD(ifdi_intr_disable, mgb_intr_disable_all), #if 0 /* Not yet implemented IFLIB methods */ /* * Set multicast addresses, mtu and promiscuous mode */ DEVMETHOD(ifdi_multi_set, mgb_multi_set), DEVMETHOD(ifdi_mtu_set, mgb_mtu_set), DEVMETHOD(ifdi_promisc_set, mgb_promisc_set), /* * Needed for VLAN support */ DEVMETHOD(ifdi_vlan_register, mgb_vlan_register), DEVMETHOD(ifdi_vlan_unregister, mgb_vlan_unregister), DEVMETHOD(ifdi_needs_restart, mgb_if_needs_restart), /* * Needed for WOL support * at the very least. */ DEVMETHOD(ifdi_shutdown, mgb_shutdown), DEVMETHOD(ifdi_suspend, mgb_suspend), DEVMETHOD(ifdi_resume, mgb_resume), #endif /* UNUSED_IFLIB_METHODS */ DEVMETHOD_END }; static driver_t mgb_iflib_driver = { "mgb", mgb_iflib_methods, sizeof(struct mgb_softc) }; static struct if_txrx mgb_txrx = { .ift_txd_encap = mgb_isc_txd_encap, .ift_txd_flush = mgb_isc_txd_flush, .ift_txd_credits_update = mgb_isc_txd_credits_update, .ift_rxd_available = mgb_isc_rxd_available, .ift_rxd_pkt_get = mgb_isc_rxd_pkt_get, .ift_rxd_refill = mgb_isc_rxd_refill, .ift_rxd_flush = mgb_isc_rxd_flush, .ift_legacy_intr = mgb_legacy_intr }; static struct if_shared_ctx mgb_sctx_init = { .isc_magic = IFLIB_MAGIC, .isc_q_align = PAGE_SIZE, .isc_admin_intrcnt = 1, .isc_flags = IFLIB_DRIVER_MEDIA /* | IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ*/, .isc_vendor_info = mgb_vendor_info_array, .isc_driver_version = "1", .isc_driver = &mgb_iflib_driver, /* 2 queues per set for TX and RX (ring queue, head writeback queue) */ .isc_ntxqs = 2, .isc_tx_maxsize = MGB_DMA_MAXSEGS * MCLBYTES, /* .isc_tx_nsegments = MGB_DMA_MAXSEGS, */ .isc_tx_maxsegsize = MCLBYTES, .isc_ntxd_min = {1, 1}, /* Will want to make this bigger */ .isc_ntxd_max = {MGB_DMA_RING_SIZE, 1}, .isc_ntxd_default = {MGB_DMA_RING_SIZE, 1}, .isc_nrxqs = 2, .isc_rx_maxsize = MCLBYTES, .isc_rx_nsegments = 1, .isc_rx_maxsegsize = MCLBYTES, .isc_nrxd_min = {1, 1}, /* Will want to make this bigger */ .isc_nrxd_max = {MGB_DMA_RING_SIZE, 1}, .isc_nrxd_default = {MGB_DMA_RING_SIZE, 1}, .isc_nfl = 1, /*one free list since there is only one queue */ #if 0 /* UNUSED_CTX */ .isc_tso_maxsize = MGB_TSO_MAXSIZE + sizeof(struct ether_vlan_header), .isc_tso_maxsegsize = MGB_TX_MAXSEGSIZE, #endif /* UNUSED_CTX */ }; /*********************************************************************/ static void * mgb_register(device_t dev) { return (&mgb_sctx_init); } static int mgb_attach_pre(if_ctx_t ctx) { struct mgb_softc *sc; if_softc_ctx_t scctx; int error, phyaddr, rid; struct ether_addr hwaddr; struct mii_data *miid; sc = iflib_get_softc(ctx); sc->ctx = ctx; sc->dev = iflib_get_dev(ctx); scctx = iflib_get_softc_ctx(ctx); /* IFLIB required setup */ scctx->isc_txrx = &mgb_txrx; scctx->isc_tx_nsegments = MGB_DMA_MAXSEGS; /* Ring desc queues */ scctx->isc_txqsizes[0] = sizeof(struct mgb_ring_desc) * scctx->isc_ntxd[0]; scctx->isc_rxqsizes[0] = sizeof(struct mgb_ring_desc) * scctx->isc_nrxd[0]; /* Head WB queues */ scctx->isc_txqsizes[1] = sizeof(uint32_t) * scctx->isc_ntxd[1]; scctx->isc_rxqsizes[1] = sizeof(uint32_t) * scctx->isc_nrxd[1]; /* XXX: Must have 1 txqset, but can have up to 4 rxqsets */ scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; /* scctx->isc_tx_csum_flags = (CSUM_TCP | CSUM_UDP) | (CSUM_TCP_IPV6 | CSUM_UDP_IPV6) | CSUM_TSO */ scctx->isc_tx_csum_flags = 0; scctx->isc_capabilities = scctx->isc_capenable = 0; #if 0 /* * CSUM, TSO and VLAN support are TBD */ IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 | IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO | IFCAP_JUMBO_MTU; scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER; #endif /* get the BAR */ error = mgb_alloc_regs(sc); if (error != 0) { device_printf(sc->dev, "Unable to allocate bus resource: registers.\n"); goto fail; } error = mgb_test_bar(sc); if (error != 0) goto fail; error = mgb_hw_init(sc); if (error != 0) { device_printf(sc->dev, "MGB device init failed. (err: %d)\n", error); goto fail; } switch (pci_get_device(sc->dev)) { case MGB_LAN7430_DEVICE_ID: phyaddr = 1; break; case MGB_LAN7431_DEVICE_ID: default: phyaddr = MII_PHY_ANY; break; } /* XXX: Would be nice(r) if locked methods were here */ error = mii_attach(sc->dev, &sc->miibus, iflib_get_ifp(ctx), mgb_media_change, mgb_media_status, BMSR_DEFCAPMASK, phyaddr, MII_OFFSET_ANY, MIIF_DOPAUSE); if (error != 0) { device_printf(sc->dev, "Failed to attach MII interface\n"); goto fail; } miid = device_get_softc(sc->miibus); scctx->isc_media = &miid->mii_media; scctx->isc_msix_bar = pci_msix_table_bar(sc->dev); /** Setup PBA BAR **/ rid = pci_msix_pba_bar(sc->dev); if (rid != scctx->isc_msix_bar) { sc->pba = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->pba == NULL) { error = ENXIO; device_printf(sc->dev, "Failed to setup PBA BAR\n"); goto fail; } } mgb_get_ethaddr(sc, &hwaddr); if (ETHER_IS_BROADCAST(hwaddr.octet) || ETHER_IS_MULTICAST(hwaddr.octet) || ETHER_IS_ZERO(hwaddr.octet)) ether_gen_addr(iflib_get_ifp(ctx), &hwaddr); /* * XXX: if the MAC address was generated the linux driver * writes it back to the device. */ iflib_set_mac(ctx, hwaddr.octet); /* Map all vectors to vector 0 (admin interrupts) by default. */ CSR_WRITE_REG(sc, MGB_INTR_VEC_RX_MAP, 0); CSR_WRITE_REG(sc, MGB_INTR_VEC_TX_MAP, 0); CSR_WRITE_REG(sc, MGB_INTR_VEC_OTHER_MAP, 0); return (0); fail: mgb_detach(ctx); return (error); } static int mgb_attach_post(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); device_printf(sc->dev, "Interrupt test: %s\n", (mgb_intr_test(sc) ? "PASS" : "FAIL")); return (0); } static int mgb_detach(if_ctx_t ctx) { struct mgb_softc *sc; int error; sc = iflib_get_softc(ctx); /* XXX: Should report errors but still detach everything. */ error = mgb_hw_teardown(sc); /* Release IRQs */ iflib_irq_free(ctx, &sc->rx_irq); iflib_irq_free(ctx, &sc->admin_irq); if (sc->miibus != NULL) device_delete_child(sc->dev, sc->miibus); if (sc->pba != NULL) error = bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->pba), sc->pba); sc->pba = NULL; error = mgb_release_regs(sc); return (error); } static int mgb_media_change(if_t ifp) { struct mii_data *miid; struct mii_softc *miisc; struct mgb_softc *sc; if_ctx_t ctx; int needs_reset; ctx = if_getsoftc(ifp); sc = iflib_get_softc(ctx); miid = device_get_softc(sc->miibus); LIST_FOREACH(miisc, &miid->mii_phys, mii_list) PHY_RESET(miisc); needs_reset = mii_mediachg(miid); if (needs_reset != 0) ifp->if_init(ctx); return (needs_reset); } static void mgb_media_status(if_t ifp, struct ifmediareq *ifmr) { struct mgb_softc *sc; struct mii_data *miid; sc = iflib_get_softc(if_getsoftc(ifp)); miid = device_get_softc(sc->miibus); if ((if_getflags(ifp) & IFF_UP) == 0) return; mii_pollstat(miid); ifmr->ifm_active = miid->mii_media_active; ifmr->ifm_status = miid->mii_media_status; } static int mgb_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int ntxqs, int ntxqsets) { struct mgb_softc *sc; struct mgb_ring_data *rdata; int q; sc = iflib_get_softc(ctx); KASSERT(ntxqsets == 1, ("ntxqsets = %d", ntxqsets)); rdata = &sc->tx_ring_data; for (q = 0; q < ntxqsets; q++) { KASSERT(ntxqs == 2, ("ntxqs = %d", ntxqs)); /* Ring */ rdata->ring = (struct mgb_ring_desc *) vaddrs[q * ntxqs + 0]; rdata->ring_bus_addr = paddrs[q * ntxqs + 0]; /* Head WB */ rdata->head_wb = (uint32_t *) vaddrs[q * ntxqs + 1]; rdata->head_wb_bus_addr = paddrs[q * ntxqs + 1]; } return (0); } static int mgb_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs, int nrxqs, int nrxqsets) { struct mgb_softc *sc; struct mgb_ring_data *rdata; int q; sc = iflib_get_softc(ctx); KASSERT(nrxqsets == 1, ("nrxqsets = %d", nrxqsets)); rdata = &sc->rx_ring_data; for (q = 0; q < nrxqsets; q++) { KASSERT(nrxqs == 2, ("nrxqs = %d", nrxqs)); /* Ring */ rdata->ring = (struct mgb_ring_desc *) vaddrs[q * nrxqs + 0]; rdata->ring_bus_addr = paddrs[q * nrxqs + 0]; /* Head WB */ rdata->head_wb = (uint32_t *) vaddrs[q * nrxqs + 1]; rdata->head_wb_bus_addr = paddrs[q * nrxqs + 1]; } return (0); } static void mgb_queues_free(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); memset(&sc->rx_ring_data, 0, sizeof(struct mgb_ring_data)); memset(&sc->tx_ring_data, 0, sizeof(struct mgb_ring_data)); } static void mgb_init(if_ctx_t ctx) { struct mgb_softc *sc; struct mii_data *miid; int error; sc = iflib_get_softc(ctx); miid = device_get_softc(sc->miibus); device_printf(sc->dev, "running init ...\n"); mgb_dma_init(sc); /* XXX: Turn off perfect filtering, turn on (broad|multi|uni)cast rx */ CSR_CLEAR_REG(sc, MGB_RFE_CTL, MGB_RFE_ALLOW_PERFECT_FILTER); CSR_UPDATE_REG(sc, MGB_RFE_CTL, MGB_RFE_ALLOW_BROADCAST | MGB_RFE_ALLOW_MULTICAST | MGB_RFE_ALLOW_UNICAST); error = mii_mediachg(miid); /* Not much we can do if this fails. */ if (error) device_printf(sc->dev, "%s: mii_mediachg returned %d", __func__, error); } #ifdef DEBUG static void mgb_dump_some_stats(struct mgb_softc *sc) { int i; int first_stat = 0x1200; int last_stat = 0x12FC; for (i = first_stat; i <= last_stat; i += 4) if (CSR_READ_REG(sc, i) != 0) device_printf(sc->dev, "0x%04x: 0x%08x\n", i, CSR_READ_REG(sc, i)); char *stat_names[] = { "MAC_ERR_STS ", "FCT_INT_STS ", "DMAC_CFG ", "DMAC_CMD ", "DMAC_INT_STS ", "DMAC_INT_EN ", "DMAC_RX_ERR_STS0 ", "DMAC_RX_ERR_STS1 ", "DMAC_RX_ERR_STS2 ", "DMAC_RX_ERR_STS3 ", "INT_STS ", "INT_EN ", "INT_VEC_EN ", "INT_VEC_MAP0 ", "INT_VEC_MAP1 ", "INT_VEC_MAP2 ", "TX_HEAD0", "TX_TAIL0", "DMAC_TX_ERR_STS0 ", NULL }; int stats[] = { 0x114, 0xA0, 0xC00, 0xC0C, 0xC10, 0xC14, 0xC60, 0xCA0, 0xCE0, 0xD20, 0x780, 0x788, 0x794, 0x7A0, 0x7A4, 0x780, 0xD58, 0xD5C, 0xD60, 0x0 }; i = 0; printf("==============================\n"); while (stats[i++]) device_printf(sc->dev, "%s at offset 0x%04x = 0x%08x\n", stat_names[i - 1], stats[i - 1], CSR_READ_REG(sc, stats[i - 1])); printf("==== TX RING DESCS ====\n"); for (i = 0; i < MGB_DMA_RING_SIZE; i++) device_printf(sc->dev, "ring[%d].data0=0x%08x\n" "ring[%d].data1=0x%08x\n" "ring[%d].data2=0x%08x\n" "ring[%d].data3=0x%08x\n", i, sc->tx_ring_data.ring[i].ctl, i, sc->tx_ring_data.ring[i].addr.low, i, sc->tx_ring_data.ring[i].addr.high, i, sc->tx_ring_data.ring[i].sts); device_printf(sc->dev, "==== DUMP_TX_DMA_RAM ====\n"); CSR_WRITE_REG(sc, 0x24, 0xF); // DP_SEL & TX_RAM_0 for (i = 0; i < 128; i++) { CSR_WRITE_REG(sc, 0x2C, i); // DP_ADDR CSR_WRITE_REG(sc, 0x28, 0); // DP_CMD while ((CSR_READ_REG(sc, 0x24) & 0x80000000) == 0) // DP_SEL & READY DELAY(1000); device_printf(sc->dev, "DMAC_TX_RAM_0[%u]=%08x\n", i, CSR_READ_REG(sc, 0x30)); // DP_DATA } } #endif static void mgb_stop(if_ctx_t ctx) { struct mgb_softc *sc ; if_softc_ctx_t scctx; int i; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); /* XXX: Could potentially timeout */ for (i = 0; i < scctx->isc_nrxqsets; i++) { mgb_dmac_control(sc, MGB_DMAC_RX_START, 0, DMAC_STOP); mgb_fct_control(sc, MGB_FCT_RX_CTL, 0, FCT_DISABLE); } for (i = 0; i < scctx->isc_ntxqsets; i++) { mgb_dmac_control(sc, MGB_DMAC_TX_START, 0, DMAC_STOP); mgb_fct_control(sc, MGB_FCT_TX_CTL, 0, FCT_DISABLE); } } static int mgb_legacy_intr(void *xsc) { struct mgb_softc *sc; sc = xsc; iflib_admin_intr_deferred(sc->ctx); return (FILTER_HANDLED); } static int mgb_rxq_intr(void *xsc) { struct mgb_softc *sc; if_softc_ctx_t scctx; uint32_t intr_sts, intr_en; int qidx; sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); intr_sts = CSR_READ_REG(sc, MGB_INTR_STS); intr_en = CSR_READ_REG(sc, MGB_INTR_ENBL_SET); intr_sts &= intr_en; for (qidx = 0; qidx < scctx->isc_nrxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))){ CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_RX(qidx)); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_RX(qidx)); } } return (FILTER_SCHEDULE_THREAD); } static int mgb_admin_intr(void *xsc) { struct mgb_softc *sc; if_softc_ctx_t scctx; uint32_t intr_sts, intr_en; int qidx; sc = xsc; scctx = iflib_get_softc_ctx(sc->ctx); intr_sts = CSR_READ_REG(sc, MGB_INTR_STS); intr_en = CSR_READ_REG(sc, MGB_INTR_ENBL_SET); intr_sts &= intr_en; /* TODO: shouldn't continue if suspended */ if ((intr_sts & MGB_INTR_STS_ANY) == 0) return (FILTER_STRAY); if ((intr_sts & MGB_INTR_STS_TEST) != 0) { sc->isr_test_flag = true; CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); return (FILTER_HANDLED); } if ((intr_sts & MGB_INTR_STS_RX_ANY) != 0) { for (qidx = 0; qidx < scctx->isc_nrxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))){ iflib_rx_intr_deferred(sc->ctx, qidx); } } return (FILTER_HANDLED); } /* XXX: TX interrupts should not occur */ if ((intr_sts & MGB_INTR_STS_TX_ANY) != 0) { for (qidx = 0; qidx < scctx->isc_ntxqsets; qidx++) { if ((intr_sts & MGB_INTR_STS_RX(qidx))) { /* clear the interrupt sts and run handler */ CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_TX(qidx)); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TX(qidx)); iflib_tx_intr_deferred(sc->ctx, qidx); } } return (FILTER_HANDLED); } return (FILTER_SCHEDULE_THREAD); } static int mgb_msix_intr_assign(if_ctx_t ctx, int msix) { struct mgb_softc *sc; if_softc_ctx_t scctx; int error, i, vectorid; char irq_name[16]; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); KASSERT(scctx->isc_nrxqsets == 1 && scctx->isc_ntxqsets == 1, ("num rxqsets/txqsets != 1 ")); /* * First vector should be admin interrupts, others vectors are TX/RX * * RIDs start at 1, and vector ids start at 0. */ vectorid = 0; error = iflib_irq_alloc_generic(ctx, &sc->admin_irq, vectorid + 1, IFLIB_INTR_ADMIN, mgb_admin_intr, sc, 0, "admin"); if (error) { device_printf(sc->dev, "Failed to register admin interrupt handler\n"); return (error); } for (i = 0; i < scctx->isc_nrxqsets; i++) { vectorid++; snprintf(irq_name, sizeof(irq_name), "rxq%d", i); error = iflib_irq_alloc_generic(ctx, &sc->rx_irq, vectorid + 1, IFLIB_INTR_RXTX, mgb_rxq_intr, sc, i, irq_name); if (error) { device_printf(sc->dev, "Failed to register rxq %d interrupt handler\n", i); return (error); } CSR_UPDATE_REG(sc, MGB_INTR_VEC_RX_MAP, MGB_INTR_VEC_MAP(vectorid, i)); } /* Not actually mapping hw TX interrupts ... */ for (i = 0; i < scctx->isc_ntxqsets; i++) { snprintf(irq_name, sizeof(irq_name), "txq%d", i); iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i, irq_name); } return (0); } static void mgb_intr_enable_all(if_ctx_t ctx) { struct mgb_softc *sc; if_softc_ctx_t scctx; int i, dmac_enable = 0, intr_sts = 0, vec_en = 0; sc = iflib_get_softc(ctx); scctx = iflib_get_softc_ctx(ctx); intr_sts |= MGB_INTR_STS_ANY; vec_en |= MGB_INTR_STS_ANY; for (i = 0; i < scctx->isc_nrxqsets; i++) { intr_sts |= MGB_INTR_STS_RX(i); dmac_enable |= MGB_DMAC_RX_INTR_ENBL(i); vec_en |= MGB_INTR_RX_VEC_STS(i); } /* TX interrupts aren't needed ... */ CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, intr_sts); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, vec_en); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, dmac_enable); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, dmac_enable); } static void mgb_intr_disable_all(if_ctx_t ctx) { struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_INTR_STS, UINT32_MAX); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_CLR, UINT32_MAX); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, UINT32_MAX); } static int mgb_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* called after successful rx isr */ struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, MGB_INTR_RX_VEC_STS(qid)); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_RX(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, MGB_DMAC_RX_INTR_ENBL(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, MGB_DMAC_RX_INTR_ENBL(qid)); return (0); } static int mgb_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid) { /* XXX: not called (since tx interrupts not used) */ struct mgb_softc *sc; sc = iflib_get_softc(ctx); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_TX(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_STS, MGB_DMAC_TX_INTR_ENBL(qid)); CSR_WRITE_REG(sc, MGB_DMAC_INTR_ENBL_SET, MGB_DMAC_TX_INTR_ENBL(qid)); return (0); } static bool mgb_intr_test(struct mgb_softc *sc) { int i; sc->isr_test_flag = false; CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_VEC_ENBL_SET, MGB_INTR_STS_ANY); CSR_WRITE_REG(sc, MGB_INTR_ENBL_SET, MGB_INTR_STS_ANY | MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_SET, MGB_INTR_STS_TEST); if (sc->isr_test_flag) return (true); for (i = 0; i < MGB_TIMEOUT; i++) { DELAY(10); if (sc->isr_test_flag) break; } CSR_WRITE_REG(sc, MGB_INTR_ENBL_CLR, MGB_INTR_STS_TEST); CSR_WRITE_REG(sc, MGB_INTR_STS, MGB_INTR_STS_TEST); return (sc->isr_test_flag); } static int mgb_isc_txd_encap(void *xsc , if_pkt_info_t ipi) { struct mgb_softc *sc; struct mgb_ring_data *rdata; struct mgb_ring_desc *txd; bus_dma_segment_t *segs; qidx_t pidx, nsegs; int i; KASSERT(ipi->ipi_qsidx == 0, ("tried to refill TX Channel %d.\n", ipi->ipi_qsidx)); sc = xsc; rdata = &sc->tx_ring_data; pidx = ipi->ipi_pidx; segs = ipi->ipi_segs; nsegs = ipi->ipi_nsegs; /* For each seg, create a descriptor */ for (i = 0; i < nsegs; ++i) { KASSERT(nsegs == 1, ("Multisegment packet !!!!!\n")); txd = &rdata->ring[pidx]; txd->ctl = htole32( (segs[i].ds_len & MGB_DESC_CTL_BUFLEN_MASK ) | /* * XXX: This will be wrong in the multipacket case * I suspect FS should be for the first packet and * LS should be for the last packet */ MGB_TX_DESC_CTL_FS | MGB_TX_DESC_CTL_LS | MGB_DESC_CTL_FCS); txd->addr.low = htole32(CSR_TRANSLATE_ADDR_LOW32( segs[i].ds_addr)); txd->addr.high = htole32(CSR_TRANSLATE_ADDR_HIGH32( segs[i].ds_addr)); txd->sts = htole32( (segs[i].ds_len << 16) & MGB_DESC_FRAME_LEN_MASK); pidx = MGB_NEXT_RING_IDX(pidx); } ipi->ipi_new_pidx = pidx; return (0); } static void mgb_isc_txd_flush(void *xsc, uint16_t txqid, qidx_t pidx) { struct mgb_softc *sc; struct mgb_ring_data *rdata; KASSERT(txqid == 0, ("tried to flush TX Channel %d.\n", txqid)); sc = xsc; rdata = &sc->tx_ring_data; if (rdata->last_tail != pidx) { rdata->last_tail = pidx; CSR_WRITE_REG(sc, MGB_DMA_TX_TAIL(txqid), rdata->last_tail); } } static int mgb_isc_txd_credits_update(void *xsc, uint16_t txqid, bool clear) { struct mgb_softc *sc; struct mgb_ring_desc *txd; struct mgb_ring_data *rdata; int processed = 0; /* * > If clear is true, we need to report the number of TX command ring * > descriptors that have been processed by the device. If clear is * > false, we just need to report whether or not at least one TX * > command ring descriptor has been processed by the device. * - vmx driver */ KASSERT(txqid == 0, ("tried to credits_update TX Channel %d.\n", txqid)); sc = xsc; rdata = &sc->tx_ring_data; while (*(rdata->head_wb) != rdata->last_head) { if (!clear) return (1); txd = &rdata->ring[rdata->last_head]; memset(txd, 0, sizeof(struct mgb_ring_desc)); rdata->last_head = MGB_NEXT_RING_IDX(rdata->last_head); processed++; } return (processed); } static int mgb_isc_rxd_available(void *xsc, uint16_t rxqid, qidx_t idx, qidx_t budget) { struct mgb_softc *sc; struct mgb_ring_data *rdata; int avail = 0; sc = xsc; KASSERT(rxqid == 0, ("tried to check availability in RX Channel %d.\n", rxqid)); rdata = &sc->rx_ring_data; for (; idx != *(rdata->head_wb); idx = MGB_NEXT_RING_IDX(idx)) { avail++; /* XXX: Could verify desc is device owned here */ if (avail == budget) break; } return (avail); } static int mgb_isc_rxd_pkt_get(void *xsc, if_rxd_info_t ri) { struct mgb_softc *sc; struct mgb_ring_data *rdata; struct mgb_ring_desc rxd; int total_len; KASSERT(ri->iri_qsidx == 0, ("tried to check availability in RX Channel %d\n", ri->iri_qsidx)); sc = xsc; total_len = 0; rdata = &sc->rx_ring_data; while (*(rdata->head_wb) != rdata->last_head) { /* copy ring desc and do swapping */ rxd = rdata->ring[rdata->last_head]; rxd.ctl = le32toh(rxd.ctl); rxd.addr.low = le32toh(rxd.ctl); rxd.addr.high = le32toh(rxd.ctl); rxd.sts = le32toh(rxd.ctl); if ((rxd.ctl & MGB_DESC_CTL_OWN) != 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that it's owned by the driver\n"); return (EINVAL); } if ((rxd.ctl & MGB_RX_DESC_CTL_FS) == 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that FS is not set.\n"); device_printf(sc->dev, "Tried to read descriptor ... that it FS is not set.\n"); return (EINVAL); } /* XXX: Multi-packet support */ if ((rxd.ctl & MGB_RX_DESC_CTL_LS) == 0) { device_printf(sc->dev, "Tried to read descriptor ... " "found that LS is not set. (Multi-buffer packets not yet supported)\n"); return (EINVAL); } ri->iri_frags[0].irf_flid = 0; ri->iri_frags[0].irf_idx = rdata->last_head; ri->iri_frags[0].irf_len = MGB_DESC_GET_FRAME_LEN(&rxd); total_len += ri->iri_frags[0].irf_len; rdata->last_head = MGB_NEXT_RING_IDX(rdata->last_head); break; } ri->iri_nfrags = 1; ri->iri_len = total_len; return (0); } static void mgb_isc_rxd_refill(void *xsc, if_rxd_update_t iru) { struct mgb_softc *sc; struct mgb_ring_data *rdata; struct mgb_ring_desc *rxd; uint64_t *paddrs; qidx_t *idxs; qidx_t idx; int count, len; count = iru->iru_count; len = iru->iru_buf_size; idxs = iru->iru_idxs; paddrs = iru->iru_paddrs; KASSERT(iru->iru_qsidx == 0, ("tried to refill RX Channel %d.\n", iru->iru_qsidx)); sc = xsc; rdata = &sc->rx_ring_data; while (count > 0) { idx = idxs[--count]; rxd = &rdata->ring[idx]; rxd->sts = 0; rxd->addr.low = htole32(CSR_TRANSLATE_ADDR_LOW32(paddrs[count])); rxd->addr.high = htole32(CSR_TRANSLATE_ADDR_HIGH32(paddrs[count])); rxd->ctl = htole32(MGB_DESC_CTL_OWN | (len & MGB_DESC_CTL_BUFLEN_MASK)); } return; } static void mgb_isc_rxd_flush(void *xsc, uint16_t rxqid, uint8_t flid, qidx_t pidx) { struct mgb_softc *sc; sc = xsc; KASSERT(rxqid == 0, ("tried to flush RX Channel %d.\n", rxqid)); /* * According to the programming guide, last_tail must be set to * the last valid RX descriptor, rather than to the one past that. * Note that this is not true for the TX ring! */ sc->rx_ring_data.last_tail = MGB_PREV_RING_IDX(pidx); CSR_WRITE_REG(sc, MGB_DMA_RX_TAIL(rxqid), sc->rx_ring_data.last_tail); return; } static int mgb_test_bar(struct mgb_softc *sc) { uint32_t id_rev, dev_id; id_rev = CSR_READ_REG(sc, 0); dev_id = id_rev >> 16; if (dev_id == MGB_LAN7430_DEVICE_ID || dev_id == MGB_LAN7431_DEVICE_ID) { return (0); } else { device_printf(sc->dev, "ID check failed.\n"); return (ENXIO); } } static int mgb_alloc_regs(struct mgb_softc *sc) { int rid; rid = PCIR_BAR(MGB_BAR); pci_enable_busmaster(sc->dev); sc->regs = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->regs == NULL) return (ENXIO); return (0); } static int mgb_release_regs(struct mgb_softc *sc) { int error = 0; if (sc->regs != NULL) error = bus_release_resource(sc->dev, SYS_RES_MEMORY, rman_get_rid(sc->regs), sc->regs); sc->regs = NULL; pci_disable_busmaster(sc->dev); return (error); } static int mgb_dma_init(struct mgb_softc *sc) { if_softc_ctx_t scctx; int ch, error = 0; scctx = iflib_get_softc_ctx(sc->ctx); for (ch = 0; ch < scctx->isc_nrxqsets; ch++) if ((error = mgb_dma_rx_ring_init(sc, ch))) goto fail; for (ch = 0; ch < scctx->isc_nrxqsets; ch++) if ((error = mgb_dma_tx_ring_init(sc, ch))) goto fail; fail: return (error); } static int mgb_dma_rx_ring_init(struct mgb_softc *sc, int channel) { struct mgb_ring_data *rdata; int ring_config, error = 0; rdata = &sc->rx_ring_data; mgb_dmac_control(sc, MGB_DMAC_RX_START, 0, DMAC_RESET); KASSERT(MGB_DMAC_STATE_IS_INITIAL(sc, MGB_DMAC_RX_START, channel), ("Trying to init channels when not in init state\n")); /* write ring address */ if (rdata->ring_bus_addr == 0) { device_printf(sc->dev, "Invalid ring bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_RX_BASE_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->ring_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_RX_BASE_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->ring_bus_addr)); /* write head pointer writeback address */ if (rdata->head_wb_bus_addr == 0) { device_printf(sc->dev, "Invalid head wb bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_RX_HEAD_WB_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->head_wb_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_RX_HEAD_WB_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->head_wb_bus_addr)); /* Enable head pointer writeback */ CSR_WRITE_REG(sc, MGB_DMA_RX_CONFIG0(channel), MGB_DMA_HEAD_WB_ENBL); ring_config = CSR_READ_REG(sc, MGB_DMA_RX_CONFIG1(channel)); /* ring size */ ring_config &= ~MGB_DMA_RING_LEN_MASK; ring_config |= (MGB_DMA_RING_SIZE & MGB_DMA_RING_LEN_MASK); /* packet padding (PAD_2 is better for IP header alignment ...) */ ring_config &= ~MGB_DMA_RING_PAD_MASK; ring_config |= (MGB_DMA_RING_PAD_0 & MGB_DMA_RING_PAD_MASK); CSR_WRITE_REG(sc, MGB_DMA_RX_CONFIG1(channel), ring_config); rdata->last_head = CSR_READ_REG(sc, MGB_DMA_RX_HEAD(channel)); mgb_fct_control(sc, MGB_FCT_RX_CTL, channel, FCT_RESET); if (error != 0) { device_printf(sc->dev, "Failed to reset RX FCT.\n"); goto fail; } mgb_fct_control(sc, MGB_FCT_RX_CTL, channel, FCT_ENABLE); if (error != 0) { device_printf(sc->dev, "Failed to enable RX FCT.\n"); goto fail; } mgb_dmac_control(sc, MGB_DMAC_RX_START, channel, DMAC_START); if (error != 0) device_printf(sc->dev, "Failed to start RX DMAC.\n"); fail: return (error); } static int mgb_dma_tx_ring_init(struct mgb_softc *sc, int channel) { struct mgb_ring_data *rdata; int ring_config, error = 0; rdata = &sc->tx_ring_data; if ((error = mgb_fct_control(sc, MGB_FCT_TX_CTL, channel, FCT_RESET))) { device_printf(sc->dev, "Failed to reset TX FCT.\n"); goto fail; } if ((error = mgb_fct_control(sc, MGB_FCT_TX_CTL, channel, FCT_ENABLE))) { device_printf(sc->dev, "Failed to enable TX FCT.\n"); goto fail; } if ((error = mgb_dmac_control(sc, MGB_DMAC_TX_START, channel, DMAC_RESET))) { device_printf(sc->dev, "Failed to reset TX DMAC.\n"); goto fail; } KASSERT(MGB_DMAC_STATE_IS_INITIAL(sc, MGB_DMAC_TX_START, channel), ("Trying to init channels in not init state\n")); /* write ring address */ if (rdata->ring_bus_addr == 0) { device_printf(sc->dev, "Invalid ring bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_TX_BASE_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->ring_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_TX_BASE_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->ring_bus_addr)); /* write ring size */ ring_config = CSR_READ_REG(sc, MGB_DMA_TX_CONFIG1(channel)); ring_config &= ~MGB_DMA_RING_LEN_MASK; ring_config |= (MGB_DMA_RING_SIZE & MGB_DMA_RING_LEN_MASK); CSR_WRITE_REG(sc, MGB_DMA_TX_CONFIG1(channel), ring_config); /* Enable interrupt on completion and head pointer writeback */ ring_config = (MGB_DMA_HEAD_WB_LS_ENBL | MGB_DMA_HEAD_WB_ENBL); CSR_WRITE_REG(sc, MGB_DMA_TX_CONFIG0(channel), ring_config); /* write head pointer writeback address */ if (rdata->head_wb_bus_addr == 0) { device_printf(sc->dev, "Invalid head wb bus addr.\n"); goto fail; } CSR_WRITE_REG(sc, MGB_DMA_TX_HEAD_WB_H(channel), CSR_TRANSLATE_ADDR_HIGH32(rdata->head_wb_bus_addr)); CSR_WRITE_REG(sc, MGB_DMA_TX_HEAD_WB_L(channel), CSR_TRANSLATE_ADDR_LOW32(rdata->head_wb_bus_addr)); rdata->last_head = CSR_READ_REG(sc, MGB_DMA_TX_HEAD(channel)); KASSERT(rdata->last_head == 0, ("MGB_DMA_TX_HEAD was not reset.\n")); rdata->last_tail = 0; CSR_WRITE_REG(sc, MGB_DMA_TX_TAIL(channel), rdata->last_tail); if ((error = mgb_dmac_control(sc, MGB_DMAC_TX_START, channel, DMAC_START))) device_printf(sc->dev, "Failed to start TX DMAC.\n"); fail: return (error); } static int mgb_dmac_control(struct mgb_softc *sc, int start, int channel, enum mgb_dmac_cmd cmd) { int error = 0; switch (cmd) { case DMAC_RESET: CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_RESET(start, channel)); error = mgb_wait_for_bits(sc, MGB_DMAC_CMD, 0, MGB_DMAC_CMD_RESET(start, channel)); break; case DMAC_START: /* * NOTE: this simplifies the logic, since it will never * try to start in STOP_PENDING, but it also increases work. */ error = mgb_dmac_control(sc, start, channel, DMAC_STOP); if (error != 0) return (error); CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_START(start, channel)); break; case DMAC_STOP: CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_STOP(start, channel)); error = mgb_wait_for_bits(sc, MGB_DMAC_CMD, MGB_DMAC_CMD_STOP(start, channel), MGB_DMAC_CMD_START(start, channel)); break; } return (error); } static int mgb_fct_control(struct mgb_softc *sc, int reg, int channel, enum mgb_fct_cmd cmd) { switch (cmd) { case FCT_RESET: CSR_WRITE_REG(sc, reg, MGB_FCT_RESET(channel)); return (mgb_wait_for_bits(sc, reg, 0, MGB_FCT_RESET(channel))); case FCT_ENABLE: CSR_WRITE_REG(sc, reg, MGB_FCT_ENBL(channel)); return (0); case FCT_DISABLE: CSR_WRITE_REG(sc, reg, MGB_FCT_DSBL(channel)); return (mgb_wait_for_bits(sc, reg, 0, MGB_FCT_ENBL(channel))); } } static int mgb_hw_teardown(struct mgb_softc *sc) { int err = 0; /* Stop MAC */ CSR_CLEAR_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL); CSR_WRITE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL); if ((err = mgb_wait_for_bits(sc, MGB_MAC_RX, MGB_MAC_DSBL, 0))) return (err); if ((err = mgb_wait_for_bits(sc, MGB_MAC_TX, MGB_MAC_DSBL, 0))) return (err); return (err); } static int mgb_hw_init(struct mgb_softc *sc) { int error = 0; error = mgb_hw_reset(sc); if (error != 0) goto fail; mgb_mac_init(sc); error = mgb_phy_reset(sc); if (error != 0) goto fail; error = mgb_dmac_reset(sc); if (error != 0) goto fail; fail: return (error); } static int mgb_hw_reset(struct mgb_softc *sc) { CSR_UPDATE_REG(sc, MGB_HW_CFG, MGB_LITE_RESET); return (mgb_wait_for_bits(sc, MGB_HW_CFG, 0, MGB_LITE_RESET)); } static int mgb_mac_init(struct mgb_softc *sc) { /** * enable automatic duplex detection and * automatic speed detection */ CSR_UPDATE_REG(sc, MGB_MAC_CR, MGB_MAC_ADD_ENBL | MGB_MAC_ASD_ENBL); CSR_UPDATE_REG(sc, MGB_MAC_TX, MGB_MAC_ENBL); CSR_UPDATE_REG(sc, MGB_MAC_RX, MGB_MAC_ENBL); return (MGB_STS_OK); } static int mgb_phy_reset(struct mgb_softc *sc) { CSR_UPDATE_BYTE(sc, MGB_PMT_CTL, MGB_PHY_RESET); if (mgb_wait_for_bits(sc, MGB_PMT_CTL, 0, MGB_PHY_RESET) == MGB_STS_TIMEOUT) return (MGB_STS_TIMEOUT); return (mgb_wait_for_bits(sc, MGB_PMT_CTL, MGB_PHY_READY, 0)); } static int mgb_dmac_reset(struct mgb_softc *sc) { CSR_WRITE_REG(sc, MGB_DMAC_CMD, MGB_DMAC_RESET); return (mgb_wait_for_bits(sc, MGB_DMAC_CMD, 0, MGB_DMAC_RESET)); } static int mgb_wait_for_bits(struct mgb_softc *sc, int reg, int set_bits, int clear_bits) { int i, val; i = 0; do { /* * XXX: Datasheets states delay should be > 5 microseconds * for device reset. */ DELAY(100); val = CSR_READ_REG(sc, reg); if ((val & set_bits) == set_bits && (val & clear_bits) == 0) return (MGB_STS_OK); } while (i++ < MGB_TIMEOUT); return (MGB_STS_TIMEOUT); } static void mgb_get_ethaddr(struct mgb_softc *sc, struct ether_addr *dest) { CSR_READ_REG_BYTES(sc, MGB_MAC_ADDR_BASE_L, &dest->octet[0], 4); CSR_READ_REG_BYTES(sc, MGB_MAC_ADDR_BASE_H, &dest->octet[4], 2); } static int mgb_miibus_readreg(device_t dev, int phy, int reg) { struct mgb_softc *sc; int mii_access; sc = iflib_get_softc(device_get_softc(dev)); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return (EIO); mii_access = (phy & MGB_MII_PHY_ADDR_MASK) << MGB_MII_PHY_ADDR_SHIFT; mii_access |= (reg & MGB_MII_REG_ADDR_MASK) << MGB_MII_REG_ADDR_SHIFT; mii_access |= MGB_MII_BUSY | MGB_MII_READ; CSR_WRITE_REG(sc, MGB_MII_ACCESS, mii_access); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return (EIO); return (CSR_READ_2_BYTES(sc, MGB_MII_DATA)); } static int mgb_miibus_writereg(device_t dev, int phy, int reg, int data) { struct mgb_softc *sc; int mii_access; sc = iflib_get_softc(device_get_softc(dev)); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return (EIO); mii_access = (phy & MGB_MII_PHY_ADDR_MASK) << MGB_MII_PHY_ADDR_SHIFT; mii_access |= (reg & MGB_MII_REG_ADDR_MASK) << MGB_MII_REG_ADDR_SHIFT; mii_access |= MGB_MII_BUSY | MGB_MII_WRITE; CSR_WRITE_REG(sc, MGB_MII_DATA, data); CSR_WRITE_REG(sc, MGB_MII_ACCESS, mii_access); if (mgb_wait_for_bits(sc, MGB_MII_ACCESS, 0, MGB_MII_BUSY) == MGB_STS_TIMEOUT) return (EIO); return (0); } /* XXX: May need to lock these up */ static void mgb_miibus_statchg(device_t dev) { struct mgb_softc *sc; struct mii_data *miid; sc = iflib_get_softc(device_get_softc(dev)); miid = device_get_softc(sc->miibus); /* Update baudrate in iflib */ sc->baudrate = ifmedia_baudrate(miid->mii_media_active); iflib_link_state_change(sc->ctx, sc->link_state, sc->baudrate); } static void mgb_miibus_linkchg(device_t dev) { struct mgb_softc *sc; struct mii_data *miid; int link_state; sc = iflib_get_softc(device_get_softc(dev)); miid = device_get_softc(sc->miibus); /* XXX: copied from miibus_linkchg **/ if (miid->mii_media_status & IFM_AVALID) { if (miid->mii_media_status & IFM_ACTIVE) link_state = LINK_STATE_UP; else link_state = LINK_STATE_DOWN; } else link_state = LINK_STATE_UNKNOWN; sc->link_state = link_state; iflib_link_state_change(sc->ctx, sc->link_state, sc->baudrate); } diff --git a/sys/dev/mgb/if_mgb.h b/sys/dev/mgb/if_mgb.h index fa49805d73b7..19f6d4a20cc7 100644 --- a/sys/dev/mgb/if_mgb.h +++ b/sys/dev/mgb/if_mgb.h @@ -1,329 +1,329 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2019 The FreeBSD Foundation, Inc. + * Copyright (c) 2019 The FreeBSD Foundation * * This driver was written by Gerald ND Aryeetey * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _IF_MGB_H_ #define _IF_MGB_H_ #define MGB_MICROCHIP_VENDOR_ID 0x1055 #define MGB_LAN7430_DEVICE_ID 0x7430 #define MGB_LAN7431_DEVICE_ID 0x7431 #define MGB_TIMEOUT (500) /** Control/Status Registers **/ #define MGB_BAR 0 /* PCI Base Address */ /** Reset **/ #define MGB_HW_CFG 0x10 /** H/W Configuration Register **/ #define MGB_LITE_RESET 0x2 /** MAC **/ #define MGB_MAC_CR 0x0100 /** MAC Crontrol Register **/ #define MGB_MAC_ADD_ENBL 0x1000 /* Automatic Duplex Detection */ #define MGB_MAC_ASD_ENBL 0x0800 /* Automatic Speed Detection */ #define MGB_MAC_ADDR_BASE_L 0x11C /** MAC address lower 4 bytes (read) register **/ #define MGB_MAC_ADDR_BASE_H 0x118 /** MAC address upper 2 bytes (read) register **/ #define MGB_MAC_TX 0x0104 #define MGB_MAC_RX 0x0108 #define MGB_MAC_ENBL (1 << 0) #define MGB_MAC_DSBL (1 << 1) /** MAC Statistics **/ #define MGB_MAC_STAT_RX_FCS_ERR_CNT 0x1200 #define MGB_MAC_STAT_RX_ALIGN_ERR_CNT 0x1204 #define MGB_MAC_STAT_RX_FRAG_ERR_CNT 0x1208 #define MGB_MAC_STAT_RX_JABBER_ERR_CNT 0x120C #define MGB_MAC_STAT_RX_UNDER_ERR_CNT 0x1210 #define MGB_MAC_STAT_RX_OVER_ERR_CNT 0x1214 #define MGB_MAC_STAT_RX_DROPPED_CNT 0x1218 #define MGB_MAC_STAT_RX_BROADCAST_CNT1 0x1220 #define MGB_MAC_STAT_RX_BROADCAST_CNT 0x122C #define MGB_MAC_STAT_RX_FRAME_CNT 0x1254 #define MGB_MAC_STAT_RX_DROPPED_CNT 0x1218 #define MGB_MAC_STAT_RX_BROADCAST_CNT1 0x1220 #define MGB_MAC_STAT_RX_BROADCAST_CNT 0x122C #define MGB_MAC_STAT_RX_FRAME_CNT 0x1254 /* etc. */ /** Receive Filtering Engine **/ #define MGB_RFE_CTL 0x508 #define MGB_RFE_ALLOW_BROADCAST (1 << 10) #define MGB_RFE_ALLOW_MULTICAST (1 << 9) #define MGB_RFE_ALLOW_UNICAST (1 << 8) #define MGB_RFE_ALLOW_PERFECT_FILTER (1 << 1) /** PHY Reset (via power management control) **/ #define MGB_PMT_CTL 0x14 /** Power Management Control Register **/ #define MGB_PHY_RESET 0x10 #define MGB_PHY_READY 0x80 /** FIFO Controller **/ #define MGB_FCT_TX_CTL 0xC4 #define MGB_FCT_RX_CTL 0xAC #define MGB_FCT_ENBL(_channel) (1 << (28 + (_channel))) #define MGB_FCT_DSBL(_channel) (1 << (24 + (_channel))) #define MGB_FCT_RESET(_channel) (1 << (20 + (_channel))) /** DMA Controller **/ #define MGB_DMAC_CMD 0xC0C #define MGB_DMAC_RESET (1 << 31) #define MGB_DMAC_TX_START 16 #define MGB_DMAC_RX_START 0 #define MGB_DMAC_CMD_VAL(s, o, ch) (1 << ((s) + (o) + (ch))) #define MGB_DMAC_CMD_RESET(_s, _ch) MGB_DMAC_CMD_VAL(_s, 8, _ch) #define MGB_DMAC_CMD_START(_s, _ch) MGB_DMAC_CMD_VAL(_s, 4, _ch) #define MGB_DMAC_CMD_STOP( _s, _ch) MGB_DMAC_CMD_VAL(_s, 0, _ch) #define MGB_DMAC_STATE(_start, _stop) \ (((_start) ? 2 : 0) | ((_stop) ? 1 : 0)) #define MGB_DMAC_STATE_INITIAL MGB_DMAC_STATE(0, 0) #define MGB_DMAC_STATE_STARTED MGB_DMAC_STATE(1, 0) #define MGB_DMAC_STATE_STOP_PENDING MGB_DMAC_STATE(1, 1) #define MGB_DMAC_STATE_STOPPED MGB_DMAC_STATE(0, 1) #define MGB_DMAC_CMD_STATE(sc, _s, _ch) \ (MGB_DMAC_STATE( \ CSR_READ_REG(sc, MGB_DMAC_CMD) & MGB_DMAC_CMD_START(_s, _ch), \ CSR_READ_REG(sc, MGB_DMAC_CMD) & MGB_DMAC_CMD_STOP(_s, _ch))) #define MGB_DMAC_STATE_IS_INITIAL(sc, _s, _ch) \ (MGB_DMAC_CMD_STATE(sc, _s, _ch) == MGB_DMAC_STATE_INITIAL) #define MGB_DMAC_INTR_STS 0xC10 #define MGB_DMAC_INTR_ENBL_SET 0xC14 #define MGB_DMAC_INTR_ENBL_CLR 0xC18 #define MGB_DMAC_TX_INTR_ENBL(_ch) (1 << (_ch)) #define MGB_DMAC_RX_INTR_ENBL(_ch) (1 << (16 + (_ch))) /** DMA Rings **/ /** * Page size is 256 bytes * * Ring size, however, these could be tunable (for RX & TX) * to be a multiple of 4 (max is 65532) * **/ /* In linux driver these numbers are 50 and 65 for tx and rx .... */ #define MGB_DMA_RING_SIZE 16 /* in programming guide, this number is 100 */ #define MGB_DMA_MAXSEGS 32 #define MGB_DMA_REG(reg, _channel) ((reg) | ((_channel) << 6)) #define MGB_DMA_RING_LIST_SIZE \ (sizeof(struct mgb_ring_desc) * MGB_DMA_RING_SIZE) #define MGB_DMA_RING_INFO_SIZE \ (sizeof(uint32_t) + MGB_DMA_RING_LIST_SIZE) #define MGB_DMA_TX_CONFIG0(_channel) MGB_DMA_REG(0x0D40, _channel) #define MGB_DMA_TX_CONFIG1(_channel) MGB_DMA_REG(0x0D44, _channel) #define MGB_DMA_TX_BASE_H(_channel) MGB_DMA_REG(0x0D48, _channel) #define MGB_DMA_TX_BASE_L(_channel) MGB_DMA_REG(0x0D4C, _channel) #define MGB_DMA_TX_HEAD_WB_H(_channel) MGB_DMA_REG(0x0D50, _channel) /* head Writeback */ #define MGB_DMA_TX_HEAD_WB_L(_channel) MGB_DMA_REG(0x0D54, _channel) #define MGB_DMA_TX_HEAD(_channel) MGB_DMA_REG(0x0D58, _channel) #define MGB_DMA_TX_TAIL(_channel) MGB_DMA_REG(0x0D5C, _channel) #define MGB_DMA_RX_CONFIG0(_channel) MGB_DMA_REG(0x0C40, _channel) #define MGB_DMA_RX_CONFIG1(_channel) MGB_DMA_REG(0x0C44, _channel) #define MGB_DMA_RX_BASE_H(_channel) MGB_DMA_REG(0x0C48, _channel) #define MGB_DMA_RX_BASE_L(_channel) MGB_DMA_REG(0x0C4C, _channel) #define MGB_DMA_RX_HEAD_WB_H(_channel) MGB_DMA_REG(0x0C50, _channel) /* head Writeback */ #define MGB_DMA_RX_HEAD_WB_L(_channel) MGB_DMA_REG(0x0C54, _channel) #define MGB_DMA_RX_HEAD(_channel) MGB_DMA_REG(0x0C58, _channel) #define MGB_DMA_RX_TAIL(_channel) MGB_DMA_REG(0x0C5C, _channel) #define MGB_DMA_RING_LEN_MASK 0xFFFF #define MGB_DMA_IOC_ENBL 0x10000000 #define MGB_DMA_HEAD_WB_LS_ENBL 0x20000000 #define MGB_DMA_HEAD_WB_ENBL (1 << 5) #define MGB_DMA_RING_PAD_MASK 0x03000000 #define MGB_DMA_RING_PAD_0 0x00000000 #define MGB_DMA_RING_PAD_2 0x02000000 #define MGB_DESC_CTL_OWN (1 << 15) #define MGB_DESC_CTL_FCS (1 << 17) #define MGB_DESC_CTL_IOC (1 << 26) #define MGB_TX_DESC_CTL_LS (1 << 28) #define MGB_TX_DESC_CTL_FS (1 << 29) #define MGB_RX_DESC_CTL_LS (1 << 30) #define MGB_RX_DESC_CTL_FS (1 << 31) #define MGB_DESC_CTL_BUFLEN_MASK (0x0000FFFF) #define MGB_DESC_STS_BUFLEN_MASK (0x00003FFF) #define MGB_DESC_FRAME_LEN_MASK (0x3FFF0000) #define MGB_DESC_GET_FRAME_LEN(_desc) \ (((_desc)->ctl & MGB_DESC_FRAME_LEN_MASK) >> 16) #define MGB_NEXT_RING_IDX(_idx) (((_idx) == MGB_DMA_RING_SIZE - 1) ? 0 : ((_idx) + 1)) #define MGB_PREV_RING_IDX(_idx) (((_idx) == 0) ? (MGB_DMA_RING_SIZE - 1) : ((_idx) - 1)) #define MGB_RING_SPACE(_sc) \ ((((_sc)->tx_ring_data.last_head - (_sc)->tx_ring_data.last_tail - 1) \ + MGB_DMA_RING_SIZE ) % MGB_DMA_RING_SIZE ) /** PHY **/ #define MGB_MII_ACCESS 0x120 #define MGB_MII_DATA 0x124 #define MGB_MII_PHY_ADDR_MASK 0x1F #define MGB_MII_PHY_ADDR_SHIFT 11 #define MGB_MII_REG_ADDR_MASK 0x1F #define MGB_MII_REG_ADDR_SHIFT 6 #define MGB_MII_READ 0x0 #define MGB_MII_WRITE 0x2 #define MGB_MII_BUSY 0x1 /** Interrupt registers **/ #define MGB_INTR_STS 0x780 #define MGB_INTR_SET 0x784 /* This triggers a particular interrupt */ #define MGB_INTR_ENBL_SET 0x788 #define MGB_INTR_STS_ANY (0x1) #define MGB_INTR_STS_RX(_channel) (1 << (24 + (_channel))) #define MGB_INTR_STS_RX_ANY (0xF << 24) #define MGB_INTR_STS_TX(_channel) (1 << (16 + (_channel))) #define MGB_INTR_STS_TX_ANY (0xF << 16) #define MGB_INTR_STS_TEST (1 << 9) #define MGB_INTR_ENBL_CLR 0x78C #define MGB_INTR_VEC_ENBL_SET 0x794 #define MGB_INTR_VEC_ENBL_CLR 0x798 #define MGB_INTR_VEC_ENBL_AUTO_CLR 0x79C #define MGB_INTR_VEC_RX_MAP 0x7A0 #define MGB_INTR_VEC_TX_MAP 0x7A4 #define MGB_INTR_VEC_OTHER_MAP 0x7A8 #define MGB_INTR_VEC_MAP(_vsts, _ch) ((_vsts) << ((_ch) << 2)) #define MGB_INTR_VEC_STS(_v) (1 << (_v)) #define MGB_INTR_RX_VEC_STS(_qid) MGB_INTR_VEC_STS((_qid) + 1) #define MGB_STS_OK ( 0 ) #define MGB_STS_TIMEOUT (-1 ) #define CSR_READ_BYTE(sc, reg) \ bus_read_1((sc)->regs, reg) #define CSR_WRITE_BYTE(sc, reg, val) \ bus_write_1((sc)->regs, reg, val) #define CSR_UPDATE_BYTE(sc, reg, val) \ CSR_WRITE_BYTE(sc, reg, CSR_READ_BYTE(sc, reg) | (val)) #define CSR_READ_REG(sc, reg) \ bus_read_4((sc)->regs, reg) #define CSR_WRITE_REG(sc, reg, val) \ bus_write_4((sc)->regs, reg, val) #define CSR_CLEAR_REG(sc, reg, bits) \ CSR_WRITE_REG(sc, reg, CSR_READ_REG(sc, reg) & ~(bits)) #define CSR_UPDATE_REG(sc, reg, val) \ CSR_WRITE_REG(sc, reg, CSR_READ_REG(sc, reg) | (val)) #define CSR_READ_2_BYTES(sc, reg) \ bus_read_2((sc)->regs, reg) #define CSR_READ_REG_BYTES(sc, reg, dest, cnt) \ bus_read_region_1((sc)->regs, reg, dest, cnt) #define CSR_TRANSLATE_ADDR_LOW32(addr) ((uint64_t) (addr) & 0xFFFFFFFF) #define CSR_TRANSLATE_ADDR_HIGH32(addr) ((uint64_t) (addr) >> 32) struct mgb_irq { struct resource *res; void *handler; }; enum mgb_dmac_cmd { DMAC_RESET, DMAC_START, DMAC_STOP }; enum mgb_fct_cmd { FCT_RESET, FCT_ENABLE, FCT_DISABLE }; struct mgb_ring_desc_addr { uint32_t low; uint32_t high; } __packed; /* TODO: With descriptor bit information * this could be done without masks etc. * (using bitwise structs like vmx, * would have to separate rx/tx ring desc * definitions) */ struct mgb_ring_desc { uint32_t ctl; /* data0 */ struct mgb_ring_desc_addr addr; /* data(1|2) */ uint32_t sts; /* data3 */ } __packed; #if 0 struct mgb_ring_info { uint32_t head_wb; struct mgb_ring_desc *ring; } #endif #define MGB_HEAD_WB_PTR(_ring_info_ptr) \ ((uint32_t *)(_ring_info_ptr)) #define MGB_RING_PTR(_ring_info_ptr) \ ((struct mgb_ring_desc *)(MGB_HEAD_WB_PTR(_ring_info_ptr) + 1)) struct mgb_ring_data { uint32_t *head_wb; struct mgb_ring_desc *ring; bus_addr_t head_wb_bus_addr; bus_addr_t ring_bus_addr; uint32_t last_head; uint32_t last_tail; }; struct mgb_softc { if_ctx_t ctx; device_t dev; struct resource *regs; struct resource *pba; struct if_irq admin_irq; struct if_irq rx_irq; bool isr_test_flag; device_t miibus; int link_state; int baudrate; int if_flags; int ethaddr; int flags; struct mtx mtx; struct callout watchdog; int timer; bus_dma_tag_t dma_parent_tag; struct mgb_ring_data rx_ring_data; struct mgb_ring_data tx_ring_data; }; #endif /* _IF_MGB_H_ */ diff --git a/usr.bin/posixshmcontrol/posixshmcontrol.1 b/usr.bin/posixshmcontrol/posixshmcontrol.1 index 84c2b845b99b..bfa43a0bbeab 100644 --- a/usr.bin/posixshmcontrol/posixshmcontrol.1 +++ b/usr.bin/posixshmcontrol/posixshmcontrol.1 @@ -1,175 +1,175 @@ -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 25, 2023 .Dt POSIXSHMCONTROL 1 .Os .Sh NAME .Nm posixshmcontrol .Nd Control POSIX shared memory segments .Sh SYNOPSIS .Nm .Ar create .Op Fl l Ar pagesize .Op Fl m Ar mode .Op Pa path \&... .Nm .Ar rm .Op Pa path \&... .Nm .Ar ls .Op Fl h .Op Fl n .Op Fl j Ar jail .Nm .Ar dump .Op Pa path \&... .Nm .Ar stat .Op Fl h .Op Fl n .Op Pa path \&... .Nm .Ar truncate .Op Fl s Ar length .Op Pa path \&... .Sh DESCRIPTION The .Nm command manipulates the named POSIX shared memory segments. It allows inspecting existing segments, dumping their metadata or contents, and unlinking them. .Pp Unlinking removes the name from the system and, when the last process unmaps the segment and closes file descriptor pointing to the segment, frees underlying memory. .Pp The number of hard links as displayed by the .Ic stat subcommand, is equal to the number of references to the underlying VM object. It is almost always equal to the number of mappings +1, except for transient references. .Pp The following subcommands are provided: .Bl -tag -width truncate .It Ic create Create segments with the specified paths, if they do not already exist. .Pp The optional .Ar pagesize argument specifies the size of the virtual pages used to map the object with .Xr mmap 2 . By default, the system page size is used, but on some platforms a larger page size can be specified. The size of an object backed by large pages must be a multiple of the specified page size. The .Va hw.pagesizes sysctl variable lists the available page sizes. .Pp The optional numerical .Ar mode argument specifies the initial access mode. .It Ic rm Unlink the paths specified. .It Ic ls List all linked named shared memory segments visible to the caller. For each segment, the user and group owner, size, and path are displayed. The .Fl j option limits the output to segments within the specified .Ar jail name or id. .It Ic dump Output raw bytes values from the segment to standard output. .It Ic stat Print metadata for the specified path, in the format similar to the .Xr stat 1 utility. .It Ic truncate Change the length of the segments. Argument to the .Fl s option specifies new length. The human-friendly 'k', 'm', 'g' suffixes can be used, see .Xr expand_number 3 . If the option is not specified, assumed length is zero. .El .Pp For some commands, the following options may be provided: .Bl -tag -width XXX .It Fl h If specified, requests human-readable display of size, see .Xr humanize_number 3 . .It Fl n Prevent translation of owner and group into symbolic names using name-switch services, instead the raw numeric values are printed. .El .Sh EXIT STATUS .Ex -std .Sh EXAMPLES .Bl -bullet .It To show content of the shared memory segment with the path .Pa /1 , use the command .Dl "posixshmcontrol dump /1 | hexdump -C" .It To create a segment with the path .Pa /2 and then enlarge it to 1M, use the sequence of commands .Dl "posixshmcontrol create /2" .Dl "posixshmcontrol truncate -s 1m /2" .El .Sh SEE ALSO .Xr hexdump 1 , .Xr stat 1 , .Xr ftruncate 2 , .Xr mmap 2 , .Xr read 2 , .Xr shm_open 2 , .Xr shm_unlink 2 , .Xr stat 2 , .Xr expand_number 3 , .Xr humanize_number 3 , .Xr shm_create_largepage 3 , .Xr sysctl 3 .Sh HISTORY The .Nm command appeared in .Fx 12.1 . .Sh AUTHORS The .Nm command and this manual page were written by .An Konstantin Belousov Aq Mt kib@freebsd.org under sponsorship from The .Fx Foundation. diff --git a/usr.bin/proccontrol/proccontrol.1 b/usr.bin/proccontrol/proccontrol.1 index 8ab63d079bbe..820b0f403b01 100644 --- a/usr.bin/proccontrol/proccontrol.1 +++ b/usr.bin/proccontrol/proccontrol.1 @@ -1,139 +1,139 @@ -.\" Copyright (c) 2019 The FreeBSD Foundation, Inc. +.\" Copyright (c) 2019 The FreeBSD Foundation .\" .\" This documentation was written by .\" Konstantin Belousov under sponsorship .\" from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd September 2, 2021 .Dt PROCCONTROL 1 .Os .Sh NAME .Nm proccontrol .Nd Control some process execution aspects .Sh SYNOPSIS .Nm .Fl m Ar mode .Op Fl s Ar control .Op Fl q .Fl p Ar pid | command .Sh DESCRIPTION The .Nm command modifies the execution parameter of existing process specified by the .Ar pid argument, or starts execution of the new program .Ar command with the execution parameter set for it. .Pp Which execution parameter is changed, selected by the mandatory parameter .Ar mode . Possible values for .Ar mode are: .Bl -tag -width trapcap .It Ar aslr Control the Address Space Layout Randomization. Only applicable to the new process spawned. .It Ar trace Control the permission for debuggers to attach. Note that process is only allowed to enable tracing for itself, not for any other process. .It Ar trapcap Controls the signalling of capability mode access violations. .It Ar protmax Controls the implicit PROT_MAX application for .Xr mmap 2 . .It Ar nonewprivs Controls disabling the setuid and sgid bits for .Xr execve 2 . .It Ar wxmap Controls the write exclusive execute mode for mappings. .It Ar kpti Controls the KPTI enable, AMD64 only. .It Ar la48 Control limiting usermode process address space to 48 bits of address, AMD64 only, on machines capable of 57-bit addressing. .El .Pp The .Ar control specifies if the selected .Ar mode should be enabled or disabled. Possible values are .Ar enable and .Ar disable , with the default value being .Ar enable if not specified. See .Xr procctl 2 for detailed description of each mode effects and interaction with other process control facilities. .Pp The .Fl q switch makes the utility query and print the current setting for the selected mode. The .Fl q requires the query target process specification with .Fl p . .Sh EXIT STATUS .Ex -std .Sh EXAMPLES .Bl -bullet .It To disable debuggers attachment to the process 1020, execute .Dl "proccontrol -m trace -s disable -p 1020" .It To execute the .Xr uniq 1 program in a mode where capability access violations cause .Dv SIGTRAP delivery, do .Dl "proccontrol -m trapcap uniq" .It To query the current ASLR enablement mode for the running process 1020, do .Dl "proccontrol -m aslr -q -p 1020" .El .Sh SEE ALSO .Xr kill 2 , .Xr procctl 2 , .Xr ptrace 2 .Sh HISTORY The .Nm command appeared in .Fx 10.0 . .Sh AUTHORS The .Nm command and this manual page were written by .An Konstantin Belousov Aq Mt kib@freebsd.org under sponsorship from The FreeBSD Foundation.