Index: head/sys/kern/uipc_usrreq.c =================================================================== --- head/sys/kern/uipc_usrreq.c +++ head/sys/kern/uipc_usrreq.c @@ -65,13 +65,13 @@ #include #include #include -#include -#include /* XXX must be before */ #include +#include #include #include #include #include +#include #include #include #include @@ -106,9 +106,7 @@ MALLOC_DECLARE(M_FILECAPS); /* - * Locking key: - * (l) Locked using list lock - * (g) Locked using linkage lock + * See unpcb.h for the locking key. */ static uma_zone_t unp_zone; @@ -196,40 +194,31 @@ /* * Locking and synchronization: * - * Three types of locks exist in the local domain socket implementation: a - * a global linkage rwlock, the mtxpool lock, and per-unpcb mutexes. - * The linkage lock protects the socket count, global generation number, - * and stream/datagram global lists. + * Several types of locks exist in the local domain socket implementation: + * - a global linkage lock + * - a global connection list lock + * - the mtxpool lock + * - per-unpcb mutexes * - * The mtxpool lock protects the vnode from being modified while referenced. - * Lock ordering requires that it be acquired before any unpcb locks. + * The linkage lock protects the global socket lists, the generation number + * counter and garbage collector state. * - * The unpcb lock (unp_mtx) protects all fields in the unpcb. Of particular - * note is that this includes the unp_conn field. So long as the unpcb lock - * is held the reference to the unpcb pointed to by unp_conn is valid. If we - * require that the unpcb pointed to by unp_conn remain live in cases where - * we need to drop the unp_mtx as when we need to acquire the lock for a - * second unpcb the caller must first acquire an additional reference on the - * second unpcb and then revalidate any state (typically check that unp_conn - * is non-NULL) upon requiring the initial unpcb lock. The lock ordering - * between unpcbs is the conventional ascending address order. Two helper - * routines exist for this: + * The connection list lock protects the list of referring sockets in a datagram + * socket PCB. This lock is also overloaded to protect a global list of + * sockets whose buffers contain socket references in the form of SCM_RIGHTS + * messages. To avoid recursion, such references are released by a dedicated + * thread. * - * - unp_pcb_lock2(unp, unp2) - which just acquires the two locks in the - * safe ordering. + * The mtxpool lock protects the vnode from being modified while referenced. + * Lock ordering rules require that it be acquired before any PCB locks. * - * - unp_pcb_owned_lock2(unp, unp2, freed) - the lock for unp is held - * when called. If unp is unlocked and unp2 is subsequently freed - * freed will be set to 1. - * - * The helper routines for references are: - * - * - unp_pcb_hold(unp): Can be called any time we currently hold a valid - * reference to unp. - * - * - unp_pcb_rele(unp): The caller must hold the unp lock. If we are - * releasing the last reference, detach must have been called thus - * unp->unp_socket be NULL. + * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the + * unpcb. This includes the unp_conn field, which either links two connected + * PCBs together (for connected socket types) or points at the destination + * socket (for connectionless socket types). The operations of creating or + * destroying a connection therefore involve locking multiple PCBs. To avoid + * lock order reversals, in some cases this involves dropping a PCB lock and + * using a reference counter to maintain liveness. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pru_attach() and freed in pru_detach(). The validity of that Index: head/sys/sys/unpcb.h =================================================================== --- head/sys/sys/unpcb.h +++ head/sys/sys/unpcb.h @@ -65,30 +65,37 @@ * Stream sockets keep copies of receive sockbuf sb_cc and sb_mbcnt * so that changes in the sockbuf may be computed to modify * back pressure on the sender accordingly. + * + * Locking key: + * (a) Atomic + * (c) Constant + * (g) Locked using linkage lock + * (l) Locked using list lock + * (p) Locked using pcb lock */ LIST_HEAD(unp_head, unpcb); struct unpcb { /* Cache line 1 */ - struct mtx unp_mtx; /* mutex */ - struct unpcb *unp_conn; /* control block of connected socket */ - volatile u_int unp_refcount; - short unp_flags; /* flags */ - short unp_gcflag; /* Garbage collector flags. */ - struct sockaddr_un *unp_addr; /* bound address of socket */ - struct socket *unp_socket; /* pointer back to socket */ + struct mtx unp_mtx; /* PCB mutex */ + struct unpcb *unp_conn; /* (p) connected socket */ + volatile u_int unp_refcount; /* (a, p) atomic refcount */ + short unp_flags; /* (p) PCB flags */ + short unp_gcflag; /* (g) Garbage collector flags */ + struct sockaddr_un *unp_addr; /* (p) bound address of socket */ + struct socket *unp_socket; /* (c) pointer back to socket */ /* Cache line 2 */ - struct vnode *unp_vnode; /* if associated with file */ - struct xucred unp_peercred; /* peer credentials, if applicable */ - LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */ - LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */ - struct unp_head unp_refs; /* referencing socket linked list */ - unp_gen_t unp_gencnt; /* generation count of this instance */ - struct file *unp_file; /* back-pointer to file for gc. */ - u_int unp_msgcount; /* references from message queue */ - u_int unp_gcrefs; /* garbage collector refcount */ - ino_t unp_ino; /* fake inode number */ - LIST_ENTRY(unpcb) unp_dead; /* link in dead list */ + struct vnode *unp_vnode; /* (p) associated file if applicable */ + struct xucred unp_peercred; /* (p) peer credentials if applicable */ + LIST_ENTRY(unpcb) unp_reflink; /* (l) link in unp_refs list */ + LIST_ENTRY(unpcb) unp_link; /* (g) glue on list of all PCBs */ + struct unp_head unp_refs; /* (l) referencing socket linked list */ + unp_gen_t unp_gencnt; /* (g) generation count of this item */ + struct file *unp_file; /* (g) back-pointer to file for gc */ + u_int unp_msgcount; /* (g) references from message queue */ + u_int unp_gcrefs; /* (g) garbage collector refcount */ + ino_t unp_ino; /* (g) fake inode number */ + LIST_ENTRY(unpcb) unp_dead; /* (g) link in dead list */ } __aligned(CACHE_LINE_SIZE); /*