Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107966217
D15430.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
26 KB
Referenced Files
None
Subscribers
None
D15430.diff
View Options
Index: head/sys/kern/uipc_usrreq.c
===================================================================
--- head/sys/kern/uipc_usrreq.c
+++ head/sys/kern/uipc_usrreq.c
@@ -191,13 +191,41 @@
/*
* Locking and synchronization:
*
- * Two types of locks exist in the local domain socket implementation: a
- * a global linkage rwlock and per-unpcb mutexes. The linkage lock protects
- * the socket count, global generation number, stream/datagram global lists and
- * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
- * held exclusively over the acquisition of multiple unpcb locks to prevent
- * deadlock.
+ * Three types of locks exist in the local domain socket implementation: a
+ * a global linkage rwlock, the mtxpool lock, and per-unpcb mutexes.
+ * The linkage lock protects the socket count, global generation number,
+ * and stream/datagram global lists.
*
+ * The mtxpool lock protects the vnode from being modified while referenced.
+ * Lock ordering requires that it be acquired before any unpcb locks.
+ *
+ * The unpcb lock (unp_mtx) protects all fields in the unpcb. Of particular
+ * note is that this includes the unp_conn field. So long as the unpcb lock
+ * is held the reference to the unpcb pointed to by unp_conn is valid. If we
+ * require that the unpcb pointed to by unp_conn remain live in cases where
+ * we need to drop the unp_mtx as when we need to acquire the lock for a
+ * second unpcb the caller must first acquire an additional reference on the
+ * second unpcb and then revalidate any state (typically check that unp_conn
+ * is non-NULL) upon requiring the initial unpcb lock. The lock ordering
+ * between unpcbs is the conventional ascending address order. Two helper
+ * routines exist for this:
+ *
+ * - unp_pcb_lock2(unp, unp2) - which just acquires the two locks in the
+ * safe ordering.
+ *
+ * - unp_pcb_owned_lock2(unp, unp2, freed) - the lock for unp is held
+ * when called. If unp is unlocked and unp2 is subsequently freed
+ * freed will be set to 1.
+ *
+ * The helper routines for references are:
+ *
+ * - unp_pcb_hold(unp): Can be called any time we currently hold a valid
+ * reference to unp.
+ *
+ * - unp_pcb_rele(unp): The caller must hold the unp lock. If we are
+ * releasing the last reference, detach must have been called thus
+ * unp->unp_socket be NULL.
+ *
* UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
* allocated in pru_attach() and freed in pru_detach(). The validity of that
* pointer is an invariant, so no lock is required to dereference the so_pcb
@@ -210,16 +238,9 @@
* to the unpcb is held. Typically, this reference will be from the socket,
* or from another unpcb when the referring unpcb's lock is held (in order
* that the reference not be invalidated during use). For example, to follow
- * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn,
- * as unp_socket remains valid as long as the reference to unp_conn is valid.
+ * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
+ * that detach is not run clearing unp_socket.
*
- * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual
- * atomic reads without the lock may be performed "lockless", but more
- * complex reads and read-modify-writes require the mutex to be held. No
- * lock order is defined between unpcb locks -- multiple unpcb locks may be
- * acquired at the same time only when holding the linkage rwlock
- * exclusively, which prevents deadlocks.
- *
* Blocking with UNIX domain sockets is a tricky issue: unlike most network
* protocols, bind() is a non-atomic operation, and connect() requires
* potential sleeping in the protocol, due to potentially waiting on local or
@@ -257,13 +278,19 @@
#define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock)
#define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock)
+#define UNP_REF_LIST_LOCK() UNP_DEFERRED_LOCK();
+#define UNP_REF_LIST_UNLOCK() UNP_DEFERRED_UNLOCK();
+
#define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \
"unp_mtx", "unp_mtx", \
- MTX_DUPOK|MTX_DEF|MTX_RECURSE)
+ MTX_DUPOK|MTX_DEF)
#define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx)
#define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx)
+#define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx)
#define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx)
+#define UNP_PCB_OWNED(unp) mtx_owned(&(unp)->unp_mtx)
#define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED)
+#define UNP_PCB_UNLOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
static int uipc_connect2(struct socket *, struct socket *);
static int uipc_ctloutput(struct socket *, struct sockopt *);
@@ -289,6 +316,75 @@
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
static void unp_process_defers(void * __unused, int);
+
+static void
+unp_pcb_hold(struct unpcb *unp)
+{
+ MPASS(unp->unp_refcount);
+ refcount_acquire(&unp->unp_refcount);
+}
+
+static int
+unp_pcb_rele(struct unpcb *unp)
+{
+ int freed;
+
+ UNP_PCB_LOCK_ASSERT(unp);
+ MPASS(unp->unp_refcount);
+ if ((freed = refcount_release(&unp->unp_refcount))) {
+ /* we got here with having detached? */
+ MPASS(unp->unp_socket == NULL);
+ UNP_PCB_UNLOCK(unp);
+ UNP_PCB_LOCK_DESTROY(unp);
+ uma_zfree(unp_zone, unp);
+ }
+ return (freed);
+}
+
+static void
+unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2)
+{
+ UNP_PCB_UNLOCK_ASSERT(unp);
+ UNP_PCB_UNLOCK_ASSERT(unp2);
+ if ((uintptr_t)unp2 > (uintptr_t)unp) {
+ UNP_PCB_LOCK(unp);
+ UNP_PCB_LOCK(unp2);
+ } else {
+ UNP_PCB_LOCK(unp2);
+ UNP_PCB_LOCK(unp);
+ }
+}
+
+static __noinline void
+unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, int *freed)
+
+{
+ struct unpcb *unp2;
+
+ unp2 = *unp2p;
+ unp_pcb_hold((unp2));
+ UNP_PCB_UNLOCK((unp));
+ UNP_PCB_LOCK((unp2));
+ UNP_PCB_LOCK((unp));
+ *freed = unp_pcb_rele((unp2));
+ if (*freed)
+ *unp2p = NULL;
+}
+
+#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
+ freed = 0; \
+ UNP_PCB_LOCK_ASSERT((unp)); \
+ UNP_PCB_UNLOCK_ASSERT((unp2)); \
+ if (__predict_true(UNP_PCB_TRYLOCK((unp2)))) \
+ break; \
+ else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
+ UNP_PCB_LOCK((unp2)); \
+ else { \
+ unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
+ } \
+} while (0)
+
+
/*
* Definitions of protocols supported in the LOCAL domain.
*/
@@ -344,17 +440,16 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
+ UNP_PCB_UNLOCK_ASSERT(unp);
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
unp2 = unp->unp_conn;
if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
+ unp_pcb_hold(unp2);
+ UNP_PCB_UNLOCK(unp);
unp_drop(unp2);
- UNP_PCB_UNLOCK(unp2);
- }
- UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
+ } else
+ UNP_PCB_UNLOCK(unp);
}
static int
@@ -551,14 +646,12 @@
ASSERT_VOP_ELOCKED(vp, "uipc_bind");
soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
VOP_UNP_BIND(vp, unp);
unp->unp_vnode = vp;
unp->unp_addr = soun;
unp->unp_flags &= ~UNP_BINDING;
UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
free(buf, M_TEMP);
@@ -585,9 +678,7 @@
int error;
KASSERT(td == curthread, ("uipc_connect: td != curthread"));
- UNP_LINK_WLOCK();
error = unp_connect(so, nam, td);
- UNP_LINK_WUNLOCK();
return (error);
}
@@ -598,9 +689,7 @@
int error;
KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
- UNP_LINK_WLOCK();
error = unp_connectat(fd, so, nam, td);
- UNP_LINK_WUNLOCK();
return (error);
}
@@ -609,26 +698,41 @@
{
struct unpcb *unp, *unp2;
struct vnode *vp = NULL;
-
+ struct mtx *vplock;
+ int freed;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
- UNP_LINK_WLOCK();
+
+ vplock = NULL;
+ if ((vp = unp->unp_vnode) != NULL) {
+ vplock = mtx_pool_find(mtxpool_sleep, vp);
+ mtx_lock(vplock);
+ }
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
- unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
+ if (vp && unp->unp_vnode == NULL) {
+ mtx_unlock(vplock);
+ vp = NULL;
}
- if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+ if (vp != NULL) {
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
}
- UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
- if (vp)
+ unp2 = unp->unp_conn;
+ unp_pcb_hold(unp);
+ if (unp2 != NULL) {
+ unp_pcb_hold(unp2);
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ unp_disconnect(unp, unp2);
+ if (unp_pcb_rele(unp2) == 0)
+ UNP_PCB_UNLOCK(unp2);
+ }
+ if (unp_pcb_rele(unp) == 0)
+ UNP_PCB_UNLOCK(unp);
+ if (vp) {
+ mtx_unlock(vplock);
vrele(vp);
+ }
}
static int
@@ -637,17 +741,14 @@
struct unpcb *unp, *unp2;
int error;
- UNP_LINK_WLOCK();
unp = so1->so_pcb;
KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
- UNP_PCB_LOCK(unp);
unp2 = so2->so_pcb;
KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
- UNP_PCB_LOCK(unp2);
+ unp_pcb_lock2(unp, unp2);
error = unp_connect2(so1, so2, PRU_CONNECT2);
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
return (error);
}
@@ -655,6 +756,7 @@
uipc_detach(struct socket *so)
{
struct unpcb *unp, *unp2;
+ struct mtx *vplock;
struct sockaddr_un *saved_unp_addr;
struct vnode *vp;
int freeunp, local_unp_rights;
@@ -669,49 +771,77 @@
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
+ UNP_LINK_WUNLOCK();
+
+ UNP_PCB_UNLOCK_ASSERT(unp);
+ restart:
+ if ((vp = unp->unp_vnode) != NULL) {
+ vplock = mtx_pool_find(mtxpool_sleep, vp);
+ mtx_lock(vplock);
+ }
UNP_PCB_LOCK(unp);
- if ((unp->unp_flags & UNP_NASCENT) != 0)
+ if ((unp2 = unp->unp_conn) != NULL) {
+ unp_pcb_owned_lock2(unp, unp2, freeunp);
+ if (freeunp)
+ unp2 = NULL;
+ }
+ if (unp->unp_vnode != vp &&
+ unp->unp_vnode != NULL) {
+ mtx_unlock(vplock);
+ UNP_PCB_UNLOCK(unp);
+ if (unp2)
+ UNP_PCB_UNLOCK(unp2);
+ goto restart;
+ }
+ if ((unp->unp_flags & UNP_NASCENT) != 0) {
+ if (unp2)
+ UNP_PCB_UNLOCK(unp2);
goto teardown;
-
+ }
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
}
- unp2 = unp->unp_conn;
+ unp_pcb_hold(unp);
if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
+ unp_pcb_hold(unp2);
unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
+ if (unp_pcb_rele(unp2) == 0)
+ UNP_PCB_UNLOCK(unp2);
}
-
- /*
- * We hold the linkage lock exclusively, so it's OK to acquire
- * multiple pcb locks at a time.
- */
+ UNP_PCB_UNLOCK(unp);
+ UNP_REF_LIST_LOCK();
while (!LIST_EMPTY(&unp->unp_refs)) {
struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
- UNP_PCB_LOCK(ref);
+ unp_pcb_hold(ref);
+ UNP_REF_LIST_UNLOCK();
+
+ MPASS(ref != unp);
+ UNP_PCB_UNLOCK_ASSERT(ref);
unp_drop(ref);
- UNP_PCB_UNLOCK(ref);
+ UNP_REF_LIST_LOCK();
}
+
+ UNP_REF_LIST_UNLOCK();
+ UNP_PCB_LOCK(unp);
+ freeunp = unp_pcb_rele(unp);
+ MPASS(freeunp == 0);
local_unp_rights = unp_rights;
teardown:
- UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
- unp->unp_refcount--;
- freeunp = (unp->unp_refcount == 0);
+ unp->unp_socket = NULL;
+ freeunp = unp_pcb_rele(unp);
if (saved_unp_addr != NULL)
free(saved_unp_addr, M_SONAME);
- if (freeunp) {
- UNP_PCB_LOCK_DESTROY(unp);
- uma_zfree(unp_zone, unp);
- } else
+ if (!freeunp)
UNP_PCB_UNLOCK(unp);
- if (vp)
+ if (vp) {
+ mtx_unlock(vplock);
vrele(vp);
+ }
if (local_unp_rights)
taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
}
@@ -720,20 +850,28 @@
uipc_disconnect(struct socket *so)
{
struct unpcb *unp, *unp2;
+ int freed;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
- unp2 = unp->unp_conn;
- if (unp2 != NULL) {
- UNP_PCB_LOCK(unp2);
- unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
+ if ((unp2 = unp->unp_conn) == NULL) {
+ UNP_PCB_UNLOCK(unp);
+ return (0);
}
- UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ if (__predict_false(freed)) {
+ UNP_PCB_UNLOCK(unp);
+ return (0);
+ }
+ unp_pcb_hold(unp2);
+ unp_pcb_hold(unp);
+ unp_disconnect(unp, unp2);
+ if (unp_pcb_rele(unp) == 0)
+ UNP_PCB_UNLOCK(unp);
+ if (unp_pcb_rele(unp2) == 0)
+ UNP_PCB_UNLOCK(unp2);
return (0);
}
@@ -852,13 +990,35 @@
}
static int
+connect_internal(struct socket *so, struct sockaddr *nam, struct thread *td)
+{
+ int error;
+ struct unpcb *unp;
+
+ unp = so->so_pcb;
+ if (unp->unp_conn != NULL)
+ return (EISCONN);
+ error = unp_connect(so, nam, td);
+ if (error)
+ return (error);
+ UNP_PCB_LOCK(unp);
+ if (unp->unp_conn == NULL) {
+ UNP_PCB_UNLOCK(unp);
+ if (error == 0)
+ error = ENOTCONN;
+ }
+ return (error);
+}
+
+
+static int
uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
struct mbuf *control, struct thread *td)
{
struct unpcb *unp, *unp2;
struct socket *so2;
u_int mbcnt, sbcc;
- int error = 0;
+ int freed, error;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
@@ -866,49 +1026,66 @@
so->so_type == SOCK_SEQPACKET,
("%s: socktype %d", __func__, so->so_type));
+ freed = error = 0;
if (flags & PRUS_OOB) {
error = EOPNOTSUPP;
goto release;
}
if (control != NULL && (error = unp_internalize(&control, td)))
goto release;
- if ((nam != NULL) || (flags & PRUS_EOF))
- UNP_LINK_WLOCK();
- else
- UNP_LINK_RLOCK();
+
+ unp2 = NULL;
switch (so->so_type) {
case SOCK_DGRAM:
{
const struct sockaddr *from;
- unp2 = unp->unp_conn;
if (nam != NULL) {
- UNP_LINK_WLOCK_ASSERT();
- if (unp2 != NULL) {
- error = EISCONN;
+ /*
+ * We return with UNP_PCB_LOCK_HELD so we know that
+ * the reference is live if the pointer is valid.
+ */
+ if ((error = connect_internal(so, nam, td)))
break;
- }
- error = unp_connect(so, nam, td);
- if (error)
- break;
+ MPASS(unp->unp_conn != NULL);
unp2 = unp->unp_conn;
- }
+ } else {
+ UNP_PCB_LOCK(unp);
+ /*
+ * Because connect() and send() are non-atomic in a sendto()
+ * with a target address, it's possible that the socket will
+ * have disconnected before the send() can run. In that case
+ * return the slightly counter-intuitive but otherwise
+ * correct error that the socket is not connected.
+ */
+ if ((unp2 = unp->unp_conn) == NULL) {
+ UNP_PCB_UNLOCK(unp);
+ error = ENOTCONN;
+ break;
+ }
+ }
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ if (__predict_false(freed)) {
+ UNP_PCB_UNLOCK(unp);
+ error = ENOTCONN;
+ break;
+ }
/*
- * Because connect() and send() are non-atomic in a sendto()
- * with a target address, it's possible that the socket will
- * have disconnected before the send() can run. In that case
- * return the slightly counter-intuitive but otherwise
- * correct error that the socket is not connected.
+ * The socket referencing unp2 may have been closed
+ * or unp may have been disconnected if the unp lock
+ * was dropped to acquire unp2.
*/
- if (unp2 == NULL) {
+ if (__predict_false(unp->unp_conn == NULL) ||
+ unp2->unp_socket == NULL) {
+ UNP_PCB_UNLOCK(unp);
+ if (unp_pcb_rele(unp2) == 0)
+ UNP_PCB_UNLOCK(unp2);
error = ENOTCONN;
break;
}
- /* Lockless read. */
if (unp2->unp_flags & UNP_WANTCRED)
control = unp_addsockcred(td, control);
- UNP_PCB_LOCK(unp);
if (unp->unp_addr != NULL)
from = (struct sockaddr *)unp->unp_addr;
else
@@ -924,12 +1101,9 @@
SOCKBUF_UNLOCK(&so2->so_rcv);
error = ENOBUFS;
}
- if (nam != NULL) {
- UNP_LINK_WLOCK_ASSERT();
- UNP_PCB_LOCK(unp2);
+ if (nam != NULL)
unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
- }
+ UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
break;
}
@@ -938,42 +1112,37 @@
case SOCK_STREAM:
if ((so->so_state & SS_ISCONNECTED) == 0) {
if (nam != NULL) {
- UNP_LINK_WLOCK_ASSERT();
- error = unp_connect(so, nam, td);
- if (error)
- break; /* XXX */
- } else {
+ if ((error = connect_internal(so, nam, td)))
+ break;
+ } else {
error = ENOTCONN;
break;
}
- }
-
- /* Lockless read. */
- if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ } else if ((unp2 = unp->unp_conn) == NULL) {
+ error = ENOTCONN;
+ break;
+ } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
error = EPIPE;
break;
+ } else {
+ UNP_PCB_LOCK(unp);
+ if ((unp2 = unp->unp_conn) == NULL) {
+ UNP_PCB_UNLOCK(unp);
+ error = ENOTCONN;
+ break;
+ }
}
-
- /*
- * Because connect() and send() are non-atomic in a sendto()
- * with a target address, it's possible that the socket will
- * have disconnected before the send() can run. In that case
- * return the slightly counter-intuitive but otherwise
- * correct error that the socket is not connected.
- *
- * Locking here must be done carefully: the linkage lock
- * prevents interconnections between unpcbs from changing, so
- * we can traverse from unp to unp2 without acquiring unp's
- * lock. Socket buffer locks follow unpcb locks, so we can
- * acquire both remote and lock socket buffer locks.
- */
- unp2 = unp->unp_conn;
- if (unp2 == NULL) {
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ UNP_PCB_UNLOCK(unp);
+ if (__predict_false(freed)) {
error = ENOTCONN;
break;
}
- so2 = unp2->unp_socket;
- UNP_PCB_LOCK(unp2);
+ if ((so2 = unp2->unp_socket) == NULL) {
+ UNP_PCB_UNLOCK(unp2);
+ error = ENOTCONN;
+ break;
+ }
SOCKBUF_LOCK(&so2->so_rcv);
if (unp2->unp_flags & UNP_WANTCRED) {
/*
@@ -1046,12 +1215,6 @@
unp_shutdown(unp);
UNP_PCB_UNLOCK(unp);
}
-
- if ((nam != NULL) || (flags & PRUS_EOF))
- UNP_LINK_WUNLOCK();
- else
- UNP_LINK_RUNLOCK();
-
if (control != NULL && error != 0)
unp_dispose_mbuf(control);
@@ -1124,12 +1287,10 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
socantsendmore(so);
unp_shutdown(unp);
UNP_PCB_UNLOCK(unp);
- UNP_LINK_WUNLOCK();
return (0);
}
@@ -1333,16 +1494,11 @@
char buf[SOCK_MAXADDRLEN];
struct sockaddr *sa;
cap_rights_t rights;
- int error, len;
+ int error, len, freed;
+ struct mtx *vplock;
if (nam->sa_family != AF_UNIX)
return (EAFNOSUPPORT);
-
- UNP_LINK_WLOCK_ASSERT();
-
- unp = sotounpcb(so);
- KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
-
if (nam->sa_len > sizeof(struct sockaddr_un))
return (EINVAL);
len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
@@ -1351,12 +1507,12 @@
bcopy(soun->sun_path, buf, len);
buf[len] = 0;
+ unp = sotounpcb(so);
UNP_PCB_LOCK(unp);
if (unp->unp_flags & UNP_CONNECTING) {
UNP_PCB_UNLOCK(unp);
return (EALREADY);
}
- UNP_LINK_WUNLOCK();
unp->unp_flags |= UNP_CONNECTING;
UNP_PCB_UNLOCK(unp);
@@ -1389,11 +1545,8 @@
unp = sotounpcb(so);
KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
- /*
- * Lock linkage lock for two reasons: make sure v_socket is stable,
- * and to protect simultaneous locking of multiple pcbs.
- */
- UNP_LINK_WLOCK();
+ vplock = mtx_pool_find(mtxpool_sleep, vp);
+ mtx_lock(vplock);
VOP_UNP_CONNECT(vp, &unp2);
if (unp2 == NULL) {
error = ECONNREFUSED;
@@ -1404,8 +1557,7 @@
error = EPROTOTYPE;
goto bad2;
}
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+ unp_pcb_lock2(unp, unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
@@ -1418,7 +1570,9 @@
goto bad3;
}
unp3 = sotounpcb(so2);
- UNP_PCB_LOCK(unp3);
+ UNP_PCB_UNLOCK(unp);
+ unp_pcb_owned_lock2(unp2, unp3, freed);
+ MPASS(!freed);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
unp3->unp_addr = (struct sockaddr_un *) sa;
@@ -1445,6 +1599,8 @@
unp3->unp_flags |= UNP_WANTCRED;
UNP_PCB_UNLOCK(unp2);
unp2 = unp3;
+ unp_pcb_owned_lock2(unp2, unp, freed);
+ MPASS(!freed);
#ifdef MAC
mac_socketpeer_set_from_socket(so, so2);
mac_socketpeer_set_from_socket(so2, so);
@@ -1459,12 +1615,12 @@
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
- UNP_LINK_WUNLOCK();
+ mtx_unlock(vplock);
bad:
- if (vp != NULL)
+ if (vp != NULL) {
vput(vp);
+ }
free(sa, M_SONAME);
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
unp->unp_flags &= ~UNP_CONNECTING;
UNP_PCB_UNLOCK(unp);
@@ -1482,7 +1638,6 @@
unp2 = sotounpcb(so2);
KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
- UNP_LINK_WLOCK_ASSERT();
UNP_PCB_LOCK_ASSERT(unp);
UNP_PCB_LOCK_ASSERT(unp2);
@@ -1490,10 +1645,13 @@
return (EPROTOTYPE);
unp2->unp_flags &= ~UNP_NASCENT;
unp->unp_conn = unp2;
-
+ unp_pcb_hold(unp2);
+ unp_pcb_hold(unp);
switch (so->so_type) {
case SOCK_DGRAM:
+ UNP_REF_LIST_LOCK();
LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
+ UNP_REF_LIST_UNLOCK();
soisconnected(so);
break;
@@ -1517,31 +1675,48 @@
static void
unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
{
- struct socket *so;
+ struct socket *so, *so2;
+ int rele, freed;
KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL"));
- UNP_LINK_WLOCK_ASSERT();
UNP_PCB_LOCK_ASSERT(unp);
UNP_PCB_LOCK_ASSERT(unp2);
+ if (unp->unp_conn == NULL && unp2->unp_conn == NULL)
+ return;
+
+ MPASS(unp->unp_conn == unp2);
unp->unp_conn = NULL;
+ rele = 0;
+ so = unp->unp_socket;
+ so2 = unp2->unp_socket;
switch (unp->unp_socket->so_type) {
case SOCK_DGRAM:
+ UNP_REF_LIST_LOCK();
LIST_REMOVE(unp, unp_reflink);
- so = unp->unp_socket;
- SOCK_LOCK(so);
- so->so_state &= ~SS_ISCONNECTED;
- SOCK_UNLOCK(so);
+ UNP_REF_LIST_UNLOCK();
+ if (so) {
+ SOCK_LOCK(so);
+ so->so_state &= ~SS_ISCONNECTED;
+ SOCK_UNLOCK(so);
+ }
break;
case SOCK_STREAM:
case SOCK_SEQPACKET:
- soisdisconnected(unp->unp_socket);
+ if (so)
+ soisdisconnected(so);
+ MPASS(unp2->unp_conn == unp);
unp2->unp_conn = NULL;
- soisdisconnected(unp2->unp_socket);
+ if (so2)
+ soisdisconnected(so2);
break;
}
+ freed = unp_pcb_rele(unp);
+ MPASS(freed == 0);
+ freed = unp_pcb_rele(unp2);
+ MPASS(freed == 0);
}
/*
@@ -1625,7 +1800,7 @@
continue;
}
unp_list[i++] = unp;
- unp->unp_refcount++;
+ unp_pcb_hold(unp);
}
UNP_PCB_UNLOCK(unp);
}
@@ -1637,8 +1812,9 @@
for (i = 0; i < n; i++) {
unp = unp_list[i];
UNP_PCB_LOCK(unp);
- unp->unp_refcount--;
- if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
+ freeunp = unp_pcb_rele(unp);
+
+ if (freeunp == 0 && unp->unp_gencnt <= gencnt) {
xu->xu_len = sizeof *xu;
xu->xu_unpp = unp;
/*
@@ -1665,14 +1841,8 @@
sotoxsocket(unp->unp_socket, &xu->xu_socket);
UNP_PCB_UNLOCK(unp);
error = SYSCTL_OUT(req, xu, sizeof *xu);
- } else {
- freeunp = (unp->unp_refcount == 0);
+ } else if (freeunp == 0)
UNP_PCB_UNLOCK(unp);
- if (freeunp) {
- UNP_PCB_LOCK_DESTROY(unp);
- uma_zfree(unp_zone, unp);
- }
- }
}
free(xu, M_TEMP);
if (!error) {
@@ -1709,7 +1879,6 @@
struct unpcb *unp2;
struct socket *so;
- UNP_LINK_WLOCK_ASSERT();
UNP_PCB_LOCK_ASSERT(unp);
unp2 = unp->unp_conn;
@@ -1726,22 +1895,28 @@
{
struct socket *so = unp->unp_socket;
struct unpcb *unp2;
+ int freed;
- UNP_LINK_WLOCK_ASSERT();
- UNP_PCB_LOCK_ASSERT(unp);
-
/*
* Regardless of whether the socket's peer dropped the connection
* with this socket by aborting or disconnecting, POSIX requires
* that ECONNRESET is returned.
*/
- so->so_error = ECONNRESET;
+ /* acquire a reference so that unp isn't freed from underneath us */
+
+ UNP_PCB_LOCK(unp);
+ if (so)
+ so->so_error = ECONNRESET;
unp2 = unp->unp_conn;
- if (unp2 == NULL)
- return;
- UNP_PCB_LOCK(unp2);
- unp_disconnect(unp, unp2);
- UNP_PCB_UNLOCK(unp2);
+ if (unp2 != NULL) {
+ unp_pcb_hold(unp2);
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ unp_disconnect(unp, unp2);
+ if (unp_pcb_rele(unp2) == 0)
+ UNP_PCB_UNLOCK(unp2);
+ }
+ if (unp_pcb_rele(unp) == 0)
+ UNP_PCB_UNLOCK(unp);
}
static void
@@ -1881,7 +2056,7 @@
return;
#endif
unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ NULL, NULL, UMA_ALIGN_CACHE, 0);
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
@@ -2464,13 +2639,15 @@
{
struct unpcb *unp;
int active;
+ struct mtx *vplock;
ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
KASSERT(vp->v_type == VSOCK,
("vfs_unp_reclaim: vp->v_type != VSOCK"));
active = 0;
- UNP_LINK_WLOCK();
+ vplock = mtx_pool_find(mtxpool_sleep, vp);
+ mtx_lock(vplock);
VOP_UNP_CONNECT(vp, &unp);
if (unp == NULL)
goto done;
@@ -2481,8 +2658,8 @@
active = 1;
}
UNP_PCB_UNLOCK(unp);
-done:
- UNP_LINK_WUNLOCK();
+ done:
+ mtx_unlock(vplock);
if (active)
vunref(vp);
}
Index: head/sys/sys/unpcb.h
===================================================================
--- head/sys/sys/unpcb.h
+++ head/sys/sys/unpcb.h
@@ -69,23 +69,25 @@
LIST_HEAD(unp_head, unpcb);
struct unpcb {
- LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
- struct socket *unp_socket; /* pointer back to socket */
- struct file *unp_file; /* back-pointer to file for gc. */
- struct vnode *unp_vnode; /* if associated with file */
- ino_t unp_ino; /* fake inode number */
+ /* Cache line 1 */
+ struct mtx unp_mtx; /* mutex */
struct unpcb *unp_conn; /* control block of connected socket */
- struct unp_head unp_refs; /* referencing socket linked list */
- LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
- struct sockaddr_un *unp_addr; /* bound address of socket */
- unp_gen_t unp_gencnt; /* generation count of this instance */
+ volatile u_int unp_refcount;
short unp_flags; /* flags */
short unp_gcflag; /* Garbage collector flags. */
+ struct sockaddr_un *unp_addr; /* bound address of socket */
+ struct socket *unp_socket; /* pointer back to socket */
+ /* Cache line 2 */
+ struct vnode *unp_vnode; /* if associated with file */
struct xucred unp_peercred; /* peer credentials, if applicable */
- u_int unp_refcount;
+ LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
+ LIST_ENTRY(unpcb) unp_link; /* glue on list of all PCBs */
+ struct unp_head unp_refs; /* referencing socket linked list */
+ unp_gen_t unp_gencnt; /* generation count of this instance */
+ struct file *unp_file; /* back-pointer to file for gc. */
u_int unp_msgcount; /* references from message queue */
- struct mtx unp_mtx; /* mutex */
-};
+ ino_t unp_ino; /* fake inode number */
+} __aligned(CACHE_LINE_SIZE);
/*
* Flags in unp_flags.
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jan 21, 1:12 AM (18 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15990957
Default Alt Text
D15430.diff (26 KB)
Attached To
Mode
D15430: make unix socket locking finer grained
Attached
Detach File
Event Timeline
Log In to Comment