Index: head/sys/kern/uipc_usrreq.c =================================================================== --- head/sys/kern/uipc_usrreq.c +++ head/sys/kern/uipc_usrreq.c @@ -279,6 +279,7 @@ "unp", "unp", \ MTX_DUPOK|MTX_DEF) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) +#define UNP_PCB_LOCKPTR(unp) (&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) #define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) @@ -368,35 +369,55 @@ UNP_PCB_UNLOCK(unp2); } -static __noinline void -unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, - int *freed) +/* + * Try to lock the connected peer of an already locked socket. In some cases + * this requires that we unlock the current socket. The pairbusy counter is + * used to block concurrent connection attempts while the lock is dropped. The + * caller must be careful to revalidate PCB state. + */ +static struct unpcb * +unp_pcb_lock_peer(struct unpcb *unp) { struct unpcb *unp2; - unp2 = *unp2p; + UNP_PCB_LOCK_ASSERT(unp); + unp2 = unp->unp_conn; + if (__predict_false(unp2 == NULL)) + return (NULL); + if (__predict_false(unp == unp2)) + return (unp); + + UNP_PCB_UNLOCK_ASSERT(unp2); + + if (__predict_true(UNP_PCB_TRYLOCK(unp2))) + return (unp2); + if ((uintptr_t)unp2 > (uintptr_t)unp) { + UNP_PCB_LOCK(unp2); + return (unp2); + } + unp->unp_pairbusy++; unp_pcb_hold(unp2); UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK(unp2); UNP_PCB_LOCK(unp); - *freed = unp_pcb_rele(unp2); - if (*freed) - *unp2p = NULL; + KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL, + ("%s: socket %p was reconnected", __func__, unp)); + if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) { + unp->unp_flags &= ~UNP_WAITING; + wakeup(unp); + } + if (unp_pcb_rele(unp2)) { + /* unp2 is unlocked. */ + return (NULL); + } + if (unp->unp_conn == NULL) { + UNP_PCB_UNLOCK(unp2); + return (NULL); + } + return (unp2); } -#define unp_pcb_owned_lock2(unp, unp2, freed) do { \ - freed = 0; \ - UNP_PCB_LOCK_ASSERT(unp); \ - UNP_PCB_UNLOCK_ASSERT(unp2); \ - MPASS((unp) != (unp2)); \ - if (__predict_true(UNP_PCB_TRYLOCK(unp2))) \ - break; \ - else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \ - UNP_PCB_LOCK(unp2); \ - else \ - unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \ -} while (0) - /* * Definitions of protocols supported in the LOCAL domain. */ @@ -710,7 +731,7 @@ struct unpcb *unp, *unp2; struct vnode *vp = NULL; struct mtx *vplock; - int freed; + unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); @@ -728,15 +749,10 @@ VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } - unp2 = unp->unp_conn; - if (__predict_false(unp == unp2)) { + if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) unp_disconnect(unp, unp2); - } else if (unp2 != NULL) { - unp_pcb_owned_lock2(unp, unp2, freed); - unp_disconnect(unp, unp2); - } else { + else UNP_PCB_UNLOCK(unp); - } if (vp) { mtx_unlock(vplock); vrele(vp); @@ -765,14 +781,13 @@ struct unpcb *unp, *unp2; struct mtx *vplock; struct vnode *vp; - int freeunp, local_unp_rights; + int local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); vp = NULL; vplock = NULL; - local_unp_rights = 0; SOCK_LOCK(so); if (!SOLISTENING(so)) { @@ -811,21 +826,11 @@ VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } - if (__predict_false(unp == unp->unp_conn)) { - unp_disconnect(unp, unp); - unp2 = NULL; - } else { - if ((unp2 = unp->unp_conn) != NULL) { - unp_pcb_owned_lock2(unp, unp2, freeunp); - if (freeunp) - unp2 = NULL; - } - unp_pcb_hold(unp); - if (unp2 != NULL) - unp_disconnect(unp, unp2); - else - UNP_PCB_UNLOCK(unp); - } + if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) + unp_disconnect(unp, unp2); + else + UNP_PCB_UNLOCK(unp); + UNP_REF_LIST_LOCK(); while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); @@ -838,11 +843,9 @@ unp_drop(ref); UNP_REF_LIST_LOCK(); } - UNP_REF_LIST_UNLOCK(); + UNP_PCB_LOCK(unp); - freeunp = unp_pcb_rele(unp); - MPASS(freeunp == 0); local_unp_rights = unp_rights; unp->unp_socket->so_pcb = NULL; unp->unp_socket = NULL; @@ -862,24 +865,15 @@ uipc_disconnect(struct socket *so) { struct unpcb *unp, *unp2; - int freed; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); UNP_PCB_LOCK(unp); - if ((unp2 = unp->unp_conn) == NULL) { + if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) + unp_disconnect(unp, unp2); + else UNP_PCB_UNLOCK(unp); - return (0); - } - if (__predict_true(unp != unp2)) { - unp_pcb_owned_lock2(unp, unp2, freed); - if (__predict_false(freed)) { - UNP_PCB_UNLOCK(unp); - return (0); - } - } - unp_disconnect(unp, unp2); return (0); } @@ -998,27 +992,6 @@ } static int -connect_internal(struct socket *so, struct sockaddr *nam, struct thread *td) -{ - int error; - struct unpcb *unp; - - unp = so->so_pcb; - if (unp->unp_conn != NULL) - return (EISCONN); - error = unp_connect(so, nam, td); - if (error) - return (error); - UNP_PCB_LOCK(unp); - if (unp->unp_conn == NULL) { - UNP_PCB_UNLOCK(unp); - if (error == 0) - error = ENOTCONN; - } - return (error); -} - -static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { @@ -1048,57 +1021,26 @@ const struct sockaddr *from; if (nam != NULL) { - /* - * We return with UNP_PCB_LOCK_HELD so we know that - * the reference is live if the pointer is valid. - */ - if ((error = connect_internal(so, nam, td))) + error = unp_connect(so, nam, td); + if (error != 0) break; - MPASS(unp->unp_conn != NULL); - unp2 = unp->unp_conn; - } else { - UNP_PCB_LOCK(unp); - - /* - * Because connect() and send() are non-atomic in a sendto() - * with a target address, it's possible that the socket will - * have disconnected before the send() can run. In that case - * return the slightly counter-intuitive but otherwise - * correct error that the socket is not connected. - */ - if ((unp2 = unp->unp_conn) == NULL) { - UNP_PCB_UNLOCK(unp); - error = ENOTCONN; - break; - } } - if (__predict_false(unp == unp2)) { - if (unp->unp_socket == NULL) { - error = ENOTCONN; - break; - } - goto connect_self; - } - unp_pcb_owned_lock2(unp, unp2, freed); - if (__predict_false(freed)) { - UNP_PCB_UNLOCK(unp); - error = ENOTCONN; - break; - } + UNP_PCB_LOCK(unp); + /* - * The socket referencing unp2 may have been closed - * or unp may have been disconnected if the unp lock - * was dropped to acquire unp2. + * Because connect() and send() are non-atomic in a sendto() + * with a target address, it's possible that the socket will + * have disconnected before the send() can run. In that case + * return the slightly counter-intuitive but otherwise + * correct error that the socket is not connected. */ - if (__predict_false(unp->unp_conn == NULL) || - unp2->unp_socket == NULL) { + unp2 = unp_pcb_lock_peer(unp); + if (unp2 == NULL) { UNP_PCB_UNLOCK(unp); - if (unp_pcb_rele(unp2) == 0) - UNP_PCB_UNLOCK(unp2); error = ENOTCONN; break; } - connect_self: + if (unp2->unp_flags & UNP_WANTCRED) control = unp_addsockcred(td, control); if (unp->unp_addr != NULL) @@ -1127,36 +1069,26 @@ case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { - error = connect_internal(so, nam, td); + error = unp_connect(so, nam, td); if (error != 0) break; } else { error = ENOTCONN; break; } - } else { - UNP_PCB_LOCK(unp); } - if ((unp2 = unp->unp_conn) == NULL) { + UNP_PCB_LOCK(unp); + if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) { UNP_PCB_UNLOCK(unp); error = ENOTCONN; break; } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) { - UNP_PCB_UNLOCK(unp); + unp_pcb_unlock_pair(unp, unp2); error = EPIPE; break; - } else if ((unp2 = unp->unp_conn) == NULL) { - UNP_PCB_UNLOCK(unp); - error = ENOTCONN; - break; } - unp_pcb_owned_lock2(unp, unp2, freed); UNP_PCB_UNLOCK(unp); - if (__predict_false(freed)) { - error = ENOTCONN; - break; - } if ((so2 = unp2->unp_socket) == NULL) { UNP_PCB_UNLOCK(unp2); error = ENOTCONN; @@ -1291,30 +1223,19 @@ ("%s: unexpected socket type for %p", __func__, so)); UNP_PCB_LOCK(unp); - if ((unp2 = unp->unp_conn) == NULL) { + if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) { UNP_PCB_UNLOCK(unp); - goto search; + so2 = unp2->unp_socket; + SOCKBUF_LOCK(&so2->so_rcv); + if ((error = sbready(&so2->so_rcv, m, count)) == 0) + sorwakeup_locked(so2); + else + SOCKBUF_UNLOCK(&so2->so_rcv); + UNP_PCB_UNLOCK(unp2); + return (error); } - if (unp != unp2) { - if (UNP_PCB_TRYLOCK(unp2) == 0) { - unp_pcb_hold(unp2); - UNP_PCB_UNLOCK(unp); - UNP_PCB_LOCK(unp2); - if (unp_pcb_rele(unp2)) - goto search; - } else - UNP_PCB_UNLOCK(unp); - } - so2 = unp2->unp_socket; - SOCKBUF_LOCK(&so2->so_rcv); - if ((error = sbready(&so2->so_rcv, m, count)) == 0) - sorwakeup_locked(so2); - else - SOCKBUF_UNLOCK(&so2->so_rcv); - UNP_PCB_UNLOCK(unp2); - return (error); + UNP_PCB_UNLOCK(unp); -search: /* * The receiving socket has been disconnected, but may still be valid. * In this case, the now-ready mbufs are still present in its socket @@ -1566,7 +1487,7 @@ char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; - int error, len, freed; + int error, len; bool connreq; if (nam->sa_family != AF_UNIX) @@ -1582,9 +1503,33 @@ unp = sotounpcb(so); UNP_PCB_LOCK(unp); - if (unp->unp_flags & UNP_CONNECTING) { - UNP_PCB_UNLOCK(unp); - return (EALREADY); + for (;;) { + /* + * Wait for connection state to stabilize. If a connection + * already exists, give up. For datagram sockets, which permit + * multiple consecutive connect(2) calls, upper layers are + * responsible for disconnecting in advance of a subsequent + * connect(2), but this is not synchronized with PCB connection + * state. + * + * Also make sure that no threads are currently attempting to + * lock the peer socket, to ensure that unp_conn cannot + * transition between two valid sockets while locks are dropped. + */ + if (unp->unp_conn != NULL) { + UNP_PCB_UNLOCK(unp); + return (EISCONN); + } + if ((unp->unp_flags & UNP_CONNECTING) != 0) { + UNP_PCB_UNLOCK(unp); + return (EALREADY); + } + if (unp->unp_pairbusy > 0) { + unp->unp_flags |= UNP_WAITING; + mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0); + continue; + } + break; } unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1657,12 +1602,12 @@ UNP_PCB_UNLOCK(unp2); unp2 = unp3; - unp_pcb_owned_lock2(unp2, unp, freed); - if (__predict_false(freed)) { - UNP_PCB_UNLOCK(unp2); - error = ECONNREFUSED; - goto bad2; - } + + /* + * It is safe to block on the PCB lock here since unp2 is + * nascent and cannot be connected to any other sockets. + */ + UNP_PCB_LOCK(unp); #ifdef MAC mac_socketpeer_set_from_socket(so, so2); mac_socketpeer_set_from_socket(so2, so); @@ -1683,6 +1628,8 @@ } free(sa, M_SONAME); UNP_PCB_LOCK(unp); + KASSERT((unp->unp_flags & UNP_CONNECTING) != 0, + ("%s: unp %p has UNP_CONNECTING clear", __func__, unp)); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); return (error); @@ -1722,6 +1669,8 @@ UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); + KASSERT(unp->unp_conn == NULL, + ("%s: socket %p is already connected", __func__, unp)); if (so2->so_type != so->so_type) return (EPROTOTYPE); @@ -1738,6 +1687,8 @@ case SOCK_STREAM: case SOCK_SEQPACKET: + KASSERT(unp2->unp_conn == NULL, + ("%s: socket %p is already connected", __func__, unp2)); unp2->unp_conn = unp; if (req == PRU_CONNECT && ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) @@ -1992,25 +1943,18 @@ { struct socket *so = unp->unp_socket; struct unpcb *unp2; - int freed; /* * Regardless of whether the socket's peer dropped the connection * with this socket by aborting or disconnecting, POSIX requires * that ECONNRESET is returned. */ - /* acquire a reference so that unp isn't freed from underneath us */ UNP_PCB_LOCK(unp); if (so) so->so_error = ECONNRESET; - unp2 = unp->unp_conn; - /* Last reference dropped in unp_disconnect(). */ - if (unp2 == unp) { - unp_pcb_rele_notlast(unp); - unp_disconnect(unp, unp2); - } else if (unp2 != NULL) { - unp_pcb_owned_lock2(unp, unp2, freed); + if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) { + /* Last reference dropped in unp_disconnect(). */ unp_pcb_rele_notlast(unp); unp_disconnect(unp, unp2); } else if (!unp_pcb_rele(unp)) { Index: head/sys/sys/unpcb.h =================================================================== --- head/sys/sys/unpcb.h +++ head/sys/sys/unpcb.h @@ -85,6 +85,7 @@ struct sockaddr_un *unp_addr; /* (p) bound address of socket */ struct socket *unp_socket; /* (c) pointer back to socket */ /* Cache line 2 */ + u_int unp_pairbusy; /* (p) threads acquiring peer locks */ struct vnode *unp_vnode; /* (p) associated file if applicable */ struct xucred unp_peercred; /* (p) peer credentials if applicable */ LIST_ENTRY(unpcb) unp_reflink; /* (l) link in unp_refs list */ @@ -117,6 +118,7 @@ */ #define UNP_CONNECTING 0x010 /* Currently connecting. */ #define UNP_BINDING 0x020 /* Currently binding. */ +#define UNP_WAITING 0x040 /* Peer state is changing. */ /* * Flags in unp_gcflag.