Index: sys/kern/uipc_usrreq.c =================================================================== --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -257,13 +257,28 @@ #define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) #define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) +#define UNP_REF_LIST_LOCK() UNP_DEFERRED_LOCK(); +#define UNP_REF_LIST_UNLOCK() UNP_DEFERRED_UNLOCK(); + #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ - MTX_DUPOK|MTX_DEF|MTX_RECURSE) + MTX_DUPOK|MTX_DEF) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) +#define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) +#define UNP_PCB_OWNED(unp) mtx_owned(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) +#define UNP_PCB_UNLOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED) + +#define UNP_PCB_CONN_LOCK_INIT(unp) mtx_init(&(unp)->unp_conn_mtx, \ + "unp_conn_mtx", "unp_conn_mtx", \ + MTX_DUPOK|MTX_DEF) +#define UNP_PCB_CONN_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_conn_mtx) +#define UNP_PCB_CONN_LOCK(unp) mtx_lock(&(unp)->unp_conn_mtx) +#define UNP_PCB_CONN_UNLOCK(unp) mtx_unlock(&(unp)->unp_conn_mtx) +#define UNP_PCB_CONN_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_conn_mtx, MA_OWNED) +#define UNP_PCB_CONN_UNLOCK_ASSERT(unp) mtx_assert(&(unp)->unp_conn_mtx, MA_NOTOWNED) static int uipc_connect2(struct socket *, struct socket *); static int uipc_ctloutput(struct socket *, struct sockopt *); @@ -289,6 +304,40 @@ static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); static void unp_process_defers(void * __unused, int); +static void +unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2) +{ + UNP_PCB_UNLOCK_ASSERT(unp); + UNP_PCB_UNLOCK_ASSERT(unp2); + if ((uintptr_t)unp2 > (uintptr_t)unp) { + UNP_PCB_LOCK(unp); + UNP_PCB_LOCK(unp2); + } else { + UNP_PCB_LOCK(unp2); + UNP_PCB_LOCK(unp); + } +} + +static int +unp_pcb_owned_lock2(struct unpcb *unp, struct unpcb *unp2) +{ + + UNP_PCB_LOCK_ASSERT(unp); + if (UNP_PCB_OWNED(unp2)) + return (0); + if (__predict_true(UNP_PCB_TRYLOCK(unp2))) + return (1); + if ((uintptr_t)unp2 > (uintptr_t)unp) { + UNP_PCB_LOCK(unp2); + } else { + UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK(unp2); + UNP_PCB_LOCK(unp); + } + return (1); +} + + /* * Definitions of protocols supported in the LOCAL domain. */ @@ -344,17 +393,17 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); + UNP_PCB_UNLOCK_ASSERT(unp); - UNP_LINK_WLOCK(); + UNP_PCB_CONN_LOCK(unp); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); + UNP_PCB_UNLOCK_ASSERT(unp2); unp_drop(unp2); - UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); } static int @@ -424,6 +473,7 @@ return (ENOBUFS); LIST_INIT(&unp->unp_refs); UNP_PCB_LOCK_INIT(unp); + UNP_PCB_CONN_LOCK_INIT(unp); unp->unp_socket = so; so->so_pcb = unp; unp->unp_refcount = 1; @@ -551,14 +601,12 @@ ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); VOP_UNP_BIND(vp, unp); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); VOP_UNLOCK(vp, 0); vn_finished_write(mp); free(buf, M_TEMP); @@ -585,9 +633,7 @@ int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); - UNP_LINK_WLOCK(); error = unp_connect(so, nam, td); - UNP_LINK_WUNLOCK(); return (error); } @@ -598,9 +644,7 @@ int error; KASSERT(td == curthread, ("uipc_connectat: td != curthread")); - UNP_LINK_WLOCK(); error = unp_connectat(fd, so, nam, td); - UNP_LINK_WUNLOCK(); return (error); } @@ -609,24 +653,30 @@ { struct unpcb *unp, *unp2; struct vnode *vp = NULL; + struct mtx *vplock; + int unlock; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); - UNP_LINK_WLOCK(); + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); + UNP_PCB_CONN_LOCK(unp); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); + unlock = unp_pcb_owned_lock2(unp, unp2); unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if (unlock) + UNP_PCB_UNLOCK(unp2); } + UNP_PCB_CONN_UNLOCK(unp); if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + mtx_unlock(vplock); if (vp) vrele(vp); } @@ -637,17 +687,16 @@ struct unpcb *unp, *unp2; int error; - UNP_LINK_WLOCK(); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); - UNP_PCB_LOCK(unp); + UNP_PCB_CONN_LOCK(unp); unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); - UNP_PCB_LOCK(unp2); + unp_pcb_lock2(unp, unp2); error = unp_connect2(so1, so2, PRU_CONNECT2); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); return (error); } @@ -658,6 +707,7 @@ struct sockaddr_un *saved_unp_addr; struct vnode *vp; int freeunp, local_unp_rights; + struct mtx *vplock; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); @@ -669,35 +719,58 @@ LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; - UNP_PCB_LOCK(unp); - if ((unp->unp_flags & UNP_NASCENT) != 0) - goto teardown; + UNP_LINK_WUNLOCK(); + UNP_PCB_UNLOCK_ASSERT(unp); + restart: + vp = unp->unp_vnode; + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); + UNP_PCB_CONN_LOCK(unp); + if ((unp2 = unp->unp_conn) != NULL) + unp_pcb_lock2(unp, unp2); + else + UNP_PCB_LOCK(unp); + if (unp->unp_vnode != vp && + unp->unp_vnode != NULL) { + mtx_unlock(vplock); + UNP_PCB_CONN_UNLOCK(unp); + UNP_PCB_UNLOCK(unp); + if (unp2) + UNP_PCB_UNLOCK(unp2); + goto restart; + } + if ((unp->unp_flags & UNP_NASCENT) != 0) { + if (unp2) + UNP_PCB_UNLOCK(unp2); + goto teardown; + } if ((vp = unp->unp_vnode) != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } - unp2 = unp->unp_conn; if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); unp_disconnect(unp, unp2); UNP_PCB_UNLOCK(unp2); } - - /* - * We hold the linkage lock exclusively, so it's OK to acquire - * multiple pcb locks at a time. - */ + unp->unp_refcount++; + UNP_PCB_UNLOCK(unp); + UNP_PCB_CONN_UNLOCK(unp); + UNP_REF_LIST_LOCK(); while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); - UNP_PCB_LOCK(ref); + UNP_REF_LIST_UNLOCK(); + UNP_PCB_UNLOCK_ASSERT(ref); unp_drop(ref); - UNP_PCB_UNLOCK(ref); + UNP_REF_LIST_LOCK(); } + + UNP_REF_LIST_UNLOCK(); + UNP_PCB_LOCK(unp); + unp->unp_refcount--; local_unp_rights = unp_rights; teardown: - UNP_LINK_WUNLOCK(); unp->unp_socket->so_pcb = NULL; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; @@ -707,9 +780,11 @@ free(saved_unp_addr, M_SONAME); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); + UNP_PCB_CONN_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } else UNP_PCB_UNLOCK(unp); + mtx_unlock(vplock); if (vp) vrele(vp); if (local_unp_rights) @@ -724,16 +799,15 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); - UNP_LINK_WLOCK(); - UNP_PCB_LOCK(unp); + UNP_PCB_CONN_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); + unp_pcb_lock2(unp, unp2); unp_disconnect(unp, unp2); + UNP_PCB_UNLOCK(unp); UNP_PCB_UNLOCK(unp2); } - UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); return (0); } @@ -858,7 +932,7 @@ struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; - int error = 0; + int unlock, error = 0; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); @@ -873,9 +947,8 @@ if (control != NULL && (error = unp_internalize(&control, td))) goto release; if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_LINK_WLOCK(); - else - UNP_LINK_RLOCK(); + UNP_PCB_CONN_LOCK(unp); + switch (so->so_type) { case SOCK_DGRAM: { @@ -883,7 +956,7 @@ unp2 = unp->unp_conn; if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); + UNP_PCB_CONN_LOCK_ASSERT(unp); if (unp2 != NULL) { error = EISCONN; break; @@ -925,10 +998,11 @@ error = ENOBUFS; } if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); - UNP_PCB_LOCK(unp2); + UNP_PCB_CONN_LOCK_ASSERT(unp); + unlock = unp_pcb_owned_lock2(unp, unp2); unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if (unlock) + UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); break; @@ -938,7 +1012,6 @@ case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); error = unp_connect(so, nam, td); if (error) break; /* XXX */ @@ -1048,9 +1121,7 @@ } if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_LINK_WUNLOCK(); - else - UNP_LINK_RUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); if (control != NULL && error != 0) unp_dispose_mbuf(control); @@ -1124,12 +1195,12 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); - UNP_LINK_WLOCK(); + UNP_PCB_CONN_LOCK(unp); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); return (0); } @@ -1333,16 +1404,11 @@ char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; - int error, len; + int error, len, unlock2; + struct mtx *vplock; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); - - UNP_LINK_WLOCK_ASSERT(); - - unp = sotounpcb(so); - KASSERT(unp != NULL, ("unp_connect: unp == NULL")); - if (nam->sa_len > sizeof(struct sockaddr_un)) return (EINVAL); len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); @@ -1351,12 +1417,13 @@ bcopy(soun->sun_path, buf, len); buf[len] = 0; + unp = sotounpcb(so); + KASSERT(unp != NULL, ("unp_connect: unp == NULL")); UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_CONNECTING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } - UNP_LINK_WUNLOCK(); unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1393,8 +1460,10 @@ * Lock linkage lock for two reasons: make sure v_socket is stable, * and to protect simultaneous locking of multiple pcbs. */ - UNP_LINK_WLOCK(); + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp2); + UNP_PCB_CONN_LOCK(unp); if (unp2 == NULL) { error = ECONNREFUSED; goto bad2; @@ -1404,8 +1473,8 @@ error = EPROTOTYPE; goto bad2; } - UNP_PCB_LOCK(unp); - UNP_PCB_LOCK(unp2); + unp_pcb_lock2(unp, unp2); + unlock2 = 1; if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (so2->so_options & SO_ACCEPTCONN) { CURVNET_SET(so2->so_vnet); @@ -1418,7 +1487,9 @@ goto bad3; } unp3 = sotounpcb(so2); - UNP_PCB_LOCK(unp3); + UNP_PCB_UNLOCK(unp); + + unlock2 = unp_pcb_owned_lock2(unp2, unp3); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; @@ -1445,6 +1516,7 @@ unp3->unp_flags |= UNP_WANTCRED; UNP_PCB_UNLOCK(unp2); unp2 = unp3; + unp_pcb_owned_lock2(unp2, unp); #ifdef MAC mac_socketpeer_set_from_socket(so, so2); mac_socketpeer_set_from_socket(so2, so); @@ -1456,15 +1528,16 @@ ("%s: unp2 %p so2 %p", __func__, unp2, so2)); error = unp_connect2(so, so2, PRU_CONNECT); bad3: - UNP_PCB_UNLOCK(unp2); + if (unlock2) + UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: - UNP_LINK_WUNLOCK(); + mtx_unlock(vplock); + UNP_PCB_CONN_UNLOCK(unp); bad: if (vp != NULL) vput(vp); free(sa, M_SONAME); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1482,7 +1555,7 @@ unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); - UNP_LINK_WLOCK_ASSERT(); + UNP_PCB_CONN_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); @@ -1493,7 +1566,9 @@ switch (so->so_type) { case SOCK_DGRAM: + UNP_REF_LIST_LOCK(); LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); + UNP_REF_LIST_UNLOCK(); soisconnected(so); break; @@ -1521,14 +1596,16 @@ KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); - UNP_LINK_WLOCK_ASSERT(); + UNP_PCB_CONN_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); unp->unp_conn = NULL; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: + UNP_REF_LIST_LOCK(); LIST_REMOVE(unp, unp_reflink); + UNP_REF_LIST_UNLOCK(); so = unp->unp_socket; SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; @@ -1670,6 +1747,7 @@ UNP_PCB_UNLOCK(unp); if (freeunp) { UNP_PCB_LOCK_DESTROY(unp); + UNP_PCB_CONN_LOCK_DESTROY(unp); uma_zfree(unp_zone, unp); } } @@ -1709,7 +1787,7 @@ struct unpcb *unp2; struct socket *so; - UNP_LINK_WLOCK_ASSERT(); + UNP_PCB_CONN_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; @@ -1726,22 +1804,28 @@ { struct socket *so = unp->unp_socket; struct unpcb *unp2; + int unlock; - UNP_LINK_WLOCK_ASSERT(); - UNP_PCB_LOCK_ASSERT(unp); /* * Regardless of whether the socket's peer dropped the connection * with this socket by aborting or disconnecting, POSIX requires * that ECONNRESET is returned. */ + /* acquire a reference so that unp isn't freed from underneath us */ + + UNP_PCB_CONN_LOCK(unp); + UNP_PCB_LOCK(unp); so->so_error = ECONNRESET; unp2 = unp->unp_conn; - if (unp2 == NULL) - return; - UNP_PCB_LOCK(unp2); - unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if (unp2 != NULL) { + unlock = unp_pcb_owned_lock2(unp, unp2); + unp_disconnect(unp, unp2); + if (unlock) + UNP_PCB_UNLOCK(unp2); + } + UNP_PCB_UNLOCK(unp); + UNP_PCB_CONN_UNLOCK(unp); } static void @@ -2464,16 +2548,19 @@ { struct unpcb *unp; int active; + struct mtx *vplock; ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim"); KASSERT(vp->v_type == VSOCK, ("vfs_unp_reclaim: vp->v_type != VSOCK")); active = 0; - UNP_LINK_WLOCK(); + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp); if (unp == NULL) goto done; + UNP_PCB_CONN_LOCK(unp); UNP_PCB_LOCK(unp); if (unp->unp_vnode == vp) { VOP_UNP_DETACH(vp); @@ -2481,8 +2568,9 @@ active = 1; } UNP_PCB_UNLOCK(unp); -done: - UNP_LINK_WUNLOCK(); + UNP_PCB_CONN_UNLOCK(unp); + done: + mtx_unlock(vplock); if (active) vunref(vp); } Index: sys/sys/unpcb.h =================================================================== --- sys/sys/unpcb.h +++ sys/sys/unpcb.h @@ -85,6 +85,7 @@ u_int unp_refcount; u_int unp_msgcount; /* references from message queue */ struct mtx unp_mtx; /* mutex */ + struct mtx unp_conn_mtx; /* mutex */ }; /*