Index: sys/kern/uipc_usrreq.c =================================================================== --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -191,12 +191,40 @@ /* * Locking and synchronization: * - * Two types of locks exist in the local domain socket implementation: a - * a global linkage rwlock and per-unpcb mutexes. The linkage lock protects - * the socket count, global generation number, stream/datagram global lists and - * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be - * held exclusively over the acquisition of multiple unpcb locks to prevent - * deadlock. + * Three types of locks exist in the local domain socket implementation: a + * a global linkage rwlock, the mtxpool lock, and per-unpcb mutexes. + * The linkage lock protects the socket count, global generation number, + * and stream/datagram global lists. + * + * The mtxpool lock protects the vnode from being modified while referenced. + * Lock ordering requires that it be acquired before any unpcb locks. + * + * The unpcb lock (unp_mtx) protects all fields in the unpcb. Of particular + * note is that this includes the unp_conn field. So long as the unpcb lock + * is held the reference to the unpcb pointed to by unp_conn is valid. If we + * require that the unpcb pointed to by unp_conn remain live in cases where + * we need to drop the unp_mtx as when we need to acquire the lock for a + * second unpcb the caller must first acquire an additional reference on the + * second unpcb and then revalidate any state (typically check that unp_conn + * is non-NULL) upon requiring the initial unpcb lock. The lock ordering + * between unpcbs is the conventional ascending address order. Two helper + * routines exist for this: + * + * - unp_pcb_lock2(unp, unp2) - which just acquires the two locks in the + * safe ordering. + * + * - unp_pcb_owned_lock2(unp, unp2, freed) - the lock for unp is held + * when called. If unp is unlocked and unp2 is subsequently freed + * freed will be set to 1. + * + * The helper routines for references are: + * + * - unp_pcb_hold(unp): Can be called any time we currently hold a valid + * reference to unp. + * + * - unp_pcb_rele(unp): The caller must hold the unp lock. If we are + * releasing the last reference, detach must have been called thus + * unp->unp_socket be NULL. * * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, * allocated in pru_attach() and freed in pru_detach(). The validity of that @@ -210,15 +238,8 @@ * to the unpcb is held. Typically, this reference will be from the socket, * or from another unpcb when the referring unpcb's lock is held (in order * that the reference not be invalidated during use). For example, to follow - * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, - * as unp_socket remains valid as long as the reference to unp_conn is valid. - * - * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual - * atomic reads without the lock may be performed "lockless", but more - * complex reads and read-modify-writes require the mutex to be held. No - * lock order is defined between unpcb locks -- multiple unpcb locks may be - * acquired at the same time only when holding the linkage rwlock - * exclusively, which prevents deadlocks. + * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee + * that detach is not run clearing unp_socket. * * Blocking with UNIX domain sockets is a tricky issue: unlike most network * protocols, bind() is a non-atomic operation, and connect() requires @@ -257,13 +278,19 @@ #define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) #define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) +#define UNP_REF_LIST_LOCK() UNP_DEFERRED_LOCK(); +#define UNP_REF_LIST_UNLOCK() UNP_DEFERRED_UNLOCK(); + #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ - MTX_DUPOK|MTX_DEF|MTX_RECURSE) + MTX_DUPOK|MTX_DEF) #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) +#define UNP_PCB_TRYLOCK(unp) mtx_trylock(&(unp)->unp_mtx) #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) +#define UNP_PCB_OWNED(unp) mtx_owned(&(unp)->unp_mtx) #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) +#define UNP_PCB_UNLOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED) static int uipc_connect2(struct socket *, struct socket *); static int uipc_ctloutput(struct socket *, struct sockopt *); @@ -289,6 +316,63 @@ static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); static void unp_process_defers(void * __unused, int); + +static void +unp_pcb_hold(struct unpcb *unp) +{ + MPASS(unp->unp_refcount); + refcount_acquire(&unp->unp_refcount); +} + +static int +unp_pcb_rele(struct unpcb *unp) +{ + int freed; + + UNP_PCB_LOCK_ASSERT(unp); + MPASS(unp->unp_refcount); + if ((freed = refcount_release(&unp->unp_refcount))) { + /* we got here with having detached? */ + MPASS(unp->unp_socket == NULL); + UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK_DESTROY(unp); + uma_zfree(unp_zone, unp); + } + return (freed); +} + +static void +unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2) +{ + UNP_PCB_UNLOCK_ASSERT(unp); + UNP_PCB_UNLOCK_ASSERT(unp2); + if ((uintptr_t)unp2 > (uintptr_t)unp) { + UNP_PCB_LOCK(unp); + UNP_PCB_LOCK(unp2); + } else { + UNP_PCB_LOCK(unp2); + UNP_PCB_LOCK(unp); + } +} + +#define unp_pcb_owned_lock2(unp, unp2, freed) do { \ + freed = 0; \ + UNP_PCB_LOCK_ASSERT((unp)); \ + UNP_PCB_UNLOCK_ASSERT((unp2)); \ + if (__predict_true(UNP_PCB_TRYLOCK((unp2)))) \ + ; \ + else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \ + UNP_PCB_LOCK((unp2)); \ + else { \ + unp_pcb_hold((unp2)); \ + UNP_PCB_UNLOCK((unp)); \ + UNP_PCB_LOCK((unp2)); \ + UNP_PCB_LOCK((unp)); \ + freed = unp_pcb_rele((unp2)); \ + } \ +} while (0) + + /* * Definitions of protocols supported in the LOCAL domain. */ @@ -344,17 +428,15 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); + UNP_PCB_UNLOCK_ASSERT(unp); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); + unp_pcb_hold(unp2); unp_drop(unp2); - UNP_PCB_UNLOCK(unp2); } UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); } static int @@ -551,14 +633,12 @@ ASSERT_VOP_ELOCKED(vp, "uipc_bind"); soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); VOP_UNP_BIND(vp, unp); unp->unp_vnode = vp; unp->unp_addr = soun; unp->unp_flags &= ~UNP_BINDING; UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); VOP_UNLOCK(vp, 0); vn_finished_write(mp); free(buf, M_TEMP); @@ -585,9 +665,7 @@ int error; KASSERT(td == curthread, ("uipc_connect: td != curthread")); - UNP_LINK_WLOCK(); error = unp_connect(so, nam, td); - UNP_LINK_WUNLOCK(); return (error); } @@ -598,9 +676,7 @@ int error; KASSERT(td == curthread, ("uipc_connectat: td != curthread")); - UNP_LINK_WLOCK(); error = unp_connectat(fd, so, nam, td); - UNP_LINK_WUNLOCK(); return (error); } @@ -609,26 +685,36 @@ { struct unpcb *unp, *unp2; struct vnode *vp = NULL; - + struct mtx *vplock; + int freed; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_close: unp == NULL")); - UNP_LINK_WLOCK(); + + if ((vp = unp->unp_vnode) != NULL) { + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); + } UNP_PCB_LOCK(unp); unp2 = unp->unp_conn; - if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); - unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); - } - if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) { + if (vp != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } - UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); - if (vp) + unp_pcb_hold(unp); + if (unp2 != NULL) { + unp_pcb_hold(unp2); + unp_pcb_owned_lock2(unp, unp2, freed); + unp_disconnect(unp, unp2); + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); + } + if (unp_pcb_rele(unp) == 0) + UNP_PCB_UNLOCK(unp); + if (vp) { + mtx_unlock(vplock); vrele(vp); + } } static int @@ -637,17 +723,14 @@ struct unpcb *unp, *unp2; int error; - UNP_LINK_WLOCK(); unp = so1->so_pcb; KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); - UNP_PCB_LOCK(unp); unp2 = so2->so_pcb; KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); - UNP_PCB_LOCK(unp2); + unp_pcb_lock2(unp, unp2); error = unp_connect2(so1, so2, PRU_CONNECT2); UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); return (error); } @@ -658,6 +741,7 @@ struct sockaddr_un *saved_unp_addr; struct vnode *vp; int freeunp, local_unp_rights; + struct mtx *vplock; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); @@ -669,49 +753,77 @@ LIST_REMOVE(unp, unp_link); unp->unp_gencnt = ++unp_gencnt; --unp_count; + UNP_LINK_WUNLOCK(); + + UNP_PCB_UNLOCK_ASSERT(unp); + restart: + if ((vp = unp->unp_vnode) != NULL) { + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); + } UNP_PCB_LOCK(unp); - if ((unp->unp_flags & UNP_NASCENT) != 0) + if ((unp2 = unp->unp_conn) != NULL) { + unp_pcb_owned_lock2(unp, unp2, freeunp); + if (freeunp) + unp2 = NULL; + } + if (unp->unp_vnode != vp && + unp->unp_vnode != NULL) { + mtx_unlock(vplock); + UNP_PCB_UNLOCK(unp); + if (unp2) + UNP_PCB_UNLOCK(unp2); + goto restart; + } + if ((unp->unp_flags & UNP_NASCENT) != 0) { + if (unp2) + UNP_PCB_UNLOCK(unp2); goto teardown; - + } if ((vp = unp->unp_vnode) != NULL) { VOP_UNP_DETACH(vp); unp->unp_vnode = NULL; } - unp2 = unp->unp_conn; + unp_pcb_hold(unp); if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); + unp_pcb_hold(unp2); unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); } - - /* - * We hold the linkage lock exclusively, so it's OK to acquire - * multiple pcb locks at a time. - */ + UNP_PCB_UNLOCK(unp); + UNP_REF_LIST_LOCK(); while (!LIST_EMPTY(&unp->unp_refs)) { struct unpcb *ref = LIST_FIRST(&unp->unp_refs); - UNP_PCB_LOCK(ref); + unp_pcb_hold(ref); + UNP_REF_LIST_UNLOCK(); + + MPASS(ref != unp); + UNP_PCB_UNLOCK_ASSERT(ref); unp_drop(ref); - UNP_PCB_UNLOCK(ref); + UNP_REF_LIST_LOCK(); } + + UNP_REF_LIST_UNLOCK(); + UNP_PCB_LOCK(unp); + freeunp = unp_pcb_rele(unp); + MPASS(freeunp == 0); local_unp_rights = unp_rights; teardown: - UNP_LINK_WUNLOCK(); unp->unp_socket->so_pcb = NULL; saved_unp_addr = unp->unp_addr; unp->unp_addr = NULL; - unp->unp_refcount--; - freeunp = (unp->unp_refcount == 0); + unp->unp_socket = NULL; + freeunp = unp_pcb_rele(unp); if (saved_unp_addr != NULL) free(saved_unp_addr, M_SONAME); - if (freeunp) { - UNP_PCB_LOCK_DESTROY(unp); - uma_zfree(unp_zone, unp); - } else + if (!freeunp) UNP_PCB_UNLOCK(unp); - if (vp) + if (vp) { + mtx_unlock(vplock); vrele(vp); + } if (local_unp_rights) taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); } @@ -720,20 +832,28 @@ uipc_disconnect(struct socket *so) { struct unpcb *unp, *unp2; + int freed; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); - unp2 = unp->unp_conn; - if (unp2 != NULL) { - UNP_PCB_LOCK(unp2); - unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if ((unp2 = unp->unp_conn) == NULL) { + UNP_PCB_UNLOCK(unp); + return (0); } - UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); + unp_pcb_owned_lock2(unp, unp2, freed); + if (__predict_false(freed)) { + UNP_PCB_UNLOCK(unp); + return (0); + } + unp_pcb_hold(unp2); + unp_pcb_hold(unp); + unp_disconnect(unp, unp2); + if (unp_pcb_rele(unp) == 0) + UNP_PCB_UNLOCK(unp); + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); return (0); } @@ -851,6 +971,28 @@ return (0); } +static int +connect_internal(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + int error; + struct unpcb *unp; + + unp = so->so_pcb; + if (unp->unp_conn != NULL) + return (EISCONN); + error = unp_connect(so, nam, td); + if (error) + return (error); + UNP_PCB_LOCK(unp); + if (unp->unp_conn == NULL) { + UNP_PCB_UNLOCK(unp); + if (error == 0) + error = ENOTCONN; + } + return (error); +} + + static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) @@ -858,7 +1000,7 @@ struct unpcb *unp, *unp2; struct socket *so2; u_int mbcnt, sbcc; - int error = 0; + int freed, error; unp = sotounpcb(so); KASSERT(unp != NULL, ("%s: unp == NULL", __func__)); @@ -866,49 +1008,62 @@ so->so_type == SOCK_SEQPACKET, ("%s: socktype %d", __func__, so->so_type)); + freed = error = 0; if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control != NULL && (error = unp_internalize(&control, td))) goto release; - if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_LINK_WLOCK(); - else - UNP_LINK_RLOCK(); + + unp2 = NULL; switch (so->so_type) { case SOCK_DGRAM: { const struct sockaddr *from; - unp2 = unp->unp_conn; if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); - if (unp2 != NULL) { - error = EISCONN; - break; - } - error = unp_connect(so, nam, td); - if (error) + /* + * We return with UNP_PCB_LOCK_HELD so we know that + * the reference is live if the pointer is valid. + */ + if ((error = connect_internal(so, nam, td))) break; + MPASS(unp->unp_conn != NULL); unp2 = unp->unp_conn; - } + } else { + UNP_PCB_LOCK(unp); + /* + * Because connect() and send() are non-atomic in a sendto() + * with a target address, it's possible that the socket will + * have disconnected before the send() can run. In that case + * return the slightly counter-intuitive but otherwise + * correct error that the socket is not connected. + */ + if ((unp2 = unp->unp_conn) == NULL) { + UNP_PCB_UNLOCK(unp); + error = ENOTCONN; + break; + } + } + unp_pcb_hold(unp2); + unp_pcb_owned_lock2(unp, unp2, freed); /* - * Because connect() and send() are non-atomic in a sendto() - * with a target address, it's possible that the socket will - * have disconnected before the send() can run. In that case - * return the slightly counter-intuitive but otherwise - * correct error that the socket is not connected. + * The socket referencing unp2 may have been closed + * or unp may have been disconnected if the unp lock + * was dropped to acquire unp2. */ - if (unp2 == NULL) { + if (__predict_false(unp->unp_conn == NULL) || + unp2->unp_socket == NULL) { + UNP_PCB_UNLOCK(unp); + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); error = ENOTCONN; break; } - /* Lockless read. */ if (unp2->unp_flags & UNP_WANTCRED) control = unp_addsockcred(td, control); - UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; else @@ -925,11 +1080,11 @@ error = ENOBUFS; } if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); - UNP_PCB_LOCK(unp2); + unp_pcb_owned_lock2(unp, unp2, freed); unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); } + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); break; } @@ -938,42 +1093,36 @@ case SOCK_STREAM: if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam != NULL) { - UNP_LINK_WLOCK_ASSERT(); - error = unp_connect(so, nam, td); - if (error) - break; /* XXX */ - } else { + if ((error = connect_internal(so, nam, td))) + break; + } else { error = ENOTCONN; break; } - } - - /* Lockless read. */ - if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + } else if ((unp2 = unp->unp_conn) == NULL) { + error = ENOTCONN; + break; + } else if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; break; + } else { + UNP_PCB_LOCK(unp); + if ((unp2 = unp->unp_conn) == NULL) { + UNP_PCB_UNLOCK(unp); + error = ENOTCONN; + break; + } } - - /* - * Because connect() and send() are non-atomic in a sendto() - * with a target address, it's possible that the socket will - * have disconnected before the send() can run. In that case - * return the slightly counter-intuitive but otherwise - * correct error that the socket is not connected. - * - * Locking here must be done carefully: the linkage lock - * prevents interconnections between unpcbs from changing, so - * we can traverse from unp to unp2 without acquiring unp's - * lock. Socket buffer locks follow unpcb locks, so we can - * acquire both remote and lock socket buffer locks. - */ - unp2 = unp->unp_conn; - if (unp2 == NULL) { + unp_pcb_hold(unp2); + UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK(unp2); + if ((so2 = unp2->unp_socket) == NULL) { + UNP_PCB_UNLOCK(unp2); + unp_pcb_rele(unp2); error = ENOTCONN; break; } - so2 = unp2->unp_socket; - UNP_PCB_LOCK(unp2); + unp_pcb_rele(unp2); SOCKBUF_LOCK(&so2->so_rcv); if (unp2->unp_flags & UNP_WANTCRED) { /* @@ -1046,12 +1195,6 @@ unp_shutdown(unp); UNP_PCB_UNLOCK(unp); } - - if ((nam != NULL) || (flags & PRUS_EOF)) - UNP_LINK_WUNLOCK(); - else - UNP_LINK_RUNLOCK(); - if (control != NULL && error != 0) unp_dispose_mbuf(control); @@ -1124,12 +1267,10 @@ unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); socantsendmore(so); unp_shutdown(unp); UNP_PCB_UNLOCK(unp); - UNP_LINK_WUNLOCK(); return (0); } @@ -1333,16 +1474,11 @@ char buf[SOCK_MAXADDRLEN]; struct sockaddr *sa; cap_rights_t rights; - int error, len; + int error, len, freed; + struct mtx *vplock; if (nam->sa_family != AF_UNIX) return (EAFNOSUPPORT); - - UNP_LINK_WLOCK_ASSERT(); - - unp = sotounpcb(so); - KASSERT(unp != NULL, ("unp_connect: unp == NULL")); - if (nam->sa_len > sizeof(struct sockaddr_un)) return (EINVAL); len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); @@ -1351,12 +1487,13 @@ bcopy(soun->sun_path, buf, len); buf[len] = 0; + unp = sotounpcb(so); + KASSERT(unp != NULL, ("unp_connect: unp == NULL")); UNP_PCB_LOCK(unp); if (unp->unp_flags & UNP_CONNECTING) { UNP_PCB_UNLOCK(unp); return (EALREADY); } - UNP_LINK_WUNLOCK(); unp->unp_flags |= UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1393,7 +1530,8 @@ * Lock linkage lock for two reasons: make sure v_socket is stable, * and to protect simultaneous locking of multiple pcbs. */ - UNP_LINK_WLOCK(); + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp2); if (unp2 == NULL) { error = ECONNREFUSED; @@ -1404,8 +1542,7 @@ error = EPROTOTYPE; goto bad2; } - UNP_PCB_LOCK(unp); - UNP_PCB_LOCK(unp2); + unp_pcb_lock2(unp, unp2); if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if (so2->so_options & SO_ACCEPTCONN) { CURVNET_SET(so2->so_vnet); @@ -1418,7 +1555,9 @@ goto bad3; } unp3 = sotounpcb(so2); - UNP_PCB_LOCK(unp3); + UNP_PCB_UNLOCK(unp); + unp_pcb_owned_lock2(unp2, unp3, freed); + MPASS(!freed); if (unp2->unp_addr != NULL) { bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); unp3->unp_addr = (struct sockaddr_un *) sa; @@ -1445,6 +1584,8 @@ unp3->unp_flags |= UNP_WANTCRED; UNP_PCB_UNLOCK(unp2); unp2 = unp3; + unp_pcb_owned_lock2(unp2, unp, freed); + MPASS(!freed); #ifdef MAC mac_socketpeer_set_from_socket(so, so2); mac_socketpeer_set_from_socket(so2, so); @@ -1459,12 +1600,12 @@ UNP_PCB_UNLOCK(unp2); UNP_PCB_UNLOCK(unp); bad2: - UNP_LINK_WUNLOCK(); + mtx_unlock(vplock); bad: - if (vp != NULL) + if (vp != NULL) { vput(vp); + } free(sa, M_SONAME); - UNP_LINK_WLOCK(); UNP_PCB_LOCK(unp); unp->unp_flags &= ~UNP_CONNECTING; UNP_PCB_UNLOCK(unp); @@ -1482,7 +1623,6 @@ unp2 = sotounpcb(so2); KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); - UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); @@ -1490,10 +1630,13 @@ return (EPROTOTYPE); unp2->unp_flags &= ~UNP_NASCENT; unp->unp_conn = unp2; - + unp_pcb_hold(unp2); + unp_pcb_hold(unp); switch (so->so_type) { case SOCK_DGRAM: + UNP_REF_LIST_LOCK(); LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); + UNP_REF_LIST_UNLOCK(); soisconnected(so); break; @@ -1517,31 +1660,48 @@ static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2) { - struct socket *so; + struct socket *so, *so2; + int rele, freed; KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); - UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); UNP_PCB_LOCK_ASSERT(unp2); + if (unp->unp_conn == NULL && unp2->unp_conn == NULL) + return; + + MPASS(unp->unp_conn == unp2); unp->unp_conn = NULL; + rele = 0; + so = unp->unp_socket; + so2 = unp2->unp_socket; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: + UNP_REF_LIST_LOCK(); LIST_REMOVE(unp, unp_reflink); - so = unp->unp_socket; - SOCK_LOCK(so); - so->so_state &= ~SS_ISCONNECTED; - SOCK_UNLOCK(so); + UNP_REF_LIST_UNLOCK(); + if (so) { + SOCK_LOCK(so); + so->so_state &= ~SS_ISCONNECTED; + SOCK_UNLOCK(so); + } break; case SOCK_STREAM: case SOCK_SEQPACKET: - soisdisconnected(unp->unp_socket); + if (so) + soisdisconnected(so); + MPASS(unp2->unp_conn == unp); unp2->unp_conn = NULL; - soisdisconnected(unp2->unp_socket); + if (so2) + soisdisconnected(so2); break; } + freed = unp_pcb_rele(unp); + MPASS(freed == 0); + freed = unp_pcb_rele(unp2); + MPASS(freed == 0); } /* @@ -1625,7 +1785,7 @@ continue; } unp_list[i++] = unp; - unp->unp_refcount++; + unp_pcb_hold(unp); } UNP_PCB_UNLOCK(unp); } @@ -1637,8 +1797,9 @@ for (i = 0; i < n; i++) { unp = unp_list[i]; UNP_PCB_LOCK(unp); - unp->unp_refcount--; - if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { + freeunp = unp_pcb_rele(unp); + + if (freeunp == 0 && unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; xu->xu_unpp = unp; /* @@ -1665,14 +1826,8 @@ sotoxsocket(unp->unp_socket, &xu->xu_socket); UNP_PCB_UNLOCK(unp); error = SYSCTL_OUT(req, xu, sizeof *xu); - } else { - freeunp = (unp->unp_refcount == 0); + } else if (freeunp == 0) UNP_PCB_UNLOCK(unp); - if (freeunp) { - UNP_PCB_LOCK_DESTROY(unp); - uma_zfree(unp_zone, unp); - } - } } free(xu, M_TEMP); if (!error) { @@ -1709,7 +1864,6 @@ struct unpcb *unp2; struct socket *so; - UNP_LINK_WLOCK_ASSERT(); UNP_PCB_LOCK_ASSERT(unp); unp2 = unp->unp_conn; @@ -1726,22 +1880,27 @@ { struct socket *so = unp->unp_socket; struct unpcb *unp2; - - UNP_LINK_WLOCK_ASSERT(); - UNP_PCB_LOCK_ASSERT(unp); + int freed; /* * Regardless of whether the socket's peer dropped the connection * with this socket by aborting or disconnecting, POSIX requires * that ECONNRESET is returned. */ + /* acquire a reference so that unp isn't freed from underneath us */ + + UNP_PCB_LOCK(unp); so->so_error = ECONNRESET; unp2 = unp->unp_conn; - if (unp2 == NULL) - return; - UNP_PCB_LOCK(unp2); - unp_disconnect(unp, unp2); - UNP_PCB_UNLOCK(unp2); + if (unp2 != NULL) { + unp_pcb_hold(unp2); + unp_pcb_owned_lock2(unp, unp2, freed); + unp_disconnect(unp, unp2); + if (unp_pcb_rele(unp2) == 0) + UNP_PCB_UNLOCK(unp2); + } + if (unp_pcb_rele(unp) == 0) + UNP_PCB_UNLOCK(unp); } static void @@ -2464,13 +2623,15 @@ { struct unpcb *unp; int active; + struct mtx *vplock; ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim"); KASSERT(vp->v_type == VSOCK, ("vfs_unp_reclaim: vp->v_type != VSOCK")); active = 0; - UNP_LINK_WLOCK(); + vplock = mtx_pool_find(mtxpool_sleep, vp); + mtx_lock(vplock); VOP_UNP_CONNECT(vp, &unp); if (unp == NULL) goto done; @@ -2481,8 +2642,8 @@ active = 1; } UNP_PCB_UNLOCK(unp); -done: - UNP_LINK_WUNLOCK(); + done: + mtx_unlock(vplock); if (active) vunref(vp); } Index: sys/sys/unpcb.h =================================================================== --- sys/sys/unpcb.h +++ sys/sys/unpcb.h @@ -82,7 +82,7 @@ short unp_flags; /* flags */ short unp_gcflag; /* Garbage collector flags. */ struct xucred unp_peercred; /* peer credentials, if applicable */ - u_int unp_refcount; + volatile u_int unp_refcount; u_int unp_msgcount; /* references from message queue */ struct mtx unp_mtx; /* mutex */ };