diff --git a/sys/dev/hyperv/hvsock/hv_sock.c b/sys/dev/hyperv/hvsock/hv_sock.c --- a/sys/dev/hyperv/hvsock/hv_sock.c +++ b/sys/dev/hyperv/hvsock/hv_sock.c @@ -450,13 +450,15 @@ { struct hvs_pcb *pcb = so2hvspcb(so); struct socket *bound_so; - int error; HVSOCK_DBG(HVSOCK_DBG_VERBOSE, "%s: HyperV Socket hvs_trans_listen called\n", __func__); - if (pcb == NULL) + if (pcb == NULL) { + HVSOCK_DBG(HVSOCK_DBG_VERBOSE, + "%s: HyperV Socket NULL pcb\n", __func__); return (EINVAL); + } /* Check if the address is already bound and it was by us. */ bound_so = hvs_find_socket_on_list(&pcb->local_addr, HVS_LIST_BOUND); @@ -466,15 +468,7 @@ return (EADDRNOTAVAIL); } - SOCK_LOCK(so); - error = solisten_proto_check(so); - if (error == 0) - solisten_proto(so, backlog); - SOCK_UNLOCK(so); - - HVSOCK_DBG(HVSOCK_DBG_VERBOSE, - "%s: HyperV Socket listen error = %d\n", __func__, error); - return (error); + return (0); } int diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -828,7 +828,7 @@ return (EINVAL); } - if (SOLISTENING(so)) { + if (SOMAYLISTEN(so)) { SOCK_BUF_UNLOCK(so, which); return (EINVAL); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1392,38 +1392,27 @@ /* * solisten() transitions a socket from a non-listening state to a listening * state, but can also be used to update the listen queue depth on an - * existing listen socket. The protocol will call back into the sockets - * layer using solisten_proto_check() and solisten_proto() to check and set - * socket-layer listen state. Call backs are used so that the protocol can - * acquire both protocol and socket layer locks in whatever order is required - * by the protocol. - * - * Protocol implementors are advised to hold the socket lock across the - * socket-layer test and set to avoid races at the socket layer. + * existing listen socket. */ int solisten(struct socket *so, int backlog, struct thread *td) { + int sbrcv_lowat, sbsnd_lowat; + u_int sbrcv_hiwat, sbsnd_hiwat; + short sbrcv_flags, sbsnd_flags; + sbintime_t sbrcv_timeo, sbsnd_timeo; int error; + bool sxlocked = false; CURVNET_SET(so->so_vnet); - error = so->so_proto->pr_listen(so, backlog, td); - CURVNET_RESTORE(); - return (error); -} - -/* - * Prepare for a call to solisten_proto(). Acquire all socket buffer locks in - * order to interlock with socket I/O. - */ -int -solisten_proto_check(struct socket *so) -{ - SOCK_LOCK_ASSERT(so); - + SOCK_LOCK(so); + if (SOLISTENING(so)) + goto listening; if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | - SS_ISDISCONNECTING)) != 0) - return (EINVAL); + SS_ISDISCONNECTING)) != 0) { + error = EINVAL; + goto out; + } /* * Sleeping is not permitted here, so simply fail if userspace is @@ -1431,63 +1420,56 @@ * transient failure is not ideal, but it should occur only if userspace * is misusing the socket interfaces. */ - if (!sx_try_xlock(&so->so_snd_sx)) - return (EAGAIN); + if (!sx_try_xlock(&so->so_snd_sx)) { + error = EAGAIN; + goto out; + } if (!sx_try_xlock(&so->so_rcv_sx)) { sx_xunlock(&so->so_snd_sx); - return (EAGAIN); + error = EAGAIN; + goto out; } + sxlocked = true; + + /* + * Interlock with soo_aio_queue() and KTLS. The aio(4) doesn't use + * soiolock(), it uses only socket buffer mutexes. We mark the + * socket as transitioning and release locks, to avoid LORs with any + * protocol level locks. + */ mtx_lock(&so->so_snd_mtx); mtx_lock(&so->so_rcv_mtx); - - /* Interlock with soo_aio_queue() and KTLS. */ - if (!SOLISTENING(so)) { - bool ktls; - + if ( #ifdef KERN_TLS - ktls = so->so_snd.sb_tls_info != NULL || - so->so_rcv.sb_tls_info != NULL; -#else - ktls = false; + so->so_snd.sb_tls_info != NULL || + so->so_rcv.sb_tls_info != NULL || #endif - if (ktls || - (so->so_snd.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0 || - (so->so_rcv.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0) { - solisten_proto_abort(so); - return (EINVAL); - } + (so->so_snd.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0 || + (so->so_rcv.sb_flags & (SB_AIO | SB_AIO_RUNNING)) != 0) { + mtx_unlock(&so->so_snd_mtx); + mtx_unlock(&so->so_rcv_mtx); + error = EINVAL; + goto out; } - return (0); -} - -/* - * Undo the setup done by solisten_proto_check(). - */ -void -solisten_proto_abort(struct socket *so) -{ + so->so_options |= SO_INLISTEN; mtx_unlock(&so->so_snd_mtx); mtx_unlock(&so->so_rcv_mtx); - sx_xunlock(&so->so_snd_sx); - sx_xunlock(&so->so_rcv_sx); -} - -void -solisten_proto(struct socket *so, int backlog) -{ - int sbrcv_lowat, sbsnd_lowat; - u_int sbrcv_hiwat, sbsnd_hiwat; - short sbrcv_flags, sbsnd_flags; - sbintime_t sbrcv_timeo, sbsnd_timeo; + SOCK_UNLOCK(so); - SOCK_LOCK_ASSERT(so); - KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | - SS_ISDISCONNECTING)) == 0, - ("%s: bad socket state %p", __func__, so)); + error = so->so_proto->pr_listen(so, backlog, td); - if (SOLISTENING(so)) - goto listening; + /* + * Protocols may return locked in case of success and may set + * SO_ACCEPTCONN early. + */ + if (SOCK_OWNED(so)) + MPASS(error == 0); + else { + SOCK_LOCK(so); + if (error) + goto out; + } /* * Change this socket to listening state. @@ -1541,11 +1523,15 @@ if (backlog < 0 || backlog > V_somaxconn) backlog = V_somaxconn; so->sol_qlimit = backlog; - - mtx_unlock(&so->so_snd_mtx); - mtx_unlock(&so->so_rcv_mtx); - sx_xunlock(&so->so_snd_sx); - sx_xunlock(&so->so_rcv_sx); +out: + so->so_options &= ~SO_INLISTEN; + if (sxlocked) { + sx_xunlock(&so->so_snd_sx); + sx_xunlock(&so->so_rcv_sx); + } + SOCK_UNLOCK(so); + CURVNET_RESTORE(); + return (error); } /* diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -853,34 +853,23 @@ static int uipc_listen(struct socket *so, int backlog, struct thread *td) { - struct unpcb *unp; - int error; + struct unpcb *unp = sotounpcb(so); + int error = 0; MPASS(so->so_type != SOCK_DGRAM); /* * Synchronize with concurrent connection attempts. */ - error = 0; - unp = sotounpcb(so); UNP_PCB_LOCK(unp); if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0) error = EINVAL; else if (unp->unp_vnode == NULL) error = EDESTADDRREQ; - if (error != 0) { - UNP_PCB_UNLOCK(unp); - return (error); - } - - SOCK_LOCK(so); - error = solisten_proto_check(so); - if (error == 0) { + else cru2xt(td, &unp->unp_peercred); - solisten_proto(so, backlog); - } - SOCK_UNLOCK(so); UNP_PCB_UNLOCK(unp); + return (error); } diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -2482,32 +2482,14 @@ ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so); - int error; - SOCK_LOCK(so); - error = solisten_proto_check(so); - if (error != 0) - goto out; - if (pcb == NULL) { - solisten_proto_abort(so); - error = EINVAL; - goto out; - } - if (ng_btsocket_l2cap_node == NULL) { - solisten_proto_abort(so); - error = EINVAL; - goto out; - } - if (pcb->psm == 0) { - solisten_proto_abort(so); - error = EADDRNOTAVAIL; - goto out; - } - solisten_proto(so, backlog); -out: - SOCK_UNLOCK(so); - return (error); -} /* ng_btsocket_listen */ + if (ng_btsocket_l2cap_node == NULL) + return (EINVAL); + if (pcb->psm == 0) + return (EADDRNOTAVAIL); + + return (0); +} /* * Return peer address for getpeername(2) or for accept(2). For the latter diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -822,6 +822,7 @@ return (EADDRNOTAVAIL); usedchannels = 0; + error = 0; mtx_lock(&pcb->pcb_mtx); @@ -863,12 +864,6 @@ * for collisions first, as there can only be one. */ mtx_lock(&ng_btsocket_rfcomm_sessions_mtx); - SOCK_LOCK(so); - error = solisten_proto_check(so); - SOCK_UNLOCK(so); - if (error != 0) - goto out; - LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING) break; @@ -880,7 +875,6 @@ * from socreate() */ if (l2so == NULL) { - solisten_proto_abort(so); error = socreate_error; goto out; } @@ -895,14 +889,10 @@ error = ng_btsocket_rfcomm_session_create(&s, l2so, NG_HCI_BDADDR_ANY, NULL, td); if (error != 0) { - solisten_proto_abort(so); goto out; } l2so = NULL; } - SOCK_LOCK(so); - solisten_proto(so, backlog); - SOCK_UNLOCK(so); out: mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx); /* @@ -912,7 +902,7 @@ if (l2so != NULL) soclose(l2so); return (error); -} /* ng_btsocket_listen */ +} /* * Return peer address for getpeername(2) or for accept(2). For the latter diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c --- a/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c @@ -1582,32 +1582,12 @@ ng_btsocket_sco_listen(struct socket *so, int backlog, struct thread *td) { ng_btsocket_sco_pcb_p pcb = so2sco_pcb(so); - int error; - if (pcb == NULL) + if (pcb == NULL || ng_btsocket_sco_node == NULL) return (EINVAL); - if (ng_btsocket_sco_node == NULL) - return (EINVAL); - - SOCK_LOCK(so); - mtx_lock(&pcb->pcb_mtx); - - error = solisten_proto_check(so); - if (error != 0) - goto out; -#if 0 - if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) { - error = EDESTADDRREQ; - goto out; - } -#endif - solisten_proto(so, backlog); -out: - mtx_unlock(&pcb->pcb_mtx); - SOCK_UNLOCK(so); - - return (error); -} /* ng_btsocket_listen */ + else + return (0); +} /* * Return peer address for getpeername(2) or for accept(2). For the latter diff --git a/sys/netinet/sctp_usrreq.c b/sys/netinet/sctp_usrreq.c --- a/sys/netinet/sctp_usrreq.c +++ b/sys/netinet/sctp_usrreq.c @@ -7212,16 +7212,8 @@ goto out; } } - SOCK_LOCK(so); - error = solisten_proto_check(so); - if (error) { - SOCK_UNLOCK(so); - goto out; - } if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) { - SOCK_UNLOCK(so); - solisten_proto_abort(so); error = EADDRINUSE; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out; @@ -7229,27 +7221,19 @@ if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && ((inp->sctp_flags & SCTP_PCB_FLAGS_WAS_CONNECTED) || (inp->sctp_flags & SCTP_PCB_FLAGS_WAS_ABORTED))) { - SOCK_UNLOCK(so); - solisten_proto_abort(so); error = EINVAL; SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error); goto out; } if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) { if ((error = sctp_inpcb_bind_locked(inp, NULL, NULL, p))) { - SOCK_UNLOCK(so); - solisten_proto_abort(so); /* bind error, probably perm */ goto out; } } if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) { - solisten_proto(so, backlog); - SOCK_UNLOCK(so); inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING; } else { - solisten_proto_abort(so); - SOCK_UNLOCK(so); if (backlog > 0) { inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING; } else { diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -348,131 +348,72 @@ } #endif /* INET6 */ -#ifdef INET /* * Prepare to accept connections. */ static int tcp_usr_listen(struct socket *so, int backlog, struct thread *td) { - struct inpcb *inp; - struct tcpcb *tp; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp = intotcpcb(inp); int error = 0; - bool already_listening; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL")); INP_WLOCK(inp); if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - return (EINVAL); - } - tp = intotcpcb(inp); - - SOCK_LOCK(so); - already_listening = SOLISTENING(so); - error = solisten_proto_check(so); - if (error != 0) { - SOCK_UNLOCK(so); + /* XXXGL: can this happen? */ + error = EINVAL; goto out; } + if (inp->inp_lport == 0) { INP_HASH_WLOCK(&V_tcbinfo); - error = in_pcbbind(inp, NULL, td->td_ucred); - INP_HASH_WUNLOCK(&V_tcbinfo); - } - if (error == 0) { - tcp_state_change(tp, TCPS_LISTEN); - solisten_proto(so, backlog); -#ifdef TCP_OFFLOAD - if ((so->so_options & SO_NO_OFFLOAD) == 0) - tcp_offload_listen_start(tp); -#endif - } else { - solisten_proto_abort(so); - } - SOCK_UNLOCK(so); - if (already_listening) - goto out; - - if (error == 0) - in_pcblisten(inp); - if (tp->t_flags & TF_FASTOPEN) - tp->t_tfo_pending = tcp_fastopen_alloc_counter(); - -out: - tcp_bblog_pru(tp, PRU_LISTEN, error); - TCP_PROBE2(debug__user, tp, PRU_LISTEN); - INP_WUNLOCK(inp); - return (error); -} -#endif /* INET */ - #ifdef INET6 -static int -tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) -{ - struct inpcb *inp; - struct tcpcb *tp; - u_char vflagsav; - int error = 0; - bool already_listening; - - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL")); - INP_WLOCK(inp); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - return (EINVAL); + if (inp->inp_vflag & INP_IPV6PROTO) { + u_char vflagsav; + + vflagsav = inp->inp_vflag; + inp->inp_vflag &= ~INP_IPV4; + if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) + inp->inp_vflag |= INP_IPV4; + error = in6_pcbbind(inp, NULL, td->td_ucred); + if (error) + inp->inp_vflag = vflagsav; + } +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + error = in_pcbbind(inp, NULL, td->td_ucred); +#endif + INP_HASH_WUNLOCK(&V_tcbinfo); + if (error) + goto out; } - tp = intotcpcb(inp); - - vflagsav = inp->inp_vflag; - SOCK_LOCK(so); - already_listening = SOLISTENING(so); - error = solisten_proto_check(so); - if (error != 0) { - SOCK_UNLOCK(so); - goto out; - } - INP_HASH_WLOCK(&V_tcbinfo); - if (inp->inp_lport == 0) { - inp->inp_vflag &= ~INP_IPV4; - if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) - inp->inp_vflag |= INP_IPV4; - error = in6_pcbbind(inp, NULL, td->td_ucred); - } - INP_HASH_WUNLOCK(&V_tcbinfo); - if (error == 0) { - tcp_state_change(tp, TCPS_LISTEN); - solisten_proto(so, backlog); + in_pcblisten(inp); #ifdef TCP_OFFLOAD - if ((so->so_options & SO_NO_OFFLOAD) == 0) - tcp_offload_listen_start(tp); + if ((so->so_options & SO_NO_OFFLOAD) == 0) + tcp_offload_listen_start(tp); #endif - } else { - solisten_proto_abort(so); - } - SOCK_UNLOCK(so); - if (already_listening) - goto out; - - if (error == 0) - in_pcblisten(inp); if (tp->t_flags & TF_FASTOPEN) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); - if (error != 0) - inp->inp_vflag = vflagsav; - -out: - tcp_bblog_pru(tp, PRU_LISTEN, error); + /* + * TCP needs both pcb lock and socket lock to switch on listening, + * cause tcp_input() checks SOLISTENING() using pcb lock only. Then + * syncache_socket() will SOLISTEN_LOCK() and see fully transitioned + * socket. + */ + SOCK_LOCK(so); + so->so_options |= SO_ACCEPTCONN; + tcp_state_change(tp, TCPS_LISTEN); + tcp_bblog_pru(tp, PRU_LISTEN, 0); TCP_PROBE2(debug__user, tp, PRU_LISTEN); +out: INP_WUNLOCK(inp); return (error); } -#endif /* INET6 */ #ifdef INET /* @@ -1441,7 +1382,7 @@ .pr_control = in6_control, .pr_detach = tcp_usr_detach, .pr_disconnect = tcp_usr_disconnect, - .pr_listen = tcp6_usr_listen, + .pr_listen = tcp_usr_listen, .pr_peeraddr = in6_mapped_peeraddr, .pr_rcvd = tcp_usr_rcvd, .pr_rcvoob = tcp_usr_rcvoob, diff --git a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c --- a/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c +++ b/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c @@ -510,17 +510,10 @@ error = EINVAL; goto out; } - if (error == 0 && ssk->lport == 0) + if (ssk->lport == 0) error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred); - SOCK_LOCK(so); if (error == 0) - error = solisten_proto_check(so); - if (error == 0) { - solisten_proto(so, backlog); ssk->state = TCPS_LISTEN; - } - SOCK_UNLOCK(so); - out: SDP_WUNLOCK(ssk); if (error == 0) diff --git a/sys/sys/socket.h b/sys/sys/socket.h --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -146,6 +146,9 @@ #define SO_NO_DDP 0x00008000 /* disable direct data placement */ #define SO_REUSEPORT_LB 0x00010000 /* reuse with load balancing */ #define SO_RERROR 0x00020000 /* keep track of receive errors */ +#if __BSD_VISIBLE +#define SO_INLISTEN 0x00040000 /* in solisten() now, used by aio(4) */ +#endif /* * Additional options, not kept in so_options. diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -249,6 +249,9 @@ #define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED) #define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED) +#define SOMAYLISTEN(so) \ + ((so)->so_options & (SO_ACCEPTCONN | SO_INLISTEN)) + #define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0) #define SOLISTEN_LOCK(sol) do { \ mtx_lock(&(sol)->so_lock); \ @@ -514,9 +517,6 @@ struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags); void sohasoutofband(struct socket *so); int solisten(struct socket *so, int backlog, struct thread *td); -void solisten_proto(struct socket *so, int backlog); -void solisten_proto_abort(struct socket *so); -int solisten_proto_check(struct socket *so); bool solisten_enqueue(struct socket *, int); int solisten_dequeue(struct socket *, struct socket **, int); struct socket *