diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -761,9 +761,10 @@ #define INPLOOKUP_WILDCARD 0x00000001 /* Allow wildcard sockets. */ #define INPLOOKUP_RLOCKPCB 0x00000002 /* Return inpcb read-locked. */ #define INPLOOKUP_WLOCKPCB 0x00000004 /* Return inpcb write-locked. */ +#define INPLOOKUP_RLOCKLISTEN 0x00000008 /* Rlock if listening, W if !.*/ #define INPLOOKUP_MASK (INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB | \ - INPLOOKUP_WLOCKPCB) + INPLOOKUP_WLOCKPCB | INPLOOKUP_RLOCKLISTEN) #define sotoinpcb(so) ((struct inpcb *)(so)->so_pcb) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -2518,31 +2518,24 @@ struct inpcb *inp; inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, - (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp, - numa_domain); + lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain); if (inp != NULL) { if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_WUNLOCK(inp); - inp = NULL; - } } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_RUNLOCK(inp); - inp = NULL; - } + } else if (lookupflags & INPLOOKUP_RLOCKLISTEN) { + if (inp->inp_socket != NULL && + SOLISTENING(inp->inp_socket)) + INP_RLOCK(inp); + else + INP_WLOCK(inp); } else panic("%s: locking bug", __func__); -#ifdef INVARIANTS - if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) - INP_WLOCK_ASSERT(inp); - else - INP_RLOCK_ASSERT(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_UNLOCK(inp); + inp = NULL; } -#endif } return (inp); @@ -2564,8 +2557,8 @@ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, - ("%s: LOCKPCB not set", __func__)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB | + INPLOOKUP_RLOCKLISTEN)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the @@ -2600,8 +2593,8 @@ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, - ("%s: LOCKPCB not set", __func__)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB | + INPLOOKUP_RLOCKLISTEN)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP /* diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -626,6 +626,7 @@ int drop_hdrlen; int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ + int lookupflag; uint8_t iptos; struct m_tag *fwd_tag = NULL; #ifdef INET6 @@ -825,6 +826,12 @@ ) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + /* + * For initial SYN packets arriving on listening socket, + * we don't need write lock. + */ + lookupflag = (thflags & (TH_ACK|TH_SYN)) == TH_SYN ? + INPLOOKUP_RLOCKLISTEN : INPLOOKUP_WLOCKPCB; findpcb: #ifdef INET6 if (isipv6 && fwd_tag != NULL) { @@ -837,7 +844,7 @@ */ inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, - INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m); + lookupflag, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. @@ -847,14 +854,13 @@ inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) : - th->th_dport, INPLOOKUP_WILDCARD | - INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); + th->th_dport, INPLOOKUP_WILDCARD | lookupflag, + m->m_pkthdr.rcvif); } } else if (isipv6) { inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, - INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, - m->m_pkthdr.rcvif, m); + INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m); } #endif /* INET6 */ #if defined(INET6) && defined(INET) @@ -870,8 +876,7 @@ * already got one like this? */ inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, - ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB, - m->m_pkthdr.rcvif, m); + ip->ip_dst, th->th_dport, lookupflag, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. @@ -881,14 +886,13 @@ inp = in_pcblookup(&V_tcbinfo, ip->ip_src, th->th_sport, next_hop->sin_addr, next_hop->sin_port ? ntohs(next_hop->sin_port) : - th->th_dport, INPLOOKUP_WILDCARD | - INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif); + th->th_dport, INPLOOKUP_WILDCARD | lookupflag, + m->m_pkthdr.rcvif); } } else inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, - INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB, - m->m_pkthdr.rcvif, m); + INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m); #endif /* INET */ /* @@ -918,14 +922,14 @@ rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } - INP_WLOCK_ASSERT(inp); + INP_LOCK_ASSERT(inp); /* * While waiting for inp lock during the lookup, another thread * can have dropped the inpcb, in which case we need to loop back * and try to find a new inpcb to deliver to. */ if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); + INP_UNLOCK(inp); inp = NULL; goto findpcb; } @@ -1014,7 +1018,6 @@ #endif #ifdef MAC - INP_WLOCK_ASSERT(inp); if (mac_inpcb_check_deliver(inp, m)) goto dropunlock; #endif @@ -1124,8 +1127,10 @@ * Socket is created in state SYN_RECEIVED. * Unlock the listen socket, lock the newly * created socket and update the tp variable. + * If we came here via jump to tfo_socket_result, + * then listening socket is read-locked. */ - INP_WUNLOCK(inp); /* listen socket */ + INP_UNLOCK(inp); /* listen socket */ inp = sotoinpcb(so); /* * New connection inpcb is already locked by @@ -1213,6 +1218,7 @@ ("%s: Listen socket: TH_RST or TH_ACK set", __func__)); KASSERT(thflags & (TH_SYN), ("%s: Listen socket: TH_SYN not set", __func__)); + INP_RLOCK_ASSERT(inp); #ifdef INET6 /* * If deprecated address is forbidden, @@ -1381,7 +1387,7 @@ if (inp != NULL) { tcp_dropwithreset(m, th, tp, tlen, rstreason); - INP_WUNLOCK(inp); + INP_UNLOCK(inp); } else tcp_dropwithreset(m, th, NULL, tlen, rstreason); m = NULL; /* mbuf chain got consumed. */ @@ -1392,7 +1398,7 @@ TCP_PROBE5(receive, NULL, tp, m, tp, th); if (inp != NULL) - INP_WUNLOCK(inp); + INP_UNLOCK(inp); drop: INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo); @@ -3360,7 +3366,7 @@ #endif if (tp != NULL) { - INP_WLOCK_ASSERT(tp->t_inpcb); + INP_LOCK_ASSERT(tp->t_inpcb); } /* Don't bother if destination was broadcast/multicast. */ diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1428,7 +1428,7 @@ if (tp != NULL) { inp = tp->t_inpcb; KASSERT(inp != NULL, ("tcp control block w/o inpcb")); - INP_WLOCK_ASSERT(inp); + INP_LOCK_ASSERT(inp); } else inp = NULL; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1397,7 +1397,7 @@ int tfo_response_cookie_valid = 0; bool locked; - INP_WLOCK_ASSERT(inp); /* listen socket */ + INP_RLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, ("%s: unexpected tcp flags", __func__)); @@ -1469,13 +1469,13 @@ #ifdef MAC if (mac_syncache_init(&maclabel) != 0) { - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); goto done; } else mac_syncache_create(maclabel, inp); #endif if (!tfo_cookie_valid) - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); /* * Remember the IP options, if any. @@ -1528,7 +1528,7 @@ } if (sc != NULL) { if (tfo_cookie_valid) - INP_WUNLOCK(inp); + INP_RUNLOCK(inp); TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { /* @@ -1735,7 +1735,7 @@ if (tfo_cookie_valid) { syncache_tfo_expand(sc, lsop, m, tfo_response_cookie); - /* INP_WUNLOCK(inp) will be performed by the caller */ + /* INP_RUNLOCK(inp) will be performed by the caller */ rv = 1; goto tfo_expanded; } diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1293,31 +1293,24 @@ struct inpcb *inp; inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, - (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp, - numa_domain); + lookupflags & INPLOOKUP_WILDCARD, ifp, numa_domain); if (inp != NULL) { if (lookupflags & INPLOOKUP_WLOCKPCB) { INP_WLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_WUNLOCK(inp); - inp = NULL; - } } else if (lookupflags & INPLOOKUP_RLOCKPCB) { INP_RLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_RUNLOCK(inp); - inp = NULL; - } + } else if (lookupflags & INPLOOKUP_RLOCKLISTEN) { + if (inp->inp_socket != NULL && + SOLISTENING(inp->inp_socket)) + INP_RLOCK(inp); + else + INP_WLOCK(inp); } else panic("%s: locking bug", __func__); -#ifdef INVARIANTS - if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) - INP_WLOCK_ASSERT(inp); - else - INP_RLOCK_ASSERT(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) { + INP_UNLOCK(inp); + inp = NULL; } -#endif } return (inp); } @@ -1338,8 +1331,8 @@ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, - ("%s: LOCKPCB not set", __func__)); + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB | + INPLOOKUP_RLOCKLISTEN)) != 0, ("%s: LOCKPCB not set", __func__)); /* * When not using RSS, use connection groups in preference to the @@ -1374,7 +1367,8 @@ KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); - KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, + KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB | + INPLOOKUP_RLOCKLISTEN)) != 0, ("%s: LOCKPCB not set", __func__)); #ifdef PCBGROUP diff --git a/sys/security/mac/mac_inet.c b/sys/security/mac/mac_inet.c --- a/sys/security/mac/mac_inet.c +++ b/sys/security/mac/mac_inet.c @@ -487,7 +487,7 @@ mac_syncache_create(struct label *label, struct inpcb *inp) { - INP_WLOCK_ASSERT(inp); + INP_LOCK_ASSERT(inp); MAC_POLICY_PERFORM_NOSLEEP(syncache_create, label, inp); }