Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -2236,6 +2236,16 @@ } #endif /* PCBGROUP */ +#define LOCAL_UNLOCK(_inp) \ + do { \ + if ((_inp) != NULL) { \ + \ + if (lookupflags & INPLOOKUP_WLOCKPCB) \ + INP_WUNLOCK((_inp)); \ + else \ + INP_RUNLOCK((_inp)); \ + } \ + } while (0) /* * Lookup PCB in hash list, using pcbinfo tables. This variation assumes * that the caller has locked the hash list, and will not perform any further @@ -2248,14 +2258,17 @@ { struct inpcbhead *head; struct inpcb *inp, *tmpinp; + struct inpcb *local_wild, *local_exact, *jail_wild; +#ifdef INET6 + struct inpcb *local_wild_mapped; +#endif u_short fport = fport_arg, lport = lport_arg; -#ifdef INVARIANTS - KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + KASSERT((lookupflags & (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) != + (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB), ("%s: invalid lookup flags %d", __func__, lookupflags)); - if (!mtx_owned(&pcbinfo->ipi_hash_lock)) - MPASS(in_epoch_verbose(net_epoch_preempt, 1)); -#endif + INP_HASH_LOCK_ASSERT(pcbinfo); + /* * First look for an exact match. */ @@ -2263,10 +2276,15 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) + goto next1; #ifdef INET6 - /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) - continue; + goto next1; #endif if (inp->inp_faddr.s_addr == faddr.s_addr && inp->inp_laddr.s_addr == laddr.s_addr && @@ -2277,14 +2295,33 @@ * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ - if (prison_flag(inp->inp_cred, PR_IP4)) + if (prison_flag(inp->inp_cred, PR_IP4)) { + if (lookupflags & INPLOOKUP_WLOCKPCB) { + if (tmpinp != NULL) + INP_WUNLOCK(tmpinp); + } else { + if (tmpinp != NULL) + INP_RUNLOCK(tmpinp); + if ((lookupflags & + INPLOOKUP_RLOCKPCB) == 0) + INP_RUNLOCK(inp); + } return (inp); - if (tmpinp == NULL) + } + if (tmpinp == NULL) { tmpinp = inp; + continue; + } } +next1: + LOCAL_UNLOCK(inp); } - if (tmpinp != NULL) + if (tmpinp != NULL) { + if ((lookupflags & + (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) == 0) + INP_RUNLOCK(tmpinp); return (tmpinp); + } /* * Then look in lb group (for wildcard match). @@ -2294,6 +2331,13 @@ inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport, lookupflags); if (inp != NULL) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else if (lookupflags & INPLOOKUP_RLOCKPCB) + INP_RLOCK(inp); + KASSERT(((inp->inp_flags2 & INP_FREED) == 0), + ("%s:%d: inp %p returned from lbgroup marked FREED", + __func__, __LINE__, inp)); return (inp); } } @@ -2302,13 +2346,12 @@ * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { - struct inpcb *local_wild = NULL, *local_exact = NULL; -#ifdef INET6 - struct inpcb *local_wild_mapped = NULL; -#endif - struct inpcb *jail_wild = NULL; int injail; + local_wild = local_exact = jail_wild = NULL; +#ifdef INET6 + local_wild_mapped = NULL; +#endif /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. @@ -2320,57 +2363,99 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) + goto nextw; #ifdef INET6 - /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) - continue; + goto nextw; #endif if (inp->inp_faddr.s_addr != INADDR_ANY || inp->inp_lport != lport) - continue; + goto nextw; injail = prison_flag(inp->inp_cred, PR_IP4); if (injail) { if (prison_check_ip4(inp->inp_cred, &laddr) != 0) - continue; + goto nextw; } else { if (local_exact != NULL) - continue; + goto nextw; } if (inp->inp_laddr.s_addr == laddr.s_addr) { if (injail) - return (inp); - else + goto out; + else { + LOCAL_UNLOCK(local_exact); local_exact = inp; + } } else if (inp->inp_laddr.s_addr == INADDR_ANY) { #ifdef INET6 - /* XXX inp locking, NULL check */ - if (inp->inp_vflag & INP_IPV6PROTO) + if (inp->inp_vflag & INP_IPV6PROTO) { + LOCAL_UNLOCK(local_wild_mapped); local_wild_mapped = inp; - else + } else #endif - if (injail) + if (injail) { + LOCAL_UNLOCK(jail_wild); jail_wild = inp; - else + } else { + LOCAL_UNLOCK(local_wild); local_wild = inp; - } + } + } else + goto nextw; + continue; +nextw: + LOCAL_UNLOCK(inp); } /* LIST_FOREACH */ - if (jail_wild != NULL) - return (jail_wild); - if (local_exact != NULL) - return (local_exact); - if (local_wild != NULL) - return (local_wild); + + if (jail_wild != NULL) { + inp = jail_wild; + jail_wild = NULL; + goto out; + } + if (local_exact != NULL) { + inp = local_exact; + local_exact = NULL; + goto out; + } + if (local_wild != NULL) { + inp = local_wild; + local_wild = NULL; + goto out; + } #ifdef INET6 - if (local_wild_mapped != NULL) - return (local_wild_mapped); + if (local_wild_mapped != NULL) { + inp = local_wild_mapped; + local_wild_mapped = NULL; + goto out; + } #endif } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + /* + * Not found. + */ return (NULL); + +out: + LOCAL_UNLOCK(jail_wild); + LOCAL_UNLOCK(local_exact); + LOCAL_UNLOCK(local_wild); +#ifdef INET6 + LOCAL_UNLOCK(local_wild_mapped); +#endif + if ((lookupflags & (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) == 0) + INP_RUNLOCK(inp); + return (inp); } +#undef LOCAL_UNLOCK /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the @@ -2386,32 +2471,18 @@ INP_HASH_RLOCK(pcbinfo); inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, - (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + lookupflags, ifp); + INP_HASH_RUNLOCK(pcbinfo); +#ifdef INVARIANTS if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) { - INP_WLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_WUNLOCK(inp); - inp = NULL; - } - } else if (lookupflags & INPLOOKUP_RLOCKPCB) { - INP_RLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_RUNLOCK(inp); - inp = NULL; - } - } else + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK_ASSERT(inp); + else if (lookupflags & INPLOOKUP_RLOCKPCB) + INP_RLOCK_ASSERT(inp); + else panic("%s: locking bug", __func__); -#ifdef INVARIANTS - if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) - INP_WLOCK_ASSERT(inp); - else - INP_RLOCK_ASSERT(inp); - } -#endif } - INP_HASH_RUNLOCK(pcbinfo); +#endif return (inp); } Index: sys/netinet6/in6_pcb.c =================================================================== --- sys/netinet6/in6_pcb.c +++ sys/netinet6/in6_pcb.c @@ -1123,6 +1123,16 @@ } #endif /* PCBGROUP */ +#define LOCAL_UNLOCK(_inp) \ + do { \ + if ((_inp) != NULL) { \ + \ + if (lookupflags & INPLOOKUP_WLOCKPCB) \ + INP_WUNLOCK((_inp)); \ + else \ + INP_RUNLOCK((_inp)); \ + } \ + } while (0) /* * Lookup PCB in hash list. */ @@ -1133,11 +1143,12 @@ { struct inpcbhead *head; struct inpcb *inp, *tmpinp; + struct inpcb *local_wild, *local_exact, *jail_wild; u_short fport = fport_arg, lport = lport_arg; - KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, + KASSERT((lookupflags & (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) != + (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB), ("%s: invalid lookup flags %d", __func__, lookupflags)); - INP_HASH_LOCK_ASSERT(pcbinfo); /* @@ -1147,9 +1158,14 @@ head = &pcbinfo->ipi_hashbase[INP_PCBHASH( INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash) { - /* XXX inp locking */ + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) + goto next1; if ((inp->inp_vflag & INP_IPV6) == 0) - continue; + goto next1; if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && inp->inp_fport == fport && @@ -1159,14 +1175,33 @@ * the inp here, without any checks. * Well unless both bound with SO_REUSEPORT? */ - if (prison_flag(inp->inp_cred, PR_IP6)) + if (prison_flag(inp->inp_cred, PR_IP6)) { + if (lookupflags & INPLOOKUP_WLOCKPCB) { + if (tmpinp != NULL) + INP_WUNLOCK(tmpinp); + } else { + if (tmpinp != NULL) + INP_RUNLOCK(tmpinp); + if ((lookupflags & + INPLOOKUP_RLOCKPCB) == 0) + INP_RUNLOCK(inp); + } return (inp); - if (tmpinp == NULL) + } + if (tmpinp == NULL) { tmpinp = inp; + continue; + } } +next1: + LOCAL_UNLOCK(inp); } - if (tmpinp != NULL) + if (tmpinp != NULL) { + if ((lookupflags & + (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) == 0) + INP_RUNLOCK(tmpinp); return (tmpinp); + } /* * Then look in lb group (for wildcard match). @@ -1176,6 +1211,13 @@ inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr, fport, lookupflags); if (inp != NULL) { + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else if (lookupflags & INPLOOKUP_RLOCKPCB) + INP_RLOCK(inp); + KASSERT(((inp->inp_flags2 & INP_FREED) == 0), + ("%s:%d: inp %p returned from lbgroup marked FREED", + __func__, __LINE__, inp)); return (inp); } } @@ -1184,10 +1226,9 @@ * Then look for a wildcard match, if requested. */ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { - struct inpcb *local_wild = NULL, *local_exact = NULL; - struct inpcb *jail_wild = NULL; int injail; + local_wild = local_exact = jail_wild = NULL; /* * Order of socket selection - we always prefer jails. * 1. jailed, non-wild. @@ -1199,51 +1240,81 @@ INP6_PCBHASHKEY(&in6addr_any), lport, 0, pcbinfo->ipi_hashmask)]; CK_LIST_FOREACH(inp, head, inp_hash) { - /* XXX inp locking */ + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK(inp); + else + INP_RLOCK(inp); + if (__predict_false(inp->inp_flags2 & INP_FREED)) + goto nextw; if ((inp->inp_vflag & INP_IPV6) == 0) - continue; + goto nextw; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || - inp->inp_lport != lport) { - continue; - } + inp->inp_lport != lport) + goto nextw; injail = prison_flag(inp->inp_cred, PR_IP6); if (injail) { if (prison_check_ip6(inp->inp_cred, laddr) != 0) - continue; + goto nextw; } else { if (local_exact != NULL) - continue; + goto nextw; } - if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { if (injail) - return (inp); - else + goto out; + else { + LOCAL_UNLOCK(local_exact); local_exact = inp; + } } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { - if (injail) + if (injail) { + LOCAL_UNLOCK(jail_wild); jail_wild = inp; - else + } else { + LOCAL_UNLOCK(local_wild); local_wild = inp; - } + } + } else + goto nextw; + continue; +nextw: + LOCAL_UNLOCK(inp); } /* LIST_FOREACH */ - if (jail_wild != NULL) - return (jail_wild); - if (local_exact != NULL) - return (local_exact); - if (local_wild != NULL) - return (local_wild); + if (jail_wild != NULL) { + inp = jail_wild; + jail_wild = NULL; + goto out; + } + if (local_exact != NULL) { + inp = local_exact; + local_exact = NULL; + goto out; + } + if (local_wild != NULL) { + inp = local_wild; + local_wild = NULL; + goto out; + } } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ /* * Not found. */ return (NULL); + +out: + LOCAL_UNLOCK(jail_wild); + LOCAL_UNLOCK(local_exact); + LOCAL_UNLOCK(local_wild); + if ((lookupflags & (INPLOOKUP_WLOCKPCB|INPLOOKUP_RLOCKPCB)) == 0) + INP_RUNLOCK(inp); + return (inp); } +#undef LOCAL_UNLOCK /* * Lookup PCB in hash list, using pcbinfo tables. This variation locks the @@ -1259,32 +1330,19 @@ INP_HASH_RLOCK(pcbinfo); inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, - (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); + lookupflags, ifp); + INP_HASH_RUNLOCK(pcbinfo); +#ifdef INVARIANTS if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) { - INP_WLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_WUNLOCK(inp); - inp = NULL; - } - } else if (lookupflags & INPLOOKUP_RLOCKPCB) { - INP_RLOCK(inp); - if (__predict_false(inp->inp_flags2 & INP_FREED)) { - INP_RUNLOCK(inp); - inp = NULL; - } - } else + if (lookupflags & INPLOOKUP_WLOCKPCB) + INP_WLOCK_ASSERT(inp); + else if (lookupflags & INPLOOKUP_RLOCKPCB) + INP_RLOCK_ASSERT(inp); + else panic("%s: locking bug", __func__); -#ifdef INVARIANTS - if (inp != NULL) { - if (lookupflags & INPLOOKUP_WLOCKPCB) - INP_WLOCK_ASSERT(inp); - else - INP_RLOCK_ASSERT(inp); - } -#endif } - INP_HASH_RUNLOCK(pcbinfo); +#endif + return (inp); }