Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -956,6 +956,7 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); + struct mbuf *options; int error, optval; #ifdef RSS uint32_t rss_bucket; @@ -1226,7 +1227,10 @@ #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: if (IPSEC_ENABLED(ipv4)) { + INP_WLOCK(inp); error = IPSEC_PCBCTL(ipv4, inp, sopt); + if (!error) + INP_WUNLOCK(inp); break; } /* FALLTHROUGH */ @@ -1242,12 +1246,17 @@ switch (sopt->sopt_name) { case IP_OPTIONS: case IP_RETOPTS: - if (inp->inp_options) + if (inp->inp_options) { + options = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); + INP_RLOCK(inp); + bcopy(inp->inp_options, options, min(inp->inp_options->m_len, sopt->sopt_valsize)); + INP_RUNLOCK(inp); error = sooptcopyout(sopt, - mtod(inp->inp_options, + mtod(options, char *), - inp->inp_options->m_len); - else + options->m_len); + free(options, M_TEMP); + } else sopt->sopt_valsize = 0; break; @@ -1315,12 +1324,14 @@ break; case IP_PORTRANGE: + INP_RLOCK(inp); if (inp->inp_flags & INP_HIGHPORT) optval = IP_PORTRANGE_HIGH; else if (inp->inp_flags & INP_LOWPORT) optval = IP_PORTRANGE_LOW; else optval = 0; + INP_RUNLOCK(inp); break; case IP_ONESBCAST: @@ -1346,9 +1357,11 @@ break; #ifdef RSS case IP_RSSBUCKETID: + INP_RLOCK(inp); retval = rss_hash2bucket(inp->inp_flowid, inp->inp_flowtype, &rss_bucket); + INP_RUNLOCK(inp); if (retval == 0) optval = rss_bucket; else @@ -1380,7 +1393,9 @@ #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: if (IPSEC_ENABLED(ipv4)) { + INP_WLOCK(inp); error = IPSEC_PCBCTL(ipv4, inp, sopt); + INP_WUNLOCK(inp); break; } /* FALLTHROUGH */ Index: sys/netinet/raw_ip.c =================================================================== --- sys/netinet/raw_ip.c +++ sys/netinet/raw_ip.c @@ -637,10 +637,12 @@ sizeof optval); if (error) break; + INP_WLOCK(inp); if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; + INP_WUNLOCK(inp); break; case IP_FW3: /* generic ipfw v.3 functions */ Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -133,7 +133,7 @@ struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); -static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); +static int ip6_getpcbopt(struct inpcb *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); @@ -1496,8 +1496,10 @@ error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; + INP_WLOCK(in6p); error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); + INP_WUNLOCK(in6p); m_freem(m); /* XXX */ break; } @@ -1594,23 +1596,34 @@ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) -#define OPTSET2(bit, val) do { \ - INP_WLOCK(in6p); \ +#define OPTSET2_N(bit, val) do { \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ +} while (0) +#define OPTSET2(bit, val) do { \ + INP_WLOCK(in6p); \ + OPTSET2_N(bit, val); \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) +#define OPTSET2292_EXCLUSIVE(bit) \ +do { \ + INP_WLOCK(in6p); \ + if (OPTBIT(IN6P_RFC2292)) { \ + error = EINVAL; \ + } else { \ + if (optval) \ + in6p->inp_flags |= (bit); \ + else \ + in6p->inp_flags &= ~(bit); \ + } \ + INP_WUNLOCK(in6p); \ +} while (/*CONSTCOND*/ 0) case IPV6_RECVPKTINFO: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_PKTINFO); + OPTSET2292_EXCLUSIVE(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: @@ -1622,57 +1635,34 @@ error = EINVAL; break; } + INP_WLOCK(in6p); optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } case IPV6_RECVHOPLIMIT: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_HOPLIMIT); + OPTSET2292_EXCLUSIVE(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_HOPOPTS); + OPTSET2292_EXCLUSIVE(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_DSTOPTS); + OPTSET2292_EXCLUSIVE(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_RTHDRDSTOPTS); + OPTSET2292_EXCLUSIVE(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_RTHDR); + OPTSET2292_EXCLUSIVE(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: @@ -1702,8 +1692,10 @@ * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ + INP_WLOCK(in6p); if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { + INP_WUNLOCK(in6p); error = EINVAL; break; } @@ -1712,14 +1704,11 @@ in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; + INP_WUNLOCK(in6p); break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - OPTSET(IN6P_TCLASS); + OPTSET2292_EXCLUSIVE(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); @@ -1739,8 +1728,10 @@ case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { + INP_WLOCK(in6p); in6p->inp_rss_listen_bucket = optval; - OPTSET2(INP_RSS_BUCKET_SET, 1); + OPTSET2_N(INP_RSS_BUCKET_SET, 1); + INP_WUNLOCK(in6p); } else { error = EINVAL; } @@ -1763,11 +1754,13 @@ break; { struct ip6_pktopts **optp; + INP_WLOCK(in6p); optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } @@ -1832,12 +1825,6 @@ int optlen; struct ip6_pktopts **optp; - /* cannot mix with RFC2292 */ - if (OPTBIT(IN6P_RFC2292)) { - error = EINVAL; - break; - } - /* * We only ensure valsize is not too large * here. Further validation will be done @@ -1847,12 +1834,21 @@ sizeof(optbuf_storage), 0); if (error) break; + + INP_WLOCK(in6p); + /* cannot mix with RFC2292 */ + if (OPTBIT(IN6P_RFC2292)) { + INP_WUNLOCK(in6p); + error = EINVAL; + break; + } optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); + INP_WUNLOCK(in6p); break; } #undef OPTSET @@ -1905,7 +1901,10 @@ #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { + INP_WLOCK(in6p); error = IPSEC_PCBCTL(ipv6, in6p, sopt); + if (!error) + INP_WUNLOCK(in6p); break; } /* FALLTHROUGH */ @@ -2037,10 +2036,12 @@ break; #ifdef RSS case IPV6_RSSBUCKETID: + INP_RLOCK(in6p); retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); + INP_RUNLOCK(in6p); if (retval == 0) optval = rss_bucket; else @@ -2057,16 +2058,15 @@ break; } - if (error) - break; error = sooptcopyout(sopt, &optval, sizeof optval); - break; + return (error); case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; + struct in6_addr addr; if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); @@ -2074,9 +2074,14 @@ * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. + * Copy faddr out of in6p to avoid holding lock + * on inp during route lookup. */ + INP_RLOCK(in6p); + bcopy(&in6p->in6p_faddr, &addr, sizeof(addr)); + INP_RUNLOCK(in6p); error = ip6_getpmtu_ctl(so->so_fibnum, - &in6p->in6p_faddr, &pmtu); + &addr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) @@ -2126,8 +2131,7 @@ case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: - error = ip6_getpcbopt(in6p->in6p_outputopts, - optname, sopt); + error = ip6_getpcbopt(in6p, optname, sopt); break; case IPV6_MULTICAST_IF: @@ -2140,7 +2144,9 @@ #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { + INP_RLOCK(in6p); error = IPSEC_PCBCTL(ipv6, in6p, sopt); + INP_RUNLOCK(in6p); break; } /* FALLTHROUGH */ @@ -2238,6 +2244,8 @@ int error = 0; struct thread *td = sopt->sopt_td; + INP_WLOCK_ASSERT(sotoinpcb(so)); + /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC @@ -2302,17 +2310,50 @@ return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } +#define GET_PKTOPT_VAR(field, lenexpr) do { \ + if (pktopt && pktopt->field) { \ + INP_RUNLOCK(in6p); \ + optdata = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK); \ + malloc_optdata = true; \ + INP_RLOCK(in6p); \ + if (in6p->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { \ + INP_RUNLOCK(in6p); \ + free(optdata, M_TEMP); \ + return (ECONNRESET); \ + } \ + pktopt = in6p->in6p_outputopts; \ + if (pktopt && pktopt->field) { \ + optdatalen = min(lenexpr, sopt->sopt_valsize); \ + bcopy(&pktopt->field, optdata, optdatalen); \ + } else { \ + free(optdata, M_TEMP); \ + optdata = NULL; \ + malloc_optdata = false; \ + } \ + } \ +} while(0) + +#define GET_PKTOPT_EXT_HDR(field) GET_PKTOPT_VAR(field, \ + (((struct ip6_ext *)pktopt->field)->ip6e_len + 1) << 3) + +#define GET_PKTOPT_SOCKADDR(field) GET_PKTOPT_VAR(field, \ + pktopt->field->sa_len) + static int -ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) +ip6_getpcbopt(struct inpcb *in6p, int optname, struct sockopt *sopt) { void *optdata = NULL; + bool malloc_optdata = false; int optdatalen = 0; - struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; + struct ip6_pktopts *pktopt; + + INP_RLOCK(in6p); + pktopt = in6p->in6p_outputopts; switch (optname) { case IPV6_PKTINFO: @@ -2329,50 +2370,29 @@ break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) - optdata = (void *)&pktopt->ip6po_tclass; - else - optdata = (void *)&deftclass; + deftclass = pktopt->ip6po_tclass; + optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: - if (pktopt && pktopt->ip6po_hbh) { - optdata = (void *)pktopt->ip6po_hbh; - ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; - optdatalen = (ip6e->ip6e_len + 1) << 3; - } + GET_PKTOPT_EXT_HDR(ip6po_hbh); break; case IPV6_RTHDR: - if (pktopt && pktopt->ip6po_rthdr) { - optdata = (void *)pktopt->ip6po_rthdr; - ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; - optdatalen = (ip6e->ip6e_len + 1) << 3; - } + GET_PKTOPT_EXT_HDR(ip6po_rthdr); break; case IPV6_RTHDRDSTOPTS: - if (pktopt && pktopt->ip6po_dest1) { - optdata = (void *)pktopt->ip6po_dest1; - ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; - optdatalen = (ip6e->ip6e_len + 1) << 3; - } + GET_PKTOPT_EXT_HDR(ip6po_dest1); break; case IPV6_DSTOPTS: - if (pktopt && pktopt->ip6po_dest2) { - optdata = (void *)pktopt->ip6po_dest2; - ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; - optdatalen = (ip6e->ip6e_len + 1) << 3; - } + GET_PKTOPT_EXT_HDR(ip6po_dest2); break; case IPV6_NEXTHOP: - if (pktopt && pktopt->ip6po_nexthop) { - optdata = (void *)pktopt->ip6po_nexthop; - optdatalen = pktopt->ip6po_nexthop->sa_len; - } + GET_PKTOPT_SOCKADDR(ip6po_nexthop); break; case IPV6_USE_MIN_MTU: if (pktopt) - optdata = (void *)&pktopt->ip6po_minmtu; - else - optdata = (void *)&defminmtu; + defminmtu = pktopt->ip6po_minmtu; + optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: @@ -2385,19 +2405,22 @@ break; case IPV6_PREFER_TEMPADDR: if (pktopt) - optdata = (void *)&pktopt->ip6po_prefer_tempaddr; - else - optdata = (void *)&defpreftemp; + defpreftemp = pktopt->ip6po_prefer_tempaddr; + optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif + INP_RUNLOCK(in6p); return (ENOPROTOOPT); } + INP_RUNLOCK(in6p); error = sooptcopyout(sopt, optdata, optdatalen); + if (malloc_optdata) + free(optdata, M_TEMP); return (error); } @@ -3082,6 +3105,8 @@ { int len; + INP_WLOCK_ASSERT(in6p); + if (!in6p->in6p_outputopts) return 0; Index: sys/netipsec/ipsec_pcb.c =================================================================== --- sys/netipsec/ipsec_pcb.c +++ sys/netipsec/ipsec_pcb.c @@ -276,6 +276,8 @@ struct secpolicy **spp, *newsp; int error, flags; + INP_WLOCK_ASSERT(inp); + xpl = (struct sadb_x_policy *)request; /* Select direction. */ switch (xpl->sadb_x_policy_dir) { @@ -332,7 +334,6 @@ return (EINVAL); } - INP_WLOCK(inp); if (xpl->sadb_x_policy_dir == IPSEC_DIR_INBOUND) { spp = &inp->inp_sp->sp_in; flags = INP_INBOUND_POLICY; @@ -352,7 +353,6 @@ inp->inp_sp->flags |= flags; KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); } - INP_WUNLOCK(inp); return (0); } @@ -365,7 +365,7 @@ xpl = (struct sadb_x_policy *)request; - INP_RLOCK(inp); + INP_WLOCK_ASSERT(inp); flags = inp->inp_sp->flags; /* Select direction. */ switch (xpl->sadb_x_policy_dir) { @@ -378,7 +378,6 @@ flags &= INP_OUTBOUND_POLICY; break; default: - INP_RUNLOCK(inp); ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, xpl->sadb_x_policy_dir)); return (EINVAL); @@ -386,7 +385,6 @@ if (flags == 0) { /* Return ENTRUST policy */ - INP_RUNLOCK(inp); xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = IPSEC_POLICY_ENTRUST; xpl->sadb_x_policy_id = 0; @@ -400,7 +398,6 @@ ("sp is NULL, but flags is 0x%04x", inp->inp_sp->flags)); key_addref(sp); - INP_RUNLOCK(inp); error = key_sp2msg(sp, request, len); key_freesp(&sp); if (error == EINVAL) @@ -421,30 +418,43 @@ size_t optlen; int error; - if (inp->inp_sp == NULL) + INP_WLOCK_ASSERT(inp); + + if (inp->inp_sp == NULL) { + INP_WUNLOCK(inp); return (ENOPROTOOPT); + } /* Limit maximum request size to PAGE_SIZE */ optlen = sopt->sopt_valsize; - if (optlen < sizeof(struct sadb_x_policy) || optlen > PAGE_SIZE) + if (optlen < sizeof(struct sadb_x_policy) || optlen > PAGE_SIZE) { + INP_WUNLOCK(inp); return (EINVAL); + } optdata = malloc(optlen, M_TEMP, sopt->sopt_td ? M_WAITOK: M_NOWAIT); - if (optdata == NULL) + if (optdata == NULL) { + INP_WUNLOCK(inp); return (ENOBUFS); + } /* * We need a hint from the user, what policy is requested - input * or output? User should specify it in the buffer, even for * setsockopt(). */ + INP_WUNLOCK(inp); error = sooptcopyin(sopt, optdata, optlen, optlen); + INP_WLOCK(inp); if (error == 0) { - if (sopt->sopt_dir == SOPT_SET) + if (sopt->sopt_dir == SOPT_SET) { error = ipsec_set_pcbpolicy(inp, sopt->sopt_td ? sopt->sopt_td->td_ucred: NULL, optdata, optlen); - else { + if (error != 0) + INP_WUNLOCK(inp); + } else { error = ipsec_get_pcbpolicy(inp, optdata, &optlen); + INP_WUNLOCK(inp); if (error == 0) error = sooptcopyout(sopt, optdata, optlen); } Index: sys/netipsec/ipsec_support.h =================================================================== --- sys/netipsec/ipsec_support.h +++ sys/netipsec/ipsec_support.h @@ -44,6 +44,14 @@ int ipsec_delete_pcbpolicy(struct inpcb *); int ipsec_copy_pcbpolicy(struct inpcb *, struct inpcb *); +/* + * The pcbctl function has the following locking characteristics: + * On setting values, the inp must already be wlocked. If an error is + * returned, the lock must be released. If there is no error returned, + * then the lock must be held. + * On getting values, the inp must already be wlocked. The lock must be + * released before returning. + */ struct ipsec_methods { int (*input)(struct mbuf *, int, int); int (*check_policy)(const struct mbuf *, struct inpcb *);