Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c +++ sys/netinet/tcp_lro.c @@ -1236,10 +1236,11 @@ static int tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) { + struct epoch_tracker et; struct inpcb *inp; struct tcpcb *tp; struct mbuf **pp, *cmp, *mv_to; - bool bpf_req, should_wake; + bool bpf_req, should_wake, needs_epoch; /* Check if packet doesn't belongs to our network interface. */ if ((tcplro_stacks_wanting_mbufq == 0) || @@ -1264,11 +1265,17 @@ IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) return (TCP_LRO_CANNOT); #endif + needs_epoch = !(lc->ifp->if_flags & IFF_KNOWSEPOCH); + if (needs_epoch) + NET_EPOCH_ENTER(et); /* Lookup inp, if any. */ inp = tcp_lro_lookup(lc->ifp, (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); - if (inp == NULL) + if (inp == NULL) { + if (needs_epoch) + NET_EPOCH_EXIT(et); return (TCP_LRO_CANNOT); + } counter_u64_add(tcp_inp_lro_locks_taken, 1); @@ -1280,6 +1287,8 @@ (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || (inp->inp_flags2 & INP_FREED)) { INP_WUNLOCK(inp); + if (needs_epoch) + NET_EPOCH_EXIT(et); return (TCP_LRO_CANNOT); } if ((inp->inp_irq_cpu_set == 0) && (lc->lro_cpu_is_set == 1)) { @@ -1289,6 +1298,8 @@ /* Check if the transport doesn't support the needed optimizations. */ if ((inp->inp_flags2 & (INP_SUPPORTS_MBUFQ | INP_MBUF_ACKCMP)) == 0) { INP_WUNLOCK(inp); + if (needs_epoch) + NET_EPOCH_EXIT(et); return (TCP_LRO_CANNOT); } @@ -1333,6 +1344,8 @@ } if (inp != NULL) INP_WUNLOCK(inp); + if (needs_epoch) + NET_EPOCH_EXIT(et); return (0); /* Success. */ } #endif @@ -1555,7 +1568,7 @@ struct ip6_hdr *ip6; } l3; struct mbuf *m; - struct mbuf *nm; + struct mbuf *nm, *msave; struct tcphdr *th; struct tcp_ackent *ack_ent; uint32_t *ts_ptr; @@ -1563,7 +1576,7 @@ bool other_opts, can_compress; uint16_t lro_type; uint16_t iptos; - int tcp_hdr_offset; + int tcp_hdr_offset, tcpoptlen; int idx; /* Get current mbuf. */ @@ -1609,11 +1622,34 @@ m->m_flags |= M_LRO_EHDRSTRP; m->m_flags &= ~M_ACKCMP; m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; - th = tcp_lro_get_th(m); - + tcpoptlen = th->th_off << 2; + if (__predict_false(m->m_len < (sizeof(*th) + m->m_pkthdr.lro_tcp_h_off + tcpoptlen))) { + /* + * This code is also highly unlikely to run, a + * driver would have to have placed + * Enet+IP+TCPheader but split the options + * into the next packet. However unlikely + * lets go ahead and validate it before passing + * in a bogus packet to TCP potentially. + */ + msave = m->m_nextpkt; + m->m_nextpkt = NULL; + m = m_pullup(m, (sizeof(*th)+ m->m_pkthdr.lro_tcp_h_off + tcpoptlen)); + if (m == NULL) { + /* + * We lost the mbuf, so we can only + * lie and tell them it was compressed + * when it was not. + */ + *pp = msave; + return (true); + } else { + /* Restore the m_nextpkt */ + m->m_nextpkt = msave; + } + } th->th_sum = 0; /* TCP checksum is valid. */ - /* Check if ACK can be compressed */ can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); @@ -1744,9 +1780,9 @@ return (TCP_LRO_CANNOT); #endif if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) != - ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || + ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || (m->m_pkthdr.csum_data != 0xffff)) { - /* + /* * The checksum either did not have hardware offload * or it was a bad checksum. We can't LRO such * a packet. @@ -1777,13 +1813,32 @@ #endif /* If no hardware or arrival stamp on the packet add timestamp */ if ((m->m_flags & (M_TSTMP_LRO | M_TSTMP)) == 0) { - m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); + m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); m->m_flags |= M_TSTMP_LRO; } - /* Get pointer to TCP header. */ th = pa->tcp; + m->m_pkthdr.lro_tcp_h_off = ((uint8_t *)th - (uint8_t *)m->m_data); + if (__predict_false(m->m_len < (m->m_pkthdr.lro_tcp_h_off + sizeof(struct tcphdr)))) { + /* + * This code is highly unlikely to run, a + * driver should not be passing a packet in + * without the Enet+IP+TCP header in the first + * mbuf. But we place this here as a paranoid + * safety measure just in case :-) + */ + m = m_pullup(m, (m->m_pkthdr.lro_tcp_h_off + sizeof(struct tcphdr))); + if (m == NULL) { + /* + * We lost the mbuf, so we can only + * lie and tell them it was accepted, we + * don't want the caller to push the + * freed mbuf into the stack. + */ + return (0); + } + } /* Don't process SYN packets. */ if (__predict_false(th->th_flags & TH_SYN)) return (TCP_LRO_CANNOT); @@ -1811,9 +1866,7 @@ m->m_pkthdr.rcvif = lc->ifp; m->m_pkthdr.lro_tcp_d_csum = tcp_data_sum; m->m_pkthdr.lro_tcp_d_len = tcp_data_len; - m->m_pkthdr.lro_tcp_h_off = ((uint8_t *)th - (uint8_t *)m->m_data); m->m_pkthdr.lro_nsegs = 1; - /* Get hash bucket. */ if (!use_hash) { bucket = &lc->lro_hash[0]; @@ -1872,9 +1925,9 @@ int error; if (((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) != - ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || + ((CSUM_DATA_VALID | CSUM_PSEUDO_HDR))) || (m->m_pkthdr.csum_data != 0xffff)) { - /* + /* * The checksum either did not have hardware offload * or it was a bad checksum. We can't LRO such * a packet. Index: sys/netinet/tcp_stacks/rack_bbr_common.c =================================================================== --- sys/netinet/tcp_stacks/rack_bbr_common.c +++ sys/netinet/tcp_stacks/rack_bbr_common.c @@ -210,7 +210,6 @@ m = m_pullup(m, sizeof(*ip6) + sizeof(*th)); if (m == NULL) { KMOD_TCPSTAT_INC(tcps_rcvshort); - m_freem(m); return (-1); } } @@ -243,7 +242,6 @@ m = m_pullup(m, sizeof (struct tcpiphdr)); if (m == NULL) { KMOD_TCPSTAT_INC(tcps_rcvshort); - m_freem(m); return (-1); } }