Changeset View
Standalone View
sys/netinet/tcp_lro.c
Context not available. | |||||
static int | static int | ||||
tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) | tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le) | ||||
{ | { | ||||
struct epoch_tracker et; | |||||
struct inpcb *inp; | struct inpcb *inp; | ||||
struct tcpcb *tp; | struct tcpcb *tp; | ||||
struct mbuf **pp, *cmp, *mv_to; | struct mbuf **pp, *cmp, *mv_to; | ||||
bool bpf_req, should_wake; | bool bpf_req, should_wake, needs_epoch; | ||||
/* Check if packet doesn't belongs to our network interface. */ | /* Check if packet doesn't belongs to our network interface. */ | ||||
if ((tcplro_stacks_wanting_mbufq == 0) || | if ((tcplro_stacks_wanting_mbufq == 0) || | ||||
Context not available. | |||||
IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) | IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6))) | ||||
return (TCP_LRO_CANNOT); | return (TCP_LRO_CANNOT); | ||||
#endif | #endif | ||||
needs_epoch = !(lc->ifp->if_flags & IFF_KNOWSEPOCH); | |||||
if (needs_epoch) | |||||
tuexen: Please use `__predict_false(needs_epoch)` instead of `needs_epoch`.
See latest patch I sent...
| |||||
NET_EPOCH_ENTER(et); | |||||
/* Lookup inp, if any. */ | /* Lookup inp, if any. */ | ||||
inp = tcp_lro_lookup(lc->ifp, | inp = tcp_lro_lookup(lc->ifp, | ||||
(le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); | (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner); | ||||
if (inp == NULL) | if (inp == NULL) { | ||||
if (needs_epoch) | |||||
Not Done Inline ActionsSee above. tuexen: See above. | |||||
NET_EPOCH_EXIT(et); | |||||
return (TCP_LRO_CANNOT); | return (TCP_LRO_CANNOT); | ||||
} | |||||
counter_u64_add(tcp_inp_lro_locks_taken, 1); | counter_u64_add(tcp_inp_lro_locks_taken, 1); | ||||
Context not available. | |||||
(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || | (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || | ||||
(inp->inp_flags2 & INP_FREED)) { | (inp->inp_flags2 & INP_FREED)) { | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
if (needs_epoch) | |||||
Not Done Inline ActionsSee above. tuexen: See above. | |||||
NET_EPOCH_EXIT(et); | |||||
return (TCP_LRO_CANNOT); | return (TCP_LRO_CANNOT); | ||||
} | } | ||||
if ((inp->inp_irq_cpu_set == 0) && (lc->lro_cpu_is_set == 1)) { | if ((inp->inp_irq_cpu_set == 0) && (lc->lro_cpu_is_set == 1)) { | ||||
Context not available. | |||||
/* Check if the transport doesn't support the needed optimizations. */ | /* Check if the transport doesn't support the needed optimizations. */ | ||||
if ((inp->inp_flags2 & (INP_SUPPORTS_MBUFQ | INP_MBUF_ACKCMP)) == 0) { | if ((inp->inp_flags2 & (INP_SUPPORTS_MBUFQ | INP_MBUF_ACKCMP)) == 0) { | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
if (needs_epoch) | |||||
Not Done Inline ActionsSee above. tuexen: See above. | |||||
NET_EPOCH_EXIT(et); | |||||
return (TCP_LRO_CANNOT); | return (TCP_LRO_CANNOT); | ||||
} | } | ||||
Context not available. | |||||
} | } | ||||
if (inp != NULL) | if (inp != NULL) | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
if (needs_epoch) | |||||
Not Done Inline ActionsSee above. tuexen: See above. | |||||
NET_EPOCH_EXIT(et); | |||||
return (0); /* Success. */ | return (0); /* Success. */ | ||||
} | } | ||||
#endif | #endif | ||||
Context not available. | |||||
struct ip6_hdr *ip6; | struct ip6_hdr *ip6; | ||||
} l3; | } l3; | ||||
struct mbuf *m; | struct mbuf *m; | ||||
struct mbuf *nm; | struct mbuf *nm, *msave; | ||||
struct tcphdr *th; | struct tcphdr *th; | ||||
struct tcp_ackent *ack_ent; | struct tcp_ackent *ack_ent; | ||||
uint32_t *ts_ptr; | uint32_t *ts_ptr; | ||||
Context not available. | |||||
bool other_opts, can_compress; | bool other_opts, can_compress; | ||||
uint16_t lro_type; | uint16_t lro_type; | ||||
uint16_t iptos; | uint16_t iptos; | ||||
int tcp_hdr_offset; | int tcp_hdr_offset, tcpoptlen; | ||||
int idx; | int idx; | ||||
/* Get current mbuf. */ | /* Get current mbuf. */ | ||||
Context not available. | |||||
case LRO_TYPE_IPV4_TCP: | case LRO_TYPE_IPV4_TCP: | ||||
tcp_hdr_offset -= sizeof(*le->outer.ip4); | tcp_hdr_offset -= sizeof(*le->outer.ip4); | ||||
m->m_pkthdr.lro_etype = ETHERTYPE_IP; | m->m_pkthdr.lro_etype = ETHERTYPE_IP; | ||||
break; | break; | ||||
case LRO_TYPE_IPV6_TCP: | case LRO_TYPE_IPV6_TCP: | ||||
tcp_hdr_offset -= sizeof(*le->outer.ip6); | tcp_hdr_offset -= sizeof(*le->outer.ip6); | ||||
Context not available. | |||||
m->m_flags |= M_LRO_EHDRSTRP; | m->m_flags |= M_LRO_EHDRSTRP; | ||||
m->m_flags &= ~M_ACKCMP; | m->m_flags &= ~M_ACKCMP; | ||||
m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; | m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset; | ||||
th = tcp_lro_get_th(m); | th = tcp_lro_get_th(m); | ||||
tcpoptlen = th->th_off << 2; | |||||
Not Done Inline ActionsThis code will never trigger, because the tcp_lro_parser() already checks that the full payload, including the TCP header and options, fit into m_len. If it doesn't it returns NULL, and so the packet is not LRO'ed. /* We expect a contiguous header [eh, ip, tcp]. */ pa = tcp_lro_parser(m, &po, &pi, true); if (__predict_false(pa == NULL)) return (TCP_LRO_NOT_SUPPORTED); hselasky: This code will never trigger, because the tcp_lro_parser() already checks that the full payload… | |||||
Done Inline ActionsSo I have now been looking at the parser, can you explain to me how The first thing I see that tcp_lro_parser() does is call the low level parser, but Now I agree that most drivers will not be sending these short mbufs Maybe I am missing something can you please point out to me how this Thanks rrs: So I have now been looking at the parser, can you explain to me how
it is checking this?
The… | |||||
Not Done Inline ActionsSure. When packets enter the tcp_lro_parser() I've factored out the generic parts into the tcp_lro_low_level_parser() so that it can be used for both inner and outer headers. Before the tcp_lro_low_level_parser() function returns non-NULL it updates parser->total_hdr_len, which include all TCP options, if any. Then before tcp_lro_parser() returns, it checks total_hdr_len agains m_len: if (data_ptr == NULL || po->total_hdr_len > m->m_len) return (NULL); If the m_len is not big enough to contain all inner and outer headers, including TCP options, then it fails passing the packet into the LRO engine basically. --HPS hselasky: Sure.
When packets enter the tcp_lro_parser() I've factored out the generic parts into the… | |||||
Done Inline ActionsYes I see that this prevents us from having a packet that fails that particular test... But what happens if I send in (from some errant driver) a m_pkthdr with the E-net and IP header but not For example if you see its IPPROT_TCP then you just set parser->tcp = ptr and then dereference We probably need to send in the m_len to the low-level parser and make sure it does not rrs: Yes I see that this prevents us from having a packet that fails that particular test...
But… | |||||
Not Done Inline ActionsHi Randall, The tcp_lro_parser() function returns NULL if there is only ETH+IP headers, if you look carefully. The tcp_lro_parser() function by default, only returns non-NULL, if the inner header is TCP and all of it fits into the first mbuf in the chain. hselasky: Hi Randall,
The tcp_lro_parser() function returns NULL if there is only ETH+IP headers, if you… | |||||
Done Inline ActionsHans This is broken if the caller send you ETH+IP+TCP and the TCP_Options in the next mbuf, you If the caller passes you ETH+IP and the TCP + Options is in the next mbuf You thus end up with unknown results and problems. rrs: Hans
This is broken if the caller send you ETH+IP+TCP and the TCP_Options in the next mbuf… | |||||
Not Done Inline Actions
Hi Randall, The current LRO code will just forward such packets AS-IS to the TCP stack, because the tcp_lro_parser() will return NULL then! mbuf chains like you describe are _not_ touched by the current LRO code. --HPS hselasky: > This is broken if the caller send you ETH+IP+TCP and the TCP_Options in the next mbuf, you
>… | |||||
if (__predict_false(m->m_len < (sizeof(*th) + m->m_pkthdr.lro_tcp_h_off + tcpoptlen))) { | |||||
/* | |||||
* This code is also highly unlikely to run, a | |||||
* driver would have to have placed | |||||
* Enet+IP+TCPheader but split the options | |||||
* into the next packet. However unlikely | |||||
* lets go ahead and validate it before passing | |||||
* in a bogus packet to TCP potentially. | |||||
*/ | |||||
msave = m->m_nextpkt; | |||||
m->m_nextpkt = NULL; | |||||
m = m_pullup(m, (sizeof(*th)+ m->m_pkthdr.lro_tcp_h_off + tcpoptlen)); | |||||
if (m == NULL) { | |||||
/* | |||||
* We lost the mbuf, so we can only | |||||
* lie and tell them it was compressed | |||||
* when it was not. | |||||
*/ | |||||
*pp = msave; | |||||
return (true); | |||||
} else { | |||||
/* Restore the m_nextpkt */ | |||||
m->m_nextpkt = msave; | |||||
} | |||||
} | |||||
th->th_sum = 0; /* TCP checksum is valid. */ | th->th_sum = 0; /* TCP checksum is valid. */ | ||||
/* Check if ACK can be compressed */ | /* Check if ACK can be compressed */ | ||||
can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); | can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts); | ||||
Context not available. | |||||
m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); | m->m_pkthdr.rcv_tstmp = bintime2ns(&lc->lro_last_queue_time); | ||||
m->m_flags |= M_TSTMP_LRO; | m->m_flags |= M_TSTMP_LRO; | ||||
} | } | ||||
/* Get pointer to TCP header. */ | /* Get pointer to TCP header. */ | ||||
th = pa->tcp; | th = pa->tcp; | ||||
m->m_pkthdr.lro_tcp_h_off = ((uint8_t *)th - (uint8_t *)m->m_data); | |||||
if (__predict_false(m->m_len < (m->m_pkthdr.lro_tcp_h_off + sizeof(struct tcphdr)))) { | |||||
/* | |||||
* This code is highly unlikely to run, a | |||||
* driver should not be passing a packet in | |||||
* without the Enet+IP+TCP header in the first | |||||
* mbuf. But we place this here as a paranoid | |||||
* safety measure just in case :-) | |||||
*/ | |||||
m = m_pullup(m, (m->m_pkthdr.lro_tcp_h_off + sizeof(struct tcphdr))); | |||||
if (m == NULL) { | |||||
/* | |||||
* We lost the mbuf, so we can only | |||||
* lie and tell them it was accepted, we | |||||
* don't want the caller to push the | |||||
* freed mbuf into the stack. | |||||
*/ | |||||
return (0); | |||||
} | |||||
} | |||||
/* Don't process SYN packets. */ | /* Don't process SYN packets. */ | ||||
if (__predict_false(th->th_flags & TH_SYN)) | if (__predict_false(th->th_flags & TH_SYN)) | ||||
return (TCP_LRO_CANNOT); | return (TCP_LRO_CANNOT); | ||||
Context not available. | |||||
m->m_pkthdr.rcvif = lc->ifp; | m->m_pkthdr.rcvif = lc->ifp; | ||||
m->m_pkthdr.lro_tcp_d_csum = tcp_data_sum; | m->m_pkthdr.lro_tcp_d_csum = tcp_data_sum; | ||||
m->m_pkthdr.lro_tcp_d_len = tcp_data_len; | m->m_pkthdr.lro_tcp_d_len = tcp_data_len; | ||||
m->m_pkthdr.lro_tcp_h_off = ((uint8_t *)th - (uint8_t *)m->m_data); | |||||
m->m_pkthdr.lro_nsegs = 1; | m->m_pkthdr.lro_nsegs = 1; | ||||
/* Get hash bucket. */ | /* Get hash bucket. */ | ||||
if (!use_hash) { | if (!use_hash) { | ||||
bucket = &lc->lro_hash[0]; | bucket = &lc->lro_hash[0]; | ||||
Context not available. |
Please use __predict_false(needs_epoch) instead of needs_epoch.
See latest patch I sent...