Index: sys/netinet/tcp_lro.h =================================================================== --- sys/netinet/tcp_lro.h +++ sys/netinet/tcp_lro.h @@ -38,9 +38,6 @@ #define TCP_LRO_ENTRIES 8 #endif -#define TCP_LRO_SEQUENCE(mb) \ - (mb)->m_pkthdr.PH_loc.thirtytwo[0] - struct lro_entry { LIST_ENTRY(lro_entry) next; struct mbuf *m_head; @@ -80,10 +77,15 @@ #define source_ip6 lesource.s_ip6 #define dest_ip6 ledest.d_ip6 +struct lro_mbuf_sort { + uint64_t seq; + struct mbuf *mb; +}; + /* NB: This is part of driver structs. */ struct lro_ctrl { struct ifnet *ifp; - struct mbuf **lro_mbuf_data; + struct lro_mbuf_sort *lro_mbuf_data; uint64_t lro_queued; uint64_t lro_flushed; uint64_t lro_bad_csum; Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c +++ sys/netinet/tcp_lro.c @@ -111,9 +111,9 @@ LIST_INIT(&lc->lro_active); /* compute size to allocate */ - size = (lro_mbufs * sizeof(struct mbuf *)) + + size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) + (lro_entries * sizeof(*le)); - lc->lro_mbuf_data = (struct mbuf **) + lc->lro_mbuf_data = (struct lro_mbuf_sort *) malloc(size, M_LRO, M_NOWAIT | M_ZERO); /* check for out of memory */ @@ -149,7 +149,7 @@ /* free mbuf array, if any */ for (x = 0; x != lc->lro_mbuf_count; x++) - m_freem(lc->lro_mbuf_data[x]); + m_freem(lc->lro_mbuf_data[x].mb); lc->lro_mbuf_count = 0; /* free allocated memory, if any */ @@ -365,32 +365,93 @@ LIST_INSERT_HEAD(&lc->lro_free, le, next); } -static int -tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb) +#ifdef HAVE_INLINE_FLSLL +#define tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1)) +#else +static inline uint64_t +tcp_lro_msb_64(uint64_t x) { - const struct mbuf *ma = *((const struct mbuf * const *)ppa); - const struct mbuf *mb = *((const struct mbuf * const *)ppb); - int ret; + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + return (x & ~(x >> 1)); +} +#endif - ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb); - if (ret != 0) - goto done; +/* + * The tcp_lro_sort() routine is comparable to qsort(), except it has + * a worst case complexity limit of O(MIN(N,64)*N), where N is the + * number of elements to sort and 64 is the number of sequence bits + * available. The algorithm is bit-slicing the 64-bit sequence number, + * sorting one bit at a time from the most significant bit until the + * least significant one, skipping the constant bits. + */ +static void +tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size) +{ + uint64_t ones; + uint64_t zeros; + uint32_t x; + uint32_t y; - if (ma->m_pkthdr.flowid > mb->m_pkthdr.flowid) - return (1); - else if (ma->m_pkthdr.flowid < mb->m_pkthdr.flowid) - return (-1); +repeat: + if (size <= 1) + return; - ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb); -done: - return (ret); + /* compute sequence bits which are constant */ + ones = 0; + zeros = 0; + for (x = 0; x != size; x++) { + ones |= parray[x].seq; + zeros |= ~parray[x].seq; + } + + /* compute bits which are not constant into "ones" */ + ones &= zeros; + if (ones == 0) + return; + + /* pick the most significant bit which is not constant */ + ones = tcp_lro_msb_64(ones); + + /* + * Move entries having cleared sequence bits to the beginning + * of the array: + */ + for (x = y = 0; y != size; y++) { + struct lro_mbuf_sort temp; + + /* skip set bits */ + if (parray[y].seq & ones) + continue; + /* swap entries, if any */ + if (x != y) { + temp = parray[x]; + parray[x] = parray[y]; + parray[y] = temp; + } + x++; + } + + KASSERT(x != 0 && x != size, ("Memory is corrupted\n")); + + /* sort zeros */ + tcp_lro_sort(parray, x); + + /* sort ones */ + parray += x; + size -= x; + goto repeat; } void tcp_lro_flush_all(struct lro_ctrl *lc) { - uint32_t hashtype; - uint32_t flowid; + uint64_t seq; + uint64_t nseq; unsigned x; /* check if no mbufs to flush */ @@ -398,30 +459,27 @@ goto done; /* sort all mbufs according to stream */ - qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *), - &tcp_lro_mbuf_compare_header); + tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count); /* input data into LRO engine, stream by stream */ - flowid = 0; - hashtype = M_HASHTYPE_NONE; + seq = 0; for (x = 0; x != lc->lro_mbuf_count; x++) { struct mbuf *mb; - mb = lc->lro_mbuf_data[x]; + /* get mbuf */ + mb = lc->lro_mbuf_data[x].mb; + + /* get sequence number, masking away the packet index */ + nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24); /* check for new stream */ - if (mb->m_pkthdr.flowid != flowid || - M_HASHTYPE_GET(mb) != hashtype) { - flowid = mb->m_pkthdr.flowid; - hashtype = M_HASHTYPE_GET(mb); + if (seq != nseq) { + seq = nseq; /* flush active streams */ tcp_lro_rx_done(lc); } -#ifdef TCP_LRO_RESET_SEQUENCE - /* reset sequence number */ - TCP_LRO_SEQUENCE(mb) = 0; -#endif + /* add packet to LRO engine */ if (tcp_lro_rx(lc, mb, 0) != 0) { /* input packet to network layer */ @@ -799,11 +857,14 @@ if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max)) tcp_lro_flush_all(lc); - /* store sequence number */ - TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count; + /* create sequence number */ + lc->lro_mbuf_data[lc->lro_mbuf_count].seq = + (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | + (((uint64_t)mb->m_pkthdr.flowid) << 24) | + ((uint64_t)lc->lro_mbuf_count); /* enter mbuf */ - lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb; + lc->lro_mbuf_data[lc->lro_mbuf_count++].mb = mb; } /* end */