Index: sys/netinet/tcp_lro.h =================================================================== --- sys/netinet/tcp_lro.h +++ sys/netinet/tcp_lro.h @@ -38,9 +38,6 @@ #define TCP_LRO_ENTRIES 8 #endif -#define TCP_LRO_SEQUENCE(mb) \ - (mb)->m_pkthdr.PH_loc.thirtytwo[0] - struct lro_entry { SLIST_ENTRY(lro_entry) next; Index: sys/netinet/tcp_lro.c =================================================================== --- sys/netinet/tcp_lro.c +++ sys/netinet/tcp_lro.c @@ -92,7 +92,7 @@ SLIST_INIT(&lc->lro_active); /* compute size to allocate */ - size = (lro_mbufs * sizeof(struct mbuf *)) + + size = (2 * lro_mbufs * sizeof(struct mbuf *)) + (lro_entries * sizeof(*le)); lc->lro_mbuf_data = (struct mbuf **) malloc(size, M_LRO, M_NOWAIT | M_ZERO); @@ -104,7 +104,7 @@ } /* compute offset for LRO entries */ le = (struct lro_entry *) - (lc->lro_mbuf_data + lro_mbufs); + (lc->lro_mbuf_data + 2 * lro_mbufs); /* setup linked list */ for (i = 0; i != lro_entries; i++) @@ -335,24 +335,85 @@ SLIST_INSERT_HEAD(&lc->lro_free, le, next); } -static int -tcp_lro_mbuf_compare_header(const void *ppa, const void *ppb) +/* + * Compare two mbufs and return true if the first one is less than or + * equal to the second one: + */ +static inline int +tcp_lro_mbuf_lt_eq(const void *ppa, const void *ppb) { const struct mbuf *ma = *((const struct mbuf * const *)ppa); const struct mbuf *mb = *((const struct mbuf * const *)ppb); - int ret; + int64_t ret; ret = M_HASHTYPE_GET(ma) - M_HASHTYPE_GET(mb); - if (ret != 0) - goto done; + if (ret < 0) + return (1); + else if (ret > 0) + return (0); - ret = ma->m_pkthdr.flowid - mb->m_pkthdr.flowid; - if (ret != 0) - goto done; + ret = ((uint64_t)(uint32_t)ma->m_pkthdr.flowid) - + ((uint64_t)(uint32_t)mb->m_pkthdr.flowid); + if (ret <= 0) + return (1); + else + return (0); +} - ret = TCP_LRO_SEQUENCE(ma) - TCP_LRO_SEQUENCE(mb); -done: - return (ret); +/* Optimised mergesort algorithm */ +static void +tcp_lro_mbuf_sort(struct lro_ctrl *lc, unsigned max) +{ + struct mbuf **ptemp = lc->lro_mbuf_data + max; + unsigned x; + unsigned y; + + /* select partition size */ + for (x = 1; x < max; x *= 2) { + /* iterate all the partitions */ + for (y = 0; (y + x) < max; y += 2 * x) { + unsigned a; + unsigned b; + unsigned c; + unsigned d; + + a = 0; /* lower index */ + b = x; /* upper index */ + c = y; /* partition start */ + d = 2 * x; /* partition length */ + + /* check if upper partition is too small */ + if (d > (max - y)) + d = (max - y); + + /* skip if partittions are back to back */ + if (tcp_lro_mbuf_lt_eq( + lc->lro_mbuf_data + c + b - 1, + lc->lro_mbuf_data + c + b)) + continue; + + /* copy data to temporary buffer */ + memcpy(ptemp, lc->lro_mbuf_data + c, d * sizeof(void *)); + + /* merge sort - preserving the sequence of the elements */ + while (1) { + if (a == x) { + memcpy(lc->lro_mbuf_data + c, + ptemp + b, (d - b) * sizeof(void *)); + break; + } else if (b == d) { + memcpy(lc->lro_mbuf_data + c, + ptemp + a, (x - a) * sizeof(void *)); + break; + } + if (tcp_lro_mbuf_lt_eq(ptemp + a, ptemp + b)) { + lc->lro_mbuf_data[c++] = ptemp[a++]; + } else { + lc->lro_mbuf_data[c++] = ptemp[b++]; + } + } + } + } } void @@ -368,8 +429,7 @@ goto done; /* sort all mbufs according to stream */ - qsort(lc->lro_mbuf_data, lc->lro_mbuf_count, sizeof(struct mbuf *), - &tcp_lro_mbuf_compare_header); + tcp_lro_mbuf_sort(lc, lc->lro_mbuf_count); /* input data into LRO engine, stream by stream */ flowid = 0; @@ -391,10 +451,6 @@ tcp_lro_flush(lc, le); } } -#ifdef TCP_LRO_RESET_SEQUENCE - /* reset sequence number */ - TCP_LRO_SEQUENCE(mb) = 0; -#endif /* add packet to LRO engine */ if (tcp_lro_rx(lc, mb, 0) != 0) { /* input packet to network layer */ @@ -766,9 +822,6 @@ if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max)) tcp_lro_flush_all(lc); - /* store sequence number */ - TCP_LRO_SEQUENCE(mb) = lc->lro_mbuf_count; - /* enter mbuf */ lc->lro_mbuf_data[lc->lro_mbuf_count++] = mb; }