Index: sys/net/iflib.h =================================================================== --- sys/net/iflib.h +++ sys/net/iflib.h @@ -131,7 +131,9 @@ uint8_t ipi_mflags; /* packet mbuf flags */ uint32_t ipi_tcp_seq; /* tcp seqno */ - uint32_t __spare0__; + uint8_t ipi_ip_tos; /* IP ToS field data */ + uint8_t __spare0__; + uint16_t __spare1__; } *if_pkt_info_t; typedef struct if_irq { @@ -188,6 +190,7 @@ void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); qidx_t (*ift_txq_select) (void *, struct mbuf *); + qidx_t (*ift_txq_select_v2) (void *, struct mbuf *, if_pkt_info_t); } *if_txrx_t; typedef struct if_softc_ctx { @@ -416,6 +419,13 @@ * as ift_txq_select in struct if_txrx */ #define IFLIB_FEATURE_QUEUE_SELECT 1400050 +/* + * Driver can set its own TX queue selection function + * as ift_txq_select_v2 in struct if_txrx. This includes + * having iflib send L3+ extra header information to the + * function. + */ +#define IFLIB_FEATURE_QUEUE_SELECT_V2 1400057 /* * These enum values are used in iflib_needs_restart to indicate to iflib Index: sys/net/iflib.c =================================================================== --- sys/net/iflib.c +++ sys/net/iflib.c @@ -210,6 +210,7 @@ #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_legacy_intr ifc_txrx.ift_legacy_intr #define isc_txq_select ifc_txrx.ift_txq_select +#define isc_txq_select_v2 ifc_txrx.ift_txq_select_v2 eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; struct ether_addr ifc_mac; @@ -3188,6 +3189,127 @@ #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) +/** + * Parse up to the L3 header; extract IPv4/IPv6 header information. + * Currently: IP ToS value, IP header version/presence + */ +static int +iflib_parse_header_partial(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) +{ + if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; + struct ether_vlan_header *eh; + struct mbuf *m; + + m = *mp; + if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && + M_WRITABLE(m) == 0) { + if ((m = m_dup(m, M_NOWAIT)) == NULL) { + return (ENOMEM); + } else { + m_freem(*mp); + DBG_COUNTER_INC(tx_frees); + *mp = m; + } + } + + /* + * Determine where frame payload starts. + * Jump over vlan headers if already present, + * helpful for QinQ too. + */ + if (__predict_false(m->m_len < sizeof(*eh))) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) + return (ENOMEM); + } + eh = mtod(m, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + pi->ipi_etype = ntohs(eh->evl_proto); + pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + pi->ipi_etype = ntohs(eh->evl_encap_proto); + pi->ipi_ehdrlen = ETHER_HDR_LEN; + } + + switch (pi->ipi_etype) { +#ifdef INET + case ETHERTYPE_IP: + { + struct mbuf *n; + struct ip *ip = NULL; + int miniplen; + + miniplen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip)); + if (__predict_false(m->m_len < miniplen)) { + /* + * Check for common case where the first mbuf only contains + * the Ethernet header + */ + if (m->m_len == pi->ipi_ehdrlen) { + n = m->m_next; + MPASS(n); + /* If next mbuf contains at least the minimal IP header, then stop */ + if (n->m_len >= sizeof(*ip)) { + ip = (struct ip *)n->m_data; + } else { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, miniplen)) == NULL)) + return (ENOMEM); + ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); + } + } else { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, miniplen)) == NULL)) + return (ENOMEM); + ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); + } + } else { + ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); + } + + pi->ipi_ip_hlen = ip->ip_hl << 2; + pi->ipi_ipproto = ip->ip_p; + pi->ipi_ip_tos = ip->ip_tos; + pi->ipi_flags |= IPI_TX_IPV4; + + if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) + ip->ip_sum = 0; + + break; + } +#endif +#ifdef INET6 + case ETHERTYPE_IPV6: + { + struct ip6_hdr *ip6; + + if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) + return (ENOMEM); + } + ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); + + /* XXX-BZ this will go badly in case of ext hdrs. */ + pi->ipi_ip_hlen = sizeof(struct ip6_hdr); + pi->ipi_ipproto = ip6->ip6_nxt; + pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6); + pi->ipi_flags |= IPI_TX_IPV6; + + break; + } +#endif + default: + pi->ipi_csum_flags &= ~CSUM_OFFLOAD; + pi->ipi_ip_hlen = 0; + break; + } + *mp = m; + + return (0); + +} + static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { @@ -3269,6 +3391,7 @@ } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; + pi->ipi_ip_tos = ip->ip_tos; pi->ipi_flags |= IPI_TX_IPV4; /* TCP checksum offload may require TCP header length */ @@ -3322,6 +3445,7 @@ /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; + pi->ipi_ip_tos = IPV6_TRAFFIC_CLASS(ip6); pi->ipi_flags |= IPI_TX_IPV6; /* TCP checksum offload may require TCP header length */ @@ -4155,7 +4279,27 @@ /* ALTQ-enabled interfaces always use queue 0. */ qidx = 0; /* Use driver-supplied queue selection method if it exists */ - if (ctx->isc_txq_select) + if (ctx->isc_txq_select_v2) { + /* HACK: Parse header in order to make IP ToS information available + * to isc_txq_select. + */ + struct if_pkt_info pi; + pkt_info_zero(&pi); + /* XXX: This will make txq->ift_pullups count only really valid + * on queue 0 + */ + txq = &ctx->ifc_txqs[0]; + err = iflib_parse_header_partial(txq, &pi, &m); + if (__predict_false(err != 0)) { + DBG_COUNTER_INC(encap_txd_encap_fail); + return (err); + } + /* Let driver make queueing decision */ + qidx = ctx->isc_txq_select_v2(ctx->ifc_softc, m, &pi); + /* XXX: packet info is discarded; hand it off somehow instead? */ + } + /* Backwards compatibility */ + else if (ctx->isc_txq_select) qidx = ctx->isc_txq_select(ctx->ifc_softc, m); /* If not, use iflib's standard method */ else if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) Index: sys/sys/param.h =================================================================== --- sys/sys/param.h +++ sys/sys/param.h @@ -76,7 +76,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1400056 +#define __FreeBSD_version 1400057 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,