Page MenuHomeFreeBSD

D30691.id90559.diff
No OneTemporary

D30691.id90559.diff

Index: sys/dev/ixl/if_ixl.c
===================================================================
--- sys/dev/ixl/if_ixl.c
+++ sys/dev/ixl/if_ixl.c
@@ -118,6 +118,8 @@
static void ixl_if_timer(if_ctx_t ctx, uint16_t qid);
static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag);
static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
+static void ixl_if_vxlan_register(if_ctx_t ctx, u16 port);
+static void ixl_if_vxlan_unregister(if_ctx_t ctx, u16 port);
static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt);
static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
@@ -190,6 +192,8 @@
DEVMETHOD(ifdi_timer, ixl_if_timer),
DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register),
DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister),
+ DEVMETHOD(ifdi_vxlan_register, ixl_if_vxlan_register),
+ DEVMETHOD(ifdi_vxlan_unregister, ixl_if_vxlan_unregister),
DEVMETHOD(ifdi_get_counter, ixl_if_get_counter),
DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req),
DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl),
@@ -726,6 +730,9 @@
/* Add protocol filters to list */
ixl_init_filters(vsi);
+ /* Initialize udp_ports bitmap for VXLAN offloads */
+ memset(&pf->udp_ports, 0, sizeof(pf->udp_ports));
+
/* Init queue allocation manager */
error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp);
if (error) {
@@ -996,6 +1003,9 @@
/* Re-add configure filters to HW */
ixl_reconfigure_filters(vsi);
+ /* Sync all UDP filters */
+ ixl_sync_udp_filters(pf, true);
+
/* Configure promiscuous mode */
ixl_if_promisc_set(ctx, if_getflags(ifp));
@@ -1424,6 +1434,9 @@
ixl_process_adminq(pf, &pending);
ixl_update_link_status(pf);
+ if (IXL_PF_HAS_PENDING_UDP_FILTER_SYNC(pf))
+ ixl_sync_udp_filters(pf, false);
+
/*
* If there are still messages to process, reschedule ourselves.
* Otherwise, re-enable our interrupt and go to sleep.
@@ -1729,6 +1742,73 @@
}
}
+static void
+ixl_if_vxlan_register(if_ctx_t ctx, u16 port)
+{
+ struct ixl_pf *pf = iflib_get_softc(ctx);
+ int idx;
+
+ /* Check if port already exists */
+ idx = ixl_get_udp_port_idx(pf, port);
+ if (idx != -1) {
+ device_printf(pf->dev, "port %d already offloaded\n", port);
+ return;
+ }
+
+ /* Now check if there is space to add the new port */
+ idx = ixl_get_udp_port_idx(pf, 0);
+ if (idx == -1) {
+ device_printf(pf->dev,
+ "maximum number of offloaded UDP ports reached, not adding port %d\n",
+ port);
+ return;
+ }
+
+ pf->udp_ports[idx].port = port;
+ pf->udp_ports[idx].filter_index = IXL_UDP_PORT_INDEX_UNUSED;
+ pf->udp_ports[idx].is_marked_for_deletion = FALSE;
+ pf->pending_udp_bitmap |= BIT_ULL(idx);
+
+ atomic_set_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+
+ if (if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)
+ iflib_admin_intr_deferred(ctx);
+}
+
+static void
+ixl_if_vxlan_unregister(if_ctx_t ctx, u16 port)
+{
+ struct ixl_pf *pf = iflib_get_softc(ctx);
+ int idx;
+
+ /* Check if port already exists */
+ idx = ixl_get_udp_port_idx(pf, port);
+ if (idx == -1) {
+ device_printf(pf->dev,
+ "UDP port %d was not found, not deleting\n", port);
+ return;
+ }
+
+ /* If port exists, set the value to 0. When ixl_if_vxlan_register looks for
+ * an empty entry for a new tunnel, it looks for entries with port set to 0.
+ * Also, mark current entry for deletion and make the deletion pending.
+ */
+ pf->udp_ports[idx].port = 0;
+ pf->udp_ports[idx].is_marked_for_deletion = TRUE;
+
+ /* Toggle pending bit instead of setting it. This way if we are
+ * deleting a port that has yet to be added we just clear the pending
+ * bit and don't have to worry about it.
+ */
+ pf->pending_udp_bitmap ^= BIT_ULL(idx);
+
+ atomic_set_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+
+ if (if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)
+ iflib_admin_intr_deferred(ctx);
+}
+
+
static uint64_t
ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
{
@@ -1839,6 +1919,7 @@
{
switch (event) {
case IFLIB_RESTART_VLAN_CONFIG:
+ case IFLIB_RESTART_VXLAN_CONFIG:
default:
return (false);
}
Index: sys/dev/ixl/ixl.h
===================================================================
--- sys/dev/ixl/ixl.h
+++ sys/dev/ixl/ixl.h
@@ -199,7 +199,12 @@
#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
#define CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6|CSUM_SCTP_IPV6)
-#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO)
+#define CSUM_INNER_IPV4 (CSUM_INNER_IP|CSUM_INNER_IP_TCP|CSUM_INNER_IP_UDP)
+#define CSUM_INNER_IPV6 (CSUM_INNER_IP6_TCP|CSUM_INNER_IP6_UDP)
+#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO| \
+ CSUM_ENCAP_VXLAN|CSUM_INNER_IPV4|CSUM_INNER_IPV6| \
+ CSUM_INNER_IP_TSO|CSUM_INNER_IP6_TSO)
+#define IXL_NEEDS_CTXD(flags) (flags & (CSUM_TSO | CSUM_ENCAP_VXLAN))
/* Misc flags for ixl_vsi.flags */
#define IXL_FLAGS_KEEP_TSO4 (1 << 0)
@@ -261,7 +266,8 @@
IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | \
IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO | \
IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \
- IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO)
+ IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO | \
+ IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)
#define IXL_CSUM_TCP \
(CSUM_IP_TCP|CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP6_TCP)
@@ -271,6 +277,9 @@
(CSUM_IP_SCTP|CSUM_IP6_SCTP)
#define IXL_CSUM_IPV4 \
(CSUM_IP|CSUM_IP_TSO)
+#define IXL_CSUM_TSO \
+ (CSUM_IP_TSO | CSUM_IP6_TSO | \
+ CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO)
/* Pre-11 counter(9) compatibility */
#if __FreeBSD_version >= 1100036
@@ -404,6 +413,7 @@
/* Stats */
u64 irqs;
u64 tso;
+ u64 tx_vxlan;
};
struct ixl_rx_queue {
Index: sys/dev/ixl/ixl_pf.h
===================================================================
--- sys/dev/ixl/ixl_pf.h
+++ sys/dev/ixl/ixl_pf.h
@@ -89,6 +89,7 @@
IXL_PF_STATE_FW_LLDP_DISABLED = (1 << 9),
IXL_PF_STATE_EEE_ENABLED = (1 << 10),
IXL_PF_STATE_LINK_ACTIVE_ON_DOWN = (1 << 11),
+ IXL_PF_STATE_UDP_FILTER_SYNC_PENDING = (1 << 12),
};
#define IXL_PF_IN_RECOVERY_MODE(pf) \
@@ -97,6 +98,19 @@
#define IXL_PF_IS_RESETTING(pf) \
((atomic_load_acq_32(&pf->state) & IXL_PF_STATE_RESETTING) != 0)
+#define IXL_PF_HAS_PENDING_UDP_FILTER_SYNC(pf) \
+ ((atomic_load_acq_32(&pf->state) & \
+ IXL_PF_STATE_UDP_FILTER_SYNC_PENDING) != 0)
+
+#define IXL_UDP_PORT_INDEX_UNUSED 255
+struct ixl_udp_port_config {
+ /* AdminQ command interface expects port number in Host byte order */
+ u16 port;
+ u8 filter_index;
+ bool is_marked_for_deletion;
+};
+
+
struct ixl_vf {
struct ixl_vsi vsi;
u32 vf_flags;
@@ -172,6 +186,10 @@
int num_vfs;
uint16_t veb_seid;
int vc_debug_lvl;
+
+ /* VXLAN */
+ struct ixl_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
+ u16 pending_udp_bitmap;
};
/*
@@ -438,4 +456,7 @@
int ixl_attach_get_link_status(struct ixl_pf *);
int ixl_sysctl_set_flowcntl(SYSCTL_HANDLER_ARGS);
+int ixl_get_udp_port_idx(struct ixl_pf *, u16);
+void ixl_sync_udp_filters(struct ixl_pf *, bool all);
+
#endif /* _IXL_PF_H_ */
Index: sys/dev/ixl/ixl_pf_main.c
===================================================================
--- sys/dev/ixl/ixl_pf_main.c
+++ sys/dev/ixl/ixl_pf_main.c
@@ -1031,6 +1031,81 @@
ixl_set_rss_hlut(pf);
}
+int
+ixl_get_udp_port_idx(struct ixl_pf *pf, u16 port)
+{
+ u8 i;
+
+ for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; ++i) {
+ /* Do not report ports with pending deletions as
+ * being available.
+ */
+ if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
+ continue;
+ if (pf->udp_ports[i].is_marked_for_deletion == TRUE)
+ continue;
+ if (pf->udp_ports[i].port == port)
+ return i;
+ }
+
+ return -1;
+}
+
+void
+ixl_sync_udp_filters(struct ixl_pf *pf, bool all)
+{
+ struct ixl_udp_port_config *udp_port;
+ struct i40e_hw *hw = &pf->hw;
+ enum i40e_status_code status;
+ bool is_marked_for_deletion;
+ u8 i, filter_index;
+ u16 port;
+
+ for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; ++i) {
+ if (all || pf->pending_udp_bitmap & BIT_ULL(i)) {
+ status = I40E_SUCCESS;
+
+ udp_port = &pf->udp_ports[i];
+ pf->pending_udp_bitmap &= ~BIT_ULL(i);
+
+ port = udp_port->port;
+ is_marked_for_deletion = udp_port->is_marked_for_deletion;
+ filter_index = udp_port->filter_index;
+
+ if (!is_marked_for_deletion && port > 0) {
+ status = i40e_aq_add_udp_tunnel(hw, port,
+ I40E_AQC_TUNNEL_TYPE_VXLAN, &filter_index, NULL);
+ } else if (filter_index != IXL_UDP_PORT_INDEX_UNUSED)
+ status = i40e_aq_del_udp_tunnel(hw, filter_index, NULL);
+
+ if (status) {
+ device_printf(pf->dev,
+ "VXLAN %s port %d, index %d failed, err %s aq_err %s\n",
+ is_marked_for_deletion ? "delete" : "add",
+ port, filter_index, i40e_stat_str(&pf->hw, status),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ if (is_marked_for_deletion == FALSE) {
+ /* failed to add, just reset port,
+ * drop pending bit for any deletion
+ */
+ udp_port->port = 0;
+ pf->pending_udp_bitmap &= ~BIT_ULL(i);
+ }
+ } else {
+ if (is_marked_for_deletion == FALSE) {
+ /* record filter index on success */
+ udp_port->filter_index = filter_index;
+ }
+ /* clear the deletion flag */
+ udp_port->is_marked_for_deletion = FALSE;
+ }
+ }
+ }
+
+ atomic_clear_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+}
+
/*
* In some firmware versions there is default MAC/VLAN filter
* configured which interferes with filters managed by driver.
Index: sys/dev/ixl/ixl_txrx.c
===================================================================
--- sys/dev/ixl/ixl_txrx.c
+++ sys/dev/ixl/ixl_txrx.c
@@ -272,13 +272,18 @@
}
}
-/**********************************************************************
- *
- * Setup context for hardware segmentation offload (TSO)
+#if defined(INET6) || defined(INET)
+/**
+ * Setup context descriptor for TSO or VXLAN Offload
+ * @txr: TX ring which handles transmission
+ * @pi: information extracted from packet headers
*
- **********************************************************************/
+ * Configure TX descriptor with information extracted
+ * from a packet header required for HW to calculate
+ * requested checksum and perform TCP segmentation.
+ */
static int
-ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
+ixl_ctxd_setup(struct tx_ring *txr, if_pkt_info_t pi)
{
if_softc_ctx_t scctx;
struct i40e_tx_context_desc *TXD;
@@ -288,43 +293,74 @@
idx = pi->ipi_pidx;
TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
- total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
- tsolen = pi->ipi_len - total_hdr_len;
scctx = txr->que->vsi->shared;
+ type_cmd_tso_mss =
+ (u64)I40E_TX_DESC_DTYPE_CONTEXT << I40E_TXD_CTX_QW1_DTYPE_SHIFT;
+
type = I40E_TX_DESC_DTYPE_CONTEXT;
- cmd = I40E_TX_CTX_DESC_TSO;
- /*
- * TSO MSS must not be less than 64; this prevents a
- * BAD_LSO_MSS MDD event when the MSS is too small.
- */
- if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
- txr->mss_too_small++;
- pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
+ if (pi->ipi_csum_flags & IXL_CSUM_TSO) {
+ cmd = I40E_TX_CTX_DESC_TSO;
+ /*
+ * TSO MSS must not be less than 64; this prevents a
+ * BAD_LSO_MSS MDD event when the MSS is too small.
+ */
+ if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
+ txr->mss_too_small++;
+ pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
+ }
+ mss = pi->ipi_tso_segsz;
+
+ total_hdr_len = pi->ipi_ehdrlen +
+ pi->ipi_ip_hlen + pi->ipi_tcp_hlen +
+ pi->ipi_outer_ip_hlen + pi->ipi_tun_hlen;
+ tsolen = pi->ipi_len - total_hdr_len;
+
+ /* Check for BAD_LS0_MSS MDD event (mss too large) */
+ MPASS(mss <= IXL_MAX_TSO_MSS);
+ /* Check for NO_HEAD MDD event (header lengths are 0) */
+ MPASS(pi->ipi_ehdrlen != 0);
+ MPASS(pi->ipi_ip_hlen != 0);
+ /* Partial check for BAD_LSO_LEN MDD event */
+ MPASS(tsolen != 0);
+ /* Partial check for WRONG_SIZE MDD event (during TSO) */
+ MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
+
+ type_cmd_tso_mss |=
+ ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+ ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+ ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+ txr->que->tso++;
}
- mss = pi->ipi_tso_segsz;
-
- /* Check for BAD_LS0_MSS MDD event (mss too large) */
- MPASS(mss <= IXL_MAX_TSO_MSS);
- /* Check for NO_HEAD MDD event (header lengths are 0) */
- MPASS(pi->ipi_ehdrlen != 0);
- MPASS(pi->ipi_ip_hlen != 0);
- /* Partial check for BAD_LSO_LEN MDD event */
- MPASS(tsolen != 0);
- /* Partial check for WRONG_SIZE MDD event (during TSO) */
- MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
-
- type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
- ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
- ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
- ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
- TXD->tunneling_params = htole32(0);
- txr->que->tso++;
+ if (pi->ipi_csum_flags & CSUM_ENCAP_VXLAN) {
+ u32 tun_params = I40E_TXD_CTX_UDP_TUNNELING;
+ switch (pi->ipi_outer_etype) {
+ case ETHERTYPE_IP:
+ if (pi->ipi_csum_flags & CSUM_INNER_IP_TSO)
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV4;
+ else
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+ break;
+ case ETHERTYPE_IPV6:
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV6;
+ break;
+ default:
+ break;
+ }
+ tun_params |=
+ (pi->ipi_outer_ip_hlen >> 2) << I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+ (pi->ipi_tun_hlen >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+ TXD->tunneling_params = htole32(tun_params);
+ txr->que->tx_vxlan++;
+ } else
+ TXD->tunneling_params = htole32(0);
+
return ((idx + 1) & (scctx->isc_ntxd[0]-1));
}
+#endif
/*********************************************************************
*
@@ -355,12 +391,12 @@
/* Set up the TSO/CSUM offload */
if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
- /* Set up the TSO context descriptor if required */
- if (pi->ipi_csum_flags & CSUM_TSO) {
+ /* Set up the context descriptor if required */
+ if (IXL_NEEDS_CTXD(pi->ipi_csum_flags)) {
/* Prevent MAX_BUFF MDD event (for TSO) */
if (ixl_tso_detect_sparse(segs, nsegs, pi))
return (EFBIG);
- i = ixl_tso_setup(txr, pi);
+ i = ixl_ctxd_setup(txr, pi);
}
ixl_tx_setup_offload(que, pi, &cmd, &off);
}
@@ -730,13 +766,17 @@
return (0);
}
-/*********************************************************************
- *
- * Verify that the hardware indicated that the checksum is valid.
- * Inform the stack about the status of checksum so that stack
- * doesn't spend time verifying the checksum.
+/**
+ * ixl_rx_checksum - Verify that the hardware indicated that the checksum is valid or not
+ * @ri: iflib RXD info
+ * @status: RX descriptor status data
+ * @error: RX descriptor error data
+ * @ptype: packet type
*
- *********************************************************************/
+ * Determine whether the hardware indicated that RX checksums were verified
+ * and are valid. Inform the stack about the status of checksum so that stack
+ * doesn't spend time verifying them.
+ */
static u8
ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
{
@@ -750,6 +790,10 @@
decoded = decode_rx_desc_ptype(ptype);
+ /* Cannot proceed if packet type is unknown or not an IP packet */
+ if (decoded.known == 0 || decoded.outer_ip != I40E_RX_PTYPE_OUTER_IP)
+ return (0);
+
/* IPv6 with extension headers likely have bad csum */
if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
@@ -760,21 +804,68 @@
}
}
- ri->iri_csum_flags |= CSUM_L3_CALC;
-
- /* IPv4 checksum error */
- if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
- return (1);
-
- ri->iri_csum_flags |= CSUM_L3_VALID;
- ri->iri_csum_flags |= CSUM_L4_CALC;
-
- /* L4 checksum error */
- if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
- return (1);
+ switch (decoded.tunnel_type) {
+ case I40E_RX_PTYPE_TUNNEL_NONE:
+ /* L3 checksum is calculated only for IPv4 packets */
+ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ ri->iri_csum_flags |= CSUM_L3_CALC;
+ /* IPv4 checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L3_VALID;
+ }
- ri->iri_csum_flags |= CSUM_L4_VALID;
- ri->iri_csum_data |= htons(0xffff);
+ switch (decoded.inner_prot) {
+ case I40E_RX_PTYPE_INNER_PROT_UDP:
+ case I40E_RX_PTYPE_INNER_PROT_TCP:
+ case I40E_RX_PTYPE_INNER_PROT_SCTP:
+ ri->iri_csum_flags |= CSUM_L4_CALC;
+ /* L4 checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L4_VALID;
+ ri->iri_csum_data |= htons(0xffff);
+ break;
+ default:
+ break;
+ }
+ break;
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT:
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC:
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN:
+ /* L3 checksum of outer IPv4 packets */
+ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ ri->iri_csum_flags = CSUM_L3_CALC;
+ /* IP checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L3_VALID;
+ }
+ /* L3 checksum of most inner IPv4 packets */
+ if (decoded.tunnel_end_prot == I40E_RX_PTYPE_TUNNEL_END_IPV4) {
+ ri->iri_csum_flags = CSUM_INNER_L3_CALC;
+ /* IP checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_INNER_L3_VALID;
+ }
+ switch (decoded.inner_prot) {
+ case I40E_RX_PTYPE_INNER_PROT_UDP:
+ case I40E_RX_PTYPE_INNER_PROT_TCP:
+ case I40E_RX_PTYPE_INNER_PROT_SCTP:
+ ri->iri_csum_flags |= CSUM_INNER_L4_CALC;
+ if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_INNER_L4_VALID;
+ ri->iri_csum_data |= htons(0xffff);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
return (0);
}
@@ -950,6 +1041,9 @@
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
CTLFLAG_RD, &(tx_que->tso),
"TSO");
+ SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_vxlan",
+ CTLFLAG_RD, &(tx_que->tx_vxlan),
+ "VXLAN HW Offload");
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
CTLFLAG_RD, &(txr->mss_too_small),
"TSO sends with an MSS less than 64");
Index: sys/net/ifdi_if.m
===================================================================
--- sys/net/ifdi_if.m
+++ sys/net/ifdi_if.m
@@ -97,6 +97,11 @@
{
}
+ static void
+ null_vxlan_register_op(if_ctx_t _ctx __unused, uint16_t port __unused)
+ {
+ }
+
static int
null_q_setup(if_ctx_t _ctx __unused, uint32_t _qid __unused)
{
@@ -458,6 +463,16 @@
uint16_t _vtag;
} DEFAULT null_vlan_register_op;
+METHOD void vxlan_register {
+ if_ctx_t _ctx;
+ uint16_t _port;
+} DEFAULT null_vxlan_register_op;
+
+METHOD void vxlan_unregister {
+ if_ctx_t _ctx;
+ uint16_t _port;
+} DEFAULT null_vxlan_register_op;
+
METHOD int sysctl_int_delay {
if_ctx_t _sctx;
if_int_delay_info_t _iidi;
Index: sys/net/iflib.h
===================================================================
--- sys/net/iflib.h
+++ sys/net/iflib.h
@@ -131,7 +131,11 @@
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t __spare0__;
+
+ /* Tunneled packets offload handling */
+ uint16_t ipi_outer_etype; /* outer ether header length */
+ uint8_t ipi_outer_ip_hlen; /* outer ip header length */
+ uint8_t ipi_tun_hlen; /* tunnel headers length */
} *if_pkt_info_t;
typedef struct if_irq {
@@ -410,6 +414,7 @@
*/
enum iflib_restart_event {
IFLIB_RESTART_VLAN_CONFIG,
+ IFLIB_RESTART_VXLAN_CONFIG,
};
/*
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -56,6 +56,7 @@
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_media.h>
+#include <net/if_vxlan.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/mp_ring.h>
@@ -71,6 +72,7 @@
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <netinet/ip_var.h>
#include <netinet6/ip6_var.h>
@@ -211,6 +213,8 @@
#define isc_legacy_intr ifc_txrx.ift_legacy_intr
eventhandler_tag ifc_vlan_attach_event;
eventhandler_tag ifc_vlan_detach_event;
+ eventhandler_tag ifc_vxlan_attach_event;
+ eventhandler_tag ifc_vxlan_detach_event;
struct ether_addr ifc_mac;
};
@@ -708,7 +712,7 @@
#endif
static int iflib_register(if_ctx_t);
static void iflib_deregister(if_ctx_t);
-static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
+static void iflib_unregister_event_handlers(if_ctx_t ctx);
static uint16_t iflib_get_mbuf_size_for(unsigned int size);
static void iflib_init_locked(if_ctx_t ctx);
static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
@@ -2464,6 +2468,7 @@
iflib_txq_t txq;
iflib_rxq_t rxq;
int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
+ int vxlan_hwcsum_flags, vxlan_hwtso_flags;
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
@@ -2476,6 +2481,12 @@
tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
+ /* CSUM_ENCAP_VXLAN has to be set if any of VXLAN offloads is enabled */
+ vxlan_hwcsum_flags = scctx->isc_tx_csum_flags & (CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_ENCAP_VXLAN);
+ vxlan_hwtso_flags = scctx->isc_tx_csum_flags & (CSUM_INNER_IP6_TSO | CSUM_INNER_IP_TSO |
+ CSUM_ENCAP_VXLAN);
/* Set hardware offload abilities */
if_clearhwassist(ifp);
if (if_getcapenable(ifp) & IFCAP_TXCSUM)
@@ -2486,6 +2497,10 @@
if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
if (if_getcapenable(ifp) & IFCAP_TSO6)
if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
+ if (if_getcapenable(ifp) & IFCAP_VXLAN_HWCSUM)
+ if_sethwassistbits(ifp, vxlan_hwcsum_flags, 0);
+ if (if_getcapenable(ifp) & IFCAP_VXLAN_HWTSO)
+ if_sethwassistbits(ifp, vxlan_hwtso_flags, 0);
for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
CALLOUT_LOCK(txq);
@@ -3157,13 +3172,178 @@
pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
+ printf("pi outer_etype: %d outer_ip_len: %d tun_len: %d\n",
+ pi->ipi_outer_etype, pi->ipi_outer_ip_hlen, pi->ipi_tun_hlen);
}
#endif
#define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
#define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO))
+#define IS_TX_INNER_OFFLOAD4(pi) ((pi)->ipi_csum_flags & \
+ (CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO))
+#define IS_INNER_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_INNER_IP_TSO)
#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
#define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
+#define IS_TX_INNER_OFFLOAD6(pi) ((pi)->ipi_csum_flags & \
+ (CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO))
+#define IS_INNER_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_INNER_IP6_TSO)
+
+static int
+iflib_parse_inner_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
+ struct ether_vlan_header *eh;
+ struct ip *outer_ip;
+ int ehdrlen;
+ struct mbuf *m;
+ size_t off;
+
+ if (pi->ipi_ipproto != IPPROTO_UDP)
+ return (ENXIO);
+
+ m = *mp;
+ /*
+ * Save outer frame info and reuse etype and ip_hlen for inner frame.
+ */
+ pi->ipi_outer_etype = pi->ipi_etype;
+ pi->ipi_outer_ip_hlen = pi->ipi_ip_hlen;
+ pi->ipi_tun_hlen = sizeof(struct udphdr) + sizeof(struct vxlan_header);
+
+ /* size of outer frame header */
+ off = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tun_hlen;
+ outer_ip = (struct ip *)((caddr_t)m->m_data + pi->ipi_ehdrlen);
+
+ /* For VXLAN first mbuf usually contains only outer frame headers */
+ if (m->m_len == off) {
+ m = m->m_next;
+ off = 0;
+ }
+
+ if (__predict_false((size_t)m->m_len < off + sizeof(*eh)))
+ return (ENOMEM);
+
+ eh = (struct ether_vlan_header *)((caddr_t)m->m_data + off);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ pi->ipi_etype = ntohs(eh->evl_proto);
+ ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ pi->ipi_etype = ntohs(eh->evl_encap_proto);
+ ehdrlen = ETHER_HDR_LEN;
+ }
+ pi->ipi_tun_hlen += ehdrlen;
+
+ switch (pi->ipi_etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip = NULL;
+ struct tcphdr *th = NULL;
+ int minhlen = off + ehdrlen + sizeof(*ip);
+
+ if (m->m_pkthdr.csum_flags & (CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO))
+ minhlen += sizeof(*th);
+ minhlen = min(m->m_pkthdr.len, minhlen);
+ if (__predict_false(m->m_len < minhlen)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, minhlen)) == NULL))
+ return (ENOMEM);
+ }
+ ip = (struct ip *)(m->m_data + ehdrlen);
+ if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+
+ pi->ipi_ip_hlen = ip->ip_hl << 2;
+ pi->ipi_ipproto = ip->ip_p;
+
+ /* TCP checksum offload may require TCP header length */
+ if (IS_TX_INNER_OFFLOAD4(pi)) {
+ if (__predict_false(ip->ip_p != IPPROTO_TCP))
+ return (ENXIO);
+
+ if (__predict_false(th == NULL)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+ return (ENOMEM);
+ th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+
+ if (IS_INNER_TSO4(pi)) {
+ /*
+ * TSO always requires hardware checksum offload.
+ */
+ pi->ipi_csum_flags |= (CSUM_INNER_IP_TCP | CSUM_INNER_IP);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
+ pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
+ ip->ip_sum = 0;
+ ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
+ }
+ }
+ }
+ if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_INNER_IP)) {
+ ip->ip_sum = 0;
+ outer_ip->ip_sum = 0;
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
+ struct tcphdr *th;
+ pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
+
+ if (__predict_false(m->m_len < ehdrlen + sizeof(struct ip6_hdr))) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, ehdrlen + sizeof(struct ip6_hdr))) == NULL))
+ return (ENOMEM);
+ }
+ th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
+
+ /* XXX-BZ this will go badly in case of ext hdrs. */
+ pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_flags |= IPI_TX_IPV6;
+
+ /* TCP checksum offload may require TCP header length */
+ if (IS_TX_INNER_OFFLOAD6(pi)) {
+ if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
+ return (ENXIO);
+
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+ return (ENOMEM);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+
+ if (IS_INNER_TSO6(pi)) {
+ /*
+ * TSO always requires hardware checksum offload.
+ */
+ pi->ipi_csum_flags |= CSUM_INNER_IP6_TCP;
+ th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
+ pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ }
+ }
+ break;
+ }
+#endif
+ default:
+ pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
+ pi->ipi_ip_hlen = 0;
+ break;
+ }
+ *mp = m;
+
+ return (0);
+}
static int
iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
@@ -3210,9 +3390,11 @@
struct mbuf *n;
struct ip *ip = NULL;
struct tcphdr *th = NULL;
- int minthlen;
+ int minthlen = pi->ipi_ehdrlen + sizeof(*ip);
- minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
+ if ((m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) == 0)
+ minthlen += sizeof(*th);
+ minthlen = min(m->m_pkthdr.len, minthlen);
if (__predict_false(m->m_len < minthlen)) {
/*
* if this code bloat is causing too much of a hit
@@ -3334,6 +3516,9 @@
}
*mp = m;
+ if ((m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) != 0)
+ return iflib_parse_inner_header(txq, pi, mp);
+
return (0);
}
@@ -4273,7 +4458,8 @@
#define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \
- IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG)
+ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG | \
+ IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
@@ -4515,6 +4701,50 @@
CTX_UNLOCK(ctx);
}
+static void
+iflib_vxlan_register(void *arg, if_t ifp, sa_family_t family, uint16_t port)
+{
+ if_ctx_t ctx = arg;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+
+ if (iflib_in_detach(ctx))
+ return;
+
+ /* Check if interface has VXLAN offloads enabled */
+ if (!(if_getcapenable(ctx->ifc_ifp) &
+ (IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)))
+ return;
+
+ CTX_LOCK(ctx);
+ /* Driver may need to stop traffic before enabling VXLAN offload */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_stop(ctx);
+ IFDI_VXLAN_REGISTER(ctx, port);
+ /* Re-init to load the changes, if required */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_init_locked(ctx);
+ CTX_UNLOCK(ctx);
+}
+
+static void
+iflib_vxlan_unregister(void *arg, if_t ifp, sa_family_t family, uint16_t port)
+{
+ if_ctx_t ctx = arg;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+
+ CTX_LOCK(ctx);
+ /* Driver may need all tagged packets to be flushed */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_stop(ctx);
+ IFDI_VXLAN_UNREGISTER(ctx, port);
+ /* Re-init to load the changes, if required */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_init_locked(ctx);
+ CTX_UNLOCK(ctx);
+}
+
static void
iflib_led_func(void *arg, int onoff)
{
@@ -5458,7 +5688,7 @@
if_shared_ctx_t sctx = ctx->ifc_sctx;
/* Unregister VLAN event handlers early */
- iflib_unregister_vlan_handlers(ctx);
+ iflib_unregister_event_handlers(ctx);
if ((sctx->isc_flags & IFLIB_PSEUDO) &&
(sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) {
@@ -5518,8 +5748,8 @@
ctx->ifc_flags |= IFC_IN_DETACH;
STATE_UNLOCK(ctx);
- /* Unregister VLAN handlers before calling iflib_stop() */
- iflib_unregister_vlan_handlers(ctx);
+ /* Unregister VLAN and VXLAN handlers before calling iflib_stop() */
+ iflib_unregister_event_handlers(ctx);
iflib_netmap_detach(ifp);
ether_ifdetach(ifp);
@@ -5835,6 +6065,12 @@
ctx->ifc_vlan_detach_event =
EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
EVENTHANDLER_PRI_FIRST);
+ ctx->ifc_vxlan_attach_event =
+ EVENTHANDLER_REGISTER(vxlan_start, iflib_vxlan_register, ctx,
+ EVENTHANDLER_PRI_FIRST);
+ ctx->ifc_vxlan_detach_event =
+ EVENTHANDLER_REGISTER(vxlan_stop, iflib_vxlan_unregister, ctx,
+ EVENTHANDLER_PRI_FIRST);
if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) {
ctx->ifc_mediap = &ctx->ifc_media;
@@ -5845,7 +6081,7 @@
}
static void
-iflib_unregister_vlan_handlers(if_ctx_t ctx)
+iflib_unregister_event_handlers(if_ctx_t ctx)
{
/* Unregister VLAN events */
if (ctx->ifc_vlan_attach_event != NULL) {
@@ -5857,6 +6093,15 @@
ctx->ifc_vlan_detach_event = NULL;
}
+ /* Unregister VxLAN events */
+ if (ctx->ifc_vxlan_attach_event != NULL) {
+ EVENTHANDLER_DEREGISTER(vxlan_start, ctx->ifc_vxlan_attach_event);
+ ctx->ifc_vxlan_attach_event = NULL;
+ }
+ if (ctx->ifc_vxlan_detach_event != NULL) {
+ EVENTHANDLER_DEREGISTER(vxlan_stop, ctx->ifc_vxlan_detach_event);
+ ctx->ifc_vxlan_detach_event = NULL;
+ }
}
static void
@@ -5867,8 +6112,8 @@
/* Remove all media */
ifmedia_removeall(&ctx->ifc_media);
- /* Ensure that VLAN event handlers are unregistered */
- iflib_unregister_vlan_handlers(ctx);
+ /* Ensure that VLAN and VXLAN event handlers are unregistered */
+ iflib_unregister_event_handlers(ctx);
/* Release kobject reference */
kobj_delete((kobj_t) ctx, NULL);

File Metadata

Mime Type
text/plain
Expires
Fri, Jan 30, 8:34 PM (14 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28104081
Default Alt Text
D30691.id90559.diff (32 KB)

Event Timeline