Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F143391257
D30691.id90559.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
32 KB
Referenced Files
None
Subscribers
None
D30691.id90559.diff
View Options
Index: sys/dev/ixl/if_ixl.c
===================================================================
--- sys/dev/ixl/if_ixl.c
+++ sys/dev/ixl/if_ixl.c
@@ -118,6 +118,8 @@
static void ixl_if_timer(if_ctx_t ctx, uint16_t qid);
static void ixl_if_vlan_register(if_ctx_t ctx, u16 vtag);
static void ixl_if_vlan_unregister(if_ctx_t ctx, u16 vtag);
+static void ixl_if_vxlan_register(if_ctx_t ctx, u16 port);
+static void ixl_if_vxlan_unregister(if_ctx_t ctx, u16 port);
static uint64_t ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt);
static int ixl_if_i2c_req(if_ctx_t ctx, struct ifi2creq *req);
static int ixl_if_priv_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
@@ -190,6 +192,8 @@
DEVMETHOD(ifdi_timer, ixl_if_timer),
DEVMETHOD(ifdi_vlan_register, ixl_if_vlan_register),
DEVMETHOD(ifdi_vlan_unregister, ixl_if_vlan_unregister),
+ DEVMETHOD(ifdi_vxlan_register, ixl_if_vxlan_register),
+ DEVMETHOD(ifdi_vxlan_unregister, ixl_if_vxlan_unregister),
DEVMETHOD(ifdi_get_counter, ixl_if_get_counter),
DEVMETHOD(ifdi_i2c_req, ixl_if_i2c_req),
DEVMETHOD(ifdi_priv_ioctl, ixl_if_priv_ioctl),
@@ -726,6 +730,9 @@
/* Add protocol filters to list */
ixl_init_filters(vsi);
+ /* Initialize udp_ports bitmap for VXLAN offloads */
+ memset(&pf->udp_ports, 0, sizeof(pf->udp_ports));
+
/* Init queue allocation manager */
error = ixl_pf_qmgr_init(&pf->qmgr, hw->func_caps.num_tx_qp);
if (error) {
@@ -996,6 +1003,9 @@
/* Re-add configure filters to HW */
ixl_reconfigure_filters(vsi);
+ /* Sync all UDP filters */
+ ixl_sync_udp_filters(pf, true);
+
/* Configure promiscuous mode */
ixl_if_promisc_set(ctx, if_getflags(ifp));
@@ -1424,6 +1434,9 @@
ixl_process_adminq(pf, &pending);
ixl_update_link_status(pf);
+ if (IXL_PF_HAS_PENDING_UDP_FILTER_SYNC(pf))
+ ixl_sync_udp_filters(pf, false);
+
/*
* If there are still messages to process, reschedule ourselves.
* Otherwise, re-enable our interrupt and go to sleep.
@@ -1729,6 +1742,73 @@
}
}
+static void
+ixl_if_vxlan_register(if_ctx_t ctx, u16 port)
+{
+ struct ixl_pf *pf = iflib_get_softc(ctx);
+ int idx;
+
+ /* Check if port already exists */
+ idx = ixl_get_udp_port_idx(pf, port);
+ if (idx != -1) {
+ device_printf(pf->dev, "port %d already offloaded\n", port);
+ return;
+ }
+
+ /* Now check if there is space to add the new port */
+ idx = ixl_get_udp_port_idx(pf, 0);
+ if (idx == -1) {
+ device_printf(pf->dev,
+ "maximum number of offloaded UDP ports reached, not adding port %d\n",
+ port);
+ return;
+ }
+
+ pf->udp_ports[idx].port = port;
+ pf->udp_ports[idx].filter_index = IXL_UDP_PORT_INDEX_UNUSED;
+ pf->udp_ports[idx].is_marked_for_deletion = FALSE;
+ pf->pending_udp_bitmap |= BIT_ULL(idx);
+
+ atomic_set_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+
+ if (if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)
+ iflib_admin_intr_deferred(ctx);
+}
+
+static void
+ixl_if_vxlan_unregister(if_ctx_t ctx, u16 port)
+{
+ struct ixl_pf *pf = iflib_get_softc(ctx);
+ int idx;
+
+ /* Check if port already exists */
+ idx = ixl_get_udp_port_idx(pf, port);
+ if (idx == -1) {
+ device_printf(pf->dev,
+ "UDP port %d was not found, not deleting\n", port);
+ return;
+ }
+
+ /* If port exists, set the value to 0. When ixl_if_vxlan_register looks for
+ * an empty entry for a new tunnel, it looks for entries with port set to 0.
+ * Also, mark current entry for deletion and make the deletion pending.
+ */
+ pf->udp_ports[idx].port = 0;
+ pf->udp_ports[idx].is_marked_for_deletion = TRUE;
+
+ /* Toggle pending bit instead of setting it. This way if we are
+ * deleting a port that has yet to be added we just clear the pending
+ * bit and don't have to worry about it.
+ */
+ pf->pending_udp_bitmap ^= BIT_ULL(idx);
+
+ atomic_set_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+
+ if (if_getdrvflags(iflib_get_ifp(ctx)) & IFF_DRV_RUNNING)
+ iflib_admin_intr_deferred(ctx);
+}
+
+
static uint64_t
ixl_if_get_counter(if_ctx_t ctx, ift_counter cnt)
{
@@ -1839,6 +1919,7 @@
{
switch (event) {
case IFLIB_RESTART_VLAN_CONFIG:
+ case IFLIB_RESTART_VXLAN_CONFIG:
default:
return (false);
}
Index: sys/dev/ixl/ixl.h
===================================================================
--- sys/dev/ixl/ixl.h
+++ sys/dev/ixl/ixl.h
@@ -199,7 +199,12 @@
#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
#define CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6|CSUM_UDP_IPV6|CSUM_SCTP_IPV6)
-#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO)
+#define CSUM_INNER_IPV4 (CSUM_INNER_IP|CSUM_INNER_IP_TCP|CSUM_INNER_IP_UDP)
+#define CSUM_INNER_IPV6 (CSUM_INNER_IP6_TCP|CSUM_INNER_IP6_UDP)
+#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6|CSUM_TSO| \
+ CSUM_ENCAP_VXLAN|CSUM_INNER_IPV4|CSUM_INNER_IPV6| \
+ CSUM_INNER_IP_TSO|CSUM_INNER_IP6_TSO)
+#define IXL_NEEDS_CTXD(flags) (flags & (CSUM_TSO | CSUM_ENCAP_VXLAN))
/* Misc flags for ixl_vsi.flags */
#define IXL_FLAGS_KEEP_TSO4 (1 << 0)
@@ -261,7 +266,8 @@
IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | \
IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO | \
IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM | \
- IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO)
+ IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU | IFCAP_LRO | \
+ IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)
#define IXL_CSUM_TCP \
(CSUM_IP_TCP|CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP6_TCP)
@@ -271,6 +277,9 @@
(CSUM_IP_SCTP|CSUM_IP6_SCTP)
#define IXL_CSUM_IPV4 \
(CSUM_IP|CSUM_IP_TSO)
+#define IXL_CSUM_TSO \
+ (CSUM_IP_TSO | CSUM_IP6_TSO | \
+ CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO)
/* Pre-11 counter(9) compatibility */
#if __FreeBSD_version >= 1100036
@@ -404,6 +413,7 @@
/* Stats */
u64 irqs;
u64 tso;
+ u64 tx_vxlan;
};
struct ixl_rx_queue {
Index: sys/dev/ixl/ixl_pf.h
===================================================================
--- sys/dev/ixl/ixl_pf.h
+++ sys/dev/ixl/ixl_pf.h
@@ -89,6 +89,7 @@
IXL_PF_STATE_FW_LLDP_DISABLED = (1 << 9),
IXL_PF_STATE_EEE_ENABLED = (1 << 10),
IXL_PF_STATE_LINK_ACTIVE_ON_DOWN = (1 << 11),
+ IXL_PF_STATE_UDP_FILTER_SYNC_PENDING = (1 << 12),
};
#define IXL_PF_IN_RECOVERY_MODE(pf) \
@@ -97,6 +98,19 @@
#define IXL_PF_IS_RESETTING(pf) \
((atomic_load_acq_32(&pf->state) & IXL_PF_STATE_RESETTING) != 0)
+#define IXL_PF_HAS_PENDING_UDP_FILTER_SYNC(pf) \
+ ((atomic_load_acq_32(&pf->state) & \
+ IXL_PF_STATE_UDP_FILTER_SYNC_PENDING) != 0)
+
+#define IXL_UDP_PORT_INDEX_UNUSED 255
+struct ixl_udp_port_config {
+ /* AdminQ command interface expects port number in Host byte order */
+ u16 port;
+ u8 filter_index;
+ bool is_marked_for_deletion;
+};
+
+
struct ixl_vf {
struct ixl_vsi vsi;
u32 vf_flags;
@@ -172,6 +186,10 @@
int num_vfs;
uint16_t veb_seid;
int vc_debug_lvl;
+
+ /* VXLAN */
+ struct ixl_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
+ u16 pending_udp_bitmap;
};
/*
@@ -438,4 +456,7 @@
int ixl_attach_get_link_status(struct ixl_pf *);
int ixl_sysctl_set_flowcntl(SYSCTL_HANDLER_ARGS);
+int ixl_get_udp_port_idx(struct ixl_pf *, u16);
+void ixl_sync_udp_filters(struct ixl_pf *, bool all);
+
#endif /* _IXL_PF_H_ */
Index: sys/dev/ixl/ixl_pf_main.c
===================================================================
--- sys/dev/ixl/ixl_pf_main.c
+++ sys/dev/ixl/ixl_pf_main.c
@@ -1031,6 +1031,81 @@
ixl_set_rss_hlut(pf);
}
+int
+ixl_get_udp_port_idx(struct ixl_pf *pf, u16 port)
+{
+ u8 i;
+
+ for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; ++i) {
+ /* Do not report ports with pending deletions as
+ * being available.
+ */
+ if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
+ continue;
+ if (pf->udp_ports[i].is_marked_for_deletion == TRUE)
+ continue;
+ if (pf->udp_ports[i].port == port)
+ return i;
+ }
+
+ return -1;
+}
+
+void
+ixl_sync_udp_filters(struct ixl_pf *pf, bool all)
+{
+ struct ixl_udp_port_config *udp_port;
+ struct i40e_hw *hw = &pf->hw;
+ enum i40e_status_code status;
+ bool is_marked_for_deletion;
+ u8 i, filter_index;
+ u16 port;
+
+ for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; ++i) {
+ if (all || pf->pending_udp_bitmap & BIT_ULL(i)) {
+ status = I40E_SUCCESS;
+
+ udp_port = &pf->udp_ports[i];
+ pf->pending_udp_bitmap &= ~BIT_ULL(i);
+
+ port = udp_port->port;
+ is_marked_for_deletion = udp_port->is_marked_for_deletion;
+ filter_index = udp_port->filter_index;
+
+ if (!is_marked_for_deletion && port > 0) {
+ status = i40e_aq_add_udp_tunnel(hw, port,
+ I40E_AQC_TUNNEL_TYPE_VXLAN, &filter_index, NULL);
+ } else if (filter_index != IXL_UDP_PORT_INDEX_UNUSED)
+ status = i40e_aq_del_udp_tunnel(hw, filter_index, NULL);
+
+ if (status) {
+ device_printf(pf->dev,
+ "VXLAN %s port %d, index %d failed, err %s aq_err %s\n",
+ is_marked_for_deletion ? "delete" : "add",
+ port, filter_index, i40e_stat_str(&pf->hw, status),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+ if (is_marked_for_deletion == FALSE) {
+ /* failed to add, just reset port,
+ * drop pending bit for any deletion
+ */
+ udp_port->port = 0;
+ pf->pending_udp_bitmap &= ~BIT_ULL(i);
+ }
+ } else {
+ if (is_marked_for_deletion == FALSE) {
+ /* record filter index on success */
+ udp_port->filter_index = filter_index;
+ }
+ /* clear the deletion flag */
+ udp_port->is_marked_for_deletion = FALSE;
+ }
+ }
+ }
+
+ atomic_clear_32(&pf->state, IXL_PF_STATE_UDP_FILTER_SYNC_PENDING);
+}
+
/*
* In some firmware versions there is default MAC/VLAN filter
* configured which interferes with filters managed by driver.
Index: sys/dev/ixl/ixl_txrx.c
===================================================================
--- sys/dev/ixl/ixl_txrx.c
+++ sys/dev/ixl/ixl_txrx.c
@@ -272,13 +272,18 @@
}
}
-/**********************************************************************
- *
- * Setup context for hardware segmentation offload (TSO)
+#if defined(INET6) || defined(INET)
+/**
+ * Setup context descriptor for TSO or VXLAN Offload
+ * @txr: TX ring which handles transmission
+ * @pi: information extracted from packet headers
*
- **********************************************************************/
+ * Configure TX descriptor with information extracted
+ * from a packet header required for HW to calculate
+ * requested checksum and perform TCP segmentation.
+ */
static int
-ixl_tso_setup(struct tx_ring *txr, if_pkt_info_t pi)
+ixl_ctxd_setup(struct tx_ring *txr, if_pkt_info_t pi)
{
if_softc_ctx_t scctx;
struct i40e_tx_context_desc *TXD;
@@ -288,43 +293,74 @@
idx = pi->ipi_pidx;
TXD = (struct i40e_tx_context_desc *) &txr->tx_base[idx];
- total_hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
- tsolen = pi->ipi_len - total_hdr_len;
scctx = txr->que->vsi->shared;
+ type_cmd_tso_mss =
+ (u64)I40E_TX_DESC_DTYPE_CONTEXT << I40E_TXD_CTX_QW1_DTYPE_SHIFT;
+
type = I40E_TX_DESC_DTYPE_CONTEXT;
- cmd = I40E_TX_CTX_DESC_TSO;
- /*
- * TSO MSS must not be less than 64; this prevents a
- * BAD_LSO_MSS MDD event when the MSS is too small.
- */
- if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
- txr->mss_too_small++;
- pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
+ if (pi->ipi_csum_flags & IXL_CSUM_TSO) {
+ cmd = I40E_TX_CTX_DESC_TSO;
+ /*
+ * TSO MSS must not be less than 64; this prevents a
+ * BAD_LSO_MSS MDD event when the MSS is too small.
+ */
+ if (pi->ipi_tso_segsz < IXL_MIN_TSO_MSS) {
+ txr->mss_too_small++;
+ pi->ipi_tso_segsz = IXL_MIN_TSO_MSS;
+ }
+ mss = pi->ipi_tso_segsz;
+
+ total_hdr_len = pi->ipi_ehdrlen +
+ pi->ipi_ip_hlen + pi->ipi_tcp_hlen +
+ pi->ipi_outer_ip_hlen + pi->ipi_tun_hlen;
+ tsolen = pi->ipi_len - total_hdr_len;
+
+ /* Check for BAD_LS0_MSS MDD event (mss too large) */
+ MPASS(mss <= IXL_MAX_TSO_MSS);
+ /* Check for NO_HEAD MDD event (header lengths are 0) */
+ MPASS(pi->ipi_ehdrlen != 0);
+ MPASS(pi->ipi_ip_hlen != 0);
+ /* Partial check for BAD_LSO_LEN MDD event */
+ MPASS(tsolen != 0);
+ /* Partial check for WRONG_SIZE MDD event (during TSO) */
+ MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
+
+ type_cmd_tso_mss |=
+ ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+ ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+ ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+ txr->que->tso++;
}
- mss = pi->ipi_tso_segsz;
-
- /* Check for BAD_LS0_MSS MDD event (mss too large) */
- MPASS(mss <= IXL_MAX_TSO_MSS);
- /* Check for NO_HEAD MDD event (header lengths are 0) */
- MPASS(pi->ipi_ehdrlen != 0);
- MPASS(pi->ipi_ip_hlen != 0);
- /* Partial check for BAD_LSO_LEN MDD event */
- MPASS(tsolen != 0);
- /* Partial check for WRONG_SIZE MDD event (during TSO) */
- MPASS(total_hdr_len + mss <= IXL_MAX_FRAME);
-
- type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
- ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
- ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
- ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
- TXD->tunneling_params = htole32(0);
- txr->que->tso++;
+ if (pi->ipi_csum_flags & CSUM_ENCAP_VXLAN) {
+ u32 tun_params = I40E_TXD_CTX_UDP_TUNNELING;
+ switch (pi->ipi_outer_etype) {
+ case ETHERTYPE_IP:
+ if (pi->ipi_csum_flags & CSUM_INNER_IP_TSO)
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV4;
+ else
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+ break;
+ case ETHERTYPE_IPV6:
+ tun_params |= I40E_TX_CTX_EXT_IP_IPV6;
+ break;
+ default:
+ break;
+ }
+ tun_params |=
+ (pi->ipi_outer_ip_hlen >> 2) << I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+ (pi->ipi_tun_hlen >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+ TXD->tunneling_params = htole32(tun_params);
+ txr->que->tx_vxlan++;
+ } else
+ TXD->tunneling_params = htole32(0);
+
return ((idx + 1) & (scctx->isc_ntxd[0]-1));
}
+#endif
/*********************************************************************
*
@@ -355,12 +391,12 @@
/* Set up the TSO/CSUM offload */
if (pi->ipi_csum_flags & CSUM_OFFLOAD) {
- /* Set up the TSO context descriptor if required */
- if (pi->ipi_csum_flags & CSUM_TSO) {
+ /* Set up the context descriptor if required */
+ if (IXL_NEEDS_CTXD(pi->ipi_csum_flags)) {
/* Prevent MAX_BUFF MDD event (for TSO) */
if (ixl_tso_detect_sparse(segs, nsegs, pi))
return (EFBIG);
- i = ixl_tso_setup(txr, pi);
+ i = ixl_ctxd_setup(txr, pi);
}
ixl_tx_setup_offload(que, pi, &cmd, &off);
}
@@ -730,13 +766,17 @@
return (0);
}
-/*********************************************************************
- *
- * Verify that the hardware indicated that the checksum is valid.
- * Inform the stack about the status of checksum so that stack
- * doesn't spend time verifying the checksum.
+/**
+ * ixl_rx_checksum - Verify that the hardware indicated that the checksum is valid or not
+ * @ri: iflib RXD info
+ * @status: RX descriptor status data
+ * @error: RX descriptor error data
+ * @ptype: packet type
*
- *********************************************************************/
+ * Determine whether the hardware indicated that RX checksums were verified
+ * and are valid. Inform the stack about the status of checksum so that stack
+ * doesn't spend time verifying them.
+ */
static u8
ixl_rx_checksum(if_rxd_info_t ri, u32 status, u32 error, u8 ptype)
{
@@ -750,6 +790,10 @@
decoded = decode_rx_desc_ptype(ptype);
+ /* Cannot proceed if packet type is unknown or not an IP packet */
+ if (decoded.known == 0 || decoded.outer_ip != I40E_RX_PTYPE_OUTER_IP)
+ return (0);
+
/* IPv6 with extension headers likely have bad csum */
if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
@@ -760,21 +804,68 @@
}
}
- ri->iri_csum_flags |= CSUM_L3_CALC;
-
- /* IPv4 checksum error */
- if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
- return (1);
-
- ri->iri_csum_flags |= CSUM_L3_VALID;
- ri->iri_csum_flags |= CSUM_L4_CALC;
-
- /* L4 checksum error */
- if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
- return (1);
+ switch (decoded.tunnel_type) {
+ case I40E_RX_PTYPE_TUNNEL_NONE:
+ /* L3 checksum is calculated only for IPv4 packets */
+ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ ri->iri_csum_flags |= CSUM_L3_CALC;
+ /* IPv4 checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L3_VALID;
+ }
- ri->iri_csum_flags |= CSUM_L4_VALID;
- ri->iri_csum_data |= htons(0xffff);
+ switch (decoded.inner_prot) {
+ case I40E_RX_PTYPE_INNER_PROT_UDP:
+ case I40E_RX_PTYPE_INNER_PROT_TCP:
+ case I40E_RX_PTYPE_INNER_PROT_SCTP:
+ ri->iri_csum_flags |= CSUM_L4_CALC;
+ /* L4 checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L4_VALID;
+ ri->iri_csum_data |= htons(0xffff);
+ break;
+ default:
+ break;
+ }
+ break;
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT:
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC:
+ case I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN:
+ /* L3 checksum of outer IPv4 packets */
+ if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
+ ri->iri_csum_flags = CSUM_L3_CALC;
+ /* IP checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_L3_VALID;
+ }
+ /* L3 checksum of most inner IPv4 packets */
+ if (decoded.tunnel_end_prot == I40E_RX_PTYPE_TUNNEL_END_IPV4) {
+ ri->iri_csum_flags = CSUM_INNER_L3_CALC;
+ /* IP checksum error */
+ if (error & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_INNER_L3_VALID;
+ }
+ switch (decoded.inner_prot) {
+ case I40E_RX_PTYPE_INNER_PROT_UDP:
+ case I40E_RX_PTYPE_INNER_PROT_TCP:
+ case I40E_RX_PTYPE_INNER_PROT_SCTP:
+ ri->iri_csum_flags |= CSUM_INNER_L4_CALC;
+ if (error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
+ return (1);
+ ri->iri_csum_flags |= CSUM_INNER_L4_VALID;
+ ri->iri_csum_data |= htons(0xffff);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
return (0);
}
@@ -950,6 +1041,9 @@
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso",
CTLFLAG_RD, &(tx_que->tso),
"TSO");
+ SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_vxlan",
+ CTLFLAG_RD, &(tx_que->tx_vxlan),
+ "VXLAN HW Offload");
SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small",
CTLFLAG_RD, &(txr->mss_too_small),
"TSO sends with an MSS less than 64");
Index: sys/net/ifdi_if.m
===================================================================
--- sys/net/ifdi_if.m
+++ sys/net/ifdi_if.m
@@ -97,6 +97,11 @@
{
}
+ static void
+ null_vxlan_register_op(if_ctx_t _ctx __unused, uint16_t port __unused)
+ {
+ }
+
static int
null_q_setup(if_ctx_t _ctx __unused, uint32_t _qid __unused)
{
@@ -458,6 +463,16 @@
uint16_t _vtag;
} DEFAULT null_vlan_register_op;
+METHOD void vxlan_register {
+ if_ctx_t _ctx;
+ uint16_t _port;
+} DEFAULT null_vxlan_register_op;
+
+METHOD void vxlan_unregister {
+ if_ctx_t _ctx;
+ uint16_t _port;
+} DEFAULT null_vxlan_register_op;
+
METHOD int sysctl_int_delay {
if_ctx_t _sctx;
if_int_delay_info_t _iidi;
Index: sys/net/iflib.h
===================================================================
--- sys/net/iflib.h
+++ sys/net/iflib.h
@@ -131,7 +131,11 @@
uint8_t ipi_mflags; /* packet mbuf flags */
uint32_t ipi_tcp_seq; /* tcp seqno */
- uint32_t __spare0__;
+
+ /* Tunneled packets offload handling */
+ uint16_t ipi_outer_etype; /* outer ether header length */
+ uint8_t ipi_outer_ip_hlen; /* outer ip header length */
+ uint8_t ipi_tun_hlen; /* tunnel headers length */
} *if_pkt_info_t;
typedef struct if_irq {
@@ -410,6 +414,7 @@
*/
enum iflib_restart_event {
IFLIB_RESTART_VLAN_CONFIG,
+ IFLIB_RESTART_VXLAN_CONFIG,
};
/*
Index: sys/net/iflib.c
===================================================================
--- sys/net/iflib.c
+++ sys/net/iflib.c
@@ -56,6 +56,7 @@
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/if_media.h>
+#include <net/if_vxlan.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/mp_ring.h>
@@ -71,6 +72,7 @@
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <netinet/ip_var.h>
#include <netinet6/ip6_var.h>
@@ -211,6 +213,8 @@
#define isc_legacy_intr ifc_txrx.ift_legacy_intr
eventhandler_tag ifc_vlan_attach_event;
eventhandler_tag ifc_vlan_detach_event;
+ eventhandler_tag ifc_vxlan_attach_event;
+ eventhandler_tag ifc_vxlan_detach_event;
struct ether_addr ifc_mac;
};
@@ -708,7 +712,7 @@
#endif
static int iflib_register(if_ctx_t);
static void iflib_deregister(if_ctx_t);
-static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
+static void iflib_unregister_event_handlers(if_ctx_t ctx);
static uint16_t iflib_get_mbuf_size_for(unsigned int size);
static void iflib_init_locked(if_ctx_t ctx);
static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
@@ -2464,6 +2468,7 @@
iflib_txq_t txq;
iflib_rxq_t rxq;
int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
+ int vxlan_hwcsum_flags, vxlan_hwtso_flags;
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
IFDI_INTR_DISABLE(ctx);
@@ -2476,6 +2481,12 @@
tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
+ /* CSUM_ENCAP_VXLAN has to be set if any of VXLAN offloads is enabled */
+ vxlan_hwcsum_flags = scctx->isc_tx_csum_flags & (CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_ENCAP_VXLAN);
+ vxlan_hwtso_flags = scctx->isc_tx_csum_flags & (CSUM_INNER_IP6_TSO | CSUM_INNER_IP_TSO |
+ CSUM_ENCAP_VXLAN);
/* Set hardware offload abilities */
if_clearhwassist(ifp);
if (if_getcapenable(ifp) & IFCAP_TXCSUM)
@@ -2486,6 +2497,10 @@
if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
if (if_getcapenable(ifp) & IFCAP_TSO6)
if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
+ if (if_getcapenable(ifp) & IFCAP_VXLAN_HWCSUM)
+ if_sethwassistbits(ifp, vxlan_hwcsum_flags, 0);
+ if (if_getcapenable(ifp) & IFCAP_VXLAN_HWTSO)
+ if_sethwassistbits(ifp, vxlan_hwtso_flags, 0);
for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
CALLOUT_LOCK(txq);
@@ -3157,13 +3172,178 @@
pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
+ printf("pi outer_etype: %d outer_ip_len: %d tun_len: %d\n",
+ pi->ipi_outer_etype, pi->ipi_outer_ip_hlen, pi->ipi_tun_hlen);
}
#endif
#define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
#define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO))
+#define IS_TX_INNER_OFFLOAD4(pi) ((pi)->ipi_csum_flags & \
+ (CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO))
+#define IS_INNER_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_INNER_IP_TSO)
#define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
#define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
+#define IS_TX_INNER_OFFLOAD6(pi) ((pi)->ipi_csum_flags & \
+ (CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO))
+#define IS_INNER_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_INNER_IP6_TSO)
+
+static int
+iflib_parse_inner_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
+{
+ if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
+ struct ether_vlan_header *eh;
+ struct ip *outer_ip;
+ int ehdrlen;
+ struct mbuf *m;
+ size_t off;
+
+ if (pi->ipi_ipproto != IPPROTO_UDP)
+ return (ENXIO);
+
+ m = *mp;
+ /*
+ * Save outer frame info and reuse etype and ip_hlen for inner frame.
+ */
+ pi->ipi_outer_etype = pi->ipi_etype;
+ pi->ipi_outer_ip_hlen = pi->ipi_ip_hlen;
+ pi->ipi_tun_hlen = sizeof(struct udphdr) + sizeof(struct vxlan_header);
+
+ /* size of outer frame header */
+ off = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tun_hlen;
+ outer_ip = (struct ip *)((caddr_t)m->m_data + pi->ipi_ehdrlen);
+
+ /* For VXLAN first mbuf usually contains only outer frame headers */
+ if (m->m_len == off) {
+ m = m->m_next;
+ off = 0;
+ }
+
+ if (__predict_false((size_t)m->m_len < off + sizeof(*eh)))
+ return (ENOMEM);
+
+ eh = (struct ether_vlan_header *)((caddr_t)m->m_data + off);
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ pi->ipi_etype = ntohs(eh->evl_proto);
+ ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+ } else {
+ pi->ipi_etype = ntohs(eh->evl_encap_proto);
+ ehdrlen = ETHER_HDR_LEN;
+ }
+ pi->ipi_tun_hlen += ehdrlen;
+
+ switch (pi->ipi_etype) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip = NULL;
+ struct tcphdr *th = NULL;
+ int minhlen = off + ehdrlen + sizeof(*ip);
+
+ if (m->m_pkthdr.csum_flags & (CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO))
+ minhlen += sizeof(*th);
+ minhlen = min(m->m_pkthdr.len, minhlen);
+ if (__predict_false(m->m_len < minhlen)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, minhlen)) == NULL))
+ return (ENOMEM);
+ }
+ ip = (struct ip *)(m->m_data + ehdrlen);
+ if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+
+ pi->ipi_ip_hlen = ip->ip_hl << 2;
+ pi->ipi_ipproto = ip->ip_p;
+
+ /* TCP checksum offload may require TCP header length */
+ if (IS_TX_INNER_OFFLOAD4(pi)) {
+ if (__predict_false(ip->ip_p != IPPROTO_TCP))
+ return (ENXIO);
+
+ if (__predict_false(th == NULL)) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+ return (ENOMEM);
+ th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+
+ if (IS_INNER_TSO4(pi)) {
+ /*
+ * TSO always requires hardware checksum offload.
+ */
+ pi->ipi_csum_flags |= (CSUM_INNER_IP_TCP | CSUM_INNER_IP);
+ th->th_sum = in_pseudo(ip->ip_src.s_addr,
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
+ pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
+ ip->ip_sum = 0;
+ ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
+ }
+ }
+ }
+ if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_INNER_IP)) {
+ ip->ip_sum = 0;
+ outer_ip->ip_sum = 0;
+ }
+ break;
+ }
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
+ struct tcphdr *th;
+ pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
+
+ if (__predict_false(m->m_len < ehdrlen + sizeof(struct ip6_hdr))) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, ehdrlen + sizeof(struct ip6_hdr))) == NULL))
+ return (ENOMEM);
+ }
+ th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
+
+ /* XXX-BZ this will go badly in case of ext hdrs. */
+ pi->ipi_ipproto = ip6->ip6_nxt;
+ pi->ipi_flags |= IPI_TX_IPV6;
+
+ /* TCP checksum offload may require TCP header length */
+ if (IS_TX_INNER_OFFLOAD6(pi)) {
+ if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
+ return (ENXIO);
+
+ if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+ txq->ift_pullups++;
+ if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+ return (ENOMEM);
+ }
+ pi->ipi_tcp_hflags = th->th_flags;
+ pi->ipi_tcp_hlen = th->th_off << 2;
+ pi->ipi_tcp_seq = th->th_seq;
+
+ if (IS_INNER_TSO6(pi)) {
+ /*
+ * TSO always requires hardware checksum offload.
+ */
+ pi->ipi_csum_flags |= CSUM_INNER_IP6_TCP;
+ th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
+ pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
+ }
+ }
+ break;
+ }
+#endif
+ default:
+ pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
+ pi->ipi_ip_hlen = 0;
+ break;
+ }
+ *mp = m;
+
+ return (0);
+}
static int
iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
@@ -3210,9 +3390,11 @@
struct mbuf *n;
struct ip *ip = NULL;
struct tcphdr *th = NULL;
- int minthlen;
+ int minthlen = pi->ipi_ehdrlen + sizeof(*ip);
- minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
+ if ((m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) == 0)
+ minthlen += sizeof(*th);
+ minthlen = min(m->m_pkthdr.len, minthlen);
if (__predict_false(m->m_len < minthlen)) {
/*
* if this code bloat is causing too much of a hit
@@ -3334,6 +3516,9 @@
}
*mp = m;
+ if ((m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) != 0)
+ return iflib_parse_inner_header(txq, pi, mp);
+
return (0);
}
@@ -4273,7 +4458,8 @@
#define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \
- IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG)
+ IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG | \
+ IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)
static int
iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
@@ -4515,6 +4701,50 @@
CTX_UNLOCK(ctx);
}
+static void
+iflib_vxlan_register(void *arg, if_t ifp, sa_family_t family, uint16_t port)
+{
+ if_ctx_t ctx = arg;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+
+ if (iflib_in_detach(ctx))
+ return;
+
+ /* Check if interface has VXLAN offloads enabled */
+ if (!(if_getcapenable(ctx->ifc_ifp) &
+ (IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO)))
+ return;
+
+ CTX_LOCK(ctx);
+ /* Driver may need to stop traffic before enabling VXLAN offload */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_stop(ctx);
+ IFDI_VXLAN_REGISTER(ctx, port);
+ /* Re-init to load the changes, if required */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_init_locked(ctx);
+ CTX_UNLOCK(ctx);
+}
+
+static void
+iflib_vxlan_unregister(void *arg, if_t ifp, sa_family_t family, uint16_t port)
+{
+ if_ctx_t ctx = arg;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+
+ CTX_LOCK(ctx);
+ /* Driver may need all tagged packets to be flushed */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_stop(ctx);
+ IFDI_VXLAN_UNREGISTER(ctx, port);
+ /* Re-init to load the changes, if required */
+ if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VXLAN_CONFIG))
+ iflib_init_locked(ctx);
+ CTX_UNLOCK(ctx);
+}
+
static void
iflib_led_func(void *arg, int onoff)
{
@@ -5458,7 +5688,7 @@
if_shared_ctx_t sctx = ctx->ifc_sctx;
/* Unregister VLAN event handlers early */
- iflib_unregister_vlan_handlers(ctx);
+ iflib_unregister_event_handlers(ctx);
if ((sctx->isc_flags & IFLIB_PSEUDO) &&
(sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) {
@@ -5518,8 +5748,8 @@
ctx->ifc_flags |= IFC_IN_DETACH;
STATE_UNLOCK(ctx);
- /* Unregister VLAN handlers before calling iflib_stop() */
- iflib_unregister_vlan_handlers(ctx);
+ /* Unregister VLAN and VXLAN handlers before calling iflib_stop() */
+ iflib_unregister_event_handlers(ctx);
iflib_netmap_detach(ifp);
ether_ifdetach(ifp);
@@ -5835,6 +6065,12 @@
ctx->ifc_vlan_detach_event =
EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
EVENTHANDLER_PRI_FIRST);
+ ctx->ifc_vxlan_attach_event =
+ EVENTHANDLER_REGISTER(vxlan_start, iflib_vxlan_register, ctx,
+ EVENTHANDLER_PRI_FIRST);
+ ctx->ifc_vxlan_detach_event =
+ EVENTHANDLER_REGISTER(vxlan_stop, iflib_vxlan_unregister, ctx,
+ EVENTHANDLER_PRI_FIRST);
if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) {
ctx->ifc_mediap = &ctx->ifc_media;
@@ -5845,7 +6081,7 @@
}
static void
-iflib_unregister_vlan_handlers(if_ctx_t ctx)
+iflib_unregister_event_handlers(if_ctx_t ctx)
{
/* Unregister VLAN events */
if (ctx->ifc_vlan_attach_event != NULL) {
@@ -5857,6 +6093,15 @@
ctx->ifc_vlan_detach_event = NULL;
}
+ /* Unregister VxLAN events */
+ if (ctx->ifc_vxlan_attach_event != NULL) {
+ EVENTHANDLER_DEREGISTER(vxlan_start, ctx->ifc_vxlan_attach_event);
+ ctx->ifc_vxlan_attach_event = NULL;
+ }
+ if (ctx->ifc_vxlan_detach_event != NULL) {
+ EVENTHANDLER_DEREGISTER(vxlan_stop, ctx->ifc_vxlan_detach_event);
+ ctx->ifc_vxlan_detach_event = NULL;
+ }
}
static void
@@ -5867,8 +6112,8 @@
/* Remove all media */
ifmedia_removeall(&ctx->ifc_media);
- /* Ensure that VLAN event handlers are unregistered */
- iflib_unregister_vlan_handlers(ctx);
+ /* Ensure that VLAN and VXLAN event handlers are unregistered */
+ iflib_unregister_event_handlers(ctx);
/* Release kobject reference */
kobj_delete((kobj_t) ctx, NULL);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jan 30, 8:34 PM (14 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28104081
Default Alt Text
D30691.id90559.diff (32 KB)
Attached To
Mode
D30691: ixl(4): Add VxLAN HW offloads support
Attached
Detach File
Event Timeline
Log In to Comment