Index: head/sys/dev/mlx5/device.h =================================================================== --- head/sys/dev/mlx5/device.h +++ head/sys/dev/mlx5/device.h @@ -619,6 +619,8 @@ u8 op_own; }; +#define MLX5_CQE_TSTMP_PTP (1ULL << 63) + static inline bool get_cqe_lro_timestamp_valid(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 7) & 1; Index: head/sys/dev/mlx5/mlx5_en/en.h =================================================================== --- head/sys/dev/mlx5/mlx5_en/en.h +++ head/sys/dev/mlx5/mlx5_en/en.h @@ -650,6 +650,16 @@ struct mlx5e_flow_table inner_rss; }; +#define MLX5E_TSTMP_PREC 10 + +struct mlx5e_clbr_point { + uint64_t base_curr; + uint64_t base_prev; + uint64_t clbr_hw_prev; + uint64_t clbr_hw_curr; + u_int clbr_gen; +}; + struct mlx5e_priv { /* priv data path fields - start */ int order_base_2_num_channels; @@ -704,6 +714,12 @@ int media_active_last; struct callout watchdog; + + struct callout tstmp_clbr; + int clbr_done; + int clbr_curr; + struct mlx5e_clbr_point clbr_points[2]; + u_int clbr_gen; }; #define MLX5E_NET_IP_ALIGN 2 Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c =================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -154,6 +154,8 @@ MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); +static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters"); + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { @@ -637,6 +639,109 @@ mtx_unlock(&priv->async_events_mtx); } +static void mlx5e_calibration_callout(void *arg); +static int mlx5e_calibration_duration = 20; +static int mlx5e_fast_calibration = 1; +static int mlx5e_normal_calibration = 30; + +static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0, + "MLX5 timestamp calibration parameteres"); + +SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN, + &mlx5e_calibration_duration, 0, + "Duration of initial calibration"); +SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN, + &mlx5e_fast_calibration, 0, + "Recalibration interval during initial calibration"); +SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN, + &mlx5e_normal_calibration, 0, + "Recalibration interval during normal operations"); + +/* + * Ignites the calibration process. + */ +static void +mlx5e_reset_calibration_callout(struct mlx5e_priv *priv) +{ + + if (priv->clbr_done == 0) + mlx5e_calibration_callout(priv); + else + callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done < + mlx5e_calibration_duration ? mlx5e_fast_calibration : + mlx5e_normal_calibration) * hz, mlx5e_calibration_callout, + priv); +} + +static uint64_t +mlx5e_timespec2usec(const struct timespec *ts) +{ + + return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec); +} + +static uint64_t +mlx5e_hw_clock(struct mlx5e_priv *priv) +{ + struct mlx5_init_seg *iseg; + uint32_t hw_h, hw_h1, hw_l; + + iseg = priv->mdev->iseg; + do { + hw_h = ioread32be(&iseg->internal_timer_h); + hw_l = ioread32be(&iseg->internal_timer_l); + hw_h1 = ioread32be(&iseg->internal_timer_h); + } while (hw_h1 != hw_h); + return (((uint64_t)hw_h << 32) | hw_l); +} + +/* + * The calibration callout, it runs either in the context of the + * thread which enables calibration, or in callout. It takes the + * snapshot of system and adapter clocks, then advances the pointers to + * the calibration point to allow rx path to read the consistent data + * lockless. + */ +static void +mlx5e_calibration_callout(void *arg) +{ + struct mlx5e_priv *priv; + struct mlx5e_clbr_point *next, *curr; + struct timespec ts; + int clbr_curr_next; + + priv = arg; + curr = &priv->clbr_points[priv->clbr_curr]; + clbr_curr_next = priv->clbr_curr + 1; + if (clbr_curr_next >= nitems(priv->clbr_points)) + clbr_curr_next = 0; + next = &priv->clbr_points[clbr_curr_next]; + + next->base_prev = curr->base_curr; + next->clbr_hw_prev = curr->clbr_hw_curr; + + next->clbr_hw_curr = mlx5e_hw_clock(priv); + if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) == + 0) { + if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx," + "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev); + priv->clbr_done = 0; + return; + } + + nanouptime(&ts); + next->base_curr = mlx5e_timespec2usec(&ts); + + curr->clbr_gen = 0; + atomic_thread_fence_rel(); + priv->clbr_curr = clbr_curr_next; + atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen)); + + if (priv->clbr_done < mlx5e_calibration_duration) + priv->clbr_done++; + mlx5e_reset_calibration_callout(priv); +} + static const char *mlx5e_rq_stats_desc[] = { MLX5E_RQ_STATS(MLX5E_STATS_DESC) }; @@ -2693,6 +2798,16 @@ mlx5e_open_locked(ifp); } } + if (mask & IFCAP_HWRXTSTMP) { + ifp->if_capenable ^= IFCAP_HWRXTSTMP; + if (ifp->if_capenable & IFCAP_HWRXTSTMP) { + if (priv->clbr_done == 0) + mlx5e_reset_calibration_callout(priv); + } else { + callout_drain(&priv->tstmp_clbr); + priv->clbr_done = 0; + } + } out: PRIV_UNLOCK(priv); break; @@ -3198,7 +3313,7 @@ ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; - ifp->if_capabilities |= IFCAP_HWSTATS; + ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; /* set TSO limits so that we don't have to drop TX packets */ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); @@ -3347,6 +3462,13 @@ mlx5e_update_stats(priv); mtx_unlock(&priv->async_events_mtx); + SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), + OID_AUTO, "rx_clbr_done", CTLFLAG_RD, + &priv->clbr_done, 0, + "RX timestamps calibration state"); + callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT); + mlx5e_reset_calibration_callout(priv); + return (priv); err_dealloc_transport_domain: @@ -3390,6 +3512,8 @@ /* stop watchdog timer */ callout_drain(&priv->watchdog); + + callout_drain(&priv->tstmp_clbr); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c =================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -179,13 +179,43 @@ /* TODO: handle tcp checksum */ } +static uint64_t +mlx5e_mbuf_tstmp(struct mlx5e_priv *priv, uint64_t hw_tstmp) +{ + struct mlx5e_clbr_point *cp; + uint64_t a1, a2, res; + u_int gen; + + do { + cp = &priv->clbr_points[priv->clbr_curr]; + gen = atomic_load_acq_int(&cp->clbr_gen); + a1 = (hw_tstmp - cp->clbr_hw_prev) >> MLX5E_TSTMP_PREC; + a2 = (cp->base_curr - cp->base_prev) >> MLX5E_TSTMP_PREC; + res = (a1 * a2) << MLX5E_TSTMP_PREC; + + /* + * Divisor cannot be zero because calibration callback + * checks for the condition and disables timestamping + * if clock halted. + */ + res /= (cp->clbr_hw_curr - cp->clbr_hw_prev) >> + MLX5E_TSTMP_PREC; + + res += cp->base_prev; + atomic_thread_fence_acq(); + } while (gen == 0 || gen != cp->clbr_gen); + return (res); +} + static inline void mlx5e_build_rx_mbuf(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct mbuf *mb, u32 cqe_bcnt) { struct ifnet *ifp = rq->ifp; + struct mlx5e_channel *c; int lro_num_seg; /* HW LRO session aggregated packets counter */ + uint64_t tstmp; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { @@ -249,6 +279,21 @@ if (cqe_has_vlan(cqe)) { mb->m_pkthdr.ether_vtag = be16_to_cpu(cqe->vlan_info); mb->m_flags |= M_VLANTAG; + } + + c = container_of(rq, struct mlx5e_channel, rq); + if (c->priv->clbr_done >= 2) { + tstmp = mlx5e_mbuf_tstmp(c->priv, be64_to_cpu(cqe->timestamp)); + if ((tstmp & MLX5_CQE_TSTMP_PTP) != 0) { + /* + * Timestamp was taken on the packet entrance, + * instead of the cqe generation. + */ + tstmp &= ~MLX5_CQE_TSTMP_PTP; + mb->m_flags |= M_TSTMP_HPREC; + } + mb->m_pkthdr.rcv_tstmp = tstmp; + mb->m_flags |= M_TSTMP; } }