Index: share/man/man4/sfxge.4 =================================================================== --- share/man/man4/sfxge.4 +++ share/man/man4/sfxge.4 @@ -93,10 +93,18 @@ .It Va hw.sfxge.tx_dpl_get_max The maximum length of the deferred packet .Dq get-list -for queued transmit -packets, used only if the transmit queue lock can be acquired. +for queued transmit packets (TCP and non-TCP), used only if the transmit +queue lock can be acquired. If a packet is dropped, the -.Va tx_early_drops +.Va tx_get_overflow +counter is incremented and the local sender receives ENOBUFS. +The value must be greater than 0. +.It Va hw.sfxge.tx_dpl_get_non_tcp_max +The maximum number of non-TCP packets in the deferred packet +.Dq get-list +, used only if the transmit queue lock can be acquired. +If packet is dropped, the +.Va tx_get_non_tcp_overflow counter is incremented and the local sender receives ENOBUFS. The value must be greater than 0. .It Va hw.sfxge.tx_dpl_put_max @@ -105,7 +113,7 @@ for queued transmit packets, used if the transmit queue lock cannot be acquired. If a packet is dropped, the -.Va tx_early_drops +.Va tx_put_overflow counter is incremented and the local sender receives ENOBUFS. The value must be greater than or equal to 0. .El Index: sys/dev/sfxge/sfxge_port.c =================================================================== --- sys/dev/sfxge/sfxge_port.c +++ sys/dev/sfxge/sfxge_port.c @@ -48,7 +48,7 @@ unsigned int count; int rc; - mtx_lock(&port->lock); + mtx_assert(&port->lock, MA_OWNED); if (port->init_state != SFXGE_PORT_STARTED) { rc = 0; @@ -82,7 +82,6 @@ rc = ETIMEDOUT; out: - mtx_unlock(&port->lock); return (rc); } @@ -93,12 +92,16 @@ unsigned int id = arg2; int rc; + mtx_lock(&sc->port.lock); if ((rc = sfxge_mac_stat_update(sc)) != 0) - return (rc); + goto out; - return (SYSCTL_OUT(req, - (uint64_t *)sc->port.mac_stats.decode_buf + id, - sizeof(uint64_t))); + rc = SYSCTL_OUT(req, + (uint64_t *)sc->port.mac_stats.decode_buf + id, + sizeof(uint64_t)); +out: + mtx_unlock(&sc->port.lock); + return (rc); } static void @@ -453,7 +456,7 @@ unsigned int count; int rc; - mtx_lock(&port->lock); + mtx_assert(&port->lock, MA_OWNED); if (port->init_state != SFXGE_PORT_STARTED) { rc = 0; @@ -487,7 +490,6 @@ rc = ETIMEDOUT; out: - mtx_unlock(&port->lock); return (rc); } @@ -498,12 +500,16 @@ unsigned int id = arg2; int rc; + mtx_lock(&sc->port.lock); if ((rc = sfxge_phy_stat_update(sc)) != 0) - return (rc); + goto out; - return (SYSCTL_OUT(req, - (uint32_t *)sc->port.phy_stats.decode_buf + id, - sizeof(uint32_t))); + rc = SYSCTL_OUT(req, + (uint32_t *)sc->port.phy_stats.decode_buf + id, + sizeof(uint32_t)); +out: + mtx_unlock(&sc->port.lock); + return (rc); } static void Index: sys/dev/sfxge/sfxge_tx.h =================================================================== --- sys/dev/sfxge/sfxge_tx.h +++ sys/dev/sfxge/sfxge_tx.h @@ -75,21 +75,29 @@ enum sfxge_tx_buf_flags flags; }; -#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT 1024 -#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64 +#define SFXGE_TX_DPL_GET_PKT_LIMIT_DEFAULT (64 * 1024) +#define SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT 1024 +#define SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT 64 /* * Deferred packet list. */ struct sfxge_tx_dpl { - unsigned int std_get_max; /* Maximum number of packets + unsigned int std_get_max; /* Maximum number of packets * in get list */ - unsigned int std_put_max; /* Maximum number of packets + unsigned int std_get_non_tcp_max; /* Maximum number + * of non-TCP packets + * in get list */ + unsigned int std_put_max; /* Maximum number of packets * in put list */ - uintptr_t std_put; /* Head of put list. */ - struct mbuf *std_get; /* Head of get list. */ - struct mbuf **std_getp; /* Tail of get list. */ - unsigned int std_get_count; /* Packets in get list. */ + uintptr_t std_put; /* Head of put list. */ + struct mbuf *std_get; /* Head of get list. */ + struct mbuf **std_getp; /* Tail of get list. */ + unsigned int std_get_count; /* Packets in get list. */ + unsigned int std_get_non_tcp_count; /* Non-TCP packets + * in get list */ + unsigned int std_get_hiwat; /* Packets in get list + * high watermark */ }; @@ -166,7 +174,10 @@ unsigned long tso_long_headers; unsigned long collapses; unsigned long drops; - unsigned long early_drops; + unsigned long get_overflow; + unsigned long get_non_tcp_overflow; + unsigned long put_overflow; + unsigned long netdown_drops; /* The following fields change more often, and are used mostly * on the completion path Index: sys/dev/sfxge/sfxge_tx.c =================================================================== --- sys/dev/sfxge/sfxge_tx.c +++ sys/dev/sfxge/sfxge_tx.c @@ -85,14 +85,23 @@ TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_MAX, &sfxge_tx_dpl_get_max); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_max, CTLFLAG_RDTUN, &sfxge_tx_dpl_get_max, 0, - "Maximum number of packets in deferred packet get-list"); + "Maximum number of any packets in deferred packet get-list"); + +#define SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX \ + SFXGE_PARAM(tx_dpl_get_non_tcp_max) +static int sfxge_tx_dpl_get_non_tcp_max = + SFXGE_TX_DPL_GET_NON_TCP_PKT_LIMIT_DEFAULT; +TUNABLE_INT(SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, &sfxge_tx_dpl_get_non_tcp_max); +SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_get_non_tcp_max, CTLFLAG_RDTUN, + &sfxge_tx_dpl_get_non_tcp_max, 0, + "Maximum number of non-TCP packets in deferred packet get-list"); #define SFXGE_PARAM_TX_DPL_PUT_MAX SFXGE_PARAM(tx_dpl_put_max) static int sfxge_tx_dpl_put_max = SFXGE_TX_DPL_PUT_PKT_LIMIT_DEFAULT; TUNABLE_INT(SFXGE_PARAM_TX_DPL_PUT_MAX, &sfxge_tx_dpl_put_max); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_dpl_put_max, CTLFLAG_RDTUN, &sfxge_tx_dpl_put_max, 0, - "Maximum number of packets in deferred packet put-list"); + "Maximum number of any packets in deferred packet put-list"); #endif @@ -152,6 +161,15 @@ #ifdef SFXGE_HAVE_MQ +static inline unsigned int +sfxge_is_mbuf_non_tcp(struct mbuf *mbuf) +{ + /* Absense of TCP checksum flags does not mean that it is non-TCP + * but it should be true if user wants to achieve high throughput. + */ + return (!(mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))); +} + /* * Reorder the put list and append it to the get list. */ @@ -163,6 +181,7 @@ volatile uintptr_t *putp; uintptr_t put; unsigned int count; + unsigned int non_tcp_count; mtx_assert(&txq->lock, MA_OWNED); @@ -181,9 +200,11 @@ get_next = NULL; count = 0; + non_tcp_count = 0; do { struct mbuf *put_next; + non_tcp_count += sfxge_is_mbuf_non_tcp(mbuf); put_next = mbuf->m_nextpkt; mbuf->m_nextpkt = get_next; get_next = mbuf; @@ -197,6 +218,7 @@ *stdp->std_getp = get_next; stdp->std_getp = get_tailp; stdp->std_get_count += count; + stdp->std_get_non_tcp_count += non_tcp_count; } #endif /* SFXGE_HAVE_MQ */ @@ -387,6 +409,7 @@ struct sfxge_tx_dpl *stdp; struct mbuf *mbuf, *next; unsigned int count; + unsigned int non_tcp_count; unsigned int pushed; int rc; @@ -401,6 +424,10 @@ mbuf = stdp->std_get; count = stdp->std_get_count; + non_tcp_count = stdp->std_get_non_tcp_count; + + if (count > stdp->std_get_hiwat) + stdp->std_get_hiwat = count; while (count != 0) { KASSERT(mbuf != NULL, ("mbuf == NULL")); @@ -415,6 +442,7 @@ rc = sfxge_tx_queue_mbuf(txq, mbuf); --count; + non_tcp_count -= sfxge_is_mbuf_non_tcp(mbuf); mbuf = next; if (rc != 0) continue; @@ -431,12 +459,16 @@ if (count == 0) { KASSERT(mbuf == NULL, ("mbuf != NULL")); + KASSERT(non_tcp_count == 0, + ("inconsistent TCP/non-TCP detection")); stdp->std_get = NULL; stdp->std_get_count = 0; + stdp->std_get_non_tcp_count = 0; stdp->std_getp = &stdp->std_get; } else { stdp->std_get = mbuf; stdp->std_get_count = count; + stdp->std_get_non_tcp_count = non_tcp_count; } if (txq->added != pushed) @@ -496,8 +528,18 @@ sfxge_tx_qdpl_swizzle(txq); - if (stdp->std_get_count >= stdp->std_get_max) + if (stdp->std_get_count >= stdp->std_get_max) { + txq->get_overflow++; return (ENOBUFS); + } + if (sfxge_is_mbuf_non_tcp(mbuf)) { + if (stdp->std_get_non_tcp_count >= + stdp->std_get_non_tcp_max) { + txq->get_non_tcp_overflow++; + return (ENOBUFS); + } + stdp->std_get_non_tcp_count++; + } *(stdp->std_getp) = mbuf; stdp->std_getp = &mbuf->m_nextpkt; @@ -518,8 +560,10 @@ old_len = mp->m_pkthdr.csum_data; } else old_len = 0; - if (old_len >= stdp->std_put_max) + if (old_len >= stdp->std_put_max) { + atomic_add_long(&txq->put_overflow, 1); return (ENOBUFS); + } mbuf->m_pkthdr.csum_data = old_len + 1; mbuf->m_nextpkt = (void *)old; } while (atomic_cmpset_ptr(putp, old, new) == 0); @@ -540,6 +584,7 @@ if (!SFXGE_LINK_UP(txq->sc)) { rc = ENETDOWN; + atomic_add_long(&txq->netdown_drops, 1); goto fail; } @@ -577,7 +622,6 @@ fail: m_freem(m); - atomic_add_long(&txq->early_drops, 1); return (rc); } @@ -596,6 +640,7 @@ } stdp->std_get = NULL; stdp->std_get_count = 0; + stdp->std_get_non_tcp_count = 0; stdp->std_getp = &stdp->std_get; mtx_unlock(&txq->lock); @@ -1411,6 +1456,13 @@ rc = EINVAL; goto fail_tx_dpl_get_max; } + if (sfxge_tx_dpl_get_non_tcp_max <= 0) { + log(LOG_ERR, "%s=%d must be greater than 0", + SFXGE_PARAM_TX_DPL_GET_NON_TCP_MAX, + sfxge_tx_dpl_get_non_tcp_max); + rc = EINVAL; + goto fail_tx_dpl_get_max; + } if (sfxge_tx_dpl_put_max < 0) { log(LOG_ERR, "%s=%d must be greater or equal to 0", SFXGE_PARAM_TX_DPL_PUT_MAX, sfxge_tx_dpl_put_max); @@ -1422,6 +1474,7 @@ stdp = &txq->dpl; stdp->std_put_max = sfxge_tx_dpl_put_max; stdp->std_get_max = sfxge_tx_dpl_get_max; + stdp->std_get_non_tcp_max = sfxge_tx_dpl_get_non_tcp_max; stdp->std_getp = &stdp->std_get; mtx_init(&txq->lock, "txq", NULL, MTX_DEF); @@ -1430,6 +1483,14 @@ SYSCTL_CHILDREN(txq_node), OID_AUTO, "dpl_get_count", CTLFLAG_RD | CTLFLAG_STATS, &stdp->std_get_count, 0, ""); + SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev), + SYSCTL_CHILDREN(txq_node), OID_AUTO, + "dpl_get_non_tcp_count", CTLFLAG_RD | CTLFLAG_STATS, + &stdp->std_get_non_tcp_count, 0, ""); + SYSCTL_ADD_UINT(device_get_sysctl_ctx(sc->dev), + SYSCTL_CHILDREN(txq_node), OID_AUTO, + "dpl_get_hiwat", CTLFLAG_RD | CTLFLAG_STATS, + &stdp->std_get_hiwat, 0, ""); #endif txq->type = type; @@ -1467,7 +1528,10 @@ SFXGE_TX_STAT(tso_long_headers, tso_long_headers), SFXGE_TX_STAT(tx_collapses, collapses), SFXGE_TX_STAT(tx_drops, drops), - SFXGE_TX_STAT(tx_early_drops, early_drops), + SFXGE_TX_STAT(tx_get_overflow, get_overflow), + SFXGE_TX_STAT(tx_get_non_tcp_overflow, get_non_tcp_overflow), + SFXGE_TX_STAT(tx_put_overflow, put_overflow), + SFXGE_TX_STAT(tx_netdown_drops, netdown_drops), }; static int