Index: sys/dev/netmap/ixgbe_netmap.h =================================================================== --- sys/dev/netmap/ixgbe_netmap.h +++ sys/dev/netmap/ixgbe_netmap.h @@ -189,7 +189,7 @@ /* device-specific */ struct adapter *adapter = ifp->if_softc; struct tx_ring *txr = &adapter->tx_rings[kring->ring_id]; - int reclaim_tx; + int reclaim_tx, report; bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_POSTREAD); @@ -236,7 +236,7 @@ __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txr->tx_buffers[nic_i]); - for (n = 0; nm_i != head; n++) { + while (nm_i != head) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; @@ -245,9 +245,9 @@ /* device-specific */ union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i]; struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i]; - int flags = (slot->flags & NS_REPORT || - nic_i == 0 || nic_i == report_frequency) ? - IXGBE_TXD_CMD_RS : 0; + unsigned int hw_flags = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS; + u_int totlen = len; /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); @@ -255,6 +255,67 @@ NM_CHECK_ADDR_LEN(na, addr, len); + report = (slot->flags & NS_REPORT) || + nic_i == 0 || + nic_i == report_frequency; + + if (slot->flags & NS_MOREFRAG) { + /* There is some duplicated code here, but + * mixing everything up in the outer loop makes + * things less transparent, and it also adds + * unnecessary instructions in the fast path + */ + union ixgbe_adv_tx_desc *first = curr; + + first->read.buffer_addr = htole64(paddr); + first->read.cmd_type_len = htole32(len | hw_flags); + bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); + /* avoid setting the FCS flag in the + * descriptors after the first, for safety + */ + hw_flags &= ~IXGBE_ADVTXD_DCMD_IFCS; + for (;;) { + nm_i = nm_next(nm_i, lim); + nic_i = nm_next(nic_i, lim); + /* remember that we have to ask for a + * report each time we move past half a + * ring + */ + report |= nic_i == 0 || + nic_i == report_frequency; + if (nm_i == head) { + /* We do not accept incomplete packets. */ + return EINVAL; + } + slot = &ring->slot[nm_i]; + len = slot->len; + addr = PNMB(na, slot, &paddr); + NM_CHECK_ADDR_LEN(na, addr, len); + curr = &txr->tx_base[nic_i]; + txbuf = &txr->tx_buffers[nic_i]; + totlen += len; + if (!(slot->flags & NS_MOREFRAG)) + break; + if (slot->flags & NS_BUF_CHANGED) { + /* buffer has changed, reload map */ + netmap_reload_map(na, txr->txtag, txbuf->map, addr); + } + curr->read.buffer_addr = htole64(paddr); + curr->read.olinfo_status = 0; + curr->read.cmd_type_len = htole32(len | hw_flags); + bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); + } + first->read.olinfo_status = + htole32(totlen << IXGBE_ADVTXD_PAYLEN_SHIFT); + totlen = 0; + } + + /* curr/txbuf now always refer to the last descriptor of a packet + * (which is also the first for single-slot packets) + * + * EOP and RS must be set only in this descriptor. + */ + hw_flags |= IXGBE_TXD_CMD_EOP | (report ? IXGBE_TXD_CMD_RS : 0); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txr->txtag, txbuf->map, addr); @@ -262,15 +323,12 @@ slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); /* Fill the slot in the NIC ring. */ - /* Use legacy descriptor, they are faster? */ curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = 0; - curr->read.cmd_type_len = htole32(len | flags | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP); + curr->read.olinfo_status = htole32(totlen << IXGBE_ADVTXD_PAYLEN_SHIFT); + curr->read.cmd_type_len = htole32(len | hw_flags); /* make sure changes to the buffer are synced */ - bus_dmamap_sync(txr->txtag, txbuf->map, - BUS_DMASYNC_PREWRITE); + bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); @@ -303,8 +361,7 @@ * This enables interrupt moderation on the tx * side though it might reduce throughput. */ - struct ixgbe_legacy_tx_desc *txd = - (struct ixgbe_legacy_tx_desc *)txr->tx_base; + union ixgbe_adv_tx_desc *txd = txr->tx_base; nic_i = txr->next_to_clean + report_frequency; if (nic_i > lim) @@ -313,7 +370,7 @@ nic_i = (nic_i < kring->nkr_num_slots / 4 || nic_i >= kring->nkr_num_slots*3/4) ? 0 : report_frequency; - reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ? + reclaim_tx = le32toh(txd[nic_i].wb.status) & IXGBE_TXD_STAT_DD; } if (reclaim_tx) { /* @@ -397,6 +454,7 @@ */ if (netmap_no_pendintr || force_update) { int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4; + u_int new_hwtail = (u_int)-1; nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); @@ -404,15 +462,20 @@ for (n = 0; ; n++) { union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i]; uint32_t staterr = le32toh(curr->wb.upper.status_error); + int complete; if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; - ring->slot[nm_i].flags = 0; + complete = staterr & IXGBE_RXD_STAT_EOP; + ring->slot[nm_i].flags = complete ? 0 : NS_MOREFRAG; bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); + if (complete) { + new_hwtail = nm_i; + } } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { @@ -421,7 +484,11 @@ ix_rx_miss_bufs += n; } rxr->next_to_check = nic_i; - kring->nr_hwtail = nm_i; + if (new_hwtail != (u_int)-1) { + /* Update nr_hwtail only if we saw a complete + * packet in the previous loop. */ + kring->nr_hwtail = new_hwtail; + } } kring->nr_kflags &= ~NKR_PENDINTR; } @@ -478,7 +545,17 @@ return netmap_ring_reinit(kring); } +static int +ixgbe_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) +{ + info->num_tx_descs = adapter->num_tx_desc; + info->num_rx_descs = adapter->num_rx_desc; + info->num_tx_rings = info->num_rx_rings = adapter->num_queues; + info->rx_buf_maxsize = adapter->rx_mbuf_sz; + return 0; +} + /* * The attach routine, called near the end of ixgbe_attach(), * fills the parameters for netmap_attach() and calls it. @@ -494,14 +571,16 @@ bzero(&na, sizeof(na)); na.ifp = adapter->ifp; - na.na_flags = NAF_BDG_MAYSLEEP; + na.na_flags = NAF_BDG_MAYSLEEP | NAF_MOREFRAG; na.num_tx_desc = adapter->num_tx_desc; na.num_rx_desc = adapter->num_rx_desc; + na.num_tx_rings = na.num_rx_rings = adapter->num_queues; + na.rx_buf_maxsize = adapter->rx_mbuf_sz; na.nm_txsync = ixgbe_netmap_txsync; na.nm_rxsync = ixgbe_netmap_rxsync; na.nm_register = ixgbe_netmap_reg; - na.num_tx_rings = na.num_rx_rings = adapter->num_queues; na.nm_intr = ixgbe_netmap_intr; + na.nm_config = ixgbe_netmap_config; netmap_attach(&na); }