Index: sys/dev/e1000/if_em.h =================================================================== --- sys/dev/e1000/if_em.h +++ sys/dev/e1000/if_em.h @@ -266,7 +266,7 @@ #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) -#define EM_MAX_SCATTER 32 +#define EM_MAX_SCATTER 64 #define EM_VFTA_SIZE 128 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c +++ sys/dev/e1000/if_em.c @@ -273,7 +273,7 @@ static void em_register_vlan(void *, if_t, u16); static void em_unregister_vlan(void *, if_t, u16); static void em_setup_vlan_hw_support(struct adapter *); -static int em_xmit(struct tx_ring *, struct mbuf **); +static int __noinline em_xmit(struct tx_ring *, struct mbuf **); static int em_dma_malloc(struct adapter *, bus_size_t, struct em_dma_alloc *, int); static void em_dma_free(struct adapter *, struct em_dma_alloc *); @@ -434,6 +434,8 @@ #include #endif /* DEV_NETMAP */ +int pcie_max_read_req; + /********************************************************************* * Device identification routine * @@ -542,6 +544,10 @@ goto err_pci; } + pci_set_max_read_req(dev, 4096); + pcie_max_read_req = pci_get_max_read_req(dev); + device_printf(dev, "max_read_req (%d)\n", pcie_max_read_req); + /* ** For ICH8 and family we need to ** map the flash memory, and this @@ -1653,15 +1659,20 @@ ++adapter->link_irq; reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); - if (reg_icr & E1000_ICR_RXO) + if (reg_icr & E1000_ICR_RXO) { + device_printf(adapter->dev, "RXO\n"); adapter->rx_overruns++; + } if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { adapter->hw.mac.get_link_status = 1; em_handle_link(adapter, 0); - } else + device_printf(adapter->dev, "RXSEQ/LSC\n"); + } else { E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); + device_printf(adapter->dev, "no idea %u\n", reg_icr); + } /* ** Because we must read the ICR for this interrupt ** it may clear other causes using autoclear, for @@ -1860,7 +1871,7 @@ * return 0 on success, positive on failure **********************************************************************/ -static int +static int __noinline em_xmit(struct tx_ring *txr, struct mbuf **m_headp) { struct adapter *adapter = txr->adapter; @@ -1872,13 +1883,13 @@ struct ether_header *eh; struct ip *ip = NULL; struct tcphdr *tp = NULL; - u32 txd_upper = 0, txd_lower = 0, txd_used = 0; + u32 txd_upper = 0, txd_lower = 0; int ip_off, poff; int nsegs, i, j, first, last = 0; - int error, do_tso, tso_desc = 0, remap = 1; + int error, do_tso, remap = 1; m_head = *m_headp; - do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0); + do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO); ip_off = poff = 0; /* @@ -1896,6 +1907,7 @@ * which also has similiar restrictions. */ if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) { +#if 0 if (do_tso || (m_head->m_next != NULL && m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) { if (M_WRITABLE(*m_headp) == 0) { @@ -1908,80 +1920,84 @@ *m_headp = m_head; } } +#endif /* * XXX * Assume IPv4, we don't have TSO/checksum offload support * for IPv6 yet. */ ip_off = sizeof(struct ether_header); - m_head = m_pullup(m_head, ip_off); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); + if (m_head->m_len < ip_off) { + m_head = m_pullup(m_head, ip_off); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } } eh = mtod(m_head, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { ip_off = sizeof(struct ether_vlan_header); - m_head = m_pullup(m_head, ip_off); + if (m_head->m_len < ip_off) { + m_head = m_pullup(m_head, ip_off); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } + } + if (m_head->m_len < ip_off + sizeof(struct ip)) { + m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); if (m_head == NULL) { *m_headp = NULL; return (ENOBUFS); } } - m_head = m_pullup(m_head, ip_off + sizeof(struct ip)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } ip = (struct ip *)(mtod(m_head, char *) + ip_off); poff = ip_off + (ip->ip_hl << 2); - if (do_tso) { - m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); + + if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) { + if (m_head->m_len < poff + sizeof(struct tcphdr)) { + m_head = m_pullup(m_head, poff + + sizeof(struct tcphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } } tp = (struct tcphdr *)(mtod(m_head, char *) + poff); - /* - * TSO workaround: - * pull 4 more bytes of data into it. - */ - m_head = m_pullup(m_head, poff + (tp->th_off << 2) + 4); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); + if (m_head->m_len < poff + (tp->th_off << 2)) { + m_head = m_pullup(m_head, poff + + (tp->th_off << 2)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); - ip->ip_len = 0; - ip->ip_sum = 0; - /* - * The pseudo TCP checksum does not include TCP payload - * length so driver should recompute the checksum here - * what hardware expect to see. This is adherence of - * Microsoft's Large Send specification. - */ tp = (struct tcphdr *)(mtod(m_head, char *) + poff); - tp->th_sum = in_pseudo(ip->ip_src.s_addr, - ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - } else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) { - m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); + if (do_tso) { + ip->ip_len = htons(m_head->m_pkthdr.tso_segsz + + (ip->ip_hl << 2) + + (tp->th_off << 2)); + ip->ip_sum = 0; + /* + * The pseudo TCP checksum does not include TCP + * payload length so driver should recompute + * the checksum here what hardware expect to + * see. This is adherence of Microsoft's Large + * Send specification. + */ + tp->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(IPPROTO_TCP)); } - tp = (struct tcphdr *)(mtod(m_head, char *) + poff); - m_head = m_pullup(m_head, poff + (tp->th_off << 2)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); - } - ip = (struct ip *)(mtod(m_head, char *) + ip_off); - tp = (struct tcphdr *)(mtod(m_head, char *) + poff); } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) { - m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); - if (m_head == NULL) { - *m_headp = NULL; - return (ENOBUFS); + if (m_head->m_len < poff + sizeof(struct udphdr)) { + m_head = m_pullup(m_head, poff + + sizeof(struct udphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } } ip = (struct ip *)(mtod(m_head, char *) + ip_off); } @@ -2029,9 +2045,6 @@ /* Try it again, but only once */ remap = 0; goto retry; - } else if (error == ENOMEM) { - adapter->no_tx_dma_setup++; - return (error); } else if (error != 0) { adapter->no_tx_dma_setup++; m_freem(*m_headp); @@ -2039,6 +2052,7 @@ return (error); } +#if 0 /* * TSO Hardware workaround, if this packet is not * TSO, and is only a single descriptor long, and @@ -2046,12 +2060,11 @@ * sentinel descriptor to prevent premature writeback. */ if ((do_tso == 0) && (txr->tx_tso == TRUE)) { - if (nsegs == 1) - tso_desc = TRUE; txr->tx_tso = FALSE; } +#endif - if (nsegs > (txr->tx_avail - 2)) { + if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) { txr->no_desc_avail++; bus_dmamap_unload(txr->txtag, map); return (ENOBUFS); @@ -2062,8 +2075,6 @@ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { em_tso_setup(txr, m_head, ip_off, ip, tp, &txd_upper, &txd_lower); - /* we need to make a final sentinel transmit desc */ - tso_desc = TRUE; } else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) em_transmit_checksum_setup(txr, m_head, ip_off, ip, &txd_upper, &txd_lower); @@ -2086,6 +2097,10 @@ ctxd = &txr->tx_base[i]; seg_addr = segs[j].ds_addr; seg_len = segs[j].ds_len; + if (j == 0 && seg_len >= (3 * (pcie_max_read_req - 4))) + device_printf(adapter->dev, "whoops, seg len(%lu)\n", seg_len); + +#if 0 /* ** TSO Workaround: ** If this is the last descriptor, we want to @@ -2095,42 +2110,45 @@ seg_len -= 4; ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = - htole32(txd_upper); + adapter->txd_cmd | txd_lower | seg_len); + ctxd->upper.data = htole32(txd_upper); if (++i == adapter->num_tx_desc) i = 0; /* Now make the sentinel */ ++txd_used; /* using an extra txd */ ctxd = &txr->tx_base[i]; tx_buffer = &txr->tx_buffers[i]; - ctxd->buffer_addr = - htole64(seg_addr + seg_len); + + ctxd->buffer_addr = htole64(seg_addr + seg_len); ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | 4); - ctxd->upper.data = - htole32(txd_upper); + adapter->txd_cmd | txd_lower | 4); + ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; } else { +#endif ctxd->buffer_addr = htole64(seg_addr); ctxd->lower.data = htole32( - adapter->txd_cmd | txd_lower | seg_len); - ctxd->upper.data = - htole32(txd_upper); + adapter->txd_cmd | txd_lower | seg_len); + ctxd->upper.data = htole32(txd_upper); last = i; if (++i == adapter->num_tx_desc) i = 0; +#if 0 } +#endif tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; + + txr->next_avail_desc = i; + txr->tx_avail--; } - txr->next_avail_desc = i; - txr->tx_avail -= nsegs; +#if 0 if (tso_desc) /* TSO used an extra for sentinel */ txr->tx_avail -= txd_used; +#endif tx_buffer->m_head = m_head; /* @@ -2301,13 +2319,19 @@ ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { - if (txr->busy == EM_TX_HUNG) + if (txr->busy == EM_TX_HUNG) { + device_printf(adapter->dev, "Declaring HUNG\n"); goto hung; - if (txr->busy >= EM_TX_MAXTRIES) + } + if (txr->busy >= EM_TX_MAXTRIES) { + device_printf(adapter->dev, "Asserting HUNG\n"); txr->busy = EM_TX_HUNG; + } /* Schedule a TX tasklet if needed */ - if (txr->tx_avail <= EM_MAX_SCATTER) + if (txr->tx_avail <= EM_MAX_SCATTER) { + device_printf(adapter->dev, "firing off a queue task\n"); taskqueue_enqueue(txr->tq, &txr->tx_task); + } } callout_reset(&adapter->timer, hz, em_local_timer, adapter); @@ -3033,6 +3057,11 @@ if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setioctlfn(ifp, em_ioctl); if_setgetcounterfn(ifp, em_get_counter); + /* TSO parameters */ + ifp->if_hw_tsomax = EM_TSO_SIZE; + ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER; + ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE; + #ifdef EM_MULTIQUEUE /* Multiqueue stack interface */ if_settransmitfn(ifp, em_mq_start); @@ -3875,6 +3904,9 @@ E1000_TXD_CMD_TCP | /* Do TCP checksum */ (mp->m_pkthdr.len - (hdr_len))); /* Total len */ + if (!(adapter->txd_cmd | E1000_TXD_CMD_IFCS)) + device_printf(adapter->dev, "IFCS NOT SET\n"); + tx_buffer->m_head = NULL; tx_buffer->next_eop = -1; @@ -3883,7 +3915,9 @@ txr->tx_avail--; txr->next_avail_desc = cur; +#if 0 txr->tx_tso = TRUE; +#endif } @@ -4891,8 +4925,8 @@ u32 ims_mask = IMS_ENABLE_MASK; if (hw->mac.type == e1000_82574) { - E1000_WRITE_REG(hw, EM_EIAC, EM_MSIX_MASK); - ims_mask |= EM_MSIX_MASK; + E1000_WRITE_REG(hw, EM_EIAC, adapter->ims); + ims_mask |= adapter->ims; } E1000_WRITE_REG(hw, E1000_IMS, ims_mask); } @@ -5984,6 +6018,45 @@ #endif #ifdef DDB +DB_COMMAND(em_read_icr, em_ddb_read_icr) +{ + devclass_t dc; + int max_em; + + dc = devclass_find("em"); + max_em = devclass_get_maxunit(dc); + + for (int index = 0; index < (max_em - 1); index++) { + device_t dev; + dev = devclass_get_device(dc, index); + if (device_get_driver(dev) == &em_driver) { + struct adapter *adapter = device_get_softc(dev); + u32 reg_icr; + reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); + device_printf(adapter->dev, "Current ICR(0x%80x)\n", + reg_icr); + } + } +} + +DB_COMMAND(em_set_ims, em_ddb_set_ims) +{ + devclass_t dc; + int max_em; + + dc = devclass_find("em"); + max_em = devclass_get_maxunit(dc); + + for (int index = 0; index < (max_em - 1); index++) { + device_t dev; + dev = devclass_get_device(dc, index); + if (device_get_driver(dev) == &em_driver) { + struct adapter *adapter = device_get_softc(dev); + E1000_WRITE_REG(&adapter->hw, E1000_ICS, adapter->ims); + } + } +} + DB_COMMAND(em_reset_dev, em_ddb_reset_dev) { devclass_t dc;